[RESEND PATCH v2 2/2] ftrace: do CPU checking after preemption disabled
With CONFIG_DEBUG_PREEMPT we observed reports like: BUG: using smp_processor_id() in preemptible caller is perf_ftrace_function_call+0x6f/0x2e0 CPU: 1 PID: 680 Comm: a.out Not tainted Call Trace: dump_stack_lvl+0x8d/0xcf check_preemption_disabled+0x104/0x110 ? optimize_nops.isra.7+0x230/0x230 ? text_poke_bp_batch+0x9f/0x310 perf_ftrace_function_call+0x6f/0x2e0 ... __text_poke+0x5/0x620 text_poke_bp_batch+0x9f/0x310 This telling us the CPU could be changed after task is preempted, and the checking on CPU before preemption will be invalid. Since now ftrace_test_recursion_trylock() will help to disable the preemption, this patch just do the checking after trylock() to address the issue. CC: Steven Rostedt Reported-by: Abaci Signed-off-by: Michael Wang --- kernel/trace/trace_event_perf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 6aed10e..fba8cb7 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -441,13 +441,13 @@ void perf_trace_buf_update(void *record, u16 type) if (!rcu_is_watching()) return; - if ((unsigned long)ops->private != smp_processor_id()) - return; - bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; + if ((unsigned long)ops->private != smp_processor_id()) + goto out; + event = container_of(ops, struct perf_event, ftrace_ops); /* -- 1.8.3.1
[RESEND PATCH v2 1/2] ftrace: disable preemption between ftrace_test_recursion_trylock/unlock()
As the documentation explained, ftrace_test_recursion_trylock() and ftrace_test_recursion_unlock() were supposed to disable and enable preemption properly, however currently this work is done outside of the function, which could be missing by mistake. This path will make sure the preemption was disabled when trylock() succeed, and the unlock() will enable the preemption if previously enabled. CC: Steven Rostedt CC: Miroslav Benes Reported-by: Abaci Suggested-by: Peter Zijlstra Signed-off-by: Michael Wang --- arch/csky/kernel/probes/ftrace.c | 2 -- arch/parisc/kernel/ftrace.c | 2 -- arch/powerpc/kernel/kprobes-ftrace.c | 2 -- arch/riscv/kernel/probes/ftrace.c| 2 -- arch/x86/kernel/kprobes/ftrace.c | 2 -- include/linux/trace_recursion.h | 22 +- kernel/livepatch/patch.c | 6 -- kernel/trace/trace_functions.c | 5 - 8 files changed, 21 insertions(+), 22 deletions(-) diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index b388228..834cffc 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -17,7 +17,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (!p) { p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE)); @@ -57,7 +56,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 0a1e75a..3543496 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -216,7 +216,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -245,7 +244,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, } __this_cpu_write(current_kprobe, NULL); out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 7154d58..072ebe7 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -26,7 +26,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)nip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -61,7 +60,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c index aab85a8..7142ec4 100644 --- a/arch/riscv/kernel/probes/ftrace.c +++ b/arch/riscv/kernel/probes/ftrace.c @@ -15,7 +15,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -52,7 +51,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 596de2f..dd2ec14 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -25,7 +25,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -59,7 +58,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index a9f9c57..101e1fb 100644 ---
[RESEND PATCH v2 0/2] fix & prevent the missing preemption disabling
The testing show that perf_ftrace_function_call() are using smp_processor_id() with preemption enabled, all the checking on CPU could be wrong after preemption. As Peter point out, the section between ftrace_test_recursion_trylock/unlock() pair require the preemption to be disabled as 'Documentation/trace/ftrace-uses.rst' explained, but currently the work is done outside of the helpers. Patch 1/2 will make sure preemption disabled after trylock() succeed, patch 2/2 will do smp_processor_id() checking after trylock to address the issue. v1: https://lore.kernel.org/all/8c7de46d-9869-aa5e-2bb9-5dbc2eda3...@linux.alibaba.com/ Michael Wang (2): ftrace: disable preemption between ftrace_test_recursion_trylock/unlock() ftrace: do CPU checking after preemption disabled arch/csky/kernel/probes/ftrace.c | 2 -- arch/parisc/kernel/ftrace.c | 2 -- arch/powerpc/kernel/kprobes-ftrace.c | 2 -- arch/riscv/kernel/probes/ftrace.c| 2 -- arch/x86/kernel/kprobes/ftrace.c | 2 -- include/linux/trace_recursion.h | 22 +- kernel/livepatch/patch.c | 6 -- kernel/trace/trace_event_perf.c | 6 +++--- kernel/trace/trace_functions.c | 5 - 9 files changed, 24 insertions(+), 25 deletions(-) -- 1.8.3.1
Re: [PATCH v2 0/2] fix & prevent the missing preemption disabling
On 2021/10/13 上午11:26, Steven Rostedt wrote: > Please start a new thread when sending new versions. v2 should not be a > reply to v1. If you want to reference v1, just add it to the cover > letter with a link tag: > > Link: > https://lore.kernel.org/all/8c7de46d-9869-aa5e-2bb9-5dbc2eda3...@linux.alibaba.com/ Ok, I'll resend it with link then. Regards, Michael Wang > > -- Steve > > > On Wed, 13 Oct 2021 11:16:56 +0800 > 王贇 wrote: > >> The testing show that perf_ftrace_function_call() are using >> smp_processor_id() >> with preemption enabled, all the checking on CPU could be wrong after >> preemption. >> >> As Peter point out, the section between >> ftrace_test_recursion_trylock/unlock() >> pair require the preemption to be disabled as >> 'Documentation/trace/ftrace-uses.rst' >> explained, but currently the work is done outside of the helpers. >> >> Patch 1/2 will make sure preemption disabled after trylock() succeed, >> patch 2/2 will do smp_processor_id() checking after trylock to address the >> issue. >> >> Michael Wang (2): >> ftrace: disable preemption between ftrace_test_recursion_trylock/unlock() >> ftrace: do CPU checking after preemption disabled >> >> arch/csky/kernel/probes/ftrace.c | 2 -- >> arch/parisc/kernel/ftrace.c | 2 -- >> arch/powerpc/kernel/kprobes-ftrace.c | 2 -- >> arch/riscv/kernel/probes/ftrace.c| 2 -- >> arch/x86/kernel/kprobes/ftrace.c | 2 -- >> include/linux/trace_recursion.h | 22 +- >> kernel/livepatch/patch.c | 6 -- >> kernel/trace/trace_event_perf.c | 6 +++--- >> kernel/trace/trace_functions.c | 5 - >> 9 files changed, 24 insertions(+), 25 deletions(-) >>
Re: [PATCH v2 0/2] fix & prevent the missing preemption disabling
Please start a new thread when sending new versions. v2 should not be a reply to v1. If you want to reference v1, just add it to the cover letter with a link tag: Link: https://lore.kernel.org/all/8c7de46d-9869-aa5e-2bb9-5dbc2eda3...@linux.alibaba.com/ -- Steve On Wed, 13 Oct 2021 11:16:56 +0800 王贇 wrote: > The testing show that perf_ftrace_function_call() are using smp_processor_id() > with preemption enabled, all the checking on CPU could be wrong after > preemption. > > As Peter point out, the section between ftrace_test_recursion_trylock/unlock() > pair require the preemption to be disabled as > 'Documentation/trace/ftrace-uses.rst' > explained, but currently the work is done outside of the helpers. > > Patch 1/2 will make sure preemption disabled after trylock() succeed, > patch 2/2 will do smp_processor_id() checking after trylock to address the > issue. > > Michael Wang (2): > ftrace: disable preemption between ftrace_test_recursion_trylock/unlock() > ftrace: do CPU checking after preemption disabled > > arch/csky/kernel/probes/ftrace.c | 2 -- > arch/parisc/kernel/ftrace.c | 2 -- > arch/powerpc/kernel/kprobes-ftrace.c | 2 -- > arch/riscv/kernel/probes/ftrace.c| 2 -- > arch/x86/kernel/kprobes/ftrace.c | 2 -- > include/linux/trace_recursion.h | 22 +- > kernel/livepatch/patch.c | 6 -- > kernel/trace/trace_event_perf.c | 6 +++--- > kernel/trace/trace_functions.c | 5 - > 9 files changed, 24 insertions(+), 25 deletions(-) >
[PATCH v2 2/2] ftrace: do CPU checking after preemption disabled
With CONFIG_DEBUG_PREEMPT we observed reports like: BUG: using smp_processor_id() in preemptible caller is perf_ftrace_function_call+0x6f/0x2e0 CPU: 1 PID: 680 Comm: a.out Not tainted Call Trace: dump_stack_lvl+0x8d/0xcf check_preemption_disabled+0x104/0x110 ? optimize_nops.isra.7+0x230/0x230 ? text_poke_bp_batch+0x9f/0x310 perf_ftrace_function_call+0x6f/0x2e0 ... __text_poke+0x5/0x620 text_poke_bp_batch+0x9f/0x310 This telling us the CPU could be changed after task is preempted, and the checking on CPU before preemption will be invalid. Since now ftrace_test_recursion_trylock() will help to disable the preemption, this patch just do the checking after trylock() to address the issue. CC: Steven Rostedt Reported-by: Abaci Signed-off-by: Michael Wang --- kernel/trace/trace_event_perf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 6aed10e..fba8cb7 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -441,13 +441,13 @@ void perf_trace_buf_update(void *record, u16 type) if (!rcu_is_watching()) return; - if ((unsigned long)ops->private != smp_processor_id()) - return; - bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; + if ((unsigned long)ops->private != smp_processor_id()) + goto out; + event = container_of(ops, struct perf_event, ftrace_ops); /* -- 1.8.3.1
[PATCH v2 1/2] ftrace: disable preemption between ftrace_test_recursion_trylock/unlock()
As the documentation explained, ftrace_test_recursion_trylock() and ftrace_test_recursion_unlock() were supposed to disable and enable preemption properly, however currently this work is done outside of the function, which could be missing by mistake. This path will make sure the preemption was disabled when trylock() succeed, and the unlock() will enable the preemption if previously enabled. CC: Steven Rostedt CC: Miroslav Benes Reported-by: Abaci Suggested-by: Peter Zijlstra Signed-off-by: Michael Wang --- arch/csky/kernel/probes/ftrace.c | 2 -- arch/parisc/kernel/ftrace.c | 2 -- arch/powerpc/kernel/kprobes-ftrace.c | 2 -- arch/riscv/kernel/probes/ftrace.c| 2 -- arch/x86/kernel/kprobes/ftrace.c | 2 -- include/linux/trace_recursion.h | 22 +- kernel/livepatch/patch.c | 6 -- kernel/trace/trace_functions.c | 5 - 8 files changed, 21 insertions(+), 22 deletions(-) diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index b388228..834cffc 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -17,7 +17,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (!p) { p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE)); @@ -57,7 +56,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 0a1e75a..3543496 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -216,7 +216,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -245,7 +244,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, } __this_cpu_write(current_kprobe, NULL); out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 7154d58..072ebe7 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -26,7 +26,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)nip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -61,7 +60,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c index aab85a8..7142ec4 100644 --- a/arch/riscv/kernel/probes/ftrace.c +++ b/arch/riscv/kernel/probes/ftrace.c @@ -15,7 +15,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -52,7 +51,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 596de2f..dd2ec14 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -25,7 +25,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -59,7 +58,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index a9f9c57..101e1fb 100644 ---
[PATCH v2 0/2] fix & prevent the missing preemption disabling
The testing show that perf_ftrace_function_call() are using smp_processor_id() with preemption enabled, all the checking on CPU could be wrong after preemption. As Peter point out, the section between ftrace_test_recursion_trylock/unlock() pair require the preemption to be disabled as 'Documentation/trace/ftrace-uses.rst' explained, but currently the work is done outside of the helpers. Patch 1/2 will make sure preemption disabled after trylock() succeed, patch 2/2 will do smp_processor_id() checking after trylock to address the issue. Michael Wang (2): ftrace: disable preemption between ftrace_test_recursion_trylock/unlock() ftrace: do CPU checking after preemption disabled arch/csky/kernel/probes/ftrace.c | 2 -- arch/parisc/kernel/ftrace.c | 2 -- arch/powerpc/kernel/kprobes-ftrace.c | 2 -- arch/riscv/kernel/probes/ftrace.c| 2 -- arch/x86/kernel/kprobes/ftrace.c | 2 -- include/linux/trace_recursion.h | 22 +- kernel/livepatch/patch.c | 6 -- kernel/trace/trace_event_perf.c | 6 +++--- kernel/trace/trace_functions.c | 5 - 9 files changed, 24 insertions(+), 25 deletions(-) -- 1.8.3.1
Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion
On 2021/10/13 上午10:30, Steven Rostedt wrote: > On Wed, 13 Oct 2021 10:04:52 +0800 > 王贇 wrote: > >> I see, while the user can still check smp_processor_id() after trylock >> return bit 0... > > But preemption would have already been disabled. That's because a bit 0 > means that a recursion check has already been made by a previous > caller and this one is nested, thus preemption is already disabled. > If bit is 0, then preemption had better be disabled as well. Thanks for the explain, now I get your point :-) Let's make bit 0 an exemption then. Regards, Michael Wang > > -- Steve >
Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion
On 2021/10/13 上午10:27, Steven Rostedt wrote: > On Wed, 13 Oct 2021 09:50:17 +0800 > 王贇 wrote: > - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } >>> >>> I don't like this change much. We have preempt_disable there not because >>> of ftrace_test_recursion, but because of RCU. ftrace_test_recursion was >>> added later. Yes, it would work with the change, but it would also hide >>> things which should not be hidden in my opinion. >> >> Not very sure about the backgroup stories, but just found this in >> 'Documentation/trace/ftrace-uses.rst': >> >> Note, on success, >> ftrace_test_recursion_trylock() will disable preemption, and the >> ftrace_test_recursion_unlock() will enable it again (if it was previously >> enabled). > > Right that part is to be fixed by what you are adding here. > > The point that Miroslav is complaining about is that the preemption > disabling is special in this case, and not just from the recursion > point of view, which is why the comment is still required. My bad... the title do confusing people, will rewrite it. Regards, Michael Wang > > -- Steve > > >> >> Seems like this lock pair was supposed to take care the preemtion itself?
Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion
On Wed, 13 Oct 2021 10:04:52 +0800 王贇 wrote: > I see, while the user can still check smp_processor_id() after trylock > return bit 0... But preemption would have already been disabled. That's because a bit 0 means that a recursion check has already been made by a previous caller and this one is nested, thus preemption is already disabled. If bit is 0, then preemption had better be disabled as well. -- Steve
Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion
On Wed, 13 Oct 2021 09:50:17 +0800 王贇 wrote: > >> - preempt_enable_notrace(); > >>ftrace_test_recursion_unlock(bit); > >> } > > > > I don't like this change much. We have preempt_disable there not because > > of ftrace_test_recursion, but because of RCU. ftrace_test_recursion was > > added later. Yes, it would work with the change, but it would also hide > > things which should not be hidden in my opinion. > > Not very sure about the backgroup stories, but just found this in > 'Documentation/trace/ftrace-uses.rst': > > Note, on success, > ftrace_test_recursion_trylock() will disable preemption, and the > ftrace_test_recursion_unlock() will enable it again (if it was previously > enabled). Right that part is to be fixed by what you are adding here. The point that Miroslav is complaining about is that the preemption disabling is special in this case, and not just from the recursion point of view, which is why the comment is still required. -- Steve > > Seems like this lock pair was supposed to take care the preemtion itself?
Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion
On 2021/10/12 下午8:43, Steven Rostedt wrote: > On Tue, 12 Oct 2021 13:40:08 +0800 > 王贇 wrote: > >> --- a/include/linux/trace_recursion.h >> +++ b/include/linux/trace_recursion.h >> @@ -214,7 +214,14 @@ static __always_inline void trace_clear_recursion(int >> bit) >> static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, >> unsigned long >> parent_ip) >> { >> -return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, >> TRACE_FTRACE_MAX); >> +int bit; >> + >> +preempt_disable_notrace(); > > The recursion test does not require preemption disabled, it uses the task > struct, not per_cpu variables, so you should not disable it before the test. > > bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, > TRACE_FTRACE_MAX); > if (bit >= 0) > preempt_disable_notrace(); > > And if the bit is zero, it means a recursion check was already done by > another caller (ftrace handler does the check, followed by calling perf), > and you really don't even need to disable preemption in that case. > > if (bit > 0) > preempt_disable_notrace(); > > And on the unlock, have: > > static __always_inline void ftrace_test_recursion_unlock(int bit) > { > if (bit) > preempt_enable_notrace(); > trace_clear_recursion(bit); > } > > But maybe that's over optimizing ;-) I see, while the user can still check smp_processor_id() after trylock return bit 0... I guess Peter's point at very beginning is to prevent such cases, since kernel for production will not have preemption debug on, and such issue won't get report but could cause trouble which really hard to trace down , way to eliminate such issue once for all sounds attractive, isn't it? Regards, Michael Wang > > -- Steve > > >> +bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, >> TRACE_FTRACE_MAX); >> +if (bit < 0) >> +preempt_enable_notrace(); >> + >> +return bit; >> }
Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion
On 2021/10/12 下午8:29, Steven Rostedt wrote: > On Tue, 12 Oct 2021 14:24:43 +0200 (CEST) > Miroslav Benes wrote: > >>> +++ b/kernel/livepatch/patch.c >>> @@ -52,11 +52,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, >>> bit = ftrace_test_recursion_trylock(ip, parent_ip); >>> if (WARN_ON_ONCE(bit < 0)) >>> return; >>> - /* >>> -* A variant of synchronize_rcu() is used to allow patching functions >>> -* where RCU is not watching, see klp_synchronize_transition(). >>> -*/ >>> - preempt_disable_notrace(); >>> >>> func = list_first_or_null_rcu(>func_stack, struct klp_func, >>> stack_node); >>> @@ -120,7 +115,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, >>> klp_arch_set_pc(fregs, (unsigned long)func->new_func); >>> >>> unlock: >>> - preempt_enable_notrace(); >>> ftrace_test_recursion_unlock(bit); >>> } >> >> I don't like this change much. We have preempt_disable there not because >> of ftrace_test_recursion, but because of RCU. ftrace_test_recursion was >> added later. Yes, it would work with the change, but it would also hide >> things which should not be hidden in my opinion. > > Agreed, but I believe the change is fine, but requires a nice comment to > explain what you said above. > > Thus, before the "ftrace_test_recursion_trylock()" we need: > > /* >* The ftrace_test_recursion_trylock() will disable preemption, >* which is required for the variant of synchronize_rcu() that is >* used to allow patching functions where RCU is not watching. >* See klp_synchronize_transition() for more details. >*/ Will be in v2 too :-) Regards, Michael Wang > > -- Steve >
Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion
On 2021/10/12 下午8:24, Miroslav Benes wrote: [snip] >> >> func = list_first_or_null_rcu(>func_stack, struct klp_func, >>stack_node); >> @@ -120,7 +115,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, >> klp_arch_set_pc(fregs, (unsigned long)func->new_func); >> >> unlock: >> -preempt_enable_notrace(); >> ftrace_test_recursion_unlock(bit); >> } > > I don't like this change much. We have preempt_disable there not because > of ftrace_test_recursion, but because of RCU. ftrace_test_recursion was > added later. Yes, it would work with the change, but it would also hide > things which should not be hidden in my opinion. Not very sure about the backgroup stories, but just found this in 'Documentation/trace/ftrace-uses.rst': Note, on success, ftrace_test_recursion_trylock() will disable preemption, and the ftrace_test_recursion_unlock() will enable it again (if it was previously enabled). Seems like this lock pair was supposed to take care the preemtion itself? Regards, Michael Wang > > Miroslav >
Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion
On 2021/10/12 下午8:17, Steven Rostedt wrote: > On Tue, 12 Oct 2021 13:40:08 +0800 > 王贇 wrote: > >> @@ -52,11 +52,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, >> bit = ftrace_test_recursion_trylock(ip, parent_ip); >> if (WARN_ON_ONCE(bit < 0)) >> return; >> -/* >> - * A variant of synchronize_rcu() is used to allow patching functions >> - * where RCU is not watching, see klp_synchronize_transition(). >> - */ > > I have to take a deeper look at this patch set, but do not remove this > comment, as it explains the protection here, that is not obvious with the > changes you made. Will keep that in v2. Regards, Michael Wang > > -- Steve > > >> -preempt_disable_notrace(); >> >> func = list_first_or_null_rcu(>func_stack, struct klp_func, >>stack_node);
Re: [PATCH 2/2] ftrace: prevent preemption in perf_ftrace_function_call()
On 2021/10/12 下午7:20, Peter Zijlstra wrote: > On Tue, Oct 12, 2021 at 01:40:31PM +0800, 王贇 wrote: > >> diff --git a/kernel/trace/trace_event_perf.c >> b/kernel/trace/trace_event_perf.c >> index 6aed10e..33c2f76 100644 >> --- a/kernel/trace/trace_event_perf.c >> +++ b/kernel/trace/trace_event_perf.c >> @@ -441,12 +441,19 @@ void perf_trace_buf_update(void *record, u16 type) >> if (!rcu_is_watching()) >> return; >> >> +/* >> + * Prevent CPU changing from now on. rcu must >> + * be in watching if the task was migrated and >> + * scheduled. >> + */ >> +preempt_disable_notrace(); >> + >> if ((unsigned long)ops->private != smp_processor_id()) >> -return; >> +goto out; >> >> bit = ftrace_test_recursion_trylock(ip, parent_ip); >> if (bit < 0) >> -return; >> +goto out; >> >> event = container_of(ops, struct perf_event, ftrace_ops); >> > > This seems rather daft, wouldn't it be easier to just put that check > under the recursion thing? In case if the condition matched, extra lock/unlock will be introduced, but I guess that's acceptable since this seems unlikely to happen :-P Will move the check in v2. Regards, Michael Wang >
[PATCH AUTOSEL 4.4 2/2] powerpc/security: Add a helper to query stf_barrier type
From: "Naveen N. Rao" [ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ] Add a helper to return the stf_barrier type for the current processor. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/security_features.h | 5 + arch/powerpc/kernel/security.c | 5 + 2 files changed, 10 insertions(+) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 3b45a64e491e..a673416da388 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(unsigned long feature) return !!(powerpc_security_features & feature); } +#ifdef CONFIG_PPC_BOOK3S_64 +enum stf_barrier_type stf_barrier_type_get(void); +#else +static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; } +#endif // Features indicating support for Spectre/Meltdown mitigations diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 45778c83038f..ac9a2498efe7 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -249,6 +249,11 @@ static int __init handle_no_stf_barrier(char *p) early_param("no_stf_barrier", handle_no_stf_barrier); +enum stf_barrier_type stf_barrier_type_get(void) +{ + return stf_enabled_flush_types; +} + /* This is the generic flag used by other architectures */ static int __init handle_ssbd(char *p) { -- 2.33.0
[PATCH AUTOSEL 4.9 4/4] powerpc/security: Add a helper to query stf_barrier type
From: "Naveen N. Rao" [ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ] Add a helper to return the stf_barrier type for the current processor. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/security_features.h | 5 + arch/powerpc/kernel/security.c | 5 + 2 files changed, 10 insertions(+) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 3b45a64e491e..a673416da388 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(unsigned long feature) return !!(powerpc_security_features & feature); } +#ifdef CONFIG_PPC_BOOK3S_64 +enum stf_barrier_type stf_barrier_type_get(void); +#else +static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; } +#endif // Features indicating support for Spectre/Meltdown mitigations diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index ff85fc800183..df42f020e703 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -249,6 +249,11 @@ static int __init handle_no_stf_barrier(char *p) early_param("no_stf_barrier", handle_no_stf_barrier); +enum stf_barrier_type stf_barrier_type_get(void) +{ + return stf_enabled_flush_types; +} + /* This is the generic flag used by other architectures */ static int __init handle_ssbd(char *p) { -- 2.33.0
[PATCH AUTOSEL 4.14 5/5] powerpc/security: Add a helper to query stf_barrier type
From: "Naveen N. Rao" [ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ] Add a helper to return the stf_barrier type for the current processor. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/security_features.h | 5 + arch/powerpc/kernel/security.c | 5 + 2 files changed, 10 insertions(+) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 3b45a64e491e..a673416da388 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(unsigned long feature) return !!(powerpc_security_features & feature); } +#ifdef CONFIG_PPC_BOOK3S_64 +enum stf_barrier_type stf_barrier_type_get(void); +#else +static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; } +#endif // Features indicating support for Spectre/Meltdown mitigations diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index b3f540c9f410..75a5277ef7b8 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -248,6 +248,11 @@ static int __init handle_no_stf_barrier(char *p) early_param("no_stf_barrier", handle_no_stf_barrier); +enum stf_barrier_type stf_barrier_type_get(void) +{ + return stf_enabled_flush_types; +} + /* This is the generic flag used by other architectures */ static int __init handle_ssbd(char *p) { -- 2.33.0
[PATCH AUTOSEL 4.19 5/5] powerpc/security: Add a helper to query stf_barrier type
From: "Naveen N. Rao" [ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ] Add a helper to return the stf_barrier type for the current processor. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/security_features.h | 5 + arch/powerpc/kernel/security.c | 5 + 2 files changed, 10 insertions(+) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 3b45a64e491e..a673416da388 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(unsigned long feature) return !!(powerpc_security_features & feature); } +#ifdef CONFIG_PPC_BOOK3S_64 +enum stf_barrier_type stf_barrier_type_get(void); +#else +static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; } +#endif // Features indicating support for Spectre/Meltdown mitigations diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 6a3dde9587cc..48985a1fd34d 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -248,6 +248,11 @@ static int __init handle_no_stf_barrier(char *p) early_param("no_stf_barrier", handle_no_stf_barrier); +enum stf_barrier_type stf_barrier_type_get(void) +{ + return stf_enabled_flush_types; +} + /* This is the generic flag used by other architectures */ static int __init handle_ssbd(char *p) { -- 2.33.0
[PATCH AUTOSEL 5.4 6/6] powerpc/security: Add a helper to query stf_barrier type
From: "Naveen N. Rao" [ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ] Add a helper to return the stf_barrier type for the current processor. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/security_features.h | 5 + arch/powerpc/kernel/security.c | 5 + 2 files changed, 10 insertions(+) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index e9e3f85134e5..316a9c8d1929 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(u64 feature) return !!(powerpc_security_features & feature); } +#ifdef CONFIG_PPC_BOOK3S_64 +enum stf_barrier_type stf_barrier_type_get(void); +#else +static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; } +#endif // Features indicating support for Spectre/Meltdown mitigations diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 1740a66cea84..ff022e725693 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -256,6 +256,11 @@ static int __init handle_no_stf_barrier(char *p) early_param("no_stf_barrier", handle_no_stf_barrier); +enum stf_barrier_type stf_barrier_type_get(void) +{ + return stf_enabled_flush_types; +} + /* This is the generic flag used by other architectures */ static int __init handle_ssbd(char *p) { -- 2.33.0
[PATCH AUTOSEL 5.10 10/11] powerpc/security: Add a helper to query stf_barrier type
From: "Naveen N. Rao" [ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ] Add a helper to return the stf_barrier type for the current processor. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/security_features.h | 5 + arch/powerpc/kernel/security.c | 5 + 2 files changed, 10 insertions(+) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index b774a4477d5f..e380acc6e413 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(u64 feature) return !!(powerpc_security_features & feature); } +#ifdef CONFIG_PPC_BOOK3S_64 +enum stf_barrier_type stf_barrier_type_get(void); +#else +static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; } +#endif // Features indicating support for Spectre/Meltdown mitigations diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index e4e1a94ccf6a..3f510c911b10 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -261,6 +261,11 @@ static int __init handle_no_stf_barrier(char *p) early_param("no_stf_barrier", handle_no_stf_barrier); +enum stf_barrier_type stf_barrier_type_get(void) +{ + return stf_enabled_flush_types; +} + /* This is the generic flag used by other architectures */ static int __init handle_ssbd(char *p) { -- 2.33.0
[PATCH AUTOSEL 5.14 16/17] powerpc/bpf: Emit stf barrier instruction sequences for BPF_NOSPEC
From: "Naveen N. Rao" [ Upstream commit b7540d62509453263604a155bf2d5f0ed450cba2 ] Emit similar instruction sequences to commit a048a07d7f4535 ("powerpc/64s: Add support for a store forwarding barrier at kernel entry/exit") when encountering BPF_NOSPEC. Mitigations are enabled depending on what the firmware advertises. In particular, we do not gate these mitigations based on current settings, just like in x86. Due to this, we don't need to take any action if mitigations are enabled or disabled at runtime. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/956570cbc191cd41f8274bed48ee757a86dac62a.1633464148.git.naveen.n@linux.vnet.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/net/bpf_jit64.h | 8 ++--- arch/powerpc/net/bpf_jit_comp64.c | 55 --- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 7b713edfa7e2..b63b35e45e55 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -16,18 +16,18 @@ * with our redzone usage. * * [ prev sp ] <- - * [ nv gpr save area] 6*8 | + * [ nv gpr save area] 5*8 | * [tail_call_cnt ] 8 | - * [local_tmp_var ] 8 | + * [local_tmp_var ] 16| * fp (r31) -->[ ebpf stack space] upto 512 | * [ frame header ] 32/112| * sp (r1) --->[stack pointer ] -- */ /* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (6*8) +#define BPF_PPC_STACK_SAVE (5*8) /* for bpf JIT code internal usage */ -#define BPF_PPC_STACK_LOCALS 16 +#define BPF_PPC_STACK_LOCALS 24 /* stack frame excluding BPF stack, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index f06c62089b14..1a567c46730a 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "bpf_jit64.h" @@ -35,9 +36,9 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ prev sp ] <- * [ ... ] | * sp (r1) --->[stack pointer ] -- - * [ nv gpr save area] 6*8 + * [ nv gpr save area] 5*8 * [tail_call_cnt ] 8 - * [local_tmp_var ] 8 + * [local_tmp_var ] 16 * [ unused red zone ] 208 bytes protected */ static int bpf_jit_stack_local(struct codegen_context *ctx) @@ -45,12 +46,12 @@ static int bpf_jit_stack_local(struct codegen_context *ctx) if (bpf_has_stack_frame(ctx)) return STACK_FRAME_MIN_SIZE + ctx->stack_size; else - return -(BPF_PPC_STACK_SAVE + 16); + return -(BPF_PPC_STACK_SAVE + 24); } static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) { - return bpf_jit_stack_local(ctx) + 8; + return bpf_jit_stack_local(ctx) + 16; } static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) @@ -272,10 +273,33 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o return 0; } +/* + * We spill into the redzone always, even if the bpf program has its own stackframe. + * Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local() + */ +void bpf_stf_barrier(void); + +asm ( +" .global bpf_stf_barrier ;" +" bpf_stf_barrier:;" +" std 21,-64(1) ;" +" std 22,-56(1) ;" +" sync;" +" ld 21,-64(1) ;" +" ld 22,-56(1) ;" +" ori 31,31,0 ;" +" .rept 14;" +" b 1f ;" +" 1: ;" +" .endr ;" +" blr ;" +); + /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, bool extra_pass) { + enum stf_barrier_type stf_barrier = stf_barrier_type_get(); const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; int i, ret; @@ -633,6 +657,29 @@ int
[PATCH AUTOSEL 5.14 15/17] powerpc/security: Add a helper to query stf_barrier type
From: "Naveen N. Rao" [ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ] Add a helper to return the stf_barrier type for the current processor. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/security_features.h | 5 + arch/powerpc/kernel/security.c | 5 + 2 files changed, 10 insertions(+) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 792eefaf230b..27574f218b37 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(u64 feature) return !!(powerpc_security_features & feature); } +#ifdef CONFIG_PPC_BOOK3S_64 +enum stf_barrier_type stf_barrier_type_get(void); +#else +static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; } +#endif // Features indicating support for Spectre/Meltdown mitigations diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index cc51fa52e783..e723ff77cc9b 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -263,6 +263,11 @@ static int __init handle_no_stf_barrier(char *p) early_param("no_stf_barrier", handle_no_stf_barrier); +enum stf_barrier_type stf_barrier_type_get(void) +{ + return stf_enabled_flush_types; +} + /* This is the generic flag used by other architectures */ static int __init handle_ssbd(char *p) { -- 2.33.0
[PATCH AUTOSEL 5.14 14/17] powerpc/bpf: Validate branch ranges
From: "Naveen N. Rao" [ Upstream commit 3832ba4e283d7052b783dab8311df7e3590fed93 ] Add checks to ensure that we never emit branch instructions with truncated branch offsets. Suggested-by: Michael Ellerman Signed-off-by: Naveen N. Rao Tested-by: Johan Almbladh Reviewed-by: Christophe Leroy Acked-by: Song Liu Acked-by: Johan Almbladh Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/71d33a6b7603ec1013c9734dd8bdd4ff5e929142.1633464148.git.naveen.n@linux.vnet.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/net/bpf_jit.h| 26 -- arch/powerpc/net/bpf_jit_comp.c | 6 +- arch/powerpc/net/bpf_jit_comp32.c | 8 ++-- arch/powerpc/net/bpf_jit_comp64.c | 8 ++-- 4 files changed, 37 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 935ea95b6635..7e9b978b768e 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -24,16 +24,30 @@ #define EMIT(instr)PLANT_INSTR(image, ctx->idx, instr) /* Long jump; (unconditional 'branch') */ -#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH |\ -(((dest) - (ctx->idx * 4)) & 0x03fc)) +#define PPC_JMP(dest)\ + do { \ + long offset = (long)(dest) - (ctx->idx * 4); \ + if (!is_offset_in_branch_range(offset)) { \ + pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ + return -ERANGE; \ + } \ + EMIT(PPC_INST_BRANCH | (offset & 0x03fc));\ + } while (0) + /* blr; (unconditional 'branch' with link) to absolute address */ #define PPC_BL_ABS(dest) EMIT(PPC_INST_BL |\ (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fc)) /* "cond" here covers BO:BI fields. */ -#define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ -(((cond) & 0x3ff) << 16) | \ -(((dest) - (ctx->idx * 4)) & \ - 0xfffc)) +#define PPC_BCC_SHORT(cond, dest)\ + do { \ + long offset = (long)(dest) - (ctx->idx * 4); \ + if (!is_offset_in_cond_branch_range(offset)) {\ + pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ + return -ERANGE; \ + } \ + EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \ + } while (0) + /* Sign-extended 32-bit immediate load */ #define PPC_LI32(d, i) do { \ if ((int)(uintptr_t)(i) >= -32768 && \ diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 53aefee3fe70..fcbf7a917c56 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -210,7 +210,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; bpf_jit_build_prologue(code_base, ); - bpf_jit_build_body(fp, code_base, , addrs, extra_pass); + if (bpf_jit_build_body(fp, code_base, , addrs, extra_pass)) { + bpf_jit_binary_free(bpf_hdr); + fp = org_fp; + goto out_addrs; + } bpf_jit_build_epilogue(code_base, ); if (bpf_jit_enable > 1) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index beb12cbc8c29..a74d52204f8d 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -200,7 +200,7 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun } } -static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) +static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) { /* * By now, the eBPF program has already setup parameters in r3-r6 @@ -261,7 +261,9 @@ static void bpf_jit_emit_tail_call(u32
[PATCH AUTOSEL 5.14 13/17] powerpc/lib: Add helper to check if offset is within conditional branch range
From: "Naveen N. Rao" [ Upstream commit 4549c3ea3160fa8b3f37dfe2f957657bb265eda9 ] Add a helper to check if a given offset is within the branch range for a powerpc conditional branch instruction, and update some sites to use the new helper. Signed-off-by: Naveen N. Rao Reviewed-by: Christophe Leroy Acked-by: Song Liu Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/442b69a34ced32ca346a0d9a855f3f6cfdbbbd41.1633464148.git.naveen.n@linux.vnet.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/code-patching.h | 1 + arch/powerpc/lib/code-patching.c | 7 ++- arch/powerpc/net/bpf_jit.h | 7 +-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index a95f63788c6b..4ba834599c4d 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -23,6 +23,7 @@ #define BRANCH_ABSOLUTE0x2 bool is_offset_in_branch_range(long offset); +bool is_offset_in_cond_branch_range(long offset); int create_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags); int create_cond_branch(struct ppc_inst *instr, const u32 *addr, diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index f9a3019e37b4..c5ed98823835 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -228,6 +228,11 @@ bool is_offset_in_branch_range(long offset) return (offset >= -0x200 && offset <= 0x1fc && !(offset & 0x3)); } +bool is_offset_in_cond_branch_range(long offset) +{ + return offset >= -0x8000 && offset <= 0x7fff && !(offset & 0x3); +} + /* * Helper to check if a given instruction is a conditional branch * Derived from the conditional checks in analyse_instr() @@ -280,7 +285,7 @@ int create_cond_branch(struct ppc_inst *instr, const u32 *addr, offset = offset - (unsigned long)addr; /* Check we can represent the target in the instruction format */ - if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3) + if (!is_offset_in_cond_branch_range(offset)) return 1; /* Mask out the flags and target, so they don't step on each other. */ diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 99fad093f43e..935ea95b6635 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -78,11 +78,6 @@ #define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0) #endif -static inline bool is_nearbranch(int offset) -{ - return (offset < 32768) && (offset >= -32768); -} - /* * The fly in the ointment of code size changing from pass to pass is * avoided by padding the short branch case with a NOP. If code size differs @@ -91,7 +86,7 @@ static inline bool is_nearbranch(int offset) * state. */ #define PPC_BCC(cond, dest)do { \ - if (is_nearbranch((dest) - (ctx->idx * 4))) { \ + if (is_offset_in_cond_branch_range((long)(dest) - (ctx->idx * 4))) {\ PPC_BCC_SHORT(cond, dest);\ EMIT(PPC_RAW_NOP()); \ } else { \ -- 2.33.0
Re: [PATCH] powerpc: don't select KFENCE on platform PPC_FSL_BOOK3E
Christophe Leroy writes: > Le 12/10/2021 à 08:24, Michael Ellerman a écrit : >> Liu Shixin writes: >>> kindly ping. >> >> I was under the impression you were trying to debug why it wasn't >> working with Christophe. > > The investigation was a bit dormant to be honest since Liu confirmed > that neither KFENCE not DEBUG_PAGEALLOC works. No worries. Sorry it fell to you to do the investigation. > I now looked at the effort to make it work, and it is not trivial. > At the time being, all linear space is mapped with pinned TLBs and > everything is setup for space 0, with space 1 being used temporarily > when doing heavy changes to space 0. > > We can't use standard pages for linear space on space 0 because we need > memory mapped at all time for exceptions (on booke exception run with > MMU on in space 0). > > In order to use standard pages, we'd need to reorganise the kernel to > have it run mostly in space 1 (for data at least) where we would map > almost everything with standard pages, and keep pinned TLB to map linear > space on space 0 for TLB miss exceptions. Then we'd do more or less like > book3s/32 and switch back into space 1 into other exceptions prolog. > > That could be good to do it as we could maybe have more code in common > with non booke 32 bits, but it is not a trivial job. > > So I suggest that for now, we just make KFENCE and DEBUG_PAGEALLOC > unselectable for booke/32 (e500 and 44x). Yep seems reasonable. cheers
Re: [PATCH v6 00/11] PCI: Drop duplicated tracking of a pci_dev's bound driver
On Mon, Oct 04, 2021 at 02:59:24PM +0200, Uwe Kleine-König wrote: > Hello, > > this is v6 of the quest to drop the "driver" member from struct pci_dev > which tracks the same data (apart from a constant offset) as dev.driver. I like this a lot and applied it to pci/driver for v5.16, thanks! I split some of the bigger patches apart so they only touched one driver or subsystem at a time. I also updated to_pci_driver() so it returns NULL when given NULL, which makes some of the validations quite a bit simpler, especially in the PM code in pci-driver.c. Full interdiff from this v6 series: diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index deaaef6efe34..36e84d904260 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -80,17 +80,15 @@ static struct resource video_rom_resource = { */ static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short device) { + struct pci_driver *drv = to_pci_driver(pdev->dev.driver); const struct pci_device_id *id; if (pdev->vendor == vendor && pdev->device == device) return true; - if (pdev->dev.driver) { - struct pci_driver *drv = to_pci_driver(pdev->dev.driver); - for (id = drv->id_table; id && id->vendor; id++) - if (id->vendor == vendor && id->device == device) - break; - } + for (id = drv ? drv->id_table : NULL; id && id->vendor; id++) + if (id->vendor == vendor && id->device == device) + break; return id && id->vendor; } diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index d997c9c3ebb5..7eb3706cf42d 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -20,38 +20,38 @@ static void pci_error_handlers(struct cxl_afu *afu, pci_channel_state_t state) { struct pci_dev *afu_dev; + struct pci_driver *afu_drv; + struct pci_error_handlers *err_handler; if (afu->phb == NULL) return; list_for_each_entry(afu_dev, >phb->bus->devices, bus_list) { - struct pci_driver *afu_drv; - - if (!afu_dev->dev.driver) - continue; - afu_drv = to_pci_driver(afu_dev->dev.driver); + if (!afu_drv) + continue; + err_handler = afu_drv->err_handler; switch (bus_error_event) { case CXL_ERROR_DETECTED_EVENT: afu_dev->error_state = state; - if (afu_drv->err_handler && - afu_drv->err_handler->error_detected) - afu_drv->err_handler->error_detected(afu_dev, state); - break; + if (err_handler && + err_handler->error_detected) + err_handler->error_detected(afu_dev, state); + break; case CXL_SLOT_RESET_EVENT: afu_dev->error_state = state; - if (afu_drv->err_handler && - afu_drv->err_handler->slot_reset) - afu_drv->err_handler->slot_reset(afu_dev); - break; + if (err_handler && + err_handler->slot_reset) + err_handler->slot_reset(afu_dev); + break; case CXL_RESUME_EVENT: - if (afu_drv->err_handler && - afu_drv->err_handler->resume) - afu_drv->err_handler->resume(afu_dev); - break; + if (err_handler && + err_handler->resume) + err_handler->resume(afu_dev); + break; } } } diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 7e7545d01e27..08bd81854101 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1795,6 +1795,8 @@ static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, pci_channel_state_t state) { struct pci_dev *afu_dev; + struct pci_driver *afu_drv; + struct pci_error_handlers *err_handler; pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET; @@ -1805,16 +1807,16 @@ static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, return result; list_for_each_entry(afu_dev, >phb->bus->devices, bus_list) { - struct pci_driver *afu_drv; - if (!afu_dev->dev.driver) - continue; - afu_drv = to_pci_driver(afu_dev->dev.driver); +
Re: [PATCH v2] KVM: PPC: Defer vtime accounting 'til after IRQ handling
Laurent Vivier writes: > Commit 112665286d08 moved guest_exit() in the interrupt protected > area to avoid wrong context warning (or worse), but the tick counter > cannot be updated and the guest time is accounted to the system time. > > To fix the problem port to POWER the x86 fix > 160457140187 ("Defer vtime accounting 'til after IRQ handling"): > > "Defer the call to account guest time until after servicing any IRQ(s) > that happened in the guest or immediately after VM-Exit. Tick-based > accounting of vCPU time relies on PF_VCPU being set when the tick IRQ > handler runs, and IRQs are blocked throughout the main sequence of > vcpu_enter_guest(), including the call into vendor code to actually > enter and exit the guest." > > Fixes: 112665286d08 ("KVM: PPC: Book3S HV: Context tracking exit guest > context before enabling irqs") > Cc: npig...@gmail.com > Cc: # 5.12 > Signed-off-by: Laurent Vivier > --- > > Notes: > v2: remove reference to commit 61bd0f66ff92 > cc stable 5.12 > add the same comment in the code as for x86 > > arch/powerpc/kvm/book3s_hv.c | 24 > 1 file changed, 20 insertions(+), 4 deletions(-) > > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 2acb1c96cfaf..a694d1a8f6ce 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c ... > @@ -4506,13 +4514,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 > time_limit, > > srcu_read_unlock(>srcu, srcu_idx); > > + context_tracking_guest_exit(); > + > set_irq_happened(trap); > > kvmppc_set_host_core(pcpu); > > - guest_exit_irqoff(); > - > local_irq_enable(); > + /* > + * Wait until after servicing IRQs to account guest time so that any > + * ticks that occurred while running the guest are properly accounted > + * to the guest. Waiting until IRQs are enabled degrades the accuracy > + * of accounting via context tracking, but the loss of accuracy is > + * acceptable for all known use cases. > + */ > + vtime_account_guest_exit(); This pops a warning for me, running guest(s) on Power8: [ 270.745303][T16661] [ cut here ] [ 270.745374][T16661] WARNING: CPU: 72 PID: 16661 at arch/powerpc/kernel/time.c:311 vtime_account_kernel+0xe0/0xf0 [ 270.745397][T16661] Modules linked in: nf_conntrack_netlink xfrm_user xfrm_algo xt_addrtype xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink ip6table_filter ip6_tables iptable_filter tun overlay fuse kvm_hv kvm binfmt_misc squashfs mlx4_ib dm_multipath scsi_dh_rdac ib_uverbs scsi_dh_alua mlx4_en ib_core sr_mod cdrom lpfc bnx2x sg mlx4_core mdio crc_t10dif crct10dif_generic scsi_transport_fc vmx_crypto gf128mul leds_powernv crct10dif_vpmsum powernv_rng led_class crct10dif_common powernv_op_panel rng_core crc32c_vpmsum sunrpc ip_tables x_tables autofs4 [ 270.745565][T16661] CPU: 72 PID: 16661 Comm: qemu-system-ppc Not tainted 5.15.0-rc5-01885-g5e96f0599cff #1 [ 270.745578][T16661] NIP: c0027c20 LR: c0080b6e9ca8 CTR: c0027b40 [ 270.745588][T16661] REGS: c0081043f4f0 TRAP: 0700 Not tainted (5.15.0-rc5-01885-g5e96f0599cff) [ 270.745599][T16661] MSR: 9282b033 CR: 2244 XER: 2000 [ 270.745635][T16661] CFAR: c0027b7c IRQMASK: 0 [ 270.745635][T16661] GPR00: c0080b6e9ca8 c0081043f790 c248f900 c00da820 [ 270.745635][T16661] GPR04: c0080b93b488 0006 000f4240 c01fc000 [ 270.745635][T16661] GPR08: 000ffa6f 8002 c0080b6ffba8 [ 270.745635][T16661] GPR12: c0027b40 c00d9e00 0001 [ 270.745635][T16661] GPR16: c254c0b0 c00941e84414 [ 270.745635][T16661] GPR20: 0001 0048 c0080b710f0c 0001 [ 270.745635][T16661] GPR24: c00941e90aa8 c24c6d60 0001 [ 270.745635][T16661] GPR28: c00803222470 c0080b93aa00 0008 c00d9e00 [ 270.745747][T16661] NIP [c0027c20] vtime_account_kernel+0xe0/0xf0 [ 270.745756][T16661] LR [c0080b6e9ca8] kvmppc_run_core+0xda0/0x16c0 [kvm_hv] [ 270.745773][T16661] Call Trace: [ 270.745779][T16661] [c0081043f790] [c0081043f7d0] 0xc0081043f7d0 (unreliable) [ 270.745793][T16661] [c0081043f7d0] [c0080b6e9ca8] kvmppc_run_core+0xda0/0x16c0 [kvm_hv] [ 270.745808][T16661] [c0081043f950] [c0080b6eee28] kvmppc_vcpu_run_hv+0x570/0xce0 [kvm_hv] [ 270.745823][T16661] [c0081043fa10] [c0080b5d2afc] kvmppc_vcpu_run+0x34/0x48 [kvm] [ 270.745847][T16661] [c0081043fa30] [c0080b5ce728] kvm_arch_vcpu_ioctl_run+0x340/0x450 [kvm]
RE: [PATCH] tpm: ibmvtpm: Make use of dma_alloc_coherent()
From: Jarkko Sakkinen > Sent: 12 October 2021 18:41 > > On Tue, 2021-10-12 at 12:43 -0300, Jason Gunthorpe wrote: > > On Tue, Oct 12, 2021 at 06:29:58PM +0300, Jarkko Sakkinen wrote: > > > On Mon, 2021-10-11 at 00:01 +0800, Cai Huoqing wrote: > > > > Replacing kmalloc/kfree/get_zeroed_page/free_page/dma_map_single/ > > > ~ > > > Replace > > > > > > > dma_unmap_single() with dma_alloc_coherent/dma_free_coherent() > > > > helps to reduce code size, and simplify the code, and coherent > > > > DMA will not clear the cache every time. > > > > > > > > Signed-off-by: Cai Huoqing > > > > > > If this does not do functionally anything useful, there's no > > > reason to apply this. > > > > At least in this case it looks like the ibmvtpm is not using the DMA > > API properly. There is no sync after each data transfer. Replacing > > this wrong usage with the coherent API is reasonable. > > Thank you. As long as this is documented to the commit message, > I'm cool with the change itself. > > E.g. something like this would be perfectly fine replacement for the > current commit message: > > "The current usage pattern for the DMA API is inappropriate, as > data transfers are not synced. Replace the existing DMA code > with the coherent DMA API." Why not also say that the DMA access snoop the cache? (I think that was mentioned earlier in the thread.) David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
Re: [PATCH] tpm: ibmvtpm: Make use of dma_alloc_coherent()
On Tue, 2021-10-12 at 12:43 -0300, Jason Gunthorpe wrote: > On Tue, Oct 12, 2021 at 06:29:58PM +0300, Jarkko Sakkinen wrote: > > On Mon, 2021-10-11 at 00:01 +0800, Cai Huoqing wrote: > > > Replacing kmalloc/kfree/get_zeroed_page/free_page/dma_map_single/ > > ~ > > Replace > > > > > dma_unmap_single() with dma_alloc_coherent/dma_free_coherent() > > > helps to reduce code size, and simplify the code, and coherent > > > DMA will not clear the cache every time. > > > > > > Signed-off-by: Cai Huoqing > > > > If this does not do functionally anything useful, there's no > > reason to apply this. > > At least in this case it looks like the ibmvtpm is not using the DMA > API properly. There is no sync after each data transfer. Replacing > this wrong usage with the coherent API is reasonable. Thank you. As long as this is documented to the commit message, I'm cool with the change itself. E.g. something like this would be perfectly fine replacement for the current commit message: "The current usage pattern for the DMA API is inappropriate, as data transfers are not synced. Replace the existing DMA code with the coherent DMA API." /Jarkko
Re: [PATCH] powerpc: don't select KFENCE on platform PPC_FSL_BOOK3E
Le 12/10/2021 à 08:24, Michael Ellerman a écrit : Liu Shixin writes: kindly ping. I was under the impression you were trying to debug why it wasn't working with Christophe. The investigation was a bit dormant to be honest since Liu confirmed that neither KFENCE not DEBUG_PAGEALLOC works. I now looked at the effort to make it work, and it is not trivial. At the time being, all linear space is mapped with pinned TLBs and everything is setup for space 0, with space 1 being used temporarily when doing heavy changes to space 0. We can't use standard pages for linear space on space 0 because we need memory mapped at all time for exceptions (on booke exception run with MMU on in space 0). In order to use standard pages, we'd need to reorganise the kernel to have it run mostly in space 1 (for data at least) where we would map almost everything with standard pages, and keep pinned TLB to map linear space on space 0 for TLB miss exceptions. Then we'd do more or less like book3s/32 and switch back into space 1 into other exceptions prolog. That could be good to do it as we could maybe have more code in common with non booke 32 bits, but it is not a trivial job. So I suggest that for now, we just make KFENCE and DEBUG_PAGEALLOC unselectable for booke/32 (e500 and 44x). Christophe cheers On 2021/9/24 14:39, Liu Shixin wrote: On platform PPC_FSL_BOOK3E, all lowmem is managed by tlbcam. That means we didn't really map the kfence pool with page granularity. Therefore, if KFENCE is enabled, the system will hit the following panic: BUG: Kernel NULL pointer dereference on read at 0x Faulting instruction address: 0xc01de598 Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=4K SMP NR_CPUS=4 MPC8544 DS Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc3+ #298 NIP: c01de598 LR: c08ae9c4 CTR: REGS: c0b4bea0 TRAP: 0300 Not tainted (5.12.0-rc3+) MSR: 00021000 CR: 24000228 XER: 2000 DEAR: ESR: GPR00: c08ae9c4 c0b4bf60 c0ad64e0 ef72 00021000 0200 GPR08: c0ad5000 0004 008fbb30 GPR16: c000 GPR24: c08ca004 c08ca004 c0b6a0e0 c0b6 c0b58f00 c085 c08ca000 ef72 NIP [c01de598] kfence_protect+0x44/0x6c LR [c08ae9c4] kfence_init+0xfc/0x2a4 Call Trace: [c0b4bf60] [efffe160] 0xefffe160 (unreliable) [c0b4bf70] [c08ae9c4] kfence_init+0xfc/0x2a4 [c0b4bfb0] [c0894d3c] start_kernel+0x3bc/0x574 [c0b4bff0] [c470] set_ivor+0x14c/0x188 Instruction dump: 7c0802a6 8109d594 546a653a 90010014 54630026 3920 7d48502e 2c0a 41820010 554a0026 5469b53a 7d295214 <8149> 38831000 554a003c 9149 random: get_random_bytes called from print_oops_end_marker+0x40/0x78 with crng_init=0 ---[ end trace ]--- Signed-off-by: Liu Shixin --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d46db0bfb998..cffd57bcb5e4 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -185,7 +185,7 @@ config PPC select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KGDB - select HAVE_ARCH_KFENCE if PPC32 + select HAVE_ARCH_KFENCE if PPC32 && !PPC_FSL_BOOK3E select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_NVRAM_OPS
Re: [PATCH] tpm: ibmvtpm: Make use of dma_alloc_coherent()
On Tue, Oct 12, 2021 at 06:29:58PM +0300, Jarkko Sakkinen wrote: > On Mon, 2021-10-11 at 00:01 +0800, Cai Huoqing wrote: > > Replacing kmalloc/kfree/get_zeroed_page/free_page/dma_map_single/ > ~ > Replace > > > dma_unmap_single() with dma_alloc_coherent/dma_free_coherent() > > helps to reduce code size, and simplify the code, and coherent > > DMA will not clear the cache every time. > > > > Signed-off-by: Cai Huoqing > > If this does not do functionally anything useful, there's no > reason to apply this. At least in this case it looks like the ibmvtpm is not using the DMA API properly. There is no sync after each data transfer. Replacing this wrong usage with the coherent API is reasonable. Jason
[RFC PATCH] powerpc: dts: Remove MPC5xxx platforms
The mpc5xxx platforms have had dts warnings for some time which no one seems to care to fix, so let's just remove the dts files. According to Arnd: "Specifically, MPC5200B has a 15 year lifetime, which ends in 11 months from now. The original bplan/Genesi Efika 5K2 was quite popular at the time it came out, and there are probably still some of those hanging around, but they came with Open Firmware rather than relying on the dts files that ship with the kernel. Grant Likely was the original maintainer for MPC52xx until 2011, Anatolij Gustschin is still listed as maintainer since then but hasn't been active in it for a while either. Anatolij can probably best judge which of these boards are still in going to be used with future kernels, but I suspect once you start removing bits from 52xx, the newer but less common 512x platform can go away as well." Cc: Anatolij Gustschin Cc: Arnd Bergmann Cc: Stephen Rothwell Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Rob Herring --- Sending this out as a feeler to see if anyone cares. If anyone does, please fix the warnings. arch/powerpc/boot/Makefile | 5 - arch/powerpc/boot/dts/a3m071.dts | 138 --- arch/powerpc/boot/dts/a4m072.dts | 147 arch/powerpc/boot/dts/ac14xx.dts | 395 arch/powerpc/boot/dts/cm5200.dts | 85 - arch/powerpc/boot/dts/lite5200b.dts | 157 arch/powerpc/boot/dts/media5200.dts | 142 arch/powerpc/boot/dts/motionpro.dts | 132 --- arch/powerpc/boot/dts/mpc5121.dtsi | 526 --- arch/powerpc/boot/dts/mpc5121ads.dts | 174 - arch/powerpc/boot/dts/mpc5125twr.dts | 293 --- arch/powerpc/boot/dts/mpc5200b.dtsi | 288 --- arch/powerpc/boot/dts/mucmc52.dts| 222 --- arch/powerpc/boot/dts/o2d.dts| 43 --- arch/powerpc/boot/dts/o2d.dtsi | 118 -- arch/powerpc/boot/dts/o2d300.dts | 48 --- arch/powerpc/boot/dts/o2dnt2.dts | 44 --- arch/powerpc/boot/dts/o2i.dts| 29 -- arch/powerpc/boot/dts/o2mnt.dts | 29 -- arch/powerpc/boot/dts/o3dnt.dts | 44 --- arch/powerpc/boot/dts/pcm030.dts | 106 -- arch/powerpc/boot/dts/pcm032.dts | 183 -- arch/powerpc/boot/dts/pdm360ng.dts | 195 -- arch/powerpc/boot/dts/uc101.dts | 152 24 files changed, 3695 deletions(-) delete mode 100644 arch/powerpc/boot/dts/a3m071.dts delete mode 100644 arch/powerpc/boot/dts/a4m072.dts delete mode 100644 arch/powerpc/boot/dts/ac14xx.dts delete mode 100644 arch/powerpc/boot/dts/cm5200.dts delete mode 100644 arch/powerpc/boot/dts/lite5200b.dts delete mode 100644 arch/powerpc/boot/dts/media5200.dts delete mode 100644 arch/powerpc/boot/dts/motionpro.dts delete mode 100644 arch/powerpc/boot/dts/mpc5121.dtsi delete mode 100644 arch/powerpc/boot/dts/mpc5121ads.dts delete mode 100644 arch/powerpc/boot/dts/mpc5125twr.dts delete mode 100644 arch/powerpc/boot/dts/mpc5200b.dtsi delete mode 100644 arch/powerpc/boot/dts/mucmc52.dts delete mode 100644 arch/powerpc/boot/dts/o2d.dts delete mode 100644 arch/powerpc/boot/dts/o2d.dtsi delete mode 100644 arch/powerpc/boot/dts/o2d300.dts delete mode 100644 arch/powerpc/boot/dts/o2dnt2.dts delete mode 100644 arch/powerpc/boot/dts/o2i.dts delete mode 100644 arch/powerpc/boot/dts/o2mnt.dts delete mode 100644 arch/powerpc/boot/dts/o3dnt.dts delete mode 100644 arch/powerpc/boot/dts/pcm030.dts delete mode 100644 arch/powerpc/boot/dts/pcm032.dts delete mode 100644 arch/powerpc/boot/dts/pdm360ng.dts delete mode 100644 arch/powerpc/boot/dts/uc101.dts diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 6900d0ac2421..15ee0c2c6a3e 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -308,11 +308,6 @@ image-$(CONFIG_PPC_EP88XC) += dtbImage.ep88xc image-$(CONFIG_PPC_ADDER875) += cuImage.adder875-uboot \ dtbImage.adder875-redboot -# Board ports in arch/powerpc/platform/52xx/Kconfig -image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200 -image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200b -image-$(CONFIG_PPC_MEDIA5200) += cuImage.media5200 - # Board ports in arch/powerpc/platform/82xx/Kconfig image-$(CONFIG_MPC8272_ADS)+= cuImage.mpc8272ads image-$(CONFIG_PQ2FADS)+= cuImage.pq2fads diff --git a/arch/powerpc/boot/dts/a3m071.dts b/arch/powerpc/boot/dts/a3m071.dts deleted file mode 100644 index 034cfd8aa95b.. --- a/arch/powerpc/boot/dts/a3m071.dts +++ /dev/null @@ -1,138 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * a3m071 board Device Tree Source - * - * Copyright 2012 Stefan Roese - * - * Copyright (C) 2011 DENX Software Engineering GmbH - * Heiko Schocher - * - * Copyright (C) 2007 Semihalf - * Marian Balakowicz - */ -
Re: [PATCH] tpm: ibmvtpm: Make use of dma_alloc_coherent()
On Mon, 2021-10-11 at 00:01 +0800, Cai Huoqing wrote: > Replacing kmalloc/kfree/get_zeroed_page/free_page/dma_map_single/ ~ Replace > dma_unmap_single() with dma_alloc_coherent/dma_free_coherent() > helps to reduce code size, and simplify the code, and coherent > DMA will not clear the cache every time. > > Signed-off-by: Cai Huoqing If this does not do functionally anything useful, there's no reason to apply this. It is also missing information why the substitution is possible. Field tested code is better than clean code, i.e. we don not risk at having possible new regressions just for a bit nicer layout... /Jarkko
Re: linux-next: build warnings in Linus' tree
On Mon, Oct 11, 2021 at 10:42 PM Rob Herring wrote: > On Sun, Oct 10, 2021 at 4:27 PM Stephen Rothwell > wrote: > FYI, u-boot removed mpc5xxx support in 2017, so maybe there's > similarly not a need to keep them in the kernel? It does appear NXP > will still sell you the parts though the last BSP was 2009. Specifically, MPC5200B has a 15 year lifetime, which ends in 11 months from now. The original bplan/Genesi Efika 5K2 was quite popular at the time it came out, and there are probably still some of those hanging around, but they came with Open Firmware rather than relying on the dts files that ship with the kernel. Grant Likely was the original maintainer for MPC52xx until 2011, Anatolij Gustschin is still listed as maintainer since then but hasn't been active in it for a while either. Anatolij can probably best judge which of these boards are still in going to be used with future kernels, but I suspect once you start removing bits from 52xx, the newer but less common 512x platform can go away as well. Arnd
[RESEND PATCH v4 1/8] bpf powerpc: Remove unused SEEN_STACK
From: Ravi Bangoria SEEN_STACK is unused on PowerPC. Remove it. Also, have SEEN_TAILCALL use 0x4000. Signed-off-by: Ravi Bangoria Reviewed-by: Christophe Leroy --- * No changes in v4. arch/powerpc/net/bpf_jit.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 7e9b978b768e..89bd744c2bff 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -125,8 +125,7 @@ #define COND_LE(CR0_GT | COND_CMP_FALSE) #define SEEN_FUNC 0x2000 /* might call external helpers */ -#define SEEN_STACK 0x4000 /* uses BPF stack */ -#define SEEN_TAILCALL 0x8000 /* uses tail calls */ +#define SEEN_TAILCALL 0x4000 /* uses tail calls */ #define SEEN_VREG_MASK 0x1ff8 /* Volatile registers r3-r12 */ #define SEEN_NVREG_MASK0x0003 /* Non volatile registers r14-r31 */ -- 2.31.1
Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion
On Tue, 12 Oct 2021 13:40:08 +0800 王贇 wrote: > --- a/include/linux/trace_recursion.h > +++ b/include/linux/trace_recursion.h > @@ -214,7 +214,14 @@ static __always_inline void trace_clear_recursion(int > bit) > static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, >unsigned long > parent_ip) > { > - return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, > TRACE_FTRACE_MAX); > + int bit; > + > + preempt_disable_notrace(); The recursion test does not require preemption disabled, it uses the task struct, not per_cpu variables, so you should not disable it before the test. bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX); if (bit >= 0) preempt_disable_notrace(); And if the bit is zero, it means a recursion check was already done by another caller (ftrace handler does the check, followed by calling perf), and you really don't even need to disable preemption in that case. if (bit > 0) preempt_disable_notrace(); And on the unlock, have: static __always_inline void ftrace_test_recursion_unlock(int bit) { if (bit) preempt_enable_notrace(); trace_clear_recursion(bit); } But maybe that's over optimizing ;-) -- Steve > + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, > TRACE_FTRACE_MAX); > + if (bit < 0) > + preempt_enable_notrace(); > + > + return bit; > }
[RESEND PATCH v4 0/8] bpf powerpc: Add BPF_PROBE_MEM support in powerpc JIT compiler
Patch #1 & #2 are simple cleanup patches. Patch #3 refactors JIT compiler code with the aim to simplify adding BPF_PROBE_MEM support. Patch #4 introduces PPC_RAW_BRANCH() macro instead of open coding branch instruction. Patch #5 & #7 add BPF_PROBE_MEM support for PPC64 & PPC32 JIT compilers respectively. Patch #6 & #8 handle bad userspace pointers for PPC64 & PPC32 cases respectively. Resending v4 after rebasing the series on top of bpf fix patches posted by Naveen: - https://patchwork.ozlabs.org/project/linuxppc-dev/cover/cover.1633464148.git.naveen.n@linux.vnet.ibm.com/ ("[v2,00/10] powerpc/bpf: Various fixes") Also, added Reviewed-by tag from Christophe for patches #3, #5, #6, #7 & #8. Hari Bathini (4): bpf powerpc: refactor JIT compiler code powerpc/ppc-opcode: introduce PPC_RAW_BRANCH() macro bpf ppc32: Add BPF_PROBE_MEM support for JIT bpf ppc32: Access only if addr is kernel address Ravi Bangoria (4): bpf powerpc: Remove unused SEEN_STACK bpf powerpc: Remove extra_pass from bpf_jit_build_body() bpf ppc64: Add BPF_PROBE_MEM support for JIT bpf ppc64: Access only if addr is kernel address arch/powerpc/include/asm/ppc-opcode.h | 2 + arch/powerpc/net/bpf_jit.h| 17 - arch/powerpc/net/bpf_jit_comp.c | 68 +++-- arch/powerpc/net/bpf_jit_comp32.c | 101 ++ arch/powerpc/net/bpf_jit_comp64.c | 72 ++ 5 files changed, 219 insertions(+), 41 deletions(-) -- 2.31.1
Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion
> diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h > index a9f9c57..805f9c4 100644 > --- a/include/linux/trace_recursion.h > +++ b/include/linux/trace_recursion.h > @@ -214,7 +214,14 @@ static __always_inline void trace_clear_recursion(int > bit) > static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, >unsigned long > parent_ip) > { > - return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, > TRACE_FTRACE_MAX); > + int bit; > + > + preempt_disable_notrace(); > + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, > TRACE_FTRACE_MAX); > + if (bit < 0) > + preempt_enable_notrace(); > + > + return bit; > } > > /** > @@ -226,6 +233,7 @@ static __always_inline int > ftrace_test_recursion_trylock(unsigned long ip, > static __always_inline void ftrace_test_recursion_unlock(int bit) > { > trace_clear_recursion(bit); > + preempt_enable_notrace(); > } > > #endif /* CONFIG_TRACING */ > diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c > index e8029ae..6e66ccd 100644 > --- a/kernel/livepatch/patch.c > +++ b/kernel/livepatch/patch.c > @@ -52,11 +52,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, > bit = ftrace_test_recursion_trylock(ip, parent_ip); > if (WARN_ON_ONCE(bit < 0)) > return; > - /* > - * A variant of synchronize_rcu() is used to allow patching functions > - * where RCU is not watching, see klp_synchronize_transition(). > - */ > - preempt_disable_notrace(); > > func = list_first_or_null_rcu(>func_stack, struct klp_func, > stack_node); > @@ -120,7 +115,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, > klp_arch_set_pc(fregs, (unsigned long)func->new_func); > > unlock: > - preempt_enable_notrace(); > ftrace_test_recursion_unlock(bit); > } I don't like this change much. We have preempt_disable there not because of ftrace_test_recursion, but because of RCU. ftrace_test_recursion was added later. Yes, it would work with the change, but it would also hide things which should not be hidden in my opinion. Miroslav
[RESEND PATCH v4 8/8] bpf ppc32: Access only if addr is kernel address
With KUAP enabled, any kernel code which wants to access userspace needs to be surrounded by disable-enable KUAP. But that is not happening for BPF_PROBE_MEM load instruction. Though PPC32 does not support read protection, considering the fact that PTR_TO_BTF_ID (which uses BPF_PROBE_MEM mode) could either be a valid kernel pointer or NULL but should never be a pointer to userspace address, execute BPF_PROBE_MEM load only if addr is kernel address, otherwise set dst_reg=0 and move on. This will catch NULL, valid or invalid userspace pointers. Only bad kernel pointer will be handled by BPF exception table. [Alexei suggested for x86] Suggested-by: Alexei Starovoitov Signed-off-by: Hari Bathini Reviewed-by: Christophe Leroy --- Changes in v4: * Adjusted the emit code to avoid using temporary reg. arch/powerpc/net/bpf_jit_comp32.c | 34 +++ 1 file changed, 34 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 5dc45e393d1d..d3a52cd42f53 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -820,6 +820,40 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + /* +* As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid +* kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM +* load only if addr is kernel address (see is_kernel_addr()), otherwise +* set dst_reg=0 and move on. +*/ + if (BPF_MODE(code) == BPF_PROBE_MEM) { + PPC_LI32(_R0, TASK_SIZE - off); + EMIT(PPC_RAW_CMPLW(src_reg, _R0)); + PPC_BCC(COND_GT, (ctx->idx + 5) * 4); + EMIT(PPC_RAW_LI(dst_reg, 0)); + /* +* For BPF_DW case, "li reg_h,0" would be needed when +* !fp->aux->verifier_zext. Emit NOP otherwise. +* +* Note that "li reg_h,0" is emitted for BPF_B/H/W case, +* if necessary. So, jump there insted of emitting an +* additional "li reg_h,0" instruction. +*/ + if (size == BPF_DW && !fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + else + EMIT(PPC_RAW_NOP()); + /* +* Need to jump two instructions instead of one for BPF_DW case +* as there are two load instructions for dst_reg_h & dst_reg +* respectively. +*/ + if (size == BPF_DW) + PPC_JMP((ctx->idx + 3) * 4); + else + PPC_JMP((ctx->idx + 2) * 4); + } + switch (size) { case BPF_B: EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); -- 2.31.1
[RESEND PATCH v4 7/8] bpf ppc32: Add BPF_PROBE_MEM support for JIT
BPF load instruction with BPF_PROBE_MEM mode can cause a fault inside kernel. Append exception table for such instructions within BPF program. Unlike other archs which uses extable 'fixup' field to pass dest_reg and nip, BPF exception table on PowerPC follows the generic PowerPC exception table design, where it populates both fixup and extable sections within BPF program. fixup section contains 3 instructions, first 2 instructions clear dest_reg (lower & higher 32-bit registers) and last instruction jumps to next instruction in the BPF code. extable 'insn' field contains relative offset of the instruction and 'fixup' field contains relative offset of the fixup entry. Example layout of BPF program with extable present: +--+ | | | | 0x4020 -->| lwz r28,4(r4) | | | | | 0x40ac -->| lwz r3,0(r24) | | lwz r4,4(r24) | | | | | |--| 0x4278 -->| li r28,0| \ | li r27,0| | fixup entry | b 0x4024 | / 0x4284 -->| li r4,0 | | li r3,0 | | b 0x40b4 | |--| 0x4290 -->| insn=0xfd90 | \ extable entry | fixup=0xffe4 | / 0x4298 -->| insn=0xfe14 | | fixup=0xffe8 | +--+ (Addresses shown here are chosen random, not real) Signed-off-by: Hari Bathini Reviewed-by: Christophe Leroy --- Changes in v4: * Dropped explicit fallthrough statement for empty switch cases. arch/powerpc/net/bpf_jit.h| 4 arch/powerpc/net/bpf_jit_comp.c | 2 ++ arch/powerpc/net/bpf_jit_comp32.c | 30 ++ 3 files changed, 36 insertions(+) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 444c9debce91..b20a2a83a6e7 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -153,7 +153,11 @@ struct codegen_context { unsigned int exentry_idx; }; +#ifdef CONFIG_PPC32 +#define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */ +#else #define BPF_FIXUP_LEN 2 /* Two instructions => 8 bytes */ +#endif static inline void bpf_flush_icache(void *start, void *end) { diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index f02457c6b54f..1a0041997050 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -297,6 +297,8 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code (ctx->exentry_idx * BPF_FIXUP_LEN * 4); fixup[0] = PPC_RAW_LI(dst_reg, 0); + if (IS_ENABLED(CONFIG_PPC32)) + fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */ fixup[BPF_FIXUP_LEN - 1] = PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)[BPF_FIXUP_LEN - 1]); diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 54e7cef3e1f2..5dc45e393d1d 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -813,9 +813,13 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * BPF_LDX */ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: switch (size) { case BPF_B: EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); @@ -834,6 +838,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (size != BPF_DW && !fp->aux->verifier_zext) EMIT(PPC_RAW_LI(dst_reg_h, 0)); + + if (BPF_MODE(code) == BPF_PROBE_MEM) { + int insn_idx = ctx->idx - 1; + int jmp_off = 4; + + /* +* In case of BPF_DW, two lwz instructions are emitted, one +* for higher 32-bit and another for lower 32-bit. So, set +* ex->insn to the first of the two and jump over both +* instructions in fixup. +* +
[RESEND PATCH v4 4/8] powerpc/ppc-opcode: introduce PPC_RAW_BRANCH() macro
Define and use PPC_RAW_BRANCH() macro instead of open coding it. This macro is used while adding BPF_PROBE_MEM support. Signed-off-by: Hari Bathini Reviewed-by: Christophe Leroy --- * No changes in v4. arch/powerpc/include/asm/ppc-opcode.h | 2 ++ arch/powerpc/net/bpf_jit.h| 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index baea657bc868..f50213e2a3e0 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -566,6 +566,8 @@ #define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr)) #define PPC_RAW_EIEIO()(0x7c0006ac) +#define PPC_RAW_BRANCH(addr) (PPC_INST_BRANCH | ((addr) & 0x03fc)) + /* Deal with instructions that older assemblers aren't aware of */ #definePPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) #definePPC_CP_ABORTstringify_in_c(.long PPC_RAW_CP_ABORT) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 7145b651fc2a..6a945f6211f4 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -31,7 +31,7 @@ pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ return -ERANGE; \ } \ - EMIT(PPC_INST_BRANCH | (offset & 0x03fc));\ + EMIT(PPC_RAW_BRANCH(offset)); \ } while (0) /* blr; (unconditional 'branch' with link) to absolute address */ -- 2.31.1
[RESEND PATCH v4 6/8] bpf ppc64: Access only if addr is kernel address
From: Ravi Bangoria On PPC64 with KUAP enabled, any kernel code which wants to access userspace needs to be surrounded by disable-enable KUAP. But that is not happening for BPF_PROBE_MEM load instruction. So, when BPF program tries to access invalid userspace address, page-fault handler considers it as bad KUAP fault: Kernel attempted to read user page (d000) - exploit attempt? (uid: 0) Considering the fact that PTR_TO_BTF_ID (which uses BPF_PROBE_MEM mode) could either be a valid kernel pointer or NULL but should never be a pointer to userspace address, execute BPF_PROBE_MEM load only if addr is kernel address, otherwise set dst_reg=0 and move on. This will catch NULL, valid or invalid userspace pointers. Only bad kernel pointer will be handled by BPF exception table. [Alexei suggested for x86] Suggested-by: Alexei Starovoitov Signed-off-by: Ravi Bangoria Signed-off-by: Hari Bathini Reviewed-by: Christophe Leroy --- Changes in v4: * Used IS_ENABLED() instead of #ifdef. * Dropped the else case that is not applicable for PPC64. arch/powerpc/net/bpf_jit_comp64.c | 26 ++ 1 file changed, 26 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index ede8cb3e453f..472d4a551945 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -789,6 +789,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_DW: case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + /* +* As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid +* kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM +* load only if addr is kernel address (see is_kernel_addr()), otherwise +* set dst_reg=0 and move on. +*/ + if (BPF_MODE(code) == BPF_PROBE_MEM) { + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, off)); + if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) + PPC_LI64(b2p[TMP_REG_2], 0x8000ul); + else /* BOOK3S_64 */ + PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET); + EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2])); + PPC_BCC(COND_GT, (ctx->idx + 4) * 4); + EMIT(PPC_RAW_LI(dst_reg, 0)); + /* +* Check if 'off' is word aligned because PPC_BPF_LL() +* (BPF_DW case) generates two instructions if 'off' is not +* word-aligned and one instruction otherwise. +*/ + if (BPF_SIZE(code) == BPF_DW && (off & 3)) + PPC_JMP((ctx->idx + 3) * 4); + else + PPC_JMP((ctx->idx + 2) * 4); + } + switch (size) { case BPF_B: EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); -- 2.31.1
[RESEND PATCH v4 5/8] bpf ppc64: Add BPF_PROBE_MEM support for JIT
From: Ravi Bangoria BPF load instruction with BPF_PROBE_MEM mode can cause a fault inside kernel. Append exception table for such instructions within BPF program. Unlike other archs which uses extable 'fixup' field to pass dest_reg and nip, BPF exception table on PowerPC follows the generic PowerPC exception table design, where it populates both fixup and extable sections within BPF program. fixup section contains two instructions, first instruction clears dest_reg and 2nd jumps to next instruction in the BPF code. extable 'insn' field contains relative offset of the instruction and 'fixup' field contains relative offset of the fixup entry. Example layout of BPF program with extable present: +--+ | | | | 0x4020 -->| ld r27,4(r3) | | | | | 0x40ac -->| lwz r3,0(r4)| | | | | |--| 0x4280 -->| li r27,0| \ fixup entry | b 0x4024 | / 0x4288 -->| li r3,0 | | b 0x40b0 | |--| 0x4290 -->| insn=0xfd90 | \ extable entry | fixup=0xffec | / 0x4298 -->| insn=0xfe14 | | fixup=0xffec | +--+ (Addresses shown here are chosen random, not real) Signed-off-by: Ravi Bangoria Signed-off-by: Hari Bathini Reviewed-by: Christophe Leroy --- Changes in v4: * Dropped explicit fallthrough statement for empty switch cases. arch/powerpc/net/bpf_jit.h| 8 +++- arch/powerpc/net/bpf_jit_comp.c | 66 --- arch/powerpc/net/bpf_jit_comp32.c | 2 +- arch/powerpc/net/bpf_jit_comp64.c | 13 +- 4 files changed, 80 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 6a945f6211f4..444c9debce91 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -150,8 +150,11 @@ struct codegen_context { unsigned int idx; unsigned int stack_size; int b2p[ARRAY_SIZE(b2p)]; + unsigned int exentry_idx; }; +#define BPF_FIXUP_LEN 2 /* Two instructions => 8 bytes */ + static inline void bpf_flush_icache(void *start, void *end) { smp_wmb(); /* smp write barrier */ @@ -175,11 +178,14 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs); + u32 *addrs, int pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, + int insn_idx, int jmp_off, int dst_reg); + #endif #endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index f7972b2c21f6..f02457c6b54f 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -101,6 +101,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_prog *tmp_fp; bool bpf_blinded = false; bool extra_pass = false; + u32 extable_len; + u32 fixup_len; if (!fp->jit_requested) return org_fp; @@ -131,7 +133,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) image = jit_data->image; bpf_hdr = jit_data->header; proglen = jit_data->proglen; - alloclen = proglen + FUNCTION_DESCR_SIZE; extra_pass = true; goto skip_init_ctx; } @@ -149,7 +150,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) cgctx.stack_size = round_up(fp->aux->stack_depth, 16); /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, , addrs)) { + if (bpf_jit_build_body(fp, 0, , addrs, 0)) { /* We hit something illegal or unsupported. */ fp = org_fp; goto out_addrs; @@ -162,7 +163,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) */ if (cgctx.seen & SEEN_TAILCALL) { cgctx.idx = 0; - if (bpf_jit_build_body(fp, 0, , addrs)) { + if (bpf_jit_build_body(fp, 0, , addrs, 0)) { fp = org_fp; goto out_addrs; } @@ -177,8 +178,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) bpf_jit_build_prologue(0, ); bpf_jit_build_epilogue(0, ); + fixup_len =
[RESEND PATCH v4 3/8] bpf powerpc: refactor JIT compiler code
Refactor powerpc LDX JITing code to simplify adding BPF_PROBE_MEM support. Signed-off-by: Hari Bathini Reviewed-by: Christophe Leroy --- Changes in v4: * Dropped the default case in the switch statement for bpf size. * Dropped explicit fallthrough statement for empty switch cases. arch/powerpc/net/bpf_jit_comp32.c | 33 ++- arch/powerpc/net/bpf_jit_comp64.c | 31 + 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 903f945601c0..8b2ac1c27f1f 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -284,6 +284,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); u32 src_reg_h = src_reg - 1; u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); + u32 size = BPF_SIZE(code); s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -812,23 +813,27 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * BPF_LDX */ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); - if (!fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(dst_reg_h, 0)); - break; case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); - if (!fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(dst_reg_h, 0)); - break; case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); - if (!fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(dst_reg_h, 0)); - break; case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ - EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + break; + case BPF_DW: + EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + break; + } + + if (size != BPF_DW && !fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); break; /* diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index b25bf9b11b9d..ad852f15ca61 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -311,6 +311,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 code = insn[i].code; u32 dst_reg = b2p[insn[i].dst_reg]; u32 src_reg = b2p[insn[i].src_reg]; + u32 size = BPF_SIZE(code); s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -778,25 +779,29 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * */ /* dst = *(u8 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_B: - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); - if (insn_is_zext([i + 1])) - addrs[++i] = ctx->idx * 4; - break; /* dst = *(u16 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_H: - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); - if (insn_is_zext([i + 1])) - addrs[++i] = ctx->idx * 4; - break; /* dst = *(u32 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_W: - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); - if (insn_is_zext([i + 1])) - addrs[++i] = ctx->idx * 4; - break; /*
[RESEND PATCH v4 2/8] bpf powerpc: Remove extra_pass from bpf_jit_build_body()
From: Ravi Bangoria In case of extra_pass, usual JIT passes are always skipped. So, extra_pass is always false while calling bpf_jit_build_body() and can be removed. Signed-off-by: Ravi Bangoria --- * No changes in v4. arch/powerpc/net/bpf_jit.h| 2 +- arch/powerpc/net/bpf_jit_comp.c | 6 +++--- arch/powerpc/net/bpf_jit_comp32.c | 4 ++-- arch/powerpc/net/bpf_jit_comp64.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 89bd744c2bff..7145b651fc2a 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -175,7 +175,7 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, bool extra_pass); + u32 *addrs); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index fcbf7a917c56..f7972b2c21f6 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -149,7 +149,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) cgctx.stack_size = round_up(fp->aux->stack_depth, 16); /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, , addrs, false)) { + if (bpf_jit_build_body(fp, 0, , addrs)) { /* We hit something illegal or unsupported. */ fp = org_fp; goto out_addrs; @@ -162,7 +162,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) */ if (cgctx.seen & SEEN_TAILCALL) { cgctx.idx = 0; - if (bpf_jit_build_body(fp, 0, , addrs, false)) { + if (bpf_jit_build_body(fp, 0, , addrs)) { fp = org_fp; goto out_addrs; } @@ -210,7 +210,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; bpf_jit_build_prologue(code_base, ); - if (bpf_jit_build_body(fp, code_base, , addrs, extra_pass)) { + if (bpf_jit_build_body(fp, code_base, , addrs)) { bpf_jit_binary_free(bpf_hdr); fp = org_fp; goto out_addrs; diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 0da31d41d413..903f945601c0 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -268,7 +268,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, bool extra_pass) + u32 *addrs) { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; @@ -862,7 +862,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - ret = bpf_jit_get_func_addr(fp, [i], extra_pass, + ret = bpf_jit_get_func_addr(fp, [i], false, _addr, _addr_fixed); if (ret < 0) return ret; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 8b5157ccfeba..b25bf9b11b9d 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -297,7 +297,7 @@ asm ( /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, bool extra_pass) + u32 *addrs) { enum stf_barrier_type stf_barrier = stf_barrier_type_get(); const struct bpf_insn *insn = fp->insnsi; @@ -831,7 +831,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - ret = bpf_jit_get_func_addr(fp, [i], extra_pass, + ret = bpf_jit_get_func_addr(fp, [i], false, _addr, _addr_fixed); if (ret < 0) return ret; -- 2.31.1
Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion
On Tue, 12 Oct 2021 14:24:43 +0200 (CEST) Miroslav Benes wrote: > > +++ b/kernel/livepatch/patch.c > > @@ -52,11 +52,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, > > bit = ftrace_test_recursion_trylock(ip, parent_ip); > > if (WARN_ON_ONCE(bit < 0)) > > return; > > - /* > > -* A variant of synchronize_rcu() is used to allow patching functions > > -* where RCU is not watching, see klp_synchronize_transition(). > > -*/ > > - preempt_disable_notrace(); > > > > func = list_first_or_null_rcu(>func_stack, struct klp_func, > > stack_node); > > @@ -120,7 +115,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, > > klp_arch_set_pc(fregs, (unsigned long)func->new_func); > > > > unlock: > > - preempt_enable_notrace(); > > ftrace_test_recursion_unlock(bit); > > } > > I don't like this change much. We have preempt_disable there not because > of ftrace_test_recursion, but because of RCU. ftrace_test_recursion was > added later. Yes, it would work with the change, but it would also hide > things which should not be hidden in my opinion. Agreed, but I believe the change is fine, but requires a nice comment to explain what you said above. Thus, before the "ftrace_test_recursion_trylock()" we need: /* * The ftrace_test_recursion_trylock() will disable preemption, * which is required for the variant of synchronize_rcu() that is * used to allow patching functions where RCU is not watching. * See klp_synchronize_transition() for more details. */ -- Steve
Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion
On Tue, 12 Oct 2021 13:40:08 +0800 王贇 wrote: > @@ -52,11 +52,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, > bit = ftrace_test_recursion_trylock(ip, parent_ip); > if (WARN_ON_ONCE(bit < 0)) > return; > - /* > - * A variant of synchronize_rcu() is used to allow patching functions > - * where RCU is not watching, see klp_synchronize_transition(). > - */ I have to take a deeper look at this patch set, but do not remove this comment, as it explains the protection here, that is not obvious with the changes you made. -- Steve > - preempt_disable_notrace(); > > func = list_first_or_null_rcu(>func_stack, struct klp_func, > stack_node);
Re: [PATCH 2/2] ftrace: prevent preemption in perf_ftrace_function_call()
On Tue, Oct 12, 2021 at 01:40:31PM +0800, 王贇 wrote: > diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c > index 6aed10e..33c2f76 100644 > --- a/kernel/trace/trace_event_perf.c > +++ b/kernel/trace/trace_event_perf.c > @@ -441,12 +441,19 @@ void perf_trace_buf_update(void *record, u16 type) > if (!rcu_is_watching()) > return; > > + /* > + * Prevent CPU changing from now on. rcu must > + * be in watching if the task was migrated and > + * scheduled. > + */ > + preempt_disable_notrace(); > + > if ((unsigned long)ops->private != smp_processor_id()) > - return; > + goto out; > > bit = ftrace_test_recursion_trylock(ip, parent_ip); > if (bit < 0) > - return; > + goto out; > > event = container_of(ops, struct perf_event, ftrace_ops); > This seems rather daft, wouldn't it be easier to just put that check under the recursion thing?
[PATCH] powerpc: Set max_mapnr correctly
max_mapnr is used by virt_addr_valid() to check if a linear address is valid. It must only include lowmem PFNs, like other architectures. Problem detected on a system with 1G mem (Only 768M are mapped), with CONFIG_DEBUG_VIRTUAL and CONFIG_TEST_DEBUG_VIRTUAL, it didn't report virt_to_phys(VMALLOC_START), VMALLOC_START being 0xf100. Signed-off-by: Christophe Leroy --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c3c4e31462ec..889f36b55df9 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -256,7 +256,7 @@ void __init mem_init(void) #endif high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - set_max_mapnr(max_pfn); + set_max_mapnr(max_low_pfn); kasan_late_init(); -- 2.31.1
Re: [PATCH 5.4 00/52] 5.4.153-rc2 review
On Tue, Oct 12, 2021 at 01:04:54PM +0530, Naresh Kamboju wrote: > On Tue, 12 Oct 2021 at 12:16, Greg Kroah-Hartman > wrote: > > > > This is the start of the stable review cycle for the 5.4.153 release. > > There are 52 patches in this series, all will be posted as a response > > to this one. If anyone has any issues with these being applied, please > > let me know. > > > > Responses should be made by Thu, 14 Oct 2021 06:44:25 +. > > Anything received after that time might be too late. > > > > The whole patch series can be found in one patch at: > > > > https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.153-rc2.gz > > or in the git tree and branch at: > > > > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git > > linux-5.4.y > > and the diffstat can be found below. > > > > thanks, > > > > greg k-h > > stable rc 5.4.153-rc2 Powerpc build failed. > > In file included from arch/powerpc/net/bpf_jit64.h:11, > from arch/powerpc/net/bpf_jit_comp64.c:19: > arch/powerpc/net/bpf_jit_comp64.c: In function 'bpf_jit_build_body': > arch/powerpc/net/bpf_jit.h:32:9: error: expected expression before 'do' >32 | do { if (d) { (d)[idx] = instr; } idx++; } while (0) > | ^~ > arch/powerpc/net/bpf_jit.h:33:33: note: in expansion of macro 'PLANT_INSTR' >33 | #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) > | ^~~ > arch/powerpc/net/bpf_jit_comp64.c:415:41: note: in expansion of macro 'EMIT' > 415 | EMIT(PPC_LI(dst_reg, 0)); > | ^~~~ > arch/powerpc/net/bpf_jit.h:33:33: note: in expansion of macro 'PLANT_INSTR' >33 | #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) > | ^~~ > arch/powerpc/net/bpf_jit.h:41:33: note: in expansion of macro 'EMIT' >41 | #define PPC_ADDI(d, a, i) EMIT(PPC_INST_ADDI | > ___PPC_RT(d) | \ > | ^~~~ > arch/powerpc/net/bpf_jit.h:44:33: note: in expansion of macro 'PPC_ADDI' >44 | #define PPC_LI(r, i)PPC_ADDI(r, 0, i) > | ^~~~ > arch/powerpc/net/bpf_jit_comp64.c:415:46: note: in expansion of macro 'PPC_LI' > 415 | EMIT(PPC_LI(dst_reg, 0)); > | ^~ > make[3]: *** [scripts/Makefile.build:262: > arch/powerpc/net/bpf_jit_comp64.o] Error 1 > make[3]: Target '__build' not remade because of errors. > > Reported-by: Linux Kernel Functional Testing Ok, I'm just going to go delete this patch from the queue now... Thanks for the quick report. greg k-h
Re: [PATCH v1 01/10] powerpc: Move 'struct ppc64_opd_entry' back into asm/elf.h
On Tue, Oct 12, 2021 at 9:10 AM Michael Ellerman wrote: > Christophe Leroy writes: > > 'struct ppc64_opd_entry' doesn't belong to uapi/asm/elf.h > > But it was by mistake added outside of __KERNEL__ section, > > therefore commit c3617f72036c ("UAPI: (Scripted) Disintegrate > > arch/powerpc/include/asm") moved it to uapi/asm/elf.h > > ... it's been visible to userspace since the first commit moved it, ~13 > years ago in 2008, v2.6.27. > > > Move it back into asm/elf.h, this brings it back in line with > > IA64 and PARISC architectures. > > Removing it from the uapi header risks breaking userspace, I doubt > anything uses it, but who knows. > > Given how long it's been there I think it's a bit risky to remove it :/ I would not be too worried about it. While we should absolutely never break existing binaries, changing the visibility of internal structures in header files only breaks compiling applications that do rely on these entries, and they really should not be using this in the first place. Arnd
Re: [PATCH 5.4 00/52] 5.4.153-rc2 review
On Tue, 12 Oct 2021 at 12:16, Greg Kroah-Hartman wrote: > > This is the start of the stable review cycle for the 5.4.153 release. > There are 52 patches in this series, all will be posted as a response > to this one. If anyone has any issues with these being applied, please > let me know. > > Responses should be made by Thu, 14 Oct 2021 06:44:25 +. > Anything received after that time might be too late. > > The whole patch series can be found in one patch at: > > https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.153-rc2.gz > or in the git tree and branch at: > > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git > linux-5.4.y > and the diffstat can be found below. > > thanks, > > greg k-h stable rc 5.4.153-rc2 Powerpc build failed. In file included from arch/powerpc/net/bpf_jit64.h:11, from arch/powerpc/net/bpf_jit_comp64.c:19: arch/powerpc/net/bpf_jit_comp64.c: In function 'bpf_jit_build_body': arch/powerpc/net/bpf_jit.h:32:9: error: expected expression before 'do' 32 | do { if (d) { (d)[idx] = instr; } idx++; } while (0) | ^~ arch/powerpc/net/bpf_jit.h:33:33: note: in expansion of macro 'PLANT_INSTR' 33 | #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) | ^~~ arch/powerpc/net/bpf_jit_comp64.c:415:41: note: in expansion of macro 'EMIT' 415 | EMIT(PPC_LI(dst_reg, 0)); | ^~~~ arch/powerpc/net/bpf_jit.h:33:33: note: in expansion of macro 'PLANT_INSTR' 33 | #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) | ^~~ arch/powerpc/net/bpf_jit.h:41:33: note: in expansion of macro 'EMIT' 41 | #define PPC_ADDI(d, a, i) EMIT(PPC_INST_ADDI | ___PPC_RT(d) | \ | ^~~~ arch/powerpc/net/bpf_jit.h:44:33: note: in expansion of macro 'PPC_ADDI' 44 | #define PPC_LI(r, i)PPC_ADDI(r, 0, i) | ^~~~ arch/powerpc/net/bpf_jit_comp64.c:415:46: note: in expansion of macro 'PPC_LI' 415 | EMIT(PPC_LI(dst_reg, 0)); | ^~ make[3]: *** [scripts/Makefile.build:262: arch/powerpc/net/bpf_jit_comp64.o] Error 1 make[3]: Target '__build' not remade because of errors. Reported-by: Linux Kernel Functional Testing -- Linaro LKFT https://lkft.linaro.org
Re: [PATCH v1 01/10] powerpc: Move 'struct ppc64_opd_entry' back into asm/elf.h
Christophe Leroy writes: > 'struct ppc64_opd_entry' doesn't belong to uapi/asm/elf.h Agree, but ... > It was initially in module_64.c and commit 2d291e902791 ("Fix compile > failure with non modular builds") moved it into asm/elf.h > > But it was by mistake added outside of __KERNEL__ section, > therefore commit c3617f72036c ("UAPI: (Scripted) Disintegrate > arch/powerpc/include/asm") moved it to uapi/asm/elf.h ... it's been visible to userspace since the first commit moved it, ~13 years ago in 2008, v2.6.27. > Move it back into asm/elf.h, this brings it back in line with > IA64 and PARISC architectures. Removing it from the uapi header risks breaking userspace, I doubt anything uses it, but who knows. Given how long it's been there I think it's a bit risky to remove it :/ > diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h > index b8425e3cfd81..64b523848cd7 100644 > --- a/arch/powerpc/include/asm/elf.h > +++ b/arch/powerpc/include/asm/elf.h > @@ -176,4 +176,11 @@ do { > \ > /* Relocate the kernel image to @final_address */ > void relocate(unsigned long final_address); > > +/* There's actually a third entry here, but it's unused */ > +struct ppc64_opd_entry > +{ > + unsigned long funcaddr; > + unsigned long r2; > +}; > + > #endif /* _ASM_POWERPC_ELF_H */ > diff --git a/arch/powerpc/include/uapi/asm/elf.h > b/arch/powerpc/include/uapi/asm/elf.h > index 860c59291bfc..308857123a08 100644 > --- a/arch/powerpc/include/uapi/asm/elf.h > +++ b/arch/powerpc/include/uapi/asm/elf.h > @@ -289,12 +289,4 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG]; > /* Keep this the last entry. */ > #define R_PPC64_NUM 253 > > -/* There's actually a third entry here, but it's unused */ > -struct ppc64_opd_entry > -{ > - unsigned long funcaddr; > - unsigned long r2; > -}; Rather than removing it we can make it uapi only with: #ifndef __KERNEL__ /* There's actually a third entry here, but it's unused */ struct ppc64_opd_entry { unsigned long funcaddr; unsigned long r2; }; #endif And then we can do whatever we want with the kernel internal version. cheers
Re: [PATCH v1 04/10] asm-generic: Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR to define associated stubs
* Christophe Leroy : > > > Le 12/10/2021 à 08:02, Helge Deller a écrit : > > On 10/11/21 17:25, Christophe Leroy wrote: > > > Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR instead of > > > 'dereference_function_descriptor' > > > to know whether arch has function descriptors. > > > > > > Signed-off-by: Christophe Leroy > > > --- > > > arch/ia64/include/asm/sections.h| 4 ++-- > > > arch/parisc/include/asm/sections.h | 6 -- > > > arch/powerpc/include/asm/sections.h | 6 -- > > > include/asm-generic/sections.h | 3 ++- > > > 4 files changed, 12 insertions(+), 7 deletions(-) > > > > > > diff --git a/arch/ia64/include/asm/sections.h > > > b/arch/ia64/include/asm/sections.h > > > index 35f24e52149a..80f5868afb06 100644 > > > --- a/arch/ia64/include/asm/sections.h > > > +++ b/arch/ia64/include/asm/sections.h > > > @@ -7,6 +7,8 @@ > > >* David Mosberger-Tang > > >*/ > > > > > > +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 > > > + > > > #include > > > #include > > > #include > > > @@ -27,8 +29,6 @@ extern char > > > __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_b > > > extern char __start_unwind[], __end_unwind[]; > > > extern char __start_ivt_text[], __end_ivt_text[]; > > > > > > -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 > > > - > > > #undef dereference_function_descriptor > > > static inline void *dereference_function_descriptor(void *ptr) > > > { > > > diff --git a/arch/parisc/include/asm/sections.h > > > b/arch/parisc/include/asm/sections.h > > > index bb52aea0cb21..2e781ee19b66 100644 > > > --- a/arch/parisc/include/asm/sections.h > > > +++ b/arch/parisc/include/asm/sections.h > > > @@ -2,6 +2,10 @@ > > > #ifndef _PARISC_SECTIONS_H > > > #define _PARISC_SECTIONS_H > > > > > > +#ifdef CONFIG_64BIT > > > +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 > > > +#endif > > > + > > > /* nothing to see, move along */ > > > #include > > > > > > @@ -9,8 +13,6 @@ extern char __alt_instructions[], > > > __alt_instructions_end[]; > > > > > > #ifdef CONFIG_64BIT > > > > > > -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 > > > - > > > #undef dereference_function_descriptor > > > void *dereference_function_descriptor(void *); > > > > > > diff --git a/arch/powerpc/include/asm/sections.h > > > b/arch/powerpc/include/asm/sections.h > > > index 32e7035863ac..b7f1ba04e756 100644 > > > --- a/arch/powerpc/include/asm/sections.h > > > +++ b/arch/powerpc/include/asm/sections.h > > > @@ -8,6 +8,10 @@ > > > > > > #define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed > > > > > > +#ifdef PPC64_ELF_ABI_v1 > > > +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 > > > +#endif > > > + > > > #include > > > > > > extern bool init_mem_is_free; > > > @@ -69,8 +73,6 @@ static inline int overlaps_kernel_text(unsigned long > > > start, unsigned long end) > > > > > > #ifdef PPC64_ELF_ABI_v1 > > > > > > -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 > > > - > > > #undef dereference_function_descriptor > > > static inline void *dereference_function_descriptor(void *ptr) > > > { > > > diff --git a/include/asm-generic/sections.h > > > b/include/asm-generic/sections.h > > > index d16302d3eb59..1db5cfd69817 100644 > > > --- a/include/asm-generic/sections.h > > > +++ b/include/asm-generic/sections.h > > > @@ -59,7 +59,8 @@ extern char __noinstr_text_start[], > > > __noinstr_text_end[]; > > > extern __visible const void __nosave_begin, __nosave_end; > > > > > > /* Function descriptor handling (if any). Override in asm/sections.h */ > > > -#ifndef dereference_function_descriptor > > > +#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR > > > +#else > > > > why not > > #ifndef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR > > instead of #if/#else ? > > To avoid changing it again in patch 6, or getting an ugly #ifndef/#else at > the end. Ok. Building on parisc fails at multiple files like this: CC mm/filemap.o In file included from /home/cvs/parisc/git-kernel/linus-linux-2.6/include/linux/interrupt.h:21, from /home/cvs/parisc/git-kernel/linus-linux-2.6/include/linux/trace_recursion.h:5, from /home/cvs/parisc/git-kernel/linus-linux-2.6/include/linux/ftrace.h:10, from /home/cvs/parisc/git-kernel/linus-linux-2.6/include/linux/perf_event.h:49, from /home/cvs/parisc/git-kernel/linus-linux-2.6/include/linux/trace_events.h:10, from /home/cvs/parisc/git-kernel/linus-linux-2.6/include/trace/syscall.h:7, from /home/cvs/parisc/git-kernel/linus-linux-2.6/include/linux/syscalls.h:87, from /home/cvs/parisc/git-kernel/linus-linux-2.6/init/initramfs.c:11: /home/cvs/parisc/git-kernel/linus-linux-2.6/arch/parisc/include/asm/sections.h:7:9: error: unknown type name ‘Elf64_Fdesc’ 7 | typedef Elf64_Fdesc funct_descr_t; | ^~~ So, you still need e.g. this patch: diff --git
Re: [PATCH] powerpc: don't select KFENCE on platform PPC_FSL_BOOK3E
Liu Shixin writes: > kindly ping. I was under the impression you were trying to debug why it wasn't working with Christophe. cheers > On 2021/9/24 14:39, Liu Shixin wrote: >> On platform PPC_FSL_BOOK3E, all lowmem is managed by tlbcam. That means >> we didn't really map the kfence pool with page granularity. Therefore, >> if KFENCE is enabled, the system will hit the following panic: >> >> BUG: Kernel NULL pointer dereference on read at 0x >> Faulting instruction address: 0xc01de598 >> Oops: Kernel access of bad area, sig: 11 [#1] >> BE PAGE_SIZE=4K SMP NR_CPUS=4 MPC8544 DS >> Dumping ftrace buffer: >>(ftrace buffer empty) >> Modules linked in: >> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc3+ #298 >> NIP: c01de598 LR: c08ae9c4 CTR: >> REGS: c0b4bea0 TRAP: 0300 Not tainted (5.12.0-rc3+) >> MSR: 00021000 CR: 24000228 XER: 2000 >> DEAR: ESR: >> GPR00: c08ae9c4 c0b4bf60 c0ad64e0 ef72 00021000 >> 0200 >> GPR08: c0ad5000 0004 008fbb30 >> >> GPR16: c000 >> >> GPR24: c08ca004 c08ca004 c0b6a0e0 c0b6 c0b58f00 c085 c08ca000 >> ef72 >> NIP [c01de598] kfence_protect+0x44/0x6c >> LR [c08ae9c4] kfence_init+0xfc/0x2a4 >> Call Trace: >> [c0b4bf60] [efffe160] 0xefffe160 (unreliable) >> [c0b4bf70] [c08ae9c4] kfence_init+0xfc/0x2a4 >> [c0b4bfb0] [c0894d3c] start_kernel+0x3bc/0x574 >> [c0b4bff0] [c470] set_ivor+0x14c/0x188 >> Instruction dump: >> 7c0802a6 8109d594 546a653a 90010014 54630026 3920 7d48502e 2c0a >> 41820010 554a0026 5469b53a 7d295214 <8149> 38831000 554a003c 9149 >> random: get_random_bytes called from print_oops_end_marker+0x40/0x78 >> with crng_init=0 >> ---[ end trace ]--- >> >> Signed-off-by: Liu Shixin >> --- >> arch/powerpc/Kconfig | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig >> index d46db0bfb998..cffd57bcb5e4 100644 >> --- a/arch/powerpc/Kconfig >> +++ b/arch/powerpc/Kconfig >> @@ -185,7 +185,7 @@ config PPC >> select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 >> select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 >> select HAVE_ARCH_KGDB >> -select HAVE_ARCH_KFENCE if PPC32 >> +select HAVE_ARCH_KFENCE if PPC32 && !PPC_FSL_BOOK3E >> select HAVE_ARCH_MMAP_RND_BITS >> select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT >> select HAVE_ARCH_NVRAM_OPS
Re: [PATCH] scsi: ibmvscsi: Use dma_alloc_coherent() instead of get_zeroed_page/dma_map_single()
Hi Cai, Thank you for the patch! Yet something to improve: [auto build test ERROR on powerpc/next] [also build test ERROR on v5.15-rc5 next-20211011] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Cai-Huoqing/scsi-ibmvscsi-Use-dma_alloc_coherent-instead-of-get_zeroed_page-dma_map_single/20211011-000515 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next config: powerpc-allyesconfig (attached as .config) compiler: powerpc64-linux-gcc (GCC) 11.2.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/a1533699f9b84980097fc59d047b5292c1abab1b git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Cai-Huoqing/scsi-ibmvscsi-Use-dma_alloc_coherent-instead-of-get_zeroed_page-dma_map_single/20211011-000515 git checkout a1533699f9b84980097fc59d047b5292c1abab1b # save the attached .config to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=powerpc SHELL=/bin/bash If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All errors (new ones prefixed by >>): drivers/scsi/ibmvscsi/ibmvscsi.c: In function 'ibmvscsi_init_crq_queue': >> drivers/scsi/ibmvscsi/ibmvscsi.c:334:21: error: 'struct crq_queue' has no >> member named 'msg'; did you mean 'msgs'? 334 | if (!queue->msg) | ^~~ | msgs drivers/scsi/ibmvscsi/ibmvscsi.c:388:60: error: 'struct crq_queue' has no member named 'msg'; did you mean 'msgs'? 388 | dma_free_coherent(hostdata->dev, PAGE_SIZE, queue->msg, queue->msg_token); |^~~ |msgs vim +334 drivers/scsi/ibmvscsi/ibmvscsi.c 312 313 /** 314 * ibmvscsi_init_crq_queue() - Initializes and registers CRQ with hypervisor 315 * @queue: crq_queue to initialize and register 316 * @hostdata: ibmvscsi_host_data of host 317 * @max_requests: maximum requests (unused) 318 * 319 * Allocates a page for messages, maps it for dma, and registers 320 * the crq with the hypervisor. 321 * Returns zero on success. 322 */ 323 static int ibmvscsi_init_crq_queue(struct crq_queue *queue, 324 struct ibmvscsi_host_data *hostdata, 325 int max_requests) 326 { 327 int rc; 328 int retrc; 329 struct vio_dev *vdev = to_vio_dev(hostdata->dev); 330 331 queue->size = PAGE_SIZE / sizeof(*queue->msgs); 332 queue->msgs = dma_alloc_coherent(hostdata->dev, PAGE_SIZE, 333 >msg_token, GFP_KERNEL); > 334 if (!queue->msg) 335 goto malloc_failed; 336 337 gather_partition_info(); 338 set_adapter_info(hostdata); 339 340 retrc = rc = plpar_hcall_norets(H_REG_CRQ, 341 vdev->unit_address, 342 queue->msg_token, PAGE_SIZE); 343 if (rc == H_RESOURCE) 344 /* maybe kexecing and resource is busy. try a reset */ 345 rc = ibmvscsi_reset_crq_queue(queue, 346hostdata); 347 348 if (rc == H_CLOSED) { 349 /* Adapter is good, but other end is not ready */ 350 dev_warn(hostdata->dev, "Partner adapter not ready\n"); 351 retrc = 0; 352 } else if (rc != 0) { 353 dev_warn(hostdata->dev, "Error %d opening adapter\n", rc); 354 goto reg_crq_failed; 355 } 356 357 queue->cur = 0; 358 spin_lock_init(>lock); 359 360 tasklet_init(>srp_task, (void *)ibmvscsi_task, 361 (unsigned long)hostdata); 362 363 if (request_irq(vdev->irq, 364 ibmvscsi_handle_event, 365 0, "ibmvscsi", (void *)hostdata) != 0) { 366 dev_err(hostdata->dev, "couldn't register irq 0x%x\n", 367 vdev->irq); 368 goto req_irq_failed; 369 } 370 371 rc = vio_enable_interrupts(vdev); 372 if (rc != 0) { 373
Re: [PATCH v1 04/10] asm-generic: Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR to define associated stubs
Le 12/10/2021 à 08:02, Helge Deller a écrit : On 10/11/21 17:25, Christophe Leroy wrote: Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR instead of 'dereference_function_descriptor' to know whether arch has function descriptors. Signed-off-by: Christophe Leroy --- arch/ia64/include/asm/sections.h| 4 ++-- arch/parisc/include/asm/sections.h | 6 -- arch/powerpc/include/asm/sections.h | 6 -- include/asm-generic/sections.h | 3 ++- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index 35f24e52149a..80f5868afb06 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -7,6 +7,8 @@ *David Mosberger-Tang */ +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 + #include #include #include @@ -27,8 +29,6 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_b extern char __start_unwind[], __end_unwind[]; extern char __start_ivt_text[], __end_ivt_text[]; -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - #undef dereference_function_descriptor static inline void *dereference_function_descriptor(void *ptr) { diff --git a/arch/parisc/include/asm/sections.h b/arch/parisc/include/asm/sections.h index bb52aea0cb21..2e781ee19b66 100644 --- a/arch/parisc/include/asm/sections.h +++ b/arch/parisc/include/asm/sections.h @@ -2,6 +2,10 @@ #ifndef _PARISC_SECTIONS_H #define _PARISC_SECTIONS_H +#ifdef CONFIG_64BIT +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 +#endif + /* nothing to see, move along */ #include @@ -9,8 +13,6 @@ extern char __alt_instructions[], __alt_instructions_end[]; #ifdef CONFIG_64BIT -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - #undef dereference_function_descriptor void *dereference_function_descriptor(void *); diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 32e7035863ac..b7f1ba04e756 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -8,6 +8,10 @@ #define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed +#ifdef PPC64_ELF_ABI_v1 +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 +#endif + #include extern bool init_mem_is_free; @@ -69,8 +73,6 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end) #ifdef PPC64_ELF_ABI_v1 -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - #undef dereference_function_descriptor static inline void *dereference_function_descriptor(void *ptr) { diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index d16302d3eb59..1db5cfd69817 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -59,7 +59,8 @@ extern char __noinstr_text_start[], __noinstr_text_end[]; extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ -#ifndef dereference_function_descriptor +#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR +#else why not #ifndef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR instead of #if/#else ? To avoid changing it again in patch 6, or getting an ugly #ifndef/#else at the end. #define dereference_function_descriptor(p) ((void *)(p)) #define dereference_kernel_function_descriptor(p) ((void *)(p)) #endif
Re: [PATCH v1 04/10] asm-generic: Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR to define associated stubs
On 10/11/21 17:25, Christophe Leroy wrote: > Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR instead of > 'dereference_function_descriptor' > to know whether arch has function descriptors. > > Signed-off-by: Christophe Leroy > --- > arch/ia64/include/asm/sections.h| 4 ++-- > arch/parisc/include/asm/sections.h | 6 -- > arch/powerpc/include/asm/sections.h | 6 -- > include/asm-generic/sections.h | 3 ++- > 4 files changed, 12 insertions(+), 7 deletions(-) > > diff --git a/arch/ia64/include/asm/sections.h > b/arch/ia64/include/asm/sections.h > index 35f24e52149a..80f5868afb06 100644 > --- a/arch/ia64/include/asm/sections.h > +++ b/arch/ia64/include/asm/sections.h > @@ -7,6 +7,8 @@ > * David Mosberger-Tang > */ > > +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 > + > #include > #include > #include > @@ -27,8 +29,6 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], > __end_gate_brl_fsys_b > extern char __start_unwind[], __end_unwind[]; > extern char __start_ivt_text[], __end_ivt_text[]; > > -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 > - > #undef dereference_function_descriptor > static inline void *dereference_function_descriptor(void *ptr) > { > diff --git a/arch/parisc/include/asm/sections.h > b/arch/parisc/include/asm/sections.h > index bb52aea0cb21..2e781ee19b66 100644 > --- a/arch/parisc/include/asm/sections.h > +++ b/arch/parisc/include/asm/sections.h > @@ -2,6 +2,10 @@ > #ifndef _PARISC_SECTIONS_H > #define _PARISC_SECTIONS_H > > +#ifdef CONFIG_64BIT > +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 > +#endif > + > /* nothing to see, move along */ > #include > > @@ -9,8 +13,6 @@ extern char __alt_instructions[], __alt_instructions_end[]; > > #ifdef CONFIG_64BIT > > -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 > - > #undef dereference_function_descriptor > void *dereference_function_descriptor(void *); > > diff --git a/arch/powerpc/include/asm/sections.h > b/arch/powerpc/include/asm/sections.h > index 32e7035863ac..b7f1ba04e756 100644 > --- a/arch/powerpc/include/asm/sections.h > +++ b/arch/powerpc/include/asm/sections.h > @@ -8,6 +8,10 @@ > > #define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed > > +#ifdef PPC64_ELF_ABI_v1 > +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 > +#endif > + > #include > > extern bool init_mem_is_free; > @@ -69,8 +73,6 @@ static inline int overlaps_kernel_text(unsigned long start, > unsigned long end) > > #ifdef PPC64_ELF_ABI_v1 > > -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 > - > #undef dereference_function_descriptor > static inline void *dereference_function_descriptor(void *ptr) > { > diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h > index d16302d3eb59..1db5cfd69817 100644 > --- a/include/asm-generic/sections.h > +++ b/include/asm-generic/sections.h > @@ -59,7 +59,8 @@ extern char __noinstr_text_start[], __noinstr_text_end[]; > extern __visible const void __nosave_begin, __nosave_end; > > /* Function descriptor handling (if any). Override in asm/sections.h */ > -#ifndef dereference_function_descriptor > +#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR > +#else why not #ifndef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR instead of #if/#else ? > #define dereference_function_descriptor(p) ((void *)(p)) > #define dereference_kernel_function_descriptor(p) ((void *)(p)) > #endif >
[PATCH 0/2] ftrace: make sure preemption disabled on recursion testing
The testing show that perf_ftrace_function_call() are using smp_processor_id() with preemption enabled, all the checking on CPU could be wrong after preemption, PATCH 1/2 will fix that. Besides, as Peter point out, the testing of recursion within the section between ftrace_test_recursion_trylock()/_unlock() pair also need the preemption disabled as the documentation explained, PATCH 2/2 will make sure on that. Michael Wang (2): ftrace: disable preemption on the testing of recursion ftrace: prevent preemption in perf_ftrace_function_call() arch/csky/kernel/probes/ftrace.c | 2 -- arch/parisc/kernel/ftrace.c | 2 -- arch/powerpc/kernel/kprobes-ftrace.c | 2 -- arch/riscv/kernel/probes/ftrace.c| 2 -- arch/x86/kernel/kprobes/ftrace.c | 2 -- include/linux/trace_recursion.h | 10 +- kernel/livepatch/patch.c | 6 -- kernel/trace/trace_event_perf.c | 17 + kernel/trace/trace_functions.c | 5 - 9 files changed, 22 insertions(+), 26 deletions(-) -- 1.8.3.1
Re: [PATCH 0/2] ftrace: make sure preemption disabled on recursion testing
On 2021/10/12 下午1:39, 王贇 wrote: > The testing show that perf_ftrace_function_call() are using > smp_processor_id() with preemption enabled, all the checking > on CPU could be wrong after preemption, PATCH 1/2 will fix > that. 2/2 actually. > > Besides, as Peter point out, the testing of recursion within > the section between ftrace_test_recursion_trylock()/_unlock() > pair also need the preemption disabled as the documentation > explained, PATCH 2/2 will make sure on that. 1/2 actually... Regards, Michael Wang > > Michael Wang (2): > ftrace: disable preemption on the testing of recursion > ftrace: prevent preemption in perf_ftrace_function_call() > > arch/csky/kernel/probes/ftrace.c | 2 -- > arch/parisc/kernel/ftrace.c | 2 -- > arch/powerpc/kernel/kprobes-ftrace.c | 2 -- > arch/riscv/kernel/probes/ftrace.c| 2 -- > arch/x86/kernel/kprobes/ftrace.c | 2 -- > include/linux/trace_recursion.h | 10 +- > kernel/livepatch/patch.c | 6 -- > kernel/trace/trace_event_perf.c | 17 + > kernel/trace/trace_functions.c | 5 - > 9 files changed, 22 insertions(+), 26 deletions(-) >
[PATCH 2/2] ftrace: prevent preemption in perf_ftrace_function_call()
With CONFIG_DEBUG_PREEMPT we observed reports like: BUG: using smp_processor_id() in preemptible caller is perf_ftrace_function_call+0x6f/0x2e0 CPU: 1 PID: 680 Comm: a.out Not tainted Call Trace: dump_stack_lvl+0x8d/0xcf check_preemption_disabled+0x104/0x110 ? optimize_nops.isra.7+0x230/0x230 ? text_poke_bp_batch+0x9f/0x310 perf_ftrace_function_call+0x6f/0x2e0 ... __text_poke+0x5/0x620 text_poke_bp_batch+0x9f/0x310 This telling us the CPU could be changed after task is preempted, and the checking on CPU before preemption will be invalid. This patch just turn off preemption in perf_ftrace_function_call() to prevent CPU changing. CC: Steven Rostedt Reported-by: Abaci Signed-off-by: Michael Wang --- kernel/trace/trace_event_perf.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 6aed10e..33c2f76 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -441,12 +441,19 @@ void perf_trace_buf_update(void *record, u16 type) if (!rcu_is_watching()) return; + /* +* Prevent CPU changing from now on. rcu must +* be in watching if the task was migrated and +* scheduled. +*/ + preempt_disable_notrace(); + if ((unsigned long)ops->private != smp_processor_id()) - return; + goto out; bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) - return; + goto out; event = container_of(ops, struct perf_event, ftrace_ops); @@ -468,16 +475,18 @@ void perf_trace_buf_update(void *record, u16 type) entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, ); if (!entry) - goto out; + goto unlock; entry->ip = ip; entry->parent_ip = parent_ip; perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, 1, , , NULL); -out: +unlock: ftrace_test_recursion_unlock(bit); #undef ENTRY_SIZE +out: + preempt_enable_notrace(); } static int perf_ftrace_function_register(struct perf_event *event) -- 1.8.3.1
[PATCH 1/2] ftrace: disable preemption on the testing of recursion
As the documentation explained, ftrace_test_recursion_trylock() and ftrace_test_recursion_unlock() were supposed to disable and enable preemption properly, however currently this work is done outside of the function, which could be missing by mistake. This path will make sure the preemption will be disabled when trylock() succeed, and the unlock() will enable preemption when the the testing of recursion are finished. Reported-by: Abaci Suggested-by: Peter Zijlstra Signed-off-by: Michael Wang --- arch/csky/kernel/probes/ftrace.c | 2 -- arch/parisc/kernel/ftrace.c | 2 -- arch/powerpc/kernel/kprobes-ftrace.c | 2 -- arch/riscv/kernel/probes/ftrace.c| 2 -- arch/x86/kernel/kprobes/ftrace.c | 2 -- include/linux/trace_recursion.h | 10 +- kernel/livepatch/patch.c | 6 -- kernel/trace/trace_functions.c | 5 - 8 files changed, 9 insertions(+), 22 deletions(-) diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index ef2bb9b..dff7921 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -24,7 +24,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (!p) { p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE)); @@ -64,7 +63,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 0a1e75a..3543496 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -216,7 +216,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -245,7 +244,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, } __this_cpu_write(current_kprobe, NULL); out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 7154d58..072ebe7 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -26,7 +26,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)nip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -61,7 +60,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c index aab85a8..7142ec4 100644 --- a/arch/riscv/kernel/probes/ftrace.c +++ b/arch/riscv/kernel/probes/ftrace.c @@ -15,7 +15,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -52,7 +51,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 596de2f..dd2ec14 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -25,7 +25,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -59,7 +58,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index a9f9c57..805f9c4 100644 --- a/include/linux/trace_recursion.h +++
Re: [PATCH] powerpc/s64: Clarify that radix lacks DEBUG_PAGEALLOC
Le 12/10/2021 à 03:13, Joel Stanley a écrit : The page_alloc.c code will call into __kernel_map_pages when DEBUG_PAGEALLOC is configured and enabled. As the implementation assumes hash, this should crash spectacularly if not for a bit of luck in __kernel_map_pages. In this function linear_map_hash_count is always zero, the for loop exits without doing any damage. There are no other platforms that determine if they support debug_pagealloc at runtime. Instead of adding code to mm/page_alloc.c to do that, this change turns the map/unmap into a noop when in radix mode and prints a warning once. Signed-off-by: Joel Stanley --- I noticed this when I was looking at adding kfence support a while back. I've put that work aside and jpn has since gotten further than me, but I think this is a fix worth considering. arch/powerpc/include/asm/book3s/64/hash.h | 2 ++ arch/powerpc/mm/book3s64/hash_utils.c | 2 +- arch/powerpc/mm/book3s64/pgtable.c| 12 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index d959b0195ad9..674fe0e890dc 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -255,6 +255,8 @@ int hash__create_section_mapping(unsigned long start, unsigned long end, int nid, pgprot_t prot); int hash__remove_section_mapping(unsigned long start, unsigned long end); +void hash__kernel_map_pages(struct page *page, int numpages, int enable); + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */ diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index c145776d3ae5..cfd45245d009 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1988,7 +1988,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) mmu_kernel_ssize, 0); } -void __kernel_map_pages(struct page *page, int numpages, int enable) +void hash__kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long flags, vaddr, lmi; int i; diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 9e16c7b1a6c5..0aefc272cd03 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -526,3 +526,15 @@ static int __init pgtable_debugfs_setup(void) return 0; } arch_initcall(pgtable_debugfs_setup); + +#ifdef CONFIG_DEBUG_PAGEALLOC +void __kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (radix_enabled()) { + pr_warn_once("DEBUG_PAGEALLOC not supported in radix mode\n"); + return; + } + + hash__kernel_map_pages(page, numpages, enable); +} I think it would be better to do similar to most other functions (like map_kernel_page() for instance): In arch/powerpc/include/asm/book3s/64/pgtable.h do: static inline void __kernel_map_pages(struct page *page, int numpages, int enable) { if (radix_enabled()) radix__kernel_map_pages(...); else hash__kernel_map_pages(...); } Then in arch/powerpc/include/asm/book3s/64/radix.h do (or in /arch/powerpc/mm/book3s64/radix_pgtable.c in you prefer ?): static inline void radix__kernel_map_pages(struct page *page, int numpages, int enable) { pr_warn_once("DEBUG_PAGEALLOC not supported in radix mode\n"); } +#endif