[RESEND PATCH v2 2/2] ftrace: do CPU checking after preemption disabled

2021-10-12 Thread 王贇
With CONFIG_DEBUG_PREEMPT we observed reports like:

  BUG: using smp_processor_id() in preemptible
  caller is perf_ftrace_function_call+0x6f/0x2e0
  CPU: 1 PID: 680 Comm: a.out Not tainted
  Call Trace:
   
   dump_stack_lvl+0x8d/0xcf
   check_preemption_disabled+0x104/0x110
   ? optimize_nops.isra.7+0x230/0x230
   ? text_poke_bp_batch+0x9f/0x310
   perf_ftrace_function_call+0x6f/0x2e0
   ...
   __text_poke+0x5/0x620
   text_poke_bp_batch+0x9f/0x310

This telling us the CPU could be changed after task is preempted, and
the checking on CPU before preemption will be invalid.

Since now ftrace_test_recursion_trylock() will help to disable the
preemption, this patch just do the checking after trylock() to address
the issue.

CC: Steven Rostedt 
Reported-by: Abaci 
Signed-off-by: Michael Wang 
---
 kernel/trace/trace_event_perf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 6aed10e..fba8cb7 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -441,13 +441,13 @@ void perf_trace_buf_update(void *record, u16 type)
if (!rcu_is_watching())
return;

-   if ((unsigned long)ops->private != smp_processor_id())
-   return;
-
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;

+   if ((unsigned long)ops->private != smp_processor_id())
+   goto out;
+
event = container_of(ops, struct perf_event, ftrace_ops);

/*
-- 
1.8.3.1



[RESEND PATCH v2 1/2] ftrace: disable preemption between ftrace_test_recursion_trylock/unlock()

2021-10-12 Thread 王贇
As the documentation explained, ftrace_test_recursion_trylock()
and ftrace_test_recursion_unlock() were supposed to disable and
enable preemption properly, however currently this work is done
outside of the function, which could be missing by mistake.

This path will make sure the preemption was disabled when trylock()
succeed, and the unlock() will enable the preemption if previously
enabled.

CC: Steven Rostedt 
CC: Miroslav Benes 
Reported-by: Abaci 
Suggested-by: Peter Zijlstra 
Signed-off-by: Michael Wang 
---
 arch/csky/kernel/probes/ftrace.c |  2 --
 arch/parisc/kernel/ftrace.c  |  2 --
 arch/powerpc/kernel/kprobes-ftrace.c |  2 --
 arch/riscv/kernel/probes/ftrace.c|  2 --
 arch/x86/kernel/kprobes/ftrace.c |  2 --
 include/linux/trace_recursion.h  | 22 +-
 kernel/livepatch/patch.c |  6 --
 kernel/trace/trace_functions.c   |  5 -
 8 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c
index b388228..834cffc 100644
--- a/arch/csky/kernel/probes/ftrace.c
+++ b/arch/csky/kernel/probes/ftrace.c
@@ -17,7 +17,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
return;

regs = ftrace_get_regs(fregs);
-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (!p) {
p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE));
@@ -57,7 +56,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
__this_cpu_write(current_kprobe, NULL);
}
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 0a1e75a..3543496 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -216,7 +216,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
return;

regs = ftrace_get_regs(fregs);
-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
goto out;
@@ -245,7 +244,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
}
__this_cpu_write(current_kprobe, NULL);
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c 
b/arch/powerpc/kernel/kprobes-ftrace.c
index 7154d58..072ebe7 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -26,7 +26,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long 
parent_nip,
return;

regs = ftrace_get_regs(fregs);
-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)nip);
if (unlikely(!p) || kprobe_disabled(p))
goto out;
@@ -61,7 +60,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long 
parent_nip,
__this_cpu_write(current_kprobe, NULL);
}
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/riscv/kernel/probes/ftrace.c 
b/arch/riscv/kernel/probes/ftrace.c
index aab85a8..7142ec4 100644
--- a/arch/riscv/kernel/probes/ftrace.c
+++ b/arch/riscv/kernel/probes/ftrace.c
@@ -15,7 +15,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
if (bit < 0)
return;

-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
goto out;
@@ -52,7 +51,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
__this_cpu_write(current_kprobe, NULL);
}
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 596de2f..dd2ec14 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -25,7 +25,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
if (bit < 0)
return;

-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
goto out;
@@ -59,7 +58,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
__this_cpu_write(current_kprobe, NULL);
}
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index a9f9c57..101e1fb 100644
--- 

[RESEND PATCH v2 0/2] fix & prevent the missing preemption disabling

2021-10-12 Thread 王贇
The testing show that perf_ftrace_function_call() are using smp_processor_id()
with preemption enabled, all the checking on CPU could be wrong after 
preemption.

As Peter point out, the section between ftrace_test_recursion_trylock/unlock()
pair require the preemption to be disabled as 
'Documentation/trace/ftrace-uses.rst'
explained, but currently the work is done outside of the helpers.

Patch 1/2 will make sure preemption disabled after trylock() succeed,
patch 2/2 will do smp_processor_id() checking after trylock to address the
issue.

v1: 
https://lore.kernel.org/all/8c7de46d-9869-aa5e-2bb9-5dbc2eda3...@linux.alibaba.com/

Michael Wang (2):
  ftrace: disable preemption between ftrace_test_recursion_trylock/unlock()
  ftrace: do CPU checking after preemption disabled

 arch/csky/kernel/probes/ftrace.c |  2 --
 arch/parisc/kernel/ftrace.c  |  2 --
 arch/powerpc/kernel/kprobes-ftrace.c |  2 --
 arch/riscv/kernel/probes/ftrace.c|  2 --
 arch/x86/kernel/kprobes/ftrace.c |  2 --
 include/linux/trace_recursion.h  | 22 +-
 kernel/livepatch/patch.c |  6 --
 kernel/trace/trace_event_perf.c  |  6 +++---
 kernel/trace/trace_functions.c   |  5 -
 9 files changed, 24 insertions(+), 25 deletions(-)

-- 
1.8.3.1



Re: [PATCH v2 0/2] fix & prevent the missing preemption disabling

2021-10-12 Thread 王贇



On 2021/10/13 上午11:26, Steven Rostedt wrote:
> Please start a new thread when sending new versions. v2 should not be a
> reply to v1. If you want to reference v1, just add it to the cover
> letter with a link tag:
> 
> Link: 
> https://lore.kernel.org/all/8c7de46d-9869-aa5e-2bb9-5dbc2eda3...@linux.alibaba.com/

Ok, I'll resend it with link then.

Regards,
Michael Wang


> 
> -- Steve
> 
> 
> On Wed, 13 Oct 2021 11:16:56 +0800
> 王贇  wrote:
> 
>> The testing show that perf_ftrace_function_call() are using 
>> smp_processor_id()
>> with preemption enabled, all the checking on CPU could be wrong after 
>> preemption.
>>
>> As Peter point out, the section between 
>> ftrace_test_recursion_trylock/unlock()
>> pair require the preemption to be disabled as 
>> 'Documentation/trace/ftrace-uses.rst'
>> explained, but currently the work is done outside of the helpers.
>>
>> Patch 1/2 will make sure preemption disabled after trylock() succeed,
>> patch 2/2 will do smp_processor_id() checking after trylock to address the
>> issue.
>>
>> Michael Wang (2):
>>   ftrace: disable preemption between ftrace_test_recursion_trylock/unlock()
>>   ftrace: do CPU checking after preemption disabled
>>
>>  arch/csky/kernel/probes/ftrace.c |  2 --
>>  arch/parisc/kernel/ftrace.c  |  2 --
>>  arch/powerpc/kernel/kprobes-ftrace.c |  2 --
>>  arch/riscv/kernel/probes/ftrace.c|  2 --
>>  arch/x86/kernel/kprobes/ftrace.c |  2 --
>>  include/linux/trace_recursion.h  | 22 +-
>>  kernel/livepatch/patch.c |  6 --
>>  kernel/trace/trace_event_perf.c  |  6 +++---
>>  kernel/trace/trace_functions.c   |  5 -
>>  9 files changed, 24 insertions(+), 25 deletions(-)
>>


Re: [PATCH v2 0/2] fix & prevent the missing preemption disabling

2021-10-12 Thread Steven Rostedt
Please start a new thread when sending new versions. v2 should not be a
reply to v1. If you want to reference v1, just add it to the cover
letter with a link tag:

Link: 
https://lore.kernel.org/all/8c7de46d-9869-aa5e-2bb9-5dbc2eda3...@linux.alibaba.com/

-- Steve


On Wed, 13 Oct 2021 11:16:56 +0800
王贇  wrote:

> The testing show that perf_ftrace_function_call() are using smp_processor_id()
> with preemption enabled, all the checking on CPU could be wrong after 
> preemption.
> 
> As Peter point out, the section between ftrace_test_recursion_trylock/unlock()
> pair require the preemption to be disabled as 
> 'Documentation/trace/ftrace-uses.rst'
> explained, but currently the work is done outside of the helpers.
> 
> Patch 1/2 will make sure preemption disabled after trylock() succeed,
> patch 2/2 will do smp_processor_id() checking after trylock to address the
> issue.
> 
> Michael Wang (2):
>   ftrace: disable preemption between ftrace_test_recursion_trylock/unlock()
>   ftrace: do CPU checking after preemption disabled
> 
>  arch/csky/kernel/probes/ftrace.c |  2 --
>  arch/parisc/kernel/ftrace.c  |  2 --
>  arch/powerpc/kernel/kprobes-ftrace.c |  2 --
>  arch/riscv/kernel/probes/ftrace.c|  2 --
>  arch/x86/kernel/kprobes/ftrace.c |  2 --
>  include/linux/trace_recursion.h  | 22 +-
>  kernel/livepatch/patch.c |  6 --
>  kernel/trace/trace_event_perf.c  |  6 +++---
>  kernel/trace/trace_functions.c   |  5 -
>  9 files changed, 24 insertions(+), 25 deletions(-)
> 



[PATCH v2 2/2] ftrace: do CPU checking after preemption disabled

2021-10-12 Thread 王贇
With CONFIG_DEBUG_PREEMPT we observed reports like:

  BUG: using smp_processor_id() in preemptible
  caller is perf_ftrace_function_call+0x6f/0x2e0
  CPU: 1 PID: 680 Comm: a.out Not tainted
  Call Trace:
   
   dump_stack_lvl+0x8d/0xcf
   check_preemption_disabled+0x104/0x110
   ? optimize_nops.isra.7+0x230/0x230
   ? text_poke_bp_batch+0x9f/0x310
   perf_ftrace_function_call+0x6f/0x2e0
   ...
   __text_poke+0x5/0x620
   text_poke_bp_batch+0x9f/0x310

This telling us the CPU could be changed after task is preempted, and
the checking on CPU before preemption will be invalid.

Since now ftrace_test_recursion_trylock() will help to disable the
preemption, this patch just do the checking after trylock() to address
the issue.

CC: Steven Rostedt 
Reported-by: Abaci 
Signed-off-by: Michael Wang 
---
 kernel/trace/trace_event_perf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 6aed10e..fba8cb7 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -441,13 +441,13 @@ void perf_trace_buf_update(void *record, u16 type)
if (!rcu_is_watching())
return;

-   if ((unsigned long)ops->private != smp_processor_id())
-   return;
-
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;

+   if ((unsigned long)ops->private != smp_processor_id())
+   goto out;
+
event = container_of(ops, struct perf_event, ftrace_ops);

/*
-- 
1.8.3.1



[PATCH v2 1/2] ftrace: disable preemption between ftrace_test_recursion_trylock/unlock()

2021-10-12 Thread 王贇
As the documentation explained, ftrace_test_recursion_trylock()
and ftrace_test_recursion_unlock() were supposed to disable and
enable preemption properly, however currently this work is done
outside of the function, which could be missing by mistake.

This path will make sure the preemption was disabled when trylock()
succeed, and the unlock() will enable the preemption if previously
enabled.

CC: Steven Rostedt 
CC: Miroslav Benes 
Reported-by: Abaci 
Suggested-by: Peter Zijlstra 
Signed-off-by: Michael Wang 
---
 arch/csky/kernel/probes/ftrace.c |  2 --
 arch/parisc/kernel/ftrace.c  |  2 --
 arch/powerpc/kernel/kprobes-ftrace.c |  2 --
 arch/riscv/kernel/probes/ftrace.c|  2 --
 arch/x86/kernel/kprobes/ftrace.c |  2 --
 include/linux/trace_recursion.h  | 22 +-
 kernel/livepatch/patch.c |  6 --
 kernel/trace/trace_functions.c   |  5 -
 8 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c
index b388228..834cffc 100644
--- a/arch/csky/kernel/probes/ftrace.c
+++ b/arch/csky/kernel/probes/ftrace.c
@@ -17,7 +17,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
return;

regs = ftrace_get_regs(fregs);
-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (!p) {
p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE));
@@ -57,7 +56,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
__this_cpu_write(current_kprobe, NULL);
}
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 0a1e75a..3543496 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -216,7 +216,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
return;

regs = ftrace_get_regs(fregs);
-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
goto out;
@@ -245,7 +244,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
}
__this_cpu_write(current_kprobe, NULL);
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c 
b/arch/powerpc/kernel/kprobes-ftrace.c
index 7154d58..072ebe7 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -26,7 +26,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long 
parent_nip,
return;

regs = ftrace_get_regs(fregs);
-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)nip);
if (unlikely(!p) || kprobe_disabled(p))
goto out;
@@ -61,7 +60,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long 
parent_nip,
__this_cpu_write(current_kprobe, NULL);
}
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/riscv/kernel/probes/ftrace.c 
b/arch/riscv/kernel/probes/ftrace.c
index aab85a8..7142ec4 100644
--- a/arch/riscv/kernel/probes/ftrace.c
+++ b/arch/riscv/kernel/probes/ftrace.c
@@ -15,7 +15,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
if (bit < 0)
return;

-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
goto out;
@@ -52,7 +51,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
__this_cpu_write(current_kprobe, NULL);
}
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 596de2f..dd2ec14 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -25,7 +25,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
if (bit < 0)
return;

-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
goto out;
@@ -59,7 +58,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
__this_cpu_write(current_kprobe, NULL);
}
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index a9f9c57..101e1fb 100644
--- 

[PATCH v2 0/2] fix & prevent the missing preemption disabling

2021-10-12 Thread 王贇
The testing show that perf_ftrace_function_call() are using smp_processor_id()
with preemption enabled, all the checking on CPU could be wrong after 
preemption.

As Peter point out, the section between ftrace_test_recursion_trylock/unlock()
pair require the preemption to be disabled as 
'Documentation/trace/ftrace-uses.rst'
explained, but currently the work is done outside of the helpers.

Patch 1/2 will make sure preemption disabled after trylock() succeed,
patch 2/2 will do smp_processor_id() checking after trylock to address the
issue.

Michael Wang (2):
  ftrace: disable preemption between ftrace_test_recursion_trylock/unlock()
  ftrace: do CPU checking after preemption disabled

 arch/csky/kernel/probes/ftrace.c |  2 --
 arch/parisc/kernel/ftrace.c  |  2 --
 arch/powerpc/kernel/kprobes-ftrace.c |  2 --
 arch/riscv/kernel/probes/ftrace.c|  2 --
 arch/x86/kernel/kprobes/ftrace.c |  2 --
 include/linux/trace_recursion.h  | 22 +-
 kernel/livepatch/patch.c |  6 --
 kernel/trace/trace_event_perf.c  |  6 +++---
 kernel/trace/trace_functions.c   |  5 -
 9 files changed, 24 insertions(+), 25 deletions(-)

-- 
1.8.3.1




Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion

2021-10-12 Thread 王贇



On 2021/10/13 上午10:30, Steven Rostedt wrote:
> On Wed, 13 Oct 2021 10:04:52 +0800
> 王贇  wrote:
> 
>> I see, while the user can still check smp_processor_id() after trylock
>> return bit 0...
> 
> But preemption would have already been disabled. That's because a bit 0
> means that a recursion check has already been made by a previous
> caller and this one is nested, thus preemption is already disabled.
> If bit is 0, then preemption had better be disabled as well.

Thanks for the explain, now I get your point :-)

Let's make bit 0 an exemption then.

Regards,
Michael Wang

> 
> -- Steve
> 


Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion

2021-10-12 Thread 王贇



On 2021/10/13 上午10:27, Steven Rostedt wrote:
> On Wed, 13 Oct 2021 09:50:17 +0800
> 王贇  wrote:
> 
 -  preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
  }  
>>>
>>> I don't like this change much. We have preempt_disable there not because 
>>> of ftrace_test_recursion, but because of RCU. ftrace_test_recursion was 
>>> added later. Yes, it would work with the change, but it would also hide 
>>> things which should not be hidden in my opinion.  
>>
>> Not very sure about the backgroup stories, but just found this in
>> 'Documentation/trace/ftrace-uses.rst':
>>
>>   Note, on success,
>>   ftrace_test_recursion_trylock() will disable preemption, and the
>>   ftrace_test_recursion_unlock() will enable it again (if it was previously
>>   enabled).
> 
> Right that part is to be fixed by what you are adding here.
> 
> The point that Miroslav is complaining about is that the preemption
> disabling is special in this case, and not just from the recursion
> point of view, which is why the comment is still required.

My bad... the title do confusing people, will rewrite it.

Regards,
Michael Wang

> 
> -- Steve
> 
> 
>>
>> Seems like this lock pair was supposed to take care the preemtion itself?


Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion

2021-10-12 Thread Steven Rostedt
On Wed, 13 Oct 2021 10:04:52 +0800
王贇  wrote:

> I see, while the user can still check smp_processor_id() after trylock
> return bit 0...

But preemption would have already been disabled. That's because a bit 0
means that a recursion check has already been made by a previous
caller and this one is nested, thus preemption is already disabled.
If bit is 0, then preemption had better be disabled as well.

-- Steve


Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion

2021-10-12 Thread Steven Rostedt
On Wed, 13 Oct 2021 09:50:17 +0800
王贇  wrote:

> >> -  preempt_enable_notrace();
> >>ftrace_test_recursion_unlock(bit);
> >>  }  
> > 
> > I don't like this change much. We have preempt_disable there not because 
> > of ftrace_test_recursion, but because of RCU. ftrace_test_recursion was 
> > added later. Yes, it would work with the change, but it would also hide 
> > things which should not be hidden in my opinion.  
> 
> Not very sure about the backgroup stories, but just found this in
> 'Documentation/trace/ftrace-uses.rst':
> 
>   Note, on success,
>   ftrace_test_recursion_trylock() will disable preemption, and the
>   ftrace_test_recursion_unlock() will enable it again (if it was previously
>   enabled).

Right that part is to be fixed by what you are adding here.

The point that Miroslav is complaining about is that the preemption
disabling is special in this case, and not just from the recursion
point of view, which is why the comment is still required.

-- Steve


> 
> Seems like this lock pair was supposed to take care the preemtion itself?


Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion

2021-10-12 Thread 王贇



On 2021/10/12 下午8:43, Steven Rostedt wrote:
> On Tue, 12 Oct 2021 13:40:08 +0800
> 王贇  wrote:
> 
>> --- a/include/linux/trace_recursion.h
>> +++ b/include/linux/trace_recursion.h
>> @@ -214,7 +214,14 @@ static __always_inline void trace_clear_recursion(int 
>> bit)
>>  static __always_inline int ftrace_test_recursion_trylock(unsigned long ip,
>>   unsigned long 
>> parent_ip)
>>  {
>> -return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, 
>> TRACE_FTRACE_MAX);
>> +int bit;
>> +
>> +preempt_disable_notrace();
> 
> The recursion test does not require preemption disabled, it uses the task
> struct, not per_cpu variables, so you should not disable it before the test.
> 
>   bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, 
> TRACE_FTRACE_MAX);
>   if (bit >= 0)
>   preempt_disable_notrace();
> 
> And if the bit is zero, it means a recursion check was already done by
> another caller (ftrace handler does the check, followed by calling perf),
> and you really don't even need to disable preemption in that case.
> 
>   if (bit > 0)
>   preempt_disable_notrace();
> 
> And on the unlock, have:
> 
>  static __always_inline void ftrace_test_recursion_unlock(int bit)
>  {
>   if (bit)
>   preempt_enable_notrace();
>   trace_clear_recursion(bit);
>  }
> 
> But maybe that's over optimizing ;-)

I see, while the user can still check smp_processor_id() after trylock
return bit 0...

I guess Peter's point at very beginning is to prevent such cases, since
kernel for production will not have preemption debug on, and such issue
won't get report but could cause trouble which really hard to trace down
, way to eliminate such issue once for all sounds attractive, isn't it?

Regards,
Michael Wang

> 
> -- Steve
> 
> 
>> +bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, 
>> TRACE_FTRACE_MAX);
>> +if (bit < 0)
>> +preempt_enable_notrace();
>> +
>> +return bit;
>>  }


Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion

2021-10-12 Thread 王贇



On 2021/10/12 下午8:29, Steven Rostedt wrote:
> On Tue, 12 Oct 2021 14:24:43 +0200 (CEST)
> Miroslav Benes  wrote:
> 
>>> +++ b/kernel/livepatch/patch.c
>>> @@ -52,11 +52,6 @@ static void notrace klp_ftrace_handler(unsigned long ip,
>>> bit = ftrace_test_recursion_trylock(ip, parent_ip);
>>> if (WARN_ON_ONCE(bit < 0))
>>> return;
>>> -   /*
>>> -* A variant of synchronize_rcu() is used to allow patching functions
>>> -* where RCU is not watching, see klp_synchronize_transition().
>>> -*/
>>> -   preempt_disable_notrace();
>>>
>>> func = list_first_or_null_rcu(>func_stack, struct klp_func,
>>>   stack_node);
>>> @@ -120,7 +115,6 @@ static void notrace klp_ftrace_handler(unsigned long ip,
>>> klp_arch_set_pc(fregs, (unsigned long)func->new_func);
>>>
>>>  unlock:
>>> -   preempt_enable_notrace();
>>> ftrace_test_recursion_unlock(bit);
>>>  }  
>>
>> I don't like this change much. We have preempt_disable there not because 
>> of ftrace_test_recursion, but because of RCU. ftrace_test_recursion was 
>> added later. Yes, it would work with the change, but it would also hide 
>> things which should not be hidden in my opinion.
> 
> Agreed, but I believe the change is fine, but requires a nice comment to
> explain what you said above.
> 
> Thus, before the "ftrace_test_recursion_trylock()" we need:
> 
>   /*
>* The ftrace_test_recursion_trylock() will disable preemption,
>* which is required for the variant of synchronize_rcu() that is
>* used to allow patching functions where RCU is not watching.
>* See klp_synchronize_transition() for more details.
>*/

Will be in v2 too :-)

Regards,
Michael Wang

> 
> -- Steve
> 


Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion

2021-10-12 Thread 王贇



On 2021/10/12 下午8:24, Miroslav Benes wrote:
[snip]
>>
>>  func = list_first_or_null_rcu(>func_stack, struct klp_func,
>>stack_node);
>> @@ -120,7 +115,6 @@ static void notrace klp_ftrace_handler(unsigned long ip,
>>  klp_arch_set_pc(fregs, (unsigned long)func->new_func);
>>
>>  unlock:
>> -preempt_enable_notrace();
>>  ftrace_test_recursion_unlock(bit);
>>  }
> 
> I don't like this change much. We have preempt_disable there not because 
> of ftrace_test_recursion, but because of RCU. ftrace_test_recursion was 
> added later. Yes, it would work with the change, but it would also hide 
> things which should not be hidden in my opinion.

Not very sure about the backgroup stories, but just found this in
'Documentation/trace/ftrace-uses.rst':

  Note, on success,
  ftrace_test_recursion_trylock() will disable preemption, and the
  ftrace_test_recursion_unlock() will enable it again (if it was previously
  enabled).

Seems like this lock pair was supposed to take care the preemtion itself?

Regards,
Michael Wang

> 
> Miroslav
> 


Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion

2021-10-12 Thread 王贇



On 2021/10/12 下午8:17, Steven Rostedt wrote:
> On Tue, 12 Oct 2021 13:40:08 +0800
> 王贇  wrote:
> 
>> @@ -52,11 +52,6 @@ static void notrace klp_ftrace_handler(unsigned long ip,
>>  bit = ftrace_test_recursion_trylock(ip, parent_ip);
>>  if (WARN_ON_ONCE(bit < 0))
>>  return;
>> -/*
>> - * A variant of synchronize_rcu() is used to allow patching functions
>> - * where RCU is not watching, see klp_synchronize_transition().
>> - */
> 
> I have to take a deeper look at this patch set, but do not remove this
> comment, as it explains the protection here, that is not obvious with the
> changes you made.

Will keep that in v2.

Regards,
Michael Wang

> 
> -- Steve
> 
> 
>> -preempt_disable_notrace();
>>
>>  func = list_first_or_null_rcu(>func_stack, struct klp_func,
>>stack_node);


Re: [PATCH 2/2] ftrace: prevent preemption in perf_ftrace_function_call()

2021-10-12 Thread 王贇



On 2021/10/12 下午7:20, Peter Zijlstra wrote:
> On Tue, Oct 12, 2021 at 01:40:31PM +0800, 王贇 wrote:
> 
>> diff --git a/kernel/trace/trace_event_perf.c 
>> b/kernel/trace/trace_event_perf.c
>> index 6aed10e..33c2f76 100644
>> --- a/kernel/trace/trace_event_perf.c
>> +++ b/kernel/trace/trace_event_perf.c
>> @@ -441,12 +441,19 @@ void perf_trace_buf_update(void *record, u16 type)
>>  if (!rcu_is_watching())
>>  return;
>>
>> +/*
>> + * Prevent CPU changing from now on. rcu must
>> + * be in watching if the task was migrated and
>> + * scheduled.
>> + */
>> +preempt_disable_notrace();
>> +
>>  if ((unsigned long)ops->private != smp_processor_id())
>> -return;
>> +goto out;
>>
>>  bit = ftrace_test_recursion_trylock(ip, parent_ip);
>>  if (bit < 0)
>> -return;
>> +goto out;
>>
>>  event = container_of(ops, struct perf_event, ftrace_ops);
>>
> 
> This seems rather daft, wouldn't it be easier to just put that check
> under the recursion thing?

In case if the condition matched, extra lock/unlock will be introduced,
but I guess that's acceptable since this seems unlikely to happen :-P

Will move the check in v2.

Regards,
Michael Wang

> 


[PATCH AUTOSEL 4.4 2/2] powerpc/security: Add a helper to query stf_barrier type

2021-10-12 Thread Sasha Levin
From: "Naveen N. Rao" 

[ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ]

Add a helper to return the stf_barrier type for the current processor.

Signed-off-by: Naveen N. Rao 
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/security_features.h | 5 +
 arch/powerpc/kernel/security.c   | 5 +
 2 files changed, 10 insertions(+)

diff --git a/arch/powerpc/include/asm/security_features.h 
b/arch/powerpc/include/asm/security_features.h
index 3b45a64e491e..a673416da388 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(unsigned long 
feature)
return !!(powerpc_security_features & feature);
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+enum stf_barrier_type stf_barrier_type_get(void);
+#else
+static inline enum stf_barrier_type stf_barrier_type_get(void) { return 
STF_BARRIER_NONE; }
+#endif
 
 // Features indicating support for Spectre/Meltdown mitigations
 
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 45778c83038f..ac9a2498efe7 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -249,6 +249,11 @@ static int __init handle_no_stf_barrier(char *p)
 
 early_param("no_stf_barrier", handle_no_stf_barrier);
 
+enum stf_barrier_type stf_barrier_type_get(void)
+{
+   return stf_enabled_flush_types;
+}
+
 /* This is the generic flag used by other architectures */
 static int __init handle_ssbd(char *p)
 {
-- 
2.33.0



[PATCH AUTOSEL 4.9 4/4] powerpc/security: Add a helper to query stf_barrier type

2021-10-12 Thread Sasha Levin
From: "Naveen N. Rao" 

[ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ]

Add a helper to return the stf_barrier type for the current processor.

Signed-off-by: Naveen N. Rao 
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/security_features.h | 5 +
 arch/powerpc/kernel/security.c   | 5 +
 2 files changed, 10 insertions(+)

diff --git a/arch/powerpc/include/asm/security_features.h 
b/arch/powerpc/include/asm/security_features.h
index 3b45a64e491e..a673416da388 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(unsigned long 
feature)
return !!(powerpc_security_features & feature);
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+enum stf_barrier_type stf_barrier_type_get(void);
+#else
+static inline enum stf_barrier_type stf_barrier_type_get(void) { return 
STF_BARRIER_NONE; }
+#endif
 
 // Features indicating support for Spectre/Meltdown mitigations
 
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index ff85fc800183..df42f020e703 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -249,6 +249,11 @@ static int __init handle_no_stf_barrier(char *p)
 
 early_param("no_stf_barrier", handle_no_stf_barrier);
 
+enum stf_barrier_type stf_barrier_type_get(void)
+{
+   return stf_enabled_flush_types;
+}
+
 /* This is the generic flag used by other architectures */
 static int __init handle_ssbd(char *p)
 {
-- 
2.33.0



[PATCH AUTOSEL 4.14 5/5] powerpc/security: Add a helper to query stf_barrier type

2021-10-12 Thread Sasha Levin
From: "Naveen N. Rao" 

[ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ]

Add a helper to return the stf_barrier type for the current processor.

Signed-off-by: Naveen N. Rao 
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/security_features.h | 5 +
 arch/powerpc/kernel/security.c   | 5 +
 2 files changed, 10 insertions(+)

diff --git a/arch/powerpc/include/asm/security_features.h 
b/arch/powerpc/include/asm/security_features.h
index 3b45a64e491e..a673416da388 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(unsigned long 
feature)
return !!(powerpc_security_features & feature);
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+enum stf_barrier_type stf_barrier_type_get(void);
+#else
+static inline enum stf_barrier_type stf_barrier_type_get(void) { return 
STF_BARRIER_NONE; }
+#endif
 
 // Features indicating support for Spectre/Meltdown mitigations
 
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index b3f540c9f410..75a5277ef7b8 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -248,6 +248,11 @@ static int __init handle_no_stf_barrier(char *p)
 
 early_param("no_stf_barrier", handle_no_stf_barrier);
 
+enum stf_barrier_type stf_barrier_type_get(void)
+{
+   return stf_enabled_flush_types;
+}
+
 /* This is the generic flag used by other architectures */
 static int __init handle_ssbd(char *p)
 {
-- 
2.33.0



[PATCH AUTOSEL 4.19 5/5] powerpc/security: Add a helper to query stf_barrier type

2021-10-12 Thread Sasha Levin
From: "Naveen N. Rao" 

[ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ]

Add a helper to return the stf_barrier type for the current processor.

Signed-off-by: Naveen N. Rao 
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/security_features.h | 5 +
 arch/powerpc/kernel/security.c   | 5 +
 2 files changed, 10 insertions(+)

diff --git a/arch/powerpc/include/asm/security_features.h 
b/arch/powerpc/include/asm/security_features.h
index 3b45a64e491e..a673416da388 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(unsigned long 
feature)
return !!(powerpc_security_features & feature);
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+enum stf_barrier_type stf_barrier_type_get(void);
+#else
+static inline enum stf_barrier_type stf_barrier_type_get(void) { return 
STF_BARRIER_NONE; }
+#endif
 
 // Features indicating support for Spectre/Meltdown mitigations
 
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 6a3dde9587cc..48985a1fd34d 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -248,6 +248,11 @@ static int __init handle_no_stf_barrier(char *p)
 
 early_param("no_stf_barrier", handle_no_stf_barrier);
 
+enum stf_barrier_type stf_barrier_type_get(void)
+{
+   return stf_enabled_flush_types;
+}
+
 /* This is the generic flag used by other architectures */
 static int __init handle_ssbd(char *p)
 {
-- 
2.33.0



[PATCH AUTOSEL 5.4 6/6] powerpc/security: Add a helper to query stf_barrier type

2021-10-12 Thread Sasha Levin
From: "Naveen N. Rao" 

[ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ]

Add a helper to return the stf_barrier type for the current processor.

Signed-off-by: Naveen N. Rao 
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/security_features.h | 5 +
 arch/powerpc/kernel/security.c   | 5 +
 2 files changed, 10 insertions(+)

diff --git a/arch/powerpc/include/asm/security_features.h 
b/arch/powerpc/include/asm/security_features.h
index e9e3f85134e5..316a9c8d1929 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(u64 feature)
return !!(powerpc_security_features & feature);
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+enum stf_barrier_type stf_barrier_type_get(void);
+#else
+static inline enum stf_barrier_type stf_barrier_type_get(void) { return 
STF_BARRIER_NONE; }
+#endif
 
 // Features indicating support for Spectre/Meltdown mitigations
 
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 1740a66cea84..ff022e725693 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -256,6 +256,11 @@ static int __init handle_no_stf_barrier(char *p)
 
 early_param("no_stf_barrier", handle_no_stf_barrier);
 
+enum stf_barrier_type stf_barrier_type_get(void)
+{
+   return stf_enabled_flush_types;
+}
+
 /* This is the generic flag used by other architectures */
 static int __init handle_ssbd(char *p)
 {
-- 
2.33.0



[PATCH AUTOSEL 5.10 10/11] powerpc/security: Add a helper to query stf_barrier type

2021-10-12 Thread Sasha Levin
From: "Naveen N. Rao" 

[ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ]

Add a helper to return the stf_barrier type for the current processor.

Signed-off-by: Naveen N. Rao 
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/security_features.h | 5 +
 arch/powerpc/kernel/security.c   | 5 +
 2 files changed, 10 insertions(+)

diff --git a/arch/powerpc/include/asm/security_features.h 
b/arch/powerpc/include/asm/security_features.h
index b774a4477d5f..e380acc6e413 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(u64 feature)
return !!(powerpc_security_features & feature);
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+enum stf_barrier_type stf_barrier_type_get(void);
+#else
+static inline enum stf_barrier_type stf_barrier_type_get(void) { return 
STF_BARRIER_NONE; }
+#endif
 
 // Features indicating support for Spectre/Meltdown mitigations
 
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index e4e1a94ccf6a..3f510c911b10 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -261,6 +261,11 @@ static int __init handle_no_stf_barrier(char *p)
 
 early_param("no_stf_barrier", handle_no_stf_barrier);
 
+enum stf_barrier_type stf_barrier_type_get(void)
+{
+   return stf_enabled_flush_types;
+}
+
 /* This is the generic flag used by other architectures */
 static int __init handle_ssbd(char *p)
 {
-- 
2.33.0



[PATCH AUTOSEL 5.14 16/17] powerpc/bpf: Emit stf barrier instruction sequences for BPF_NOSPEC

2021-10-12 Thread Sasha Levin
From: "Naveen N. Rao" 

[ Upstream commit b7540d62509453263604a155bf2d5f0ed450cba2 ]

Emit similar instruction sequences to commit a048a07d7f4535
("powerpc/64s: Add support for a store forwarding barrier at kernel
entry/exit") when encountering BPF_NOSPEC.

Mitigations are enabled depending on what the firmware advertises. In
particular, we do not gate these mitigations based on current settings,
just like in x86. Due to this, we don't need to take any action if
mitigations are enabled or disabled at runtime.

Signed-off-by: Naveen N. Rao 
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/956570cbc191cd41f8274bed48ee757a86dac62a.1633464148.git.naveen.n@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/net/bpf_jit64.h  |  8 ++---
 arch/powerpc/net/bpf_jit_comp64.c | 55 ---
 2 files changed, 55 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 7b713edfa7e2..b63b35e45e55 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -16,18 +16,18 @@
  * with our redzone usage.
  *
  * [   prev sp ] <-
- * [   nv gpr save area] 6*8   |
+ * [   nv gpr save area] 5*8   |
  * [tail_call_cnt  ] 8 |
- * [local_tmp_var  ] 8 |
+ * [local_tmp_var  ] 16|
  * fp (r31) -->[   ebpf stack space] upto 512  |
  * [ frame header  ] 32/112|
  * sp (r1) --->[stack pointer  ] --
  */
 
 /* for gpr non volatile registers BPG_REG_6 to 10 */
-#define BPF_PPC_STACK_SAVE (6*8)
+#define BPF_PPC_STACK_SAVE (5*8)
 /* for bpf JIT code internal usage */
-#define BPF_PPC_STACK_LOCALS   16
+#define BPF_PPC_STACK_LOCALS   24
 /* stack frame excluding BPF stack, ensure this is quadword aligned */
 #define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \
 BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE)
diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index f06c62089b14..1a567c46730a 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "bpf_jit64.h"
 
@@ -35,9 +36,9 @@ static inline bool bpf_has_stack_frame(struct codegen_context 
*ctx)
  * [   prev sp ] <-
  * [ ...   ]   |
  * sp (r1) --->[stack pointer  ] --
- * [   nv gpr save area] 6*8
+ * [   nv gpr save area] 5*8
  * [tail_call_cnt  ] 8
- * [local_tmp_var  ] 8
+ * [local_tmp_var  ] 16
  * [   unused red zone ] 208 bytes protected
  */
 static int bpf_jit_stack_local(struct codegen_context *ctx)
@@ -45,12 +46,12 @@ static int bpf_jit_stack_local(struct codegen_context *ctx)
if (bpf_has_stack_frame(ctx))
return STACK_FRAME_MIN_SIZE + ctx->stack_size;
else
-   return -(BPF_PPC_STACK_SAVE + 16);
+   return -(BPF_PPC_STACK_SAVE + 24);
 }
 
 static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
 {
-   return bpf_jit_stack_local(ctx) + 8;
+   return bpf_jit_stack_local(ctx) + 16;
 }
 
 static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
@@ -272,10 +273,33 @@ static int bpf_jit_emit_tail_call(u32 *image, struct 
codegen_context *ctx, u32 o
return 0;
 }
 
+/*
+ * We spill into the redzone always, even if the bpf program has its own 
stackframe.
+ * Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local()
+ */
+void bpf_stf_barrier(void);
+
+asm (
+"  .global bpf_stf_barrier ;"
+"  bpf_stf_barrier:;"
+"  std 21,-64(1)   ;"
+"  std 22,-56(1)   ;"
+"  sync;"
+"  ld  21,-64(1)   ;"
+"  ld  22,-56(1)   ;"
+"  ori 31,31,0 ;"
+"  .rept 14;"
+"  b   1f  ;"
+"  1:  ;"
+"  .endr   ;"
+"  blr ;"
+);
+
 /* Assemble the body code between the prologue & epilogue */
 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context 
*ctx,
   u32 *addrs, bool extra_pass)
 {
+   enum stf_barrier_type stf_barrier = stf_barrier_type_get();
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
int i, ret;
@@ -633,6 +657,29 @@ int 

[PATCH AUTOSEL 5.14 15/17] powerpc/security: Add a helper to query stf_barrier type

2021-10-12 Thread Sasha Levin
From: "Naveen N. Rao" 

[ Upstream commit 030905920f32e91a52794937f67434ac0b3ea41a ]

Add a helper to return the stf_barrier type for the current processor.

Signed-off-by: Naveen N. Rao 
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/3bd5d7f96ea1547991ac2ce3137dc2b220bae285.1633464148.git.naveen.n@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/security_features.h | 5 +
 arch/powerpc/kernel/security.c   | 5 +
 2 files changed, 10 insertions(+)

diff --git a/arch/powerpc/include/asm/security_features.h 
b/arch/powerpc/include/asm/security_features.h
index 792eefaf230b..27574f218b37 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(u64 feature)
return !!(powerpc_security_features & feature);
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+enum stf_barrier_type stf_barrier_type_get(void);
+#else
+static inline enum stf_barrier_type stf_barrier_type_get(void) { return 
STF_BARRIER_NONE; }
+#endif
 
 // Features indicating support for Spectre/Meltdown mitigations
 
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index cc51fa52e783..e723ff77cc9b 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -263,6 +263,11 @@ static int __init handle_no_stf_barrier(char *p)
 
 early_param("no_stf_barrier", handle_no_stf_barrier);
 
+enum stf_barrier_type stf_barrier_type_get(void)
+{
+   return stf_enabled_flush_types;
+}
+
 /* This is the generic flag used by other architectures */
 static int __init handle_ssbd(char *p)
 {
-- 
2.33.0



[PATCH AUTOSEL 5.14 14/17] powerpc/bpf: Validate branch ranges

2021-10-12 Thread Sasha Levin
From: "Naveen N. Rao" 

[ Upstream commit 3832ba4e283d7052b783dab8311df7e3590fed93 ]

Add checks to ensure that we never emit branch instructions with
truncated branch offsets.

Suggested-by: Michael Ellerman 
Signed-off-by: Naveen N. Rao 
Tested-by: Johan Almbladh 
Reviewed-by: Christophe Leroy 
Acked-by: Song Liu 
Acked-by: Johan Almbladh 
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/71d33a6b7603ec1013c9734dd8bdd4ff5e929142.1633464148.git.naveen.n@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/net/bpf_jit.h| 26 --
 arch/powerpc/net/bpf_jit_comp.c   |  6 +-
 arch/powerpc/net/bpf_jit_comp32.c |  8 ++--
 arch/powerpc/net/bpf_jit_comp64.c |  8 ++--
 4 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 935ea95b6635..7e9b978b768e 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -24,16 +24,30 @@
 #define EMIT(instr)PLANT_INSTR(image, ctx->idx, instr)
 
 /* Long jump; (unconditional 'branch') */
-#define PPC_JMP(dest)  EMIT(PPC_INST_BRANCH |\
-(((dest) - (ctx->idx * 4)) & 0x03fc))
+#define PPC_JMP(dest)\
+   do {  \
+   long offset = (long)(dest) - (ctx->idx * 4);  \
+   if (!is_offset_in_branch_range(offset)) { \
+   pr_err_ratelimited("Branch offset 0x%lx (@%u) out of 
range\n", offset, ctx->idx);   \
+   return -ERANGE;   \
+   } \
+   EMIT(PPC_INST_BRANCH | (offset & 0x03fc));\
+   } while (0)
+
 /* blr; (unconditional 'branch' with link) to absolute address */
 #define PPC_BL_ABS(dest)   EMIT(PPC_INST_BL |\
 (((dest) - (unsigned long)(image + 
ctx->idx)) & 0x03fc))
 /* "cond" here covers BO:BI fields. */
-#define PPC_BCC_SHORT(cond, dest)  EMIT(PPC_INST_BRANCH_COND |   \
-(((cond) & 0x3ff) << 16) |   \
-(((dest) - (ctx->idx * 4)) & \
- 0xfffc))
+#define PPC_BCC_SHORT(cond, dest)\
+   do {  \
+   long offset = (long)(dest) - (ctx->idx * 4);  \
+   if (!is_offset_in_cond_branch_range(offset)) {\
+   pr_err_ratelimited("Conditional branch offset 0x%lx 
(@%u) out of range\n", offset, ctx->idx);   \
+   return -ERANGE;   \
+   } \
+   EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset 
& 0xfffc));  \
+   } while (0)
+
 /* Sign-extended 32-bit immediate load */
 #define PPC_LI32(d, i) do {  \
if ((int)(uintptr_t)(i) >= -32768 &&  \
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 53aefee3fe70..fcbf7a917c56 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -210,7 +210,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/* Now build the prologue, body code & epilogue for real. */
cgctx.idx = 0;
bpf_jit_build_prologue(code_base, );
-   bpf_jit_build_body(fp, code_base, , addrs, extra_pass);
+   if (bpf_jit_build_body(fp, code_base, , addrs, 
extra_pass)) {
+   bpf_jit_binary_free(bpf_hdr);
+   fp = org_fp;
+   goto out_addrs;
+   }
bpf_jit_build_epilogue(code_base, );
 
if (bpf_jit_enable > 1)
diff --git a/arch/powerpc/net/bpf_jit_comp32.c 
b/arch/powerpc/net/bpf_jit_comp32.c
index beb12cbc8c29..a74d52204f8d 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -200,7 +200,7 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct 
codegen_context *ctx, u64 fun
}
 }
 
-static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, 
u32 out)
+static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 
out)
 {
/*
 * By now, the eBPF program has already setup parameters in r3-r6
@@ -261,7 +261,9 @@ static void bpf_jit_emit_tail_call(u32 

[PATCH AUTOSEL 5.14 13/17] powerpc/lib: Add helper to check if offset is within conditional branch range

2021-10-12 Thread Sasha Levin
From: "Naveen N. Rao" 

[ Upstream commit 4549c3ea3160fa8b3f37dfe2f957657bb265eda9 ]

Add a helper to check if a given offset is within the branch range for a
powerpc conditional branch instruction, and update some sites to use the
new helper.

Signed-off-by: Naveen N. Rao 
Reviewed-by: Christophe Leroy 
Acked-by: Song Liu 
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/442b69a34ced32ca346a0d9a855f3f6cfdbbbd41.1633464148.git.naveen.n@linux.vnet.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/code-patching.h | 1 +
 arch/powerpc/lib/code-patching.c | 7 ++-
 arch/powerpc/net/bpf_jit.h   | 7 +--
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index a95f63788c6b..4ba834599c4d 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -23,6 +23,7 @@
 #define BRANCH_ABSOLUTE0x2
 
 bool is_offset_in_branch_range(long offset);
+bool is_offset_in_cond_branch_range(long offset);
 int create_branch(struct ppc_inst *instr, const u32 *addr,
  unsigned long target, int flags);
 int create_cond_branch(struct ppc_inst *instr, const u32 *addr,
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index f9a3019e37b4..c5ed98823835 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -228,6 +228,11 @@ bool is_offset_in_branch_range(long offset)
return (offset >= -0x200 && offset <= 0x1fc && !(offset & 0x3));
 }
 
+bool is_offset_in_cond_branch_range(long offset)
+{
+   return offset >= -0x8000 && offset <= 0x7fff && !(offset & 0x3);
+}
+
 /*
  * Helper to check if a given instruction is a conditional branch
  * Derived from the conditional checks in analyse_instr()
@@ -280,7 +285,7 @@ int create_cond_branch(struct ppc_inst *instr, const u32 
*addr,
offset = offset - (unsigned long)addr;
 
/* Check we can represent the target in the instruction format */
-   if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3)
+   if (!is_offset_in_cond_branch_range(offset))
return 1;
 
/* Mask out the flags and target, so they don't step on each other. */
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 99fad093f43e..935ea95b6635 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -78,11 +78,6 @@
 #define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0)
 #endif
 
-static inline bool is_nearbranch(int offset)
-{
-   return (offset < 32768) && (offset >= -32768);
-}
-
 /*
  * The fly in the ointment of code size changing from pass to pass is
  * avoided by padding the short branch case with a NOP. If code size 
differs
@@ -91,7 +86,7 @@ static inline bool is_nearbranch(int offset)
  * state.
  */
 #define PPC_BCC(cond, dest)do {  \
-   if (is_nearbranch((dest) - (ctx->idx * 4))) { \
+   if (is_offset_in_cond_branch_range((long)(dest) - (ctx->idx * 
4))) {\
PPC_BCC_SHORT(cond, dest);\
EMIT(PPC_RAW_NOP());  \
} else {  \
-- 
2.33.0



Re: [PATCH] powerpc: don't select KFENCE on platform PPC_FSL_BOOK3E

2021-10-12 Thread Michael Ellerman
Christophe Leroy  writes:
> Le 12/10/2021 à 08:24, Michael Ellerman a écrit :
>> Liu Shixin  writes:
>>> kindly ping.
>> 
>> I was under the impression you were trying to debug why it wasn't
>> working with Christophe.
>
> The investigation was a bit dormant to be honest since Liu confirmed 
> that neither KFENCE not DEBUG_PAGEALLOC works.

No worries. Sorry it fell to you to do the investigation.

> I now looked at the effort to make it work, and it is not trivial.
> At the time being, all linear space is mapped with pinned TLBs and 
> everything is setup for space 0, with space 1 being used temporarily 
> when doing heavy changes to space 0.
>
> We can't use standard pages for linear space on space 0 because we need 
> memory mapped at all time for exceptions (on booke exception run with 
> MMU on in space 0).
>
> In order to use standard pages, we'd need to reorganise the kernel to 
> have it run mostly in space 1 (for data at least) where we would map 
> almost everything with standard pages, and keep pinned TLB to map linear 
> space on space 0 for TLB miss exceptions. Then we'd do more or less like 
> book3s/32 and switch back into space 1 into other exceptions prolog.
>
> That could be good to do it as we could maybe have more code in common 
> with non booke 32 bits, but it is not a trivial job.
>
> So I suggest that for now, we just make KFENCE and DEBUG_PAGEALLOC 
> unselectable for booke/32 (e500 and 44x).

Yep seems reasonable.

cheers


Re: [PATCH v6 00/11] PCI: Drop duplicated tracking of a pci_dev's bound driver

2021-10-12 Thread Bjorn Helgaas
On Mon, Oct 04, 2021 at 02:59:24PM +0200, Uwe Kleine-König wrote:
> Hello,
> 
> this is v6 of the quest to drop the "driver" member from struct pci_dev
> which tracks the same data (apart from a constant offset) as dev.driver.

I like this a lot and applied it to pci/driver for v5.16, thanks!

I split some of the bigger patches apart so they only touched one
driver or subsystem at a time.  I also updated to_pci_driver() so it
returns NULL when given NULL, which makes some of the validations
quite a bit simpler, especially in the PM code in pci-driver.c.

Full interdiff from this v6 series:

diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index deaaef6efe34..36e84d904260 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -80,17 +80,15 @@ static struct resource video_rom_resource = {
  */
 static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned 
short device)
 {
+   struct pci_driver *drv = to_pci_driver(pdev->dev.driver);
const struct pci_device_id *id;
 
if (pdev->vendor == vendor && pdev->device == device)
return true;
 
-   if (pdev->dev.driver) {
-   struct pci_driver *drv = to_pci_driver(pdev->dev.driver);
-   for (id = drv->id_table; id && id->vendor; id++)
-   if (id->vendor == vendor && id->device == device)
-   break;
-   }
+   for (id = drv ? drv->id_table : NULL; id && id->vendor; id++)
+   if (id->vendor == vendor && id->device == device)
+   break;
 
return id && id->vendor;
 }
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index d997c9c3ebb5..7eb3706cf42d 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -20,38 +20,38 @@ static void pci_error_handlers(struct cxl_afu *afu,
pci_channel_state_t state)
 {
struct pci_dev *afu_dev;
+   struct pci_driver *afu_drv;
+   struct pci_error_handlers *err_handler;
 
if (afu->phb == NULL)
return;
 
list_for_each_entry(afu_dev, >phb->bus->devices, bus_list) {
-   struct pci_driver *afu_drv;
-
-   if (!afu_dev->dev.driver)
-   continue;
-
afu_drv = to_pci_driver(afu_dev->dev.driver);
+   if (!afu_drv)
+   continue;
 
+   err_handler = afu_drv->err_handler;
switch (bus_error_event) {
case CXL_ERROR_DETECTED_EVENT:
afu_dev->error_state = state;
 
-   if (afu_drv->err_handler &&
-   afu_drv->err_handler->error_detected)
-   afu_drv->err_handler->error_detected(afu_dev, 
state);
-   break;
+   if (err_handler &&
+   err_handler->error_detected)
+   err_handler->error_detected(afu_dev, state);
+   break;
case CXL_SLOT_RESET_EVENT:
afu_dev->error_state = state;
 
-   if (afu_drv->err_handler &&
-   afu_drv->err_handler->slot_reset)
-   afu_drv->err_handler->slot_reset(afu_dev);
-   break;
+   if (err_handler &&
+   err_handler->slot_reset)
+   err_handler->slot_reset(afu_dev);
+   break;
case CXL_RESUME_EVENT:
-   if (afu_drv->err_handler &&
-   afu_drv->err_handler->resume)
-   afu_drv->err_handler->resume(afu_dev);
-   break;
+   if (err_handler &&
+   err_handler->resume)
+   err_handler->resume(afu_dev);
+   break;
}
}
 }
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 7e7545d01e27..08bd81854101 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1795,6 +1795,8 @@ static pci_ers_result_t cxl_vphb_error_detected(struct 
cxl_afu *afu,
pci_channel_state_t state)
 {
struct pci_dev *afu_dev;
+   struct pci_driver *afu_drv;
+   struct pci_error_handlers *err_handler;
pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET;
 
@@ -1805,16 +1807,16 @@ static pci_ers_result_t cxl_vphb_error_detected(struct 
cxl_afu *afu,
return result;
 
list_for_each_entry(afu_dev, >phb->bus->devices, bus_list) {
-   struct pci_driver *afu_drv;
-   if (!afu_dev->dev.driver)
-   continue;
-
afu_drv = to_pci_driver(afu_dev->dev.driver);
+

Re: [PATCH v2] KVM: PPC: Defer vtime accounting 'til after IRQ handling

2021-10-12 Thread Michael Ellerman
Laurent Vivier  writes:
> Commit 112665286d08 moved guest_exit() in the interrupt protected
> area to avoid wrong context warning (or worse), but the tick counter
> cannot be updated and the guest time is accounted to the system time.
>
> To fix the problem port to POWER the x86 fix
> 160457140187 ("Defer vtime accounting 'til after IRQ handling"):
>
> "Defer the call to account guest time until after servicing any IRQ(s)
>  that happened in the guest or immediately after VM-Exit.  Tick-based
>  accounting of vCPU time relies on PF_VCPU being set when the tick IRQ
>  handler runs, and IRQs are blocked throughout the main sequence of
>  vcpu_enter_guest(), including the call into vendor code to actually
>  enter and exit the guest."
>
> Fixes: 112665286d08 ("KVM: PPC: Book3S HV: Context tracking exit guest 
> context before enabling irqs")
> Cc: npig...@gmail.com
> Cc:  # 5.12
> Signed-off-by: Laurent Vivier 
> ---
>
> Notes:
> v2: remove reference to commit 61bd0f66ff92
> cc stable 5.12
> add the same comment in the code as for x86
>
>  arch/powerpc/kvm/book3s_hv.c | 24 
>  1 file changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 2acb1c96cfaf..a694d1a8f6ce 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
...
> @@ -4506,13 +4514,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
> time_limit,
>  
>   srcu_read_unlock(>srcu, srcu_idx);
>  
> + context_tracking_guest_exit();
> +
>   set_irq_happened(trap);
>  
>   kvmppc_set_host_core(pcpu);
>  
> - guest_exit_irqoff();
> -
>   local_irq_enable();
> + /*
> +  * Wait until after servicing IRQs to account guest time so that any
> +  * ticks that occurred while running the guest are properly accounted
> +  * to the guest.  Waiting until IRQs are enabled degrades the accuracy
> +  * of accounting via context tracking, but the loss of accuracy is
> +  * acceptable for all known use cases.
> +  */
> + vtime_account_guest_exit();

This pops a warning for me, running guest(s) on Power8:
 
  [  270.745303][T16661] [ cut here ]
  [  270.745374][T16661] WARNING: CPU: 72 PID: 16661 at 
arch/powerpc/kernel/time.c:311 vtime_account_kernel+0xe0/0xf0
  [  270.745397][T16661] Modules linked in: nf_conntrack_netlink xfrm_user 
xfrm_algo xt_addrtype xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 
xt_tcpudp iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 
nf_defrag_ipv4 nfnetlink ip6table_filter ip6_tables iptable_filter tun overlay 
fuse kvm_hv kvm binfmt_misc squashfs mlx4_ib dm_multipath scsi_dh_rdac 
ib_uverbs scsi_dh_alua mlx4_en ib_core sr_mod cdrom lpfc bnx2x sg mlx4_core 
mdio crc_t10dif crct10dif_generic scsi_transport_fc vmx_crypto gf128mul 
leds_powernv crct10dif_vpmsum powernv_rng led_class crct10dif_common 
powernv_op_panel rng_core crc32c_vpmsum sunrpc ip_tables x_tables autofs4
  [  270.745565][T16661] CPU: 72 PID: 16661 Comm: qemu-system-ppc Not tainted 
5.15.0-rc5-01885-g5e96f0599cff #1
  [  270.745578][T16661] NIP:  c0027c20 LR: c0080b6e9ca8 CTR: 
c0027b40
  [  270.745588][T16661] REGS: c0081043f4f0 TRAP: 0700   Not tainted  
(5.15.0-rc5-01885-g5e96f0599cff)
  [  270.745599][T16661] MSR:  9282b033 
  CR: 2244  XER: 2000
  [  270.745635][T16661] CFAR: c0027b7c IRQMASK: 0
  [  270.745635][T16661] GPR00: c0080b6e9ca8 c0081043f790 
c248f900 c00da820
  [  270.745635][T16661] GPR04: c0080b93b488 0006 
000f4240 c01fc000
  [  270.745635][T16661] GPR08: 000ffa6f  
8002 c0080b6ffba8
  [  270.745635][T16661] GPR12: c0027b40 c00d9e00 
0001 
  [  270.745635][T16661] GPR16:  c254c0b0 
 c00941e84414
  [  270.745635][T16661] GPR20: 0001 0048 
c0080b710f0c 0001
  [  270.745635][T16661] GPR24: c00941e90aa8  
c24c6d60 0001
  [  270.745635][T16661] GPR28: c00803222470 c0080b93aa00 
0008 c00d9e00
  [  270.745747][T16661] NIP [c0027c20] vtime_account_kernel+0xe0/0xf0
  [  270.745756][T16661] LR [c0080b6e9ca8] kvmppc_run_core+0xda0/0x16c0 
[kvm_hv]
  [  270.745773][T16661] Call Trace:
  [  270.745779][T16661] [c0081043f790] [c0081043f7d0] 
0xc0081043f7d0 (unreliable)
  [  270.745793][T16661] [c0081043f7d0] [c0080b6e9ca8] 
kvmppc_run_core+0xda0/0x16c0 [kvm_hv]
  [  270.745808][T16661] [c0081043f950] [c0080b6eee28] 
kvmppc_vcpu_run_hv+0x570/0xce0 [kvm_hv]
  [  270.745823][T16661] [c0081043fa10] [c0080b5d2afc] 
kvmppc_vcpu_run+0x34/0x48 [kvm]
  [  270.745847][T16661] [c0081043fa30] [c0080b5ce728] 
kvm_arch_vcpu_ioctl_run+0x340/0x450 [kvm]
  

RE: [PATCH] tpm: ibmvtpm: Make use of dma_alloc_coherent()

2021-10-12 Thread David Laight
From: Jarkko Sakkinen
> Sent: 12 October 2021 18:41
> 
> On Tue, 2021-10-12 at 12:43 -0300, Jason Gunthorpe wrote:
> > On Tue, Oct 12, 2021 at 06:29:58PM +0300, Jarkko Sakkinen wrote:
> > > On Mon, 2021-10-11 at 00:01 +0800, Cai Huoqing wrote:
> > > > Replacing kmalloc/kfree/get_zeroed_page/free_page/dma_map_single/
> > >   ~
> > >   Replace
> > >
> > > > dma_unmap_single() with dma_alloc_coherent/dma_free_coherent()
> > > > helps to reduce code size, and simplify the code, and coherent
> > > > DMA will not clear the cache every time.
> > > >
> > > > Signed-off-by: Cai Huoqing 
> > >
> > > If this does not do functionally anything useful, there's no
> > > reason to apply this.
> >
> > At least in this case it looks like the ibmvtpm is not using the DMA
> > API properly. There is no sync after each data transfer. Replacing
> > this wrong usage with the coherent API is reasonable.
> 
> Thank you. As long as this is documented to the commit message,
> I'm cool with the change itself.
> 
> E.g. something like this would be perfectly fine replacement for the
> current commit message:
> 
> "The current usage pattern for the DMA API is inappropriate, as
> data transfers are not synced. Replace the existing DMA code
> with the coherent DMA API."

Why not also say that the DMA access snoop the cache?
(I think that was mentioned earlier in the thread.)

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)


Re: [PATCH] tpm: ibmvtpm: Make use of dma_alloc_coherent()

2021-10-12 Thread Jarkko Sakkinen
On Tue, 2021-10-12 at 12:43 -0300, Jason Gunthorpe wrote:
> On Tue, Oct 12, 2021 at 06:29:58PM +0300, Jarkko Sakkinen wrote:
> > On Mon, 2021-10-11 at 00:01 +0800, Cai Huoqing wrote:
> > > Replacing kmalloc/kfree/get_zeroed_page/free_page/dma_map_single/
> >   ~
> >   Replace
> > 
> > > dma_unmap_single() with dma_alloc_coherent/dma_free_coherent()
> > > helps to reduce code size, and simplify the code, and coherent
> > > DMA will not clear the cache every time.
> > > 
> > > Signed-off-by: Cai Huoqing 
> > 
> > If this does not do functionally anything useful, there's no
> > reason to apply this.
> 
> At least in this case it looks like the ibmvtpm is not using the DMA
> API properly. There is no sync after each data transfer. Replacing
> this wrong usage with the coherent API is reasonable.

Thank you. As long as this is documented to the commit message,
I'm cool with the change itself.

E.g. something like this would be perfectly fine replacement for the
current commit message:

"The current usage pattern for the DMA API is inappropriate, as
data transfers are not synced. Replace the existing DMA code
with the coherent DMA API."

/Jarkko


Re: [PATCH] powerpc: don't select KFENCE on platform PPC_FSL_BOOK3E

2021-10-12 Thread Christophe Leroy




Le 12/10/2021 à 08:24, Michael Ellerman a écrit :

Liu Shixin  writes:

kindly ping.


I was under the impression you were trying to debug why it wasn't
working with Christophe.


The investigation was a bit dormant to be honest since Liu confirmed 
that neither KFENCE not DEBUG_PAGEALLOC works.


I now looked at the effort to make it work, and it is not trivial.
At the time being, all linear space is mapped with pinned TLBs and 
everything is setup for space 0, with space 1 being used temporarily 
when doing heavy changes to space 0.


We can't use standard pages for linear space on space 0 because we need 
memory mapped at all time for exceptions (on booke exception run with 
MMU on in space 0).


In order to use standard pages, we'd need to reorganise the kernel to 
have it run mostly in space 1 (for data at least) where we would map 
almost everything with standard pages, and keep pinned TLB to map linear 
space on space 0 for TLB miss exceptions. Then we'd do more or less like 
book3s/32 and switch back into space 1 into other exceptions prolog.


That could be good to do it as we could maybe have more code in common 
with non booke 32 bits, but it is not a trivial job.


So I suggest that for now, we just make KFENCE and DEBUG_PAGEALLOC 
unselectable for booke/32 (e500 and 44x).


Christophe



cheers


On 2021/9/24 14:39, Liu Shixin wrote:

On platform PPC_FSL_BOOK3E, all lowmem is managed by tlbcam. That means
we didn't really map the kfence pool with page granularity. Therefore,
if KFENCE is enabled, the system will hit the following panic:

 BUG: Kernel NULL pointer dereference on read at 0x
 Faulting instruction address: 0xc01de598
 Oops: Kernel access of bad area, sig: 11 [#1]
 BE PAGE_SIZE=4K SMP NR_CPUS=4 MPC8544 DS
 Dumping ftrace buffer:
(ftrace buffer empty)
 Modules linked in:
 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc3+ #298
 NIP:  c01de598 LR: c08ae9c4 CTR: 
 REGS: c0b4bea0 TRAP: 0300   Not tainted  (5.12.0-rc3+)
 MSR:  00021000   CR: 24000228  XER: 2000
 DEAR:  ESR: 
 GPR00: c08ae9c4 c0b4bf60 c0ad64e0 ef72 00021000   
0200
 GPR08: c0ad5000   0004  008fbb30  

 GPR16:     c000   

 GPR24: c08ca004 c08ca004 c0b6a0e0 c0b6 c0b58f00 c085 c08ca000 
ef72
 NIP [c01de598] kfence_protect+0x44/0x6c
 LR [c08ae9c4] kfence_init+0xfc/0x2a4
 Call Trace:
 [c0b4bf60] [efffe160] 0xefffe160 (unreliable)
 [c0b4bf70] [c08ae9c4] kfence_init+0xfc/0x2a4
 [c0b4bfb0] [c0894d3c] start_kernel+0x3bc/0x574
 [c0b4bff0] [c470] set_ivor+0x14c/0x188
 Instruction dump:
 7c0802a6 8109d594 546a653a 90010014 54630026 3920 7d48502e 2c0a
 41820010 554a0026 5469b53a 7d295214 <8149> 38831000 554a003c 9149
 random: get_random_bytes called from print_oops_end_marker+0x40/0x78 with 
crng_init=0
 ---[ end trace  ]---

Signed-off-by: Liu Shixin 
---
  arch/powerpc/Kconfig | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d46db0bfb998..cffd57bcb5e4 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -185,7 +185,7 @@ config PPC
select HAVE_ARCH_KASAN  if PPC32 && PPC_PAGE_SHIFT <= 14
select HAVE_ARCH_KASAN_VMALLOC  if PPC32 && PPC_PAGE_SHIFT <= 14
select HAVE_ARCH_KGDB
-   select HAVE_ARCH_KFENCE if PPC32
+   select HAVE_ARCH_KFENCE if PPC32 && !PPC_FSL_BOOK3E
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if COMPAT
select HAVE_ARCH_NVRAM_OPS


Re: [PATCH] tpm: ibmvtpm: Make use of dma_alloc_coherent()

2021-10-12 Thread Jason Gunthorpe
On Tue, Oct 12, 2021 at 06:29:58PM +0300, Jarkko Sakkinen wrote:
> On Mon, 2021-10-11 at 00:01 +0800, Cai Huoqing wrote:
> > Replacing kmalloc/kfree/get_zeroed_page/free_page/dma_map_single/
>   ~
>   Replace
> 
> > dma_unmap_single() with dma_alloc_coherent/dma_free_coherent()
> > helps to reduce code size, and simplify the code, and coherent
> > DMA will not clear the cache every time.
> > 
> > Signed-off-by: Cai Huoqing 
> 
> If this does not do functionally anything useful, there's no
> reason to apply this.

At least in this case it looks like the ibmvtpm is not using the DMA
API properly. There is no sync after each data transfer. Replacing
this wrong usage with the coherent API is reasonable.

Jason


[RFC PATCH] powerpc: dts: Remove MPC5xxx platforms

2021-10-12 Thread Rob Herring
The mpc5xxx platforms have had dts warnings for some time which no one
seems to care to fix, so let's just remove the dts files.

According to Arnd:
"Specifically, MPC5200B has a 15 year lifetime, which ends in
11 months from now. The original bplan/Genesi Efika 5K2 was
quite popular at the time it came out, and there are probably
still some of those hanging around, but they came with Open
Firmware rather than relying on the dts files that ship with the
kernel.

Grant Likely was the original maintainer for MPC52xx until 2011,
Anatolij Gustschin is still listed as maintainer since then but hasn't
been active in it for a while either. Anatolij can probably best judge
which of these boards are still in going to be used with future kernels,
but I suspect once you start removing bits from 52xx, the newer
but less common 512x platform can go away as well."

Cc: Anatolij Gustschin 
Cc: Arnd Bergmann 
Cc: Stephen Rothwell 
Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Rob Herring 
---
Sending this out as a feeler to see if anyone cares. If anyone does, 
please fix the warnings.

 arch/powerpc/boot/Makefile   |   5 -
 arch/powerpc/boot/dts/a3m071.dts | 138 ---
 arch/powerpc/boot/dts/a4m072.dts | 147 
 arch/powerpc/boot/dts/ac14xx.dts | 395 
 arch/powerpc/boot/dts/cm5200.dts |  85 -
 arch/powerpc/boot/dts/lite5200b.dts  | 157 
 arch/powerpc/boot/dts/media5200.dts  | 142 
 arch/powerpc/boot/dts/motionpro.dts  | 132 ---
 arch/powerpc/boot/dts/mpc5121.dtsi   | 526 ---
 arch/powerpc/boot/dts/mpc5121ads.dts | 174 -
 arch/powerpc/boot/dts/mpc5125twr.dts | 293 ---
 arch/powerpc/boot/dts/mpc5200b.dtsi  | 288 ---
 arch/powerpc/boot/dts/mucmc52.dts| 222 ---
 arch/powerpc/boot/dts/o2d.dts|  43 ---
 arch/powerpc/boot/dts/o2d.dtsi   | 118 --
 arch/powerpc/boot/dts/o2d300.dts |  48 ---
 arch/powerpc/boot/dts/o2dnt2.dts |  44 ---
 arch/powerpc/boot/dts/o2i.dts|  29 --
 arch/powerpc/boot/dts/o2mnt.dts  |  29 --
 arch/powerpc/boot/dts/o3dnt.dts  |  44 ---
 arch/powerpc/boot/dts/pcm030.dts | 106 --
 arch/powerpc/boot/dts/pcm032.dts | 183 --
 arch/powerpc/boot/dts/pdm360ng.dts   | 195 --
 arch/powerpc/boot/dts/uc101.dts  | 152 
 24 files changed, 3695 deletions(-)
 delete mode 100644 arch/powerpc/boot/dts/a3m071.dts
 delete mode 100644 arch/powerpc/boot/dts/a4m072.dts
 delete mode 100644 arch/powerpc/boot/dts/ac14xx.dts
 delete mode 100644 arch/powerpc/boot/dts/cm5200.dts
 delete mode 100644 arch/powerpc/boot/dts/lite5200b.dts
 delete mode 100644 arch/powerpc/boot/dts/media5200.dts
 delete mode 100644 arch/powerpc/boot/dts/motionpro.dts
 delete mode 100644 arch/powerpc/boot/dts/mpc5121.dtsi
 delete mode 100644 arch/powerpc/boot/dts/mpc5121ads.dts
 delete mode 100644 arch/powerpc/boot/dts/mpc5125twr.dts
 delete mode 100644 arch/powerpc/boot/dts/mpc5200b.dtsi
 delete mode 100644 arch/powerpc/boot/dts/mucmc52.dts
 delete mode 100644 arch/powerpc/boot/dts/o2d.dts
 delete mode 100644 arch/powerpc/boot/dts/o2d.dtsi
 delete mode 100644 arch/powerpc/boot/dts/o2d300.dts
 delete mode 100644 arch/powerpc/boot/dts/o2dnt2.dts
 delete mode 100644 arch/powerpc/boot/dts/o2i.dts
 delete mode 100644 arch/powerpc/boot/dts/o2mnt.dts
 delete mode 100644 arch/powerpc/boot/dts/o3dnt.dts
 delete mode 100644 arch/powerpc/boot/dts/pcm030.dts
 delete mode 100644 arch/powerpc/boot/dts/pcm032.dts
 delete mode 100644 arch/powerpc/boot/dts/pdm360ng.dts
 delete mode 100644 arch/powerpc/boot/dts/uc101.dts

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 6900d0ac2421..15ee0c2c6a3e 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -308,11 +308,6 @@ image-$(CONFIG_PPC_EP88XC) += dtbImage.ep88xc
 image-$(CONFIG_PPC_ADDER875)   += cuImage.adder875-uboot \
   dtbImage.adder875-redboot
 
-# Board ports in arch/powerpc/platform/52xx/Kconfig
-image-$(CONFIG_PPC_LITE5200)   += cuImage.lite5200
-image-$(CONFIG_PPC_LITE5200)   += cuImage.lite5200b
-image-$(CONFIG_PPC_MEDIA5200)  += cuImage.media5200
-
 # Board ports in arch/powerpc/platform/82xx/Kconfig
 image-$(CONFIG_MPC8272_ADS)+= cuImage.mpc8272ads
 image-$(CONFIG_PQ2FADS)+= cuImage.pq2fads
diff --git a/arch/powerpc/boot/dts/a3m071.dts b/arch/powerpc/boot/dts/a3m071.dts
deleted file mode 100644
index 034cfd8aa95b..
--- a/arch/powerpc/boot/dts/a3m071.dts
+++ /dev/null
@@ -1,138 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * a3m071 board Device Tree Source
- *
- * Copyright 2012 Stefan Roese 
- *
- * Copyright (C) 2011 DENX Software Engineering GmbH
- * Heiko Schocher 
- *
- * Copyright (C) 2007 Semihalf
- * Marian Balakowicz 
- */
-

Re: [PATCH] tpm: ibmvtpm: Make use of dma_alloc_coherent()

2021-10-12 Thread Jarkko Sakkinen
On Mon, 2021-10-11 at 00:01 +0800, Cai Huoqing wrote:
> Replacing kmalloc/kfree/get_zeroed_page/free_page/dma_map_single/
  ~
  Replace

> dma_unmap_single() with dma_alloc_coherent/dma_free_coherent()
> helps to reduce code size, and simplify the code, and coherent
> DMA will not clear the cache every time.
> 
> Signed-off-by: Cai Huoqing 

If this does not do functionally anything useful, there's no
reason to apply this.

It is also missing information why the substitution is possible.

Field tested code is better than clean code, i.e. we don not
risk at having possible new regressions just for a bit nicer
layout...

/Jarkko




Re: linux-next: build warnings in Linus' tree

2021-10-12 Thread Arnd Bergmann
On Mon, Oct 11, 2021 at 10:42 PM Rob Herring  wrote:
> On Sun, Oct 10, 2021 at 4:27 PM Stephen Rothwell  
> wrote:
> FYI, u-boot removed mpc5xxx support in 2017, so maybe there's
> similarly not a need to keep them in the kernel? It does appear NXP
> will still sell you the parts though the last BSP was 2009.

Specifically, MPC5200B has a 15 year lifetime, which ends in
11 months from now. The original bplan/Genesi Efika 5K2 was
quite popular at the time it came out, and there are probably
still some of those hanging around, but they came with Open
Firmware rather than relying on the dts files that ship with the
kernel.

Grant Likely was the original maintainer for MPC52xx until 2011,
Anatolij Gustschin is still listed as maintainer since then but hasn't
been active in it for a while either. Anatolij can probably best judge
which of these boards are still in going to be used with future kernels,
but I suspect once you start removing bits from 52xx, the newer
but less common 512x platform can go away as well.

 Arnd


[RESEND PATCH v4 1/8] bpf powerpc: Remove unused SEEN_STACK

2021-10-12 Thread Hari Bathini
From: Ravi Bangoria 

SEEN_STACK is unused on PowerPC. Remove it. Also, have
SEEN_TAILCALL use 0x4000.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Christophe Leroy 
---

* No changes in v4.


 arch/powerpc/net/bpf_jit.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 7e9b978b768e..89bd744c2bff 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -125,8 +125,7 @@
 #define COND_LE(CR0_GT | COND_CMP_FALSE)
 
 #define SEEN_FUNC  0x2000 /* might call external helpers */
-#define SEEN_STACK 0x4000 /* uses BPF stack */
-#define SEEN_TAILCALL  0x8000 /* uses tail calls */
+#define SEEN_TAILCALL  0x4000 /* uses tail calls */
 
 #define SEEN_VREG_MASK 0x1ff8 /* Volatile registers r3-r12 */
 #define SEEN_NVREG_MASK0x0003 /* Non volatile registers r14-r31 */
-- 
2.31.1



Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion

2021-10-12 Thread Steven Rostedt
On Tue, 12 Oct 2021 13:40:08 +0800
王贇  wrote:

> --- a/include/linux/trace_recursion.h
> +++ b/include/linux/trace_recursion.h
> @@ -214,7 +214,14 @@ static __always_inline void trace_clear_recursion(int 
> bit)
>  static __always_inline int ftrace_test_recursion_trylock(unsigned long ip,
>unsigned long 
> parent_ip)
>  {
> - return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, 
> TRACE_FTRACE_MAX);
> + int bit;
> +
> + preempt_disable_notrace();

The recursion test does not require preemption disabled, it uses the task
struct, not per_cpu variables, so you should not disable it before the test.

bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, 
TRACE_FTRACE_MAX);
if (bit >= 0)
preempt_disable_notrace();

And if the bit is zero, it means a recursion check was already done by
another caller (ftrace handler does the check, followed by calling perf),
and you really don't even need to disable preemption in that case.

if (bit > 0)
preempt_disable_notrace();

And on the unlock, have:

 static __always_inline void ftrace_test_recursion_unlock(int bit)
 {
if (bit)
preempt_enable_notrace();
trace_clear_recursion(bit);
 }

But maybe that's over optimizing ;-)

-- Steve


> + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, 
> TRACE_FTRACE_MAX);
> + if (bit < 0)
> + preempt_enable_notrace();
> +
> + return bit;
>  }



[RESEND PATCH v4 0/8] bpf powerpc: Add BPF_PROBE_MEM support in powerpc JIT compiler

2021-10-12 Thread Hari Bathini
Patch #1 & #2 are simple cleanup patches. Patch #3 refactors JIT
compiler code with the aim to simplify adding BPF_PROBE_MEM support.
Patch #4 introduces PPC_RAW_BRANCH() macro instead of open coding
branch instruction. Patch #5 & #7 add BPF_PROBE_MEM support for PPC64
& PPC32 JIT compilers respectively. Patch #6 & #8 handle bad userspace
pointers for PPC64 & PPC32 cases respectively.


Resending v4 after rebasing the series on top of bpf fix patches
posted by Naveen:

  - 
https://patchwork.ozlabs.org/project/linuxppc-dev/cover/cover.1633464148.git.naveen.n@linux.vnet.ibm.com/
("[v2,00/10] powerpc/bpf: Various fixes")

Also, added Reviewed-by tag from Christophe for patches #3, #5, #6, #7 & #8.


Hari Bathini (4):
  bpf powerpc: refactor JIT compiler code
  powerpc/ppc-opcode: introduce PPC_RAW_BRANCH() macro
  bpf ppc32: Add BPF_PROBE_MEM support for JIT
  bpf ppc32: Access only if addr is kernel address

Ravi Bangoria (4):
  bpf powerpc: Remove unused SEEN_STACK
  bpf powerpc: Remove extra_pass from bpf_jit_build_body()
  bpf ppc64: Add BPF_PROBE_MEM support for JIT
  bpf ppc64: Access only if addr is kernel address

 arch/powerpc/include/asm/ppc-opcode.h |   2 +
 arch/powerpc/net/bpf_jit.h|  17 -
 arch/powerpc/net/bpf_jit_comp.c   |  68 +++--
 arch/powerpc/net/bpf_jit_comp32.c | 101 ++
 arch/powerpc/net/bpf_jit_comp64.c |  72 ++
 5 files changed, 219 insertions(+), 41 deletions(-)

-- 
2.31.1



Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion

2021-10-12 Thread Miroslav Benes
> diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
> index a9f9c57..805f9c4 100644
> --- a/include/linux/trace_recursion.h
> +++ b/include/linux/trace_recursion.h
> @@ -214,7 +214,14 @@ static __always_inline void trace_clear_recursion(int 
> bit)
>  static __always_inline int ftrace_test_recursion_trylock(unsigned long ip,
>unsigned long 
> parent_ip)
>  {
> - return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, 
> TRACE_FTRACE_MAX);
> + int bit;
> +
> + preempt_disable_notrace();
> + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, 
> TRACE_FTRACE_MAX);
> + if (bit < 0)
> + preempt_enable_notrace();
> +
> + return bit;
>  }
> 
>  /**
> @@ -226,6 +233,7 @@ static __always_inline int 
> ftrace_test_recursion_trylock(unsigned long ip,
>  static __always_inline void ftrace_test_recursion_unlock(int bit)
>  {
>   trace_clear_recursion(bit);
> + preempt_enable_notrace();
>  }
> 
>  #endif /* CONFIG_TRACING */
> diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
> index e8029ae..6e66ccd 100644
> --- a/kernel/livepatch/patch.c
> +++ b/kernel/livepatch/patch.c
> @@ -52,11 +52,6 @@ static void notrace klp_ftrace_handler(unsigned long ip,
>   bit = ftrace_test_recursion_trylock(ip, parent_ip);
>   if (WARN_ON_ONCE(bit < 0))
>   return;
> - /*
> -  * A variant of synchronize_rcu() is used to allow patching functions
> -  * where RCU is not watching, see klp_synchronize_transition().
> -  */
> - preempt_disable_notrace();
> 
>   func = list_first_or_null_rcu(>func_stack, struct klp_func,
> stack_node);
> @@ -120,7 +115,6 @@ static void notrace klp_ftrace_handler(unsigned long ip,
>   klp_arch_set_pc(fregs, (unsigned long)func->new_func);
> 
>  unlock:
> - preempt_enable_notrace();
>   ftrace_test_recursion_unlock(bit);
>  }

I don't like this change much. We have preempt_disable there not because 
of ftrace_test_recursion, but because of RCU. ftrace_test_recursion was 
added later. Yes, it would work with the change, but it would also hide 
things which should not be hidden in my opinion.

Miroslav


[RESEND PATCH v4 8/8] bpf ppc32: Access only if addr is kernel address

2021-10-12 Thread Hari Bathini
With KUAP enabled, any kernel code which wants to access userspace
needs to be surrounded by disable-enable KUAP. But that is not
happening for BPF_PROBE_MEM load instruction. Though PPC32 does not
support read protection, considering the fact that PTR_TO_BTF_ID
(which uses BPF_PROBE_MEM mode) could either be a valid kernel pointer
or NULL but should never be a pointer to userspace address, execute
BPF_PROBE_MEM load only if addr is kernel address, otherwise set
dst_reg=0 and move on.

This will catch NULL, valid or invalid userspace pointers. Only bad
kernel pointer will be handled by BPF exception table.

[Alexei suggested for x86]
Suggested-by: Alexei Starovoitov 
Signed-off-by: Hari Bathini 
Reviewed-by: Christophe Leroy 
---

Changes in v4:
* Adjusted the emit code to avoid using temporary reg.


 arch/powerpc/net/bpf_jit_comp32.c | 34 +++
 1 file changed, 34 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp32.c 
b/arch/powerpc/net/bpf_jit_comp32.c
index 5dc45e393d1d..d3a52cd42f53 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -820,6 +820,40 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
struct codegen_context *
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + 
off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+   /*
+* As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could 
either be a valid
+* kernel pointer or NULL but not a userspace address, 
execute BPF_PROBE_MEM
+* load only if addr is kernel address (see 
is_kernel_addr()), otherwise
+* set dst_reg=0 and move on.
+*/
+   if (BPF_MODE(code) == BPF_PROBE_MEM) {
+   PPC_LI32(_R0, TASK_SIZE - off);
+   EMIT(PPC_RAW_CMPLW(src_reg, _R0));
+   PPC_BCC(COND_GT, (ctx->idx + 5) * 4);
+   EMIT(PPC_RAW_LI(dst_reg, 0));
+   /*
+* For BPF_DW case, "li reg_h,0" would be 
needed when
+* !fp->aux->verifier_zext. Emit NOP otherwise.
+*
+* Note that "li reg_h,0" is emitted for 
BPF_B/H/W case,
+* if necessary. So, jump there insted of 
emitting an
+* additional "li reg_h,0" instruction.
+*/
+   if (size == BPF_DW && !fp->aux->verifier_zext)
+   EMIT(PPC_RAW_LI(dst_reg_h, 0));
+   else
+   EMIT(PPC_RAW_NOP());
+   /*
+* Need to jump two instructions instead of one 
for BPF_DW case
+* as there are two load instructions for 
dst_reg_h & dst_reg
+* respectively.
+*/
+   if (size == BPF_DW)
+   PPC_JMP((ctx->idx + 3) * 4);
+   else
+   PPC_JMP((ctx->idx + 2) * 4);
+   }
+
switch (size) {
case BPF_B:
EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
-- 
2.31.1



[RESEND PATCH v4 7/8] bpf ppc32: Add BPF_PROBE_MEM support for JIT

2021-10-12 Thread Hari Bathini
BPF load instruction with BPF_PROBE_MEM mode can cause a fault
inside kernel. Append exception table for such instructions
within BPF program.

Unlike other archs which uses extable 'fixup' field to pass dest_reg
and nip, BPF exception table on PowerPC follows the generic PowerPC
exception table design, where it populates both fixup and extable
sections within BPF program. fixup section contains 3 instructions,
first 2 instructions clear dest_reg (lower & higher 32-bit registers)
and last instruction jumps to next instruction in the BPF code.
extable 'insn' field contains relative offset of the instruction and
'fixup' field contains relative offset of the fixup entry. Example
layout of BPF program with extable present:

 +--+
 |  |
 |  |
   0x4020 -->| lwz   r28,4(r4)  |
 |  |
 |  |
   0x40ac -->| lwz  r3,0(r24)   |
 | lwz  r4,4(r24)   |
 |  |
 |  |
 |--|
   0x4278 -->| li  r28,0|  \
 | li  r27,0|  | fixup entry
 | b   0x4024   |  /
   0x4284 -->| li  r4,0 |
 | li  r3,0 |
 | b   0x40b4   |
 |--|
   0x4290 -->| insn=0xfd90  |  \ extable entry
 | fixup=0xffe4 |  /
   0x4298 -->| insn=0xfe14  |
 | fixup=0xffe8 |
 +--+

   (Addresses shown here are chosen random, not real)

Signed-off-by: Hari Bathini 
Reviewed-by: Christophe Leroy 
---

Changes in v4:
* Dropped explicit fallthrough statement for empty switch cases.


 arch/powerpc/net/bpf_jit.h|  4 
 arch/powerpc/net/bpf_jit_comp.c   |  2 ++
 arch/powerpc/net/bpf_jit_comp32.c | 30 ++
 3 files changed, 36 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 444c9debce91..b20a2a83a6e7 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -153,7 +153,11 @@ struct codegen_context {
unsigned int exentry_idx;
 };
 
+#ifdef CONFIG_PPC32
+#define BPF_FIXUP_LEN  3 /* Three instructions => 12 bytes */
+#else
 #define BPF_FIXUP_LEN  2 /* Two instructions => 8 bytes */
+#endif
 
 static inline void bpf_flush_icache(void *start, void *end)
 {
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index f02457c6b54f..1a0041997050 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -297,6 +297,8 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, 
int pass, struct code
(ctx->exentry_idx * BPF_FIXUP_LEN * 4);
 
fixup[0] = PPC_RAW_LI(dst_reg, 0);
+   if (IS_ENABLED(CONFIG_PPC32))
+   fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit 
register too */
 
fixup[BPF_FIXUP_LEN - 1] =
PPC_RAW_BRANCH((long)(pc + jmp_off) - 
(long)[BPF_FIXUP_LEN - 1]);
diff --git a/arch/powerpc/net/bpf_jit_comp32.c 
b/arch/powerpc/net/bpf_jit_comp32.c
index 54e7cef3e1f2..5dc45e393d1d 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -813,9 +813,13 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
struct codegen_context *
 * BPF_LDX
 */
case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + 
off) */
+   case BPF_LDX | BPF_PROBE_MEM | BPF_B:
case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + 
off) */
+   case BPF_LDX | BPF_PROBE_MEM | BPF_H:
case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + 
off) */
+   case BPF_LDX | BPF_PROBE_MEM | BPF_W:
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + 
off) */
+   case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
switch (size) {
case BPF_B:
EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
@@ -834,6 +838,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
struct codegen_context *
 
if (size != BPF_DW && !fp->aux->verifier_zext)
EMIT(PPC_RAW_LI(dst_reg_h, 0));
+
+   if (BPF_MODE(code) == BPF_PROBE_MEM) {
+   int insn_idx = ctx->idx - 1;
+   int jmp_off = 4;
+
+   /*
+* In case of BPF_DW, two lwz instructions are 
emitted, one
+* for higher 32-bit and another for lower 
32-bit. So, set
+* ex->insn to the first of the two and jump 
over both
+* instructions in fixup.
+*
+   

[RESEND PATCH v4 4/8] powerpc/ppc-opcode: introduce PPC_RAW_BRANCH() macro

2021-10-12 Thread Hari Bathini
Define and use PPC_RAW_BRANCH() macro instead of open coding it. This
macro is used while adding BPF_PROBE_MEM support.

Signed-off-by: Hari Bathini 
Reviewed-by: Christophe Leroy 
---

* No changes in v4.


 arch/powerpc/include/asm/ppc-opcode.h | 2 ++
 arch/powerpc/net/bpf_jit.h| 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index baea657bc868..f50213e2a3e0 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -566,6 +566,8 @@
 #define PPC_RAW_MTSPR(spr, d)  (0x7c0003a6 | ___PPC_RS(d) | 
__PPC_SPR(spr))
 #define PPC_RAW_EIEIO()(0x7c0006ac)
 
+#define PPC_RAW_BRANCH(addr)   (PPC_INST_BRANCH | ((addr) & 
0x03fc))
+
 /* Deal with instructions that older assemblers aren't aware of */
 #definePPC_BCCTR_FLUSH stringify_in_c(.long 
PPC_INST_BCCTR_FLUSH)
 #definePPC_CP_ABORTstringify_in_c(.long PPC_RAW_CP_ABORT)
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 7145b651fc2a..6a945f6211f4 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -31,7 +31,7 @@
pr_err_ratelimited("Branch offset 0x%lx (@%u) out of 
range\n", offset, ctx->idx);   \
return -ERANGE;   \
} \
-   EMIT(PPC_INST_BRANCH | (offset & 0x03fc));\
+   EMIT(PPC_RAW_BRANCH(offset)); \
} while (0)
 
 /* blr; (unconditional 'branch' with link) to absolute address */
-- 
2.31.1



[RESEND PATCH v4 6/8] bpf ppc64: Access only if addr is kernel address

2021-10-12 Thread Hari Bathini
From: Ravi Bangoria 

On PPC64 with KUAP enabled, any kernel code which wants to
access userspace needs to be surrounded by disable-enable KUAP.
But that is not happening for BPF_PROBE_MEM load instruction.
So, when BPF program tries to access invalid userspace address,
page-fault handler considers it as bad KUAP fault:

  Kernel attempted to read user page (d000) - exploit attempt? (uid: 0)

Considering the fact that PTR_TO_BTF_ID (which uses BPF_PROBE_MEM
mode) could either be a valid kernel pointer or NULL but should
never be a pointer to userspace address, execute BPF_PROBE_MEM load
only if addr is kernel address, otherwise set dst_reg=0 and move on.

This will catch NULL, valid or invalid userspace pointers. Only bad
kernel pointer will be handled by BPF exception table.

[Alexei suggested for x86]
Suggested-by: Alexei Starovoitov 
Signed-off-by: Ravi Bangoria 
Signed-off-by: Hari Bathini 
Reviewed-by: Christophe Leroy 
---

Changes in v4:
* Used IS_ENABLED() instead of #ifdef.
* Dropped the else case that is not applicable for PPC64.


 arch/powerpc/net/bpf_jit_comp64.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index ede8cb3e453f..472d4a551945 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -789,6 +789,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
struct codegen_context *
/* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_DW:
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+   /*
+* As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could 
either be a valid
+* kernel pointer or NULL but not a userspace address, 
execute BPF_PROBE_MEM
+* load only if addr is kernel address (see 
is_kernel_addr()), otherwise
+* set dst_reg=0 and move on.
+*/
+   if (BPF_MODE(code) == BPF_PROBE_MEM) {
+   EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, 
off));
+   if (IS_ENABLED(CONFIG_PPC_BOOK3E_64))
+   PPC_LI64(b2p[TMP_REG_2], 
0x8000ul);
+   else /* BOOK3S_64 */
+   PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET);
+   EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], 
b2p[TMP_REG_2]));
+   PPC_BCC(COND_GT, (ctx->idx + 4) * 4);
+   EMIT(PPC_RAW_LI(dst_reg, 0));
+   /*
+* Check if 'off' is word aligned because 
PPC_BPF_LL()
+* (BPF_DW case) generates two instructions if 
'off' is not
+* word-aligned and one instruction otherwise.
+*/
+   if (BPF_SIZE(code) == BPF_DW && (off & 3))
+   PPC_JMP((ctx->idx + 3) * 4);
+   else
+   PPC_JMP((ctx->idx + 2) * 4);
+   }
+
switch (size) {
case BPF_B:
EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
-- 
2.31.1



[RESEND PATCH v4 5/8] bpf ppc64: Add BPF_PROBE_MEM support for JIT

2021-10-12 Thread Hari Bathini
From: Ravi Bangoria 

BPF load instruction with BPF_PROBE_MEM mode can cause a fault
inside kernel. Append exception table for such instructions
within BPF program.

Unlike other archs which uses extable 'fixup' field to pass dest_reg
and nip, BPF exception table on PowerPC follows the generic PowerPC
exception table design, where it populates both fixup and extable
sections within BPF program. fixup section contains two instructions,
first instruction clears dest_reg and 2nd jumps to next instruction
in the BPF code. extable 'insn' field contains relative offset of
the instruction and 'fixup' field contains relative offset of the
fixup entry. Example layout of BPF program with extable present:

 +--+
 |  |
 |  |
   0x4020 -->| ld   r27,4(r3)   |
 |  |
 |  |
   0x40ac -->| lwz  r3,0(r4)|
 |  |
 |  |
 |--|
   0x4280 -->| li  r27,0|  \ fixup entry
 | b   0x4024   |  /
   0x4288 -->| li  r3,0 |
 | b   0x40b0   |
 |--|
   0x4290 -->| insn=0xfd90  |  \ extable entry
 | fixup=0xffec |  /
   0x4298 -->| insn=0xfe14  |
 | fixup=0xffec |
 +--+

   (Addresses shown here are chosen random, not real)

Signed-off-by: Ravi Bangoria 
Signed-off-by: Hari Bathini 
Reviewed-by: Christophe Leroy 
---

Changes in v4:
* Dropped explicit fallthrough statement for empty switch cases.


 arch/powerpc/net/bpf_jit.h|  8 +++-
 arch/powerpc/net/bpf_jit_comp.c   | 66 ---
 arch/powerpc/net/bpf_jit_comp32.c |  2 +-
 arch/powerpc/net/bpf_jit_comp64.c | 13 +-
 4 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 6a945f6211f4..444c9debce91 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -150,8 +150,11 @@ struct codegen_context {
unsigned int idx;
unsigned int stack_size;
int b2p[ARRAY_SIZE(b2p)];
+   unsigned int exentry_idx;
 };
 
+#define BPF_FIXUP_LEN  2 /* Two instructions => 8 bytes */
+
 static inline void bpf_flush_icache(void *start, void *end)
 {
smp_wmb();  /* smp write barrier */
@@ -175,11 +178,14 @@ static inline void bpf_clear_seen_register(struct 
codegen_context *ctx, int i)
 
 void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 
func);
 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context 
*ctx,
-  u32 *addrs);
+  u32 *addrs, int pass);
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_realloc_regs(struct codegen_context *ctx);
 
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct 
codegen_context *ctx,
+ int insn_idx, int jmp_off, int dst_reg);
+
 #endif
 
 #endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index f7972b2c21f6..f02457c6b54f 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -101,6 +101,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_prog *tmp_fp;
bool bpf_blinded = false;
bool extra_pass = false;
+   u32 extable_len;
+   u32 fixup_len;
 
if (!fp->jit_requested)
return org_fp;
@@ -131,7 +133,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
image = jit_data->image;
bpf_hdr = jit_data->header;
proglen = jit_data->proglen;
-   alloclen = proglen + FUNCTION_DESCR_SIZE;
extra_pass = true;
goto skip_init_ctx;
}
@@ -149,7 +150,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
 
/* Scouting faux-generate pass 0 */
-   if (bpf_jit_build_body(fp, 0, , addrs)) {
+   if (bpf_jit_build_body(fp, 0, , addrs, 0)) {
/* We hit something illegal or unsupported. */
fp = org_fp;
goto out_addrs;
@@ -162,7 +163,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 */
if (cgctx.seen & SEEN_TAILCALL) {
cgctx.idx = 0;
-   if (bpf_jit_build_body(fp, 0, , addrs)) {
+   if (bpf_jit_build_body(fp, 0, , addrs, 0)) {
fp = org_fp;
goto out_addrs;
}
@@ -177,8 +178,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
bpf_jit_build_prologue(0, );
bpf_jit_build_epilogue(0, );
 
+   fixup_len = 

[RESEND PATCH v4 3/8] bpf powerpc: refactor JIT compiler code

2021-10-12 Thread Hari Bathini
Refactor powerpc LDX JITing code to simplify adding BPF_PROBE_MEM
support.

Signed-off-by: Hari Bathini 
Reviewed-by: Christophe Leroy 
---

Changes in v4:
* Dropped the default case in the switch statement for bpf size.
* Dropped explicit fallthrough statement for empty switch cases.


 arch/powerpc/net/bpf_jit_comp32.c | 33 ++-
 arch/powerpc/net/bpf_jit_comp64.c | 31 +
 2 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp32.c 
b/arch/powerpc/net/bpf_jit_comp32.c
index 903f945601c0..8b2ac1c27f1f 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -284,6 +284,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
struct codegen_context *
u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg);
u32 src_reg_h = src_reg - 1;
u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG);
+   u32 size = BPF_SIZE(code);
s16 off = insn[i].off;
s32 imm = insn[i].imm;
bool func_addr_fixed;
@@ -812,23 +813,27 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
struct codegen_context *
 * BPF_LDX
 */
case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + 
off) */
-   EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
-   if (!fp->aux->verifier_zext)
-   EMIT(PPC_RAW_LI(dst_reg_h, 0));
-   break;
case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + 
off) */
-   EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
-   if (!fp->aux->verifier_zext)
-   EMIT(PPC_RAW_LI(dst_reg_h, 0));
-   break;
case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + 
off) */
-   EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
-   if (!fp->aux->verifier_zext)
-   EMIT(PPC_RAW_LI(dst_reg_h, 0));
-   break;
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + 
off) */
-   EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
-   EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+   switch (size) {
+   case BPF_B:
+   EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+   break;
+   case BPF_H:
+   EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+   break;
+   case BPF_W:
+   EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+   break;
+   case BPF_DW:
+   EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
+   EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+   break;
+   }
+
+   if (size != BPF_DW && !fp->aux->verifier_zext)
+   EMIT(PPC_RAW_LI(dst_reg_h, 0));
break;
 
/*
diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index b25bf9b11b9d..ad852f15ca61 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -311,6 +311,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
struct codegen_context *
u32 code = insn[i].code;
u32 dst_reg = b2p[insn[i].dst_reg];
u32 src_reg = b2p[insn[i].src_reg];
+   u32 size = BPF_SIZE(code);
s16 off = insn[i].off;
s32 imm = insn[i].imm;
bool func_addr_fixed;
@@ -778,25 +779,29 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
struct codegen_context *
 */
/* dst = *(u8 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_B:
-   EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
-   if (insn_is_zext([i + 1]))
-   addrs[++i] = ctx->idx * 4;
-   break;
/* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_H:
-   EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
-   if (insn_is_zext([i + 1]))
-   addrs[++i] = ctx->idx * 4;
-   break;
/* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_MEM | BPF_W:
-   EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
-   if (insn_is_zext([i + 1]))
-   addrs[++i] = ctx->idx * 4;
-   break;
/* 

[RESEND PATCH v4 2/8] bpf powerpc: Remove extra_pass from bpf_jit_build_body()

2021-10-12 Thread Hari Bathini
From: Ravi Bangoria 

In case of extra_pass, usual JIT passes are always skipped. So,
extra_pass is always false while calling bpf_jit_build_body() and
can be removed.

Signed-off-by: Ravi Bangoria 
---

* No changes in v4.


 arch/powerpc/net/bpf_jit.h| 2 +-
 arch/powerpc/net/bpf_jit_comp.c   | 6 +++---
 arch/powerpc/net/bpf_jit_comp32.c | 4 ++--
 arch/powerpc/net/bpf_jit_comp64.c | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 89bd744c2bff..7145b651fc2a 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -175,7 +175,7 @@ static inline void bpf_clear_seen_register(struct 
codegen_context *ctx, int i)
 
 void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 
func);
 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context 
*ctx,
-  u32 *addrs, bool extra_pass);
+  u32 *addrs);
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_realloc_regs(struct codegen_context *ctx);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index fcbf7a917c56..f7972b2c21f6 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -149,7 +149,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
 
/* Scouting faux-generate pass 0 */
-   if (bpf_jit_build_body(fp, 0, , addrs, false)) {
+   if (bpf_jit_build_body(fp, 0, , addrs)) {
/* We hit something illegal or unsupported. */
fp = org_fp;
goto out_addrs;
@@ -162,7 +162,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 */
if (cgctx.seen & SEEN_TAILCALL) {
cgctx.idx = 0;
-   if (bpf_jit_build_body(fp, 0, , addrs, false)) {
+   if (bpf_jit_build_body(fp, 0, , addrs)) {
fp = org_fp;
goto out_addrs;
}
@@ -210,7 +210,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/* Now build the prologue, body code & epilogue for real. */
cgctx.idx = 0;
bpf_jit_build_prologue(code_base, );
-   if (bpf_jit_build_body(fp, code_base, , addrs, 
extra_pass)) {
+   if (bpf_jit_build_body(fp, code_base, , addrs)) {
bpf_jit_binary_free(bpf_hdr);
fp = org_fp;
goto out_addrs;
diff --git a/arch/powerpc/net/bpf_jit_comp32.c 
b/arch/powerpc/net/bpf_jit_comp32.c
index 0da31d41d413..903f945601c0 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -268,7 +268,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct 
codegen_context *ctx, u32 o
 
 /* Assemble the body code between the prologue & epilogue */
 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context 
*ctx,
-  u32 *addrs, bool extra_pass)
+  u32 *addrs)
 {
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
@@ -862,7 +862,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
struct codegen_context *
case BPF_JMP | BPF_CALL:
ctx->seen |= SEEN_FUNC;
 
-   ret = bpf_jit_get_func_addr(fp, [i], extra_pass,
+   ret = bpf_jit_get_func_addr(fp, [i], false,
_addr, 
_addr_fixed);
if (ret < 0)
return ret;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 8b5157ccfeba..b25bf9b11b9d 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -297,7 +297,7 @@ asm (
 
 /* Assemble the body code between the prologue & epilogue */
 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context 
*ctx,
-  u32 *addrs, bool extra_pass)
+  u32 *addrs)
 {
enum stf_barrier_type stf_barrier = stf_barrier_type_get();
const struct bpf_insn *insn = fp->insnsi;
@@ -831,7 +831,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
struct codegen_context *
case BPF_JMP | BPF_CALL:
ctx->seen |= SEEN_FUNC;
 
-   ret = bpf_jit_get_func_addr(fp, [i], extra_pass,
+   ret = bpf_jit_get_func_addr(fp, [i], false,
_addr, 
_addr_fixed);
if (ret < 0)
return ret;
-- 
2.31.1



Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion

2021-10-12 Thread Steven Rostedt
On Tue, 12 Oct 2021 14:24:43 +0200 (CEST)
Miroslav Benes  wrote:

> > +++ b/kernel/livepatch/patch.c
> > @@ -52,11 +52,6 @@ static void notrace klp_ftrace_handler(unsigned long ip,
> > bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > if (WARN_ON_ONCE(bit < 0))
> > return;
> > -   /*
> > -* A variant of synchronize_rcu() is used to allow patching functions
> > -* where RCU is not watching, see klp_synchronize_transition().
> > -*/
> > -   preempt_disable_notrace();
> > 
> > func = list_first_or_null_rcu(>func_stack, struct klp_func,
> >   stack_node);
> > @@ -120,7 +115,6 @@ static void notrace klp_ftrace_handler(unsigned long ip,
> > klp_arch_set_pc(fregs, (unsigned long)func->new_func);
> > 
> >  unlock:
> > -   preempt_enable_notrace();
> > ftrace_test_recursion_unlock(bit);
> >  }  
> 
> I don't like this change much. We have preempt_disable there not because 
> of ftrace_test_recursion, but because of RCU. ftrace_test_recursion was 
> added later. Yes, it would work with the change, but it would also hide 
> things which should not be hidden in my opinion.

Agreed, but I believe the change is fine, but requires a nice comment to
explain what you said above.

Thus, before the "ftrace_test_recursion_trylock()" we need:

/*
 * The ftrace_test_recursion_trylock() will disable preemption,
 * which is required for the variant of synchronize_rcu() that is
 * used to allow patching functions where RCU is not watching.
 * See klp_synchronize_transition() for more details.
 */

-- Steve


Re: [PATCH 1/2] ftrace: disable preemption on the testing of recursion

2021-10-12 Thread Steven Rostedt
On Tue, 12 Oct 2021 13:40:08 +0800
王贇  wrote:

> @@ -52,11 +52,6 @@ static void notrace klp_ftrace_handler(unsigned long ip,
>   bit = ftrace_test_recursion_trylock(ip, parent_ip);
>   if (WARN_ON_ONCE(bit < 0))
>   return;
> - /*
> -  * A variant of synchronize_rcu() is used to allow patching functions
> -  * where RCU is not watching, see klp_synchronize_transition().
> -  */

I have to take a deeper look at this patch set, but do not remove this
comment, as it explains the protection here, that is not obvious with the
changes you made.

-- Steve


> - preempt_disable_notrace();
> 
>   func = list_first_or_null_rcu(>func_stack, struct klp_func,
> stack_node);


Re: [PATCH 2/2] ftrace: prevent preemption in perf_ftrace_function_call()

2021-10-12 Thread Peter Zijlstra
On Tue, Oct 12, 2021 at 01:40:31PM +0800, 王贇 wrote:

> diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
> index 6aed10e..33c2f76 100644
> --- a/kernel/trace/trace_event_perf.c
> +++ b/kernel/trace/trace_event_perf.c
> @@ -441,12 +441,19 @@ void perf_trace_buf_update(void *record, u16 type)
>   if (!rcu_is_watching())
>   return;
> 
> + /*
> +  * Prevent CPU changing from now on. rcu must
> +  * be in watching if the task was migrated and
> +  * scheduled.
> +  */
> + preempt_disable_notrace();
> +
>   if ((unsigned long)ops->private != smp_processor_id())
> - return;
> + goto out;
> 
>   bit = ftrace_test_recursion_trylock(ip, parent_ip);
>   if (bit < 0)
> - return;
> + goto out;
> 
>   event = container_of(ops, struct perf_event, ftrace_ops);
> 

This seems rather daft, wouldn't it be easier to just put that check
under the recursion thing?


[PATCH] powerpc: Set max_mapnr correctly

2021-10-12 Thread Christophe Leroy
max_mapnr is used by virt_addr_valid() to check if a linear
address is valid.

It must only include lowmem PFNs, like other architectures.

Problem detected on a system with 1G mem (Only 768M are mapped), with
CONFIG_DEBUG_VIRTUAL and CONFIG_TEST_DEBUG_VIRTUAL, it didn't report
virt_to_phys(VMALLOC_START), VMALLOC_START being 0xf100.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index c3c4e31462ec..889f36b55df9 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -256,7 +256,7 @@ void __init mem_init(void)
 #endif
 
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-   set_max_mapnr(max_pfn);
+   set_max_mapnr(max_low_pfn);
 
kasan_late_init();
 
-- 
2.31.1



Re: [PATCH 5.4 00/52] 5.4.153-rc2 review

2021-10-12 Thread Greg Kroah-Hartman
On Tue, Oct 12, 2021 at 01:04:54PM +0530, Naresh Kamboju wrote:
> On Tue, 12 Oct 2021 at 12:16, Greg Kroah-Hartman
>  wrote:
> >
> > This is the start of the stable review cycle for the 5.4.153 release.
> > There are 52 patches in this series, all will be posted as a response
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> >
> > Responses should be made by Thu, 14 Oct 2021 06:44:25 +.
> > Anything received after that time might be too late.
> >
> > The whole patch series can be found in one patch at:
> > 
> > https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.153-rc2.gz
> > or in the git tree and branch at:
> > 
> > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> > linux-5.4.y
> > and the diffstat can be found below.
> >
> > thanks,
> >
> > greg k-h
> 
> stable rc 5.4.153-rc2 Powerpc build failed.
> 
> In file included from arch/powerpc/net/bpf_jit64.h:11,
>  from arch/powerpc/net/bpf_jit_comp64.c:19:
> arch/powerpc/net/bpf_jit_comp64.c: In function 'bpf_jit_build_body':
> arch/powerpc/net/bpf_jit.h:32:9: error: expected expression before 'do'
>32 | do { if (d) { (d)[idx] = instr; } idx++; } while (0)
>   | ^~
> arch/powerpc/net/bpf_jit.h:33:33: note: in expansion of macro 'PLANT_INSTR'
>33 | #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr)
>   | ^~~
> arch/powerpc/net/bpf_jit_comp64.c:415:41: note: in expansion of macro 'EMIT'
>   415 | EMIT(PPC_LI(dst_reg, 0));
>   | ^~~~
> arch/powerpc/net/bpf_jit.h:33:33: note: in expansion of macro 'PLANT_INSTR'
>33 | #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr)
>   | ^~~
> arch/powerpc/net/bpf_jit.h:41:33: note: in expansion of macro 'EMIT'
>41 | #define PPC_ADDI(d, a, i)   EMIT(PPC_INST_ADDI |
> ___PPC_RT(d) |   \
>   | ^~~~
> arch/powerpc/net/bpf_jit.h:44:33: note: in expansion of macro 'PPC_ADDI'
>44 | #define PPC_LI(r, i)PPC_ADDI(r, 0, i)
>   | ^~~~
> arch/powerpc/net/bpf_jit_comp64.c:415:46: note: in expansion of macro 'PPC_LI'
>   415 | EMIT(PPC_LI(dst_reg, 0));
>   |  ^~
> make[3]: *** [scripts/Makefile.build:262:
> arch/powerpc/net/bpf_jit_comp64.o] Error 1
> make[3]: Target '__build' not remade because of errors.
> 
> Reported-by: Linux Kernel Functional Testing 

Ok, I'm just going to go delete this patch from the queue now...

Thanks for the quick report.

greg k-h


Re: [PATCH v1 01/10] powerpc: Move 'struct ppc64_opd_entry' back into asm/elf.h

2021-10-12 Thread Arnd Bergmann
On Tue, Oct 12, 2021 at 9:10 AM Michael Ellerman  wrote:
> Christophe Leroy  writes:
> > 'struct ppc64_opd_entry' doesn't belong to uapi/asm/elf.h
> > But it was by mistake added outside of __KERNEL__ section,
> > therefore commit c3617f72036c ("UAPI: (Scripted) Disintegrate
> > arch/powerpc/include/asm") moved it to uapi/asm/elf.h
>
> ... it's been visible to userspace since the first commit moved it, ~13
> years ago in 2008, v2.6.27.
>
> > Move it back into asm/elf.h, this brings it back in line with
> > IA64 and PARISC architectures.
>
> Removing it from the uapi header risks breaking userspace, I doubt
> anything uses it, but who knows.
>
> Given how long it's been there I think it's a bit risky to remove it :/

I would not be too worried about it. While we should absolutely
never break existing binaries, changing the visibility of internal
structures in header files only breaks compiling applications
that do rely on these entries, and they really should not be using
this in the first place.

Arnd


Re: [PATCH 5.4 00/52] 5.4.153-rc2 review

2021-10-12 Thread Naresh Kamboju
On Tue, 12 Oct 2021 at 12:16, Greg Kroah-Hartman
 wrote:
>
> This is the start of the stable review cycle for the 5.4.153 release.
> There are 52 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Thu, 14 Oct 2021 06:44:25 +.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.153-rc2.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-5.4.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h

stable rc 5.4.153-rc2 Powerpc build failed.

In file included from arch/powerpc/net/bpf_jit64.h:11,
 from arch/powerpc/net/bpf_jit_comp64.c:19:
arch/powerpc/net/bpf_jit_comp64.c: In function 'bpf_jit_build_body':
arch/powerpc/net/bpf_jit.h:32:9: error: expected expression before 'do'
   32 | do { if (d) { (d)[idx] = instr; } idx++; } while (0)
  | ^~
arch/powerpc/net/bpf_jit.h:33:33: note: in expansion of macro 'PLANT_INSTR'
   33 | #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr)
  | ^~~
arch/powerpc/net/bpf_jit_comp64.c:415:41: note: in expansion of macro 'EMIT'
  415 | EMIT(PPC_LI(dst_reg, 0));
  | ^~~~
arch/powerpc/net/bpf_jit.h:33:33: note: in expansion of macro 'PLANT_INSTR'
   33 | #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr)
  | ^~~
arch/powerpc/net/bpf_jit.h:41:33: note: in expansion of macro 'EMIT'
   41 | #define PPC_ADDI(d, a, i)   EMIT(PPC_INST_ADDI |
___PPC_RT(d) |   \
  | ^~~~
arch/powerpc/net/bpf_jit.h:44:33: note: in expansion of macro 'PPC_ADDI'
   44 | #define PPC_LI(r, i)PPC_ADDI(r, 0, i)
  | ^~~~
arch/powerpc/net/bpf_jit_comp64.c:415:46: note: in expansion of macro 'PPC_LI'
  415 | EMIT(PPC_LI(dst_reg, 0));
  |  ^~
make[3]: *** [scripts/Makefile.build:262:
arch/powerpc/net/bpf_jit_comp64.o] Error 1
make[3]: Target '__build' not remade because of errors.

Reported-by: Linux Kernel Functional Testing 

-- 
Linaro LKFT
https://lkft.linaro.org


Re: [PATCH v1 01/10] powerpc: Move 'struct ppc64_opd_entry' back into asm/elf.h

2021-10-12 Thread Michael Ellerman
Christophe Leroy  writes:
> 'struct ppc64_opd_entry' doesn't belong to uapi/asm/elf.h

Agree, but ...

> It was initially in module_64.c and commit 2d291e902791 ("Fix compile
> failure with non modular builds") moved it into asm/elf.h
>
> But it was by mistake added outside of __KERNEL__ section,
> therefore commit c3617f72036c ("UAPI: (Scripted) Disintegrate
> arch/powerpc/include/asm") moved it to uapi/asm/elf.h

... it's been visible to userspace since the first commit moved it, ~13
years ago in 2008, v2.6.27.

> Move it back into asm/elf.h, this brings it back in line with
> IA64 and PARISC architectures.

Removing it from the uapi header risks breaking userspace, I doubt
anything uses it, but who knows.

Given how long it's been there I think it's a bit risky to remove it :/

> diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
> index b8425e3cfd81..64b523848cd7 100644
> --- a/arch/powerpc/include/asm/elf.h
> +++ b/arch/powerpc/include/asm/elf.h
> @@ -176,4 +176,11 @@ do { 
> \
>  /* Relocate the kernel image to @final_address */
>  void relocate(unsigned long final_address);
>  
> +/* There's actually a third entry here, but it's unused */
> +struct ppc64_opd_entry
> +{
> + unsigned long funcaddr;
> + unsigned long r2;
> +};
> +
>  #endif /* _ASM_POWERPC_ELF_H */
> diff --git a/arch/powerpc/include/uapi/asm/elf.h 
> b/arch/powerpc/include/uapi/asm/elf.h
> index 860c59291bfc..308857123a08 100644
> --- a/arch/powerpc/include/uapi/asm/elf.h
> +++ b/arch/powerpc/include/uapi/asm/elf.h
> @@ -289,12 +289,4 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG];
>  /* Keep this the last entry.  */
>  #define R_PPC64_NUM  253
>  
> -/* There's actually a third entry here, but it's unused */
> -struct ppc64_opd_entry
> -{
> - unsigned long funcaddr;
> - unsigned long r2;
> -};

Rather than removing it we can make it uapi only with:

#ifndef __KERNEL__
/* There's actually a third entry here, but it's unused */
struct ppc64_opd_entry
{
unsigned long funcaddr;
unsigned long r2;
};
#endif


And then we can do whatever we want with the kernel internal version.

cheers


Re: [PATCH v1 04/10] asm-generic: Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR to define associated stubs

2021-10-12 Thread Helge Deller
* Christophe Leroy :
>
>
> Le 12/10/2021 à 08:02, Helge Deller a écrit :
> > On 10/11/21 17:25, Christophe Leroy wrote:
> > > Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR instead of 
> > > 'dereference_function_descriptor'
> > > to know whether arch has function descriptors.
> > >
> > > Signed-off-by: Christophe Leroy 
> > > ---
> > >   arch/ia64/include/asm/sections.h| 4 ++--
> > >   arch/parisc/include/asm/sections.h  | 6 --
> > >   arch/powerpc/include/asm/sections.h | 6 --
> > >   include/asm-generic/sections.h  | 3 ++-
> > >   4 files changed, 12 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/arch/ia64/include/asm/sections.h 
> > > b/arch/ia64/include/asm/sections.h
> > > index 35f24e52149a..80f5868afb06 100644
> > > --- a/arch/ia64/include/asm/sections.h
> > > +++ b/arch/ia64/include/asm/sections.h
> > > @@ -7,6 +7,8 @@
> > >*  David Mosberger-Tang 
> > >*/
> > >
> > > +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
> > > +
> > >   #include 
> > >   #include 
> > >   #include 
> > > @@ -27,8 +29,6 @@ extern char 
> > > __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_b
> > >   extern char __start_unwind[], __end_unwind[];
> > >   extern char __start_ivt_text[], __end_ivt_text[];
> > >
> > > -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
> > > -
> > >   #undef dereference_function_descriptor
> > >   static inline void *dereference_function_descriptor(void *ptr)
> > >   {
> > > diff --git a/arch/parisc/include/asm/sections.h 
> > > b/arch/parisc/include/asm/sections.h
> > > index bb52aea0cb21..2e781ee19b66 100644
> > > --- a/arch/parisc/include/asm/sections.h
> > > +++ b/arch/parisc/include/asm/sections.h
> > > @@ -2,6 +2,10 @@
> > >   #ifndef _PARISC_SECTIONS_H
> > >   #define _PARISC_SECTIONS_H
> > >
> > > +#ifdef CONFIG_64BIT
> > > +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
> > > +#endif
> > > +
> > >   /* nothing to see, move along */
> > >   #include 
> > >
> > > @@ -9,8 +13,6 @@ extern char __alt_instructions[], 
> > > __alt_instructions_end[];
> > >
> > >   #ifdef CONFIG_64BIT
> > >
> > > -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
> > > -
> > >   #undef dereference_function_descriptor
> > >   void *dereference_function_descriptor(void *);
> > >
> > > diff --git a/arch/powerpc/include/asm/sections.h 
> > > b/arch/powerpc/include/asm/sections.h
> > > index 32e7035863ac..b7f1ba04e756 100644
> > > --- a/arch/powerpc/include/asm/sections.h
> > > +++ b/arch/powerpc/include/asm/sections.h
> > > @@ -8,6 +8,10 @@
> > >
> > >   #define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed
> > >
> > > +#ifdef PPC64_ELF_ABI_v1
> > > +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
> > > +#endif
> > > +
> > >   #include 
> > >
> > >   extern bool init_mem_is_free;
> > > @@ -69,8 +73,6 @@ static inline int overlaps_kernel_text(unsigned long 
> > > start, unsigned long end)
> > >
> > >   #ifdef PPC64_ELF_ABI_v1
> > >
> > > -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
> > > -
> > >   #undef dereference_function_descriptor
> > >   static inline void *dereference_function_descriptor(void *ptr)
> > >   {
> > > diff --git a/include/asm-generic/sections.h 
> > > b/include/asm-generic/sections.h
> > > index d16302d3eb59..1db5cfd69817 100644
> > > --- a/include/asm-generic/sections.h
> > > +++ b/include/asm-generic/sections.h
> > > @@ -59,7 +59,8 @@ extern char __noinstr_text_start[], 
> > > __noinstr_text_end[];
> > >   extern __visible const void __nosave_begin, __nosave_end;
> > >
> > >   /* Function descriptor handling (if any).  Override in asm/sections.h */
> > > -#ifndef dereference_function_descriptor
> > > +#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR
> > > +#else
> >
> > why not
> > #ifndef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR
> > instead of #if/#else ?
>
> To avoid changing it again in patch 6, or getting an ugly #ifndef/#else at
> the end.

Ok.

Building on parisc fails at multiple files like this:
  CC  mm/filemap.o
In file included from 
/home/cvs/parisc/git-kernel/linus-linux-2.6/include/linux/interrupt.h:21,
 from 
/home/cvs/parisc/git-kernel/linus-linux-2.6/include/linux/trace_recursion.h:5,
 from 
/home/cvs/parisc/git-kernel/linus-linux-2.6/include/linux/ftrace.h:10,
 from 
/home/cvs/parisc/git-kernel/linus-linux-2.6/include/linux/perf_event.h:49,
 from 
/home/cvs/parisc/git-kernel/linus-linux-2.6/include/linux/trace_events.h:10,
 from 
/home/cvs/parisc/git-kernel/linus-linux-2.6/include/trace/syscall.h:7,
 from 
/home/cvs/parisc/git-kernel/linus-linux-2.6/include/linux/syscalls.h:87,
 from 
/home/cvs/parisc/git-kernel/linus-linux-2.6/init/initramfs.c:11:
/home/cvs/parisc/git-kernel/linus-linux-2.6/arch/parisc/include/asm/sections.h:7:9:
 error: unknown type name ‘Elf64_Fdesc’
7 | typedef Elf64_Fdesc funct_descr_t;
  | ^~~


So, you still need e.g. this patch:

diff --git 

Re: [PATCH] powerpc: don't select KFENCE on platform PPC_FSL_BOOK3E

2021-10-12 Thread Michael Ellerman
Liu Shixin  writes:
> kindly ping.

I was under the impression you were trying to debug why it wasn't
working with Christophe.

cheers

> On 2021/9/24 14:39, Liu Shixin wrote:
>> On platform PPC_FSL_BOOK3E, all lowmem is managed by tlbcam. That means
>> we didn't really map the kfence pool with page granularity. Therefore,
>> if KFENCE is enabled, the system will hit the following panic:
>>
>> BUG: Kernel NULL pointer dereference on read at 0x
>> Faulting instruction address: 0xc01de598
>> Oops: Kernel access of bad area, sig: 11 [#1]
>> BE PAGE_SIZE=4K SMP NR_CPUS=4 MPC8544 DS
>> Dumping ftrace buffer:
>>(ftrace buffer empty)
>> Modules linked in:
>> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc3+ #298
>> NIP:  c01de598 LR: c08ae9c4 CTR: 
>> REGS: c0b4bea0 TRAP: 0300   Not tainted  (5.12.0-rc3+)
>> MSR:  00021000   CR: 24000228  XER: 2000
>> DEAR:  ESR: 
>> GPR00: c08ae9c4 c0b4bf60 c0ad64e0 ef72 00021000   
>> 0200
>> GPR08: c0ad5000   0004  008fbb30  
>> 
>> GPR16:     c000   
>> 
>> GPR24: c08ca004 c08ca004 c0b6a0e0 c0b6 c0b58f00 c085 c08ca000 
>> ef72
>> NIP [c01de598] kfence_protect+0x44/0x6c
>> LR [c08ae9c4] kfence_init+0xfc/0x2a4
>> Call Trace:
>> [c0b4bf60] [efffe160] 0xefffe160 (unreliable)
>> [c0b4bf70] [c08ae9c4] kfence_init+0xfc/0x2a4
>> [c0b4bfb0] [c0894d3c] start_kernel+0x3bc/0x574
>> [c0b4bff0] [c470] set_ivor+0x14c/0x188
>> Instruction dump:
>> 7c0802a6 8109d594 546a653a 90010014 54630026 3920 7d48502e 2c0a
>> 41820010 554a0026 5469b53a 7d295214 <8149> 38831000 554a003c 9149
>> random: get_random_bytes called from print_oops_end_marker+0x40/0x78 
>> with crng_init=0
>> ---[ end trace  ]---
>>
>> Signed-off-by: Liu Shixin 
>> ---
>>  arch/powerpc/Kconfig | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index d46db0bfb998..cffd57bcb5e4 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -185,7 +185,7 @@ config PPC
>>  select HAVE_ARCH_KASAN  if PPC32 && PPC_PAGE_SHIFT <= 14
>>  select HAVE_ARCH_KASAN_VMALLOC  if PPC32 && PPC_PAGE_SHIFT <= 14
>>  select HAVE_ARCH_KGDB
>> -select HAVE_ARCH_KFENCE if PPC32
>> +select HAVE_ARCH_KFENCE if PPC32 && !PPC_FSL_BOOK3E
>>  select HAVE_ARCH_MMAP_RND_BITS
>>  select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if COMPAT
>>  select HAVE_ARCH_NVRAM_OPS


Re: [PATCH] scsi: ibmvscsi: Use dma_alloc_coherent() instead of get_zeroed_page/dma_map_single()

2021-10-12 Thread kernel test robot
Hi Cai,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on v5.15-rc5 next-20211011]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Cai-Huoqing/scsi-ibmvscsi-Use-dma_alloc_coherent-instead-of-get_zeroed_page-dma_map_single/20211011-000515
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allyesconfig (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/a1533699f9b84980097fc59d047b5292c1abab1b
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Cai-Huoqing/scsi-ibmvscsi-Use-dma_alloc_coherent-instead-of-get_zeroed_page-dma_map_single/20211011-000515
git checkout a1533699f9b84980097fc59d047b5292c1abab1b
# save the attached .config to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross 
O=build_dir ARCH=powerpc SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   drivers/scsi/ibmvscsi/ibmvscsi.c: In function 'ibmvscsi_init_crq_queue':
>> drivers/scsi/ibmvscsi/ibmvscsi.c:334:21: error: 'struct crq_queue' has no 
>> member named 'msg'; did you mean 'msgs'?
 334 | if (!queue->msg)
 | ^~~
 | msgs
   drivers/scsi/ibmvscsi/ibmvscsi.c:388:60: error: 'struct crq_queue' has no 
member named 'msg'; did you mean 'msgs'?
 388 | dma_free_coherent(hostdata->dev, PAGE_SIZE, queue->msg, 
queue->msg_token);
 |^~~
 |msgs


vim +334 drivers/scsi/ibmvscsi/ibmvscsi.c

   312  
   313  /**
   314   * ibmvscsi_init_crq_queue() - Initializes and registers CRQ with 
hypervisor
   315   * @queue:  crq_queue to initialize and register
   316   * @hostdata:   ibmvscsi_host_data of host
   317   * @max_requests:   maximum requests (unused)
   318   *
   319   * Allocates a page for messages, maps it for dma, and registers
   320   * the crq with the hypervisor.
   321   * Returns zero on success.
   322   */
   323  static int ibmvscsi_init_crq_queue(struct crq_queue *queue,
   324 struct ibmvscsi_host_data *hostdata,
   325 int max_requests)
   326  {
   327  int rc;
   328  int retrc;
   329  struct vio_dev *vdev = to_vio_dev(hostdata->dev);
   330  
   331  queue->size = PAGE_SIZE / sizeof(*queue->msgs);
   332  queue->msgs = dma_alloc_coherent(hostdata->dev, PAGE_SIZE,
   333  >msg_token, GFP_KERNEL);
 > 334  if (!queue->msg)
   335  goto malloc_failed;
   336  
   337  gather_partition_info();
   338  set_adapter_info(hostdata);
   339  
   340  retrc = rc = plpar_hcall_norets(H_REG_CRQ,
   341  vdev->unit_address,
   342  queue->msg_token, PAGE_SIZE);
   343  if (rc == H_RESOURCE)
   344  /* maybe kexecing and resource is busy. try a reset */
   345  rc = ibmvscsi_reset_crq_queue(queue,
   346hostdata);
   347  
   348  if (rc == H_CLOSED) {
   349  /* Adapter is good, but other end is not ready */
   350  dev_warn(hostdata->dev, "Partner adapter not ready\n");
   351  retrc = 0;
   352  } else if (rc != 0) {
   353  dev_warn(hostdata->dev, "Error %d opening adapter\n", 
rc);
   354  goto reg_crq_failed;
   355  }
   356  
   357  queue->cur = 0;
   358  spin_lock_init(>lock);
   359  
   360  tasklet_init(>srp_task, (void *)ibmvscsi_task,
   361   (unsigned long)hostdata);
   362  
   363  if (request_irq(vdev->irq,
   364  ibmvscsi_handle_event,
   365  0, "ibmvscsi", (void *)hostdata) != 0) {
   366  dev_err(hostdata->dev, "couldn't register irq 0x%x\n",
   367  vdev->irq);
   368  goto req_irq_failed;
   369  }
   370  
   371  rc = vio_enable_interrupts(vdev);
   372  if (rc != 0) {
   373 

Re: [PATCH v1 04/10] asm-generic: Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR to define associated stubs

2021-10-12 Thread Christophe Leroy




Le 12/10/2021 à 08:02, Helge Deller a écrit :

On 10/11/21 17:25, Christophe Leroy wrote:

Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR instead of 
'dereference_function_descriptor'
to know whether arch has function descriptors.

Signed-off-by: Christophe Leroy 
---
  arch/ia64/include/asm/sections.h| 4 ++--
  arch/parisc/include/asm/sections.h  | 6 --
  arch/powerpc/include/asm/sections.h | 6 --
  include/asm-generic/sections.h  | 3 ++-
  4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h
index 35f24e52149a..80f5868afb06 100644
--- a/arch/ia64/include/asm/sections.h
+++ b/arch/ia64/include/asm/sections.h
@@ -7,6 +7,8 @@
   *David Mosberger-Tang 
   */

+#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
+
  #include 
  #include 
  #include 
@@ -27,8 +29,6 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], 
__end_gate_brl_fsys_b
  extern char __start_unwind[], __end_unwind[];
  extern char __start_ivt_text[], __end_ivt_text[];

-#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
-
  #undef dereference_function_descriptor
  static inline void *dereference_function_descriptor(void *ptr)
  {
diff --git a/arch/parisc/include/asm/sections.h 
b/arch/parisc/include/asm/sections.h
index bb52aea0cb21..2e781ee19b66 100644
--- a/arch/parisc/include/asm/sections.h
+++ b/arch/parisc/include/asm/sections.h
@@ -2,6 +2,10 @@
  #ifndef _PARISC_SECTIONS_H
  #define _PARISC_SECTIONS_H

+#ifdef CONFIG_64BIT
+#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
+#endif
+
  /* nothing to see, move along */
  #include 

@@ -9,8 +13,6 @@ extern char __alt_instructions[], __alt_instructions_end[];

  #ifdef CONFIG_64BIT

-#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
-
  #undef dereference_function_descriptor
  void *dereference_function_descriptor(void *);

diff --git a/arch/powerpc/include/asm/sections.h 
b/arch/powerpc/include/asm/sections.h
index 32e7035863ac..b7f1ba04e756 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -8,6 +8,10 @@

  #define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed

+#ifdef PPC64_ELF_ABI_v1
+#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
+#endif
+
  #include 

  extern bool init_mem_is_free;
@@ -69,8 +73,6 @@ static inline int overlaps_kernel_text(unsigned long start, 
unsigned long end)

  #ifdef PPC64_ELF_ABI_v1

-#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
-
  #undef dereference_function_descriptor
  static inline void *dereference_function_descriptor(void *ptr)
  {
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index d16302d3eb59..1db5cfd69817 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -59,7 +59,8 @@ extern char __noinstr_text_start[], __noinstr_text_end[];
  extern __visible const void __nosave_begin, __nosave_end;

  /* Function descriptor handling (if any).  Override in asm/sections.h */
-#ifndef dereference_function_descriptor
+#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR
+#else


why not
#ifndef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR
instead of #if/#else ?


To avoid changing it again in patch 6, or getting an ugly #ifndef/#else 
at the end.





  #define dereference_function_descriptor(p) ((void *)(p))
  #define dereference_kernel_function_descriptor(p) ((void *)(p))
  #endif





Re: [PATCH v1 04/10] asm-generic: Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR to define associated stubs

2021-10-12 Thread Helge Deller
On 10/11/21 17:25, Christophe Leroy wrote:
> Use HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR instead of 
> 'dereference_function_descriptor'
> to know whether arch has function descriptors.
>
> Signed-off-by: Christophe Leroy 
> ---
>  arch/ia64/include/asm/sections.h| 4 ++--
>  arch/parisc/include/asm/sections.h  | 6 --
>  arch/powerpc/include/asm/sections.h | 6 --
>  include/asm-generic/sections.h  | 3 ++-
>  4 files changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/arch/ia64/include/asm/sections.h 
> b/arch/ia64/include/asm/sections.h
> index 35f24e52149a..80f5868afb06 100644
> --- a/arch/ia64/include/asm/sections.h
> +++ b/arch/ia64/include/asm/sections.h
> @@ -7,6 +7,8 @@
>   *   David Mosberger-Tang 
>   */
>
> +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
> +
>  #include 
>  #include 
>  #include 
> @@ -27,8 +29,6 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], 
> __end_gate_brl_fsys_b
>  extern char __start_unwind[], __end_unwind[];
>  extern char __start_ivt_text[], __end_ivt_text[];
>
> -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
> -
>  #undef dereference_function_descriptor
>  static inline void *dereference_function_descriptor(void *ptr)
>  {
> diff --git a/arch/parisc/include/asm/sections.h 
> b/arch/parisc/include/asm/sections.h
> index bb52aea0cb21..2e781ee19b66 100644
> --- a/arch/parisc/include/asm/sections.h
> +++ b/arch/parisc/include/asm/sections.h
> @@ -2,6 +2,10 @@
>  #ifndef _PARISC_SECTIONS_H
>  #define _PARISC_SECTIONS_H
>
> +#ifdef CONFIG_64BIT
> +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
> +#endif
> +
>  /* nothing to see, move along */
>  #include 
>
> @@ -9,8 +13,6 @@ extern char __alt_instructions[], __alt_instructions_end[];
>
>  #ifdef CONFIG_64BIT
>
> -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
> -
>  #undef dereference_function_descriptor
>  void *dereference_function_descriptor(void *);
>
> diff --git a/arch/powerpc/include/asm/sections.h 
> b/arch/powerpc/include/asm/sections.h
> index 32e7035863ac..b7f1ba04e756 100644
> --- a/arch/powerpc/include/asm/sections.h
> +++ b/arch/powerpc/include/asm/sections.h
> @@ -8,6 +8,10 @@
>
>  #define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed
>
> +#ifdef PPC64_ELF_ABI_v1
> +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
> +#endif
> +
>  #include 
>
>  extern bool init_mem_is_free;
> @@ -69,8 +73,6 @@ static inline int overlaps_kernel_text(unsigned long start, 
> unsigned long end)
>
>  #ifdef PPC64_ELF_ABI_v1
>
> -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1
> -
>  #undef dereference_function_descriptor
>  static inline void *dereference_function_descriptor(void *ptr)
>  {
> diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
> index d16302d3eb59..1db5cfd69817 100644
> --- a/include/asm-generic/sections.h
> +++ b/include/asm-generic/sections.h
> @@ -59,7 +59,8 @@ extern char __noinstr_text_start[], __noinstr_text_end[];
>  extern __visible const void __nosave_begin, __nosave_end;
>
>  /* Function descriptor handling (if any).  Override in asm/sections.h */
> -#ifndef dereference_function_descriptor
> +#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR
> +#else

why not
#ifndef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR
instead of #if/#else ?

>  #define dereference_function_descriptor(p) ((void *)(p))
>  #define dereference_kernel_function_descriptor(p) ((void *)(p))
>  #endif
>



[PATCH 0/2] ftrace: make sure preemption disabled on recursion testing

2021-10-12 Thread 王贇
The testing show that perf_ftrace_function_call() are using
smp_processor_id() with preemption enabled, all the checking
on CPU could be wrong after preemption, PATCH 1/2 will fix
that.

Besides, as Peter point out, the testing of recursion within
the section between ftrace_test_recursion_trylock()/_unlock()
pair also need the preemption disabled as the documentation
explained, PATCH 2/2 will make sure on that.

Michael Wang (2):
  ftrace: disable preemption on the testing of recursion
  ftrace: prevent preemption in perf_ftrace_function_call()

 arch/csky/kernel/probes/ftrace.c |  2 --
 arch/parisc/kernel/ftrace.c  |  2 --
 arch/powerpc/kernel/kprobes-ftrace.c |  2 --
 arch/riscv/kernel/probes/ftrace.c|  2 --
 arch/x86/kernel/kprobes/ftrace.c |  2 --
 include/linux/trace_recursion.h  | 10 +-
 kernel/livepatch/patch.c |  6 --
 kernel/trace/trace_event_perf.c  | 17 +
 kernel/trace/trace_functions.c   |  5 -
 9 files changed, 22 insertions(+), 26 deletions(-)

-- 
1.8.3.1



Re: [PATCH 0/2] ftrace: make sure preemption disabled on recursion testing

2021-10-12 Thread 王贇



On 2021/10/12 下午1:39, 王贇 wrote:
> The testing show that perf_ftrace_function_call() are using
> smp_processor_id() with preemption enabled, all the checking
> on CPU could be wrong after preemption, PATCH 1/2 will fix
> that.

2/2 actually.

> 
> Besides, as Peter point out, the testing of recursion within
> the section between ftrace_test_recursion_trylock()/_unlock()
> pair also need the preemption disabled as the documentation
> explained, PATCH 2/2 will make sure on that.

1/2 actually...

Regards,
Michael Wang

> 
> Michael Wang (2):
>   ftrace: disable preemption on the testing of recursion
>   ftrace: prevent preemption in perf_ftrace_function_call()
> 
>  arch/csky/kernel/probes/ftrace.c |  2 --
>  arch/parisc/kernel/ftrace.c  |  2 --
>  arch/powerpc/kernel/kprobes-ftrace.c |  2 --
>  arch/riscv/kernel/probes/ftrace.c|  2 --
>  arch/x86/kernel/kprobes/ftrace.c |  2 --
>  include/linux/trace_recursion.h  | 10 +-
>  kernel/livepatch/patch.c |  6 --
>  kernel/trace/trace_event_perf.c  | 17 +
>  kernel/trace/trace_functions.c   |  5 -
>  9 files changed, 22 insertions(+), 26 deletions(-)
> 


[PATCH 2/2] ftrace: prevent preemption in perf_ftrace_function_call()

2021-10-12 Thread 王贇
With CONFIG_DEBUG_PREEMPT we observed reports like:

  BUG: using smp_processor_id() in preemptible
  caller is perf_ftrace_function_call+0x6f/0x2e0
  CPU: 1 PID: 680 Comm: a.out Not tainted
  Call Trace:
   
   dump_stack_lvl+0x8d/0xcf
   check_preemption_disabled+0x104/0x110
   ? optimize_nops.isra.7+0x230/0x230
   ? text_poke_bp_batch+0x9f/0x310
   perf_ftrace_function_call+0x6f/0x2e0
   ...
   __text_poke+0x5/0x620
   text_poke_bp_batch+0x9f/0x310

This telling us the CPU could be changed after task is preempted, and
the checking on CPU before preemption will be invalid.

This patch just turn off preemption in perf_ftrace_function_call()
to prevent CPU changing.

CC: Steven Rostedt 
Reported-by: Abaci 
Signed-off-by: Michael Wang 
---
 kernel/trace/trace_event_perf.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 6aed10e..33c2f76 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -441,12 +441,19 @@ void perf_trace_buf_update(void *record, u16 type)
if (!rcu_is_watching())
return;

+   /*
+* Prevent CPU changing from now on. rcu must
+* be in watching if the task was migrated and
+* scheduled.
+*/
+   preempt_disable_notrace();
+
if ((unsigned long)ops->private != smp_processor_id())
-   return;
+   goto out;

bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
-   return;
+   goto out;

event = container_of(ops, struct perf_event, ftrace_ops);

@@ -468,16 +475,18 @@ void perf_trace_buf_update(void *record, u16 type)

entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, );
if (!entry)
-   goto out;
+   goto unlock;

entry->ip = ip;
entry->parent_ip = parent_ip;
perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
  1, , , NULL);

-out:
+unlock:
ftrace_test_recursion_unlock(bit);
 #undef ENTRY_SIZE
+out:
+   preempt_enable_notrace();
 }

 static int perf_ftrace_function_register(struct perf_event *event)
-- 
1.8.3.1




[PATCH 1/2] ftrace: disable preemption on the testing of recursion

2021-10-12 Thread 王贇
As the documentation explained, ftrace_test_recursion_trylock()
and ftrace_test_recursion_unlock() were supposed to disable and
enable preemption properly, however currently this work is done
outside of the function, which could be missing by mistake.

This path will make sure the preemption will be disabled when
trylock() succeed, and the unlock() will enable preemption when
the the testing of recursion are finished.

Reported-by: Abaci 
Suggested-by: Peter Zijlstra 
Signed-off-by: Michael Wang 
---
 arch/csky/kernel/probes/ftrace.c |  2 --
 arch/parisc/kernel/ftrace.c  |  2 --
 arch/powerpc/kernel/kprobes-ftrace.c |  2 --
 arch/riscv/kernel/probes/ftrace.c|  2 --
 arch/x86/kernel/kprobes/ftrace.c |  2 --
 include/linux/trace_recursion.h  | 10 +-
 kernel/livepatch/patch.c |  6 --
 kernel/trace/trace_functions.c   |  5 -
 8 files changed, 9 insertions(+), 22 deletions(-)

diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c
index ef2bb9b..dff7921 100644
--- a/arch/csky/kernel/probes/ftrace.c
+++ b/arch/csky/kernel/probes/ftrace.c
@@ -24,7 +24,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
return;

regs = ftrace_get_regs(fregs);
-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (!p) {
p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE));
@@ -64,7 +63,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
__this_cpu_write(current_kprobe, NULL);
}
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 0a1e75a..3543496 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -216,7 +216,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
return;

regs = ftrace_get_regs(fregs);
-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
goto out;
@@ -245,7 +244,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
}
__this_cpu_write(current_kprobe, NULL);
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c 
b/arch/powerpc/kernel/kprobes-ftrace.c
index 7154d58..072ebe7 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -26,7 +26,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long 
parent_nip,
return;

regs = ftrace_get_regs(fregs);
-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)nip);
if (unlikely(!p) || kprobe_disabled(p))
goto out;
@@ -61,7 +60,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long 
parent_nip,
__this_cpu_write(current_kprobe, NULL);
}
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/riscv/kernel/probes/ftrace.c 
b/arch/riscv/kernel/probes/ftrace.c
index aab85a8..7142ec4 100644
--- a/arch/riscv/kernel/probes/ftrace.c
+++ b/arch/riscv/kernel/probes/ftrace.c
@@ -15,7 +15,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
if (bit < 0)
return;

-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
goto out;
@@ -52,7 +51,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
__this_cpu_write(current_kprobe, NULL);
}
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 596de2f..dd2ec14 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -25,7 +25,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
if (bit < 0)
return;

-   preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
goto out;
@@ -59,7 +58,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long 
parent_ip,
__this_cpu_write(current_kprobe, NULL);
}
 out:
-   preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index a9f9c57..805f9c4 100644
--- a/include/linux/trace_recursion.h
+++ 

Re: [PATCH] powerpc/s64: Clarify that radix lacks DEBUG_PAGEALLOC

2021-10-12 Thread Christophe Leroy




Le 12/10/2021 à 03:13, Joel Stanley a écrit :

The page_alloc.c code will call into __kernel_map_pages when
DEBUG_PAGEALLOC is configured and enabled.

As the implementation assumes hash, this should crash spectacularly if
not for a bit of luck in __kernel_map_pages. In this function
linear_map_hash_count is always zero, the for loop exits without doing
any damage.

There are no other platforms that determine if they support
debug_pagealloc at runtime. Instead of adding code to mm/page_alloc.c to
do that, this change turns the map/unmap into a noop when in radix
mode and prints a warning once.

Signed-off-by: Joel Stanley 
---
I noticed this when I was looking at adding kfence support a while back.
I've put that work aside and jpn has since gotten further than me, but I
think this is a fix worth considering.

  arch/powerpc/include/asm/book3s/64/hash.h |  2 ++
  arch/powerpc/mm/book3s64/hash_utils.c |  2 +-
  arch/powerpc/mm/book3s64/pgtable.c| 12 
  3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
b/arch/powerpc/include/asm/book3s/64/hash.h
index d959b0195ad9..674fe0e890dc 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -255,6 +255,8 @@ int hash__create_section_mapping(unsigned long start, 
unsigned long end,
 int nid, pgprot_t prot);
  int hash__remove_section_mapping(unsigned long start, unsigned long end);
  
+void hash__kernel_map_pages(struct page *page, int numpages, int enable);

+
  #endif /* !__ASSEMBLY__ */
  #endif /* __KERNEL__ */
  #endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c 
b/arch/powerpc/mm/book3s64/hash_utils.c
index c145776d3ae5..cfd45245d009 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1988,7 +1988,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, 
unsigned long lmi)
 mmu_kernel_ssize, 0);
  }
  
-void __kernel_map_pages(struct page *page, int numpages, int enable)

+void hash__kernel_map_pages(struct page *page, int numpages, int enable)
  {
unsigned long flags, vaddr, lmi;
int i;
diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
b/arch/powerpc/mm/book3s64/pgtable.c
index 9e16c7b1a6c5..0aefc272cd03 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -526,3 +526,15 @@ static int __init pgtable_debugfs_setup(void)
return 0;
  }
  arch_initcall(pgtable_debugfs_setup);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+   if (radix_enabled()) {
+   pr_warn_once("DEBUG_PAGEALLOC not supported in radix mode\n");
+   return;
+   }
+
+   hash__kernel_map_pages(page, numpages, enable);
+}


I think it would be better to do similar to most other functions (like 
map_kernel_page() for instance):


In arch/powerpc/include/asm/book3s/64/pgtable.h do:

static inline void __kernel_map_pages(struct page *page, int numpages, 
int enable)

{
if (radix_enabled())
radix__kernel_map_pages(...);
else
hash__kernel_map_pages(...);
}

Then in arch/powerpc/include/asm/book3s/64/radix.h do (or in 
/arch/powerpc/mm/book3s64/radix_pgtable.c in you prefer ?):


static inline void radix__kernel_map_pages(struct page *page, int 
numpages, int enable)

{
pr_warn_once("DEBUG_PAGEALLOC not supported in radix mode\n");
}


+#endif