Re: [PATCH v3 5/6] powerpc/64/interrupt: reduce expensive debug tests

2021-09-22 Thread Christophe Leroy




Le 22/09/2021 à 16:54, Nicholas Piggin a écrit :

Move the assertions requiring restart table searches under
CONFIG_PPC_IRQ_SOFT_MASK_DEBUG.

Signed-off-by: Nicholas Piggin 
---
  arch/powerpc/include/asm/interrupt.h | 14 ++
  1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index e178d143671a..0e84e99af37b 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -97,6 +97,11 @@ static inline void srr_regs_clobbered(void)
local_paca->hsrr_valid = 0;
  }
  #else
+static inline unsigned long search_kernel_restart_table(unsigned long addr)
+{
+   return 0;
+}
+


Not sure you need that. Moving the 64s prototype out of the #ifdef would 
do it as well.




  static inline bool is_implicit_soft_masked(struct pt_regs *regs)
  {
return false;
@@ -190,13 +195,14 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs, struct interrup
 */
if (TRAP(regs) != INTERRUPT_PROGRAM) {
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
-   BUG_ON(is_implicit_soft_masked(regs));
+   if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+   BUG_ON(is_implicit_soft_masked(regs));
}
-#ifdef CONFIG_PPC_BOOK3S
+
/* Move this under a debugging check */
-   if (arch_irq_disabled_regs(regs))
+   if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) &&
+   arch_irq_disabled_regs(regs))
BUG_ON(search_kernel_restart_table(regs->nip));
-#endif
}
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE));



Re: [PATCH v3 1/6] powerpc/64/interrupt: make normal synchronous interrupts enable MSR[EE] if possible

2021-09-22 Thread Christophe Leroy




Le 22/09/2021 à 16:54, Nicholas Piggin a écrit :

Make synchronous interrupt handler entry wrappers enable MSR[EE] if
MSR[EE] was enabled in the interrupted context. IRQs are soft-disabled
at this point so there is no change to high level code, but it's a
masked interrupt could fire.

This is a performance disadvantage for interrupts which do not later
call interrupt_cond_local_irq_enable(), because an an additional mtmsrd
or wrtee instruction is executed. However the important synchronous
interrupts (e.g., page fault) do enable interrupts, so the performance
disadvantage is mostly avoided.

In the next patch, MSR[RI] enabling can be combined with MSR[EE]
enabling, which mitigates the performance drop for the former and gives
a performance advanage for the latter interrupts, on 64s machines. 64e
is coming along for the ride for now to avoid divergences with 64s in
this tricky code.

Signed-off-by: Nicholas Piggin 
---
  arch/powerpc/include/asm/interrupt.h | 19 ++-
  1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index b76ab848aa0d..3802390d8eea 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -150,7 +150,20 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs, struct interrup
  #ifdef CONFIG_PPC64
if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
trace_hardirqs_off();
-   local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+   /*
+* If the interrupt was taken with HARD_DIS clear, then enable MSR[EE].
+* Asynchronous interrupts get here with HARD_DIS set (see below), so
+* this enables MSR[EE] for synchronous interrupts. IRQs remain
+* soft-masked. The interrupt handler may later call
+* interrupt_cond_local_irq_enable() to achieve a regular process
+* context.
+*/
+   if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) {
+   if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+   BUG_ON(!(regs->msr & MSR_EE));


Could be:
	BUG_ON(IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && !(regs->msr & 
MSR_EE));



+   __hard_irq_enable();
+   }
  
  	if (user_mode(regs)) {

CT_WARN_ON(ct_state() != CONTEXT_USER);
@@ -200,6 +213,10 @@ static inline void interrupt_exit_prepare(struct pt_regs 
*regs, struct interrupt
  
  static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)

  {
+#ifdef CONFIG_PPC64
+   /* Ensure interrupt_enter_prepare does not enable MSR[EE] */
+   local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+#endif
  #ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_CTRL) &&
!test_thread_local_flags(_TLF_RUNLATCH))



[PATCH AUTOSEL 4.19 01/15] ibmvnic: check failover_pending in login response

2021-09-22 Thread Sasha Levin
From: Sukadev Bhattiprolu 

[ Upstream commit d437f5aa23aa2b7bd07cd44b839d7546cc17166f ]

If a failover occurs before a login response is received, the login
response buffer maybe undefined. Check that there was no failover
before accessing the login response buffer.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 4008007c2e34..d97641b9928b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -4038,6 +4038,14 @@ static int handle_login_rsp(union ibmvnic_crq 
*login_rsp_crq,
return 0;
}
 
+   if (adapter->failover_pending) {
+   adapter->init_done_rc = -EAGAIN;
+   netdev_dbg(netdev, "Failover pending, ignoring login 
response\n");
+   complete(&adapter->init_done);
+   /* login response buffer will be released on reset */
+   return 0;
+   }
+
netdev->mtu = adapter->req_mtu - ETH_HLEN;
 
netdev_dbg(adapter->netdev, "Login Response Buffer:\n");
-- 
2.30.2



[PATCH AUTOSEL 5.4 01/19] ibmvnic: check failover_pending in login response

2021-09-22 Thread Sasha Levin
From: Sukadev Bhattiprolu 

[ Upstream commit d437f5aa23aa2b7bd07cd44b839d7546cc17166f ]

If a failover occurs before a login response is received, the login
response buffer maybe undefined. Check that there was no failover
before accessing the login response buffer.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index ecfe588f330e..cfe7229593ea 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -4277,6 +4277,14 @@ static int handle_login_rsp(union ibmvnic_crq 
*login_rsp_crq,
return 0;
}
 
+   if (adapter->failover_pending) {
+   adapter->init_done_rc = -EAGAIN;
+   netdev_dbg(netdev, "Failover pending, ignoring login 
response\n");
+   complete(&adapter->init_done);
+   /* login response buffer will be released on reset */
+   return 0;
+   }
+
netdev->mtu = adapter->req_mtu - ETH_HLEN;
 
netdev_dbg(adapter->netdev, "Login Response Buffer:\n");
-- 
2.30.2



[PATCH AUTOSEL 5.10 01/26] ibmvnic: check failover_pending in login response

2021-09-22 Thread Sasha Levin
From: Sukadev Bhattiprolu 

[ Upstream commit d437f5aa23aa2b7bd07cd44b839d7546cc17166f ]

If a failover occurs before a login response is received, the login
response buffer maybe undefined. Check that there was no failover
before accessing the login response buffer.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 3134c1988db3..bb8d0a0f48ee 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -4478,6 +4478,14 @@ static int handle_login_rsp(union ibmvnic_crq 
*login_rsp_crq,
return 0;
}
 
+   if (adapter->failover_pending) {
+   adapter->init_done_rc = -EAGAIN;
+   netdev_dbg(netdev, "Failover pending, ignoring login 
response\n");
+   complete(&adapter->init_done);
+   /* login response buffer will be released on reset */
+   return 0;
+   }
+
netdev->mtu = adapter->req_mtu - ETH_HLEN;
 
netdev_dbg(adapter->netdev, "Login Response Buffer:\n");
-- 
2.30.2



[PATCH AUTOSEL 5.14 01/34] ibmvnic: check failover_pending in login response

2021-09-22 Thread Sasha Levin
From: Sukadev Bhattiprolu 

[ Upstream commit d437f5aa23aa2b7bd07cd44b839d7546cc17166f ]

If a failover occurs before a login response is received, the login
response buffer maybe undefined. Check that there was no failover
before accessing the login response buffer.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index a775c69e4fd7..6aa6ff89a765 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -4700,6 +4700,14 @@ static int handle_login_rsp(union ibmvnic_crq 
*login_rsp_crq,
return 0;
}
 
+   if (adapter->failover_pending) {
+   adapter->init_done_rc = -EAGAIN;
+   netdev_dbg(netdev, "Failover pending, ignoring login 
response\n");
+   complete(&adapter->init_done);
+   /* login response buffer will be released on reset */
+   return 0;
+   }
+
netdev->mtu = adapter->req_mtu - ETH_HLEN;
 
netdev_dbg(adapter->netdev, "Login Response Buffer:\n");
-- 
2.30.2



Re: [PATCH v3 5/8] x86/sme: Replace occurrences of sme_active() with cc_platform_has()

2021-09-22 Thread Kirill A. Shutemov
On Wed, Sep 22, 2021 at 09:52:07PM +0200, Borislav Petkov wrote:
> On Wed, Sep 22, 2021 at 05:30:15PM +0300, Kirill A. Shutemov wrote:
> > Not fine, but waiting to blowup with random build environment change.
> 
> Why is it not fine?
> 
> Are you suspecting that the compiler might generate something else and
> not a rip-relative access?

Yes. We had it before for __supported_pte_mask and other users of
fixup_pointer().

See for instance 4a09f0210c8b ("x86/boot/64/clang: Use fixup_pointer() to
access '__supported_pte_mask'")

Unless we find other way to guarantee RIP-relative access, we must use
fixup_pointer() to access any global variables.

-- 
 Kirill A. Shutemov


doesn't boot if linkaddr=0x0090.0000

2021-09-22 Thread cp
hi
I am new to this list. Hope this is the right place to ask.

I am working with a PPC405GP board, and as far as I understand, the
support for ppc40x platforms like Acadia and Walnut were dropped with
kernel 5.8.0, so this seems like a pretty straightforward question,
but extensive experiments from kernel 4.11 to kernel 5.7.19 haven't
shown a really clear, up-to-date answer.

In k4.11 .. k5.7.19, when the kernel size is bigger than 8 MB, the
final kernel doesn't boot but rather arch/powerpc/boot/main.c dies
before the first message from the kernel shows up.

Why?

Digging deeper I see the relation between the kernel size and link_addr

# Round the size to next higher MB limit
round_size=$(((strip_size + 0xf) & 0xfff0))

round_size=0x$(printf "%x" $round_size)
link_addr=$(printf "%d" $link_address)

and this is where link_addr is involved

text_start="-Ttext $link_address"

My kernels are compiled for cuboot, and the code that invokes "kentry"
is entirely located in arch/powerpc/boot/main.c

I instrumned that module, and this is what I see on the condole

The following is the same kernel, compiled with the same .config, but
with two link_addr values

A) with link_addr=0x0080.
image loaded from 0x0080
SP=0x03eb1b80
kernel_size = 7411084 bytes
copying 256 bytes from kernel-image at 0x0080f000 to elfheader
elf_info.loadsize = 0x00700e68
elf_info.memsize  = 0x0074234c
allocating 7611212 bytes for the new kernel
copying ...
from = 0x0081f000
to = 0x
size = 7343720
flush_cache, 32Mbyte flushed
cmdline: uboot bootargs overridden
cmdline=[console=ttyS0,115200 root=/dev/sda2 rootfstype=ext2 rw
init=/sbin/init ]
Finalizing device tree... flat tree at 0xf23b80
ft_addr=0xf23b80
my tp1: success
kernel booting 
(it boots)

B) with link_addr=0x0080.
image loaded from 0x0090
SP=0x03eb1b80
kernel_size = 7411084
copying 256 bytes from kernel-image at 0x0090f000 to elfheader
elf_info.loadsize = 0x00700e68
elf_info.memsize  = 0x0074234c
allocating 7611212 bytes for the new kernel
copying ...
from = 0x0091f000
to = 0x
size = 7343720
flush_cache, 32Mbyte flushed
cmdline: uboot bootargs overridden
cmdline=[console=ttyS0,115200 root=/dev/sda2 rootfstype=ext2 rw
init=/sbin/init ]
Finalizing device tree... flat tree at 0x1023b80
ft_addr=0x1023b80
my tp2: success
my tp3: success
invalidate_cache 0x+0x0200
my tp4: (point of no return)
calling kentry()...
kernel booting 
(it dies at this point, but without a debugger it's like watching
something fall into a black hole)

Any ideas?
I am lost ...

Carlo


Re: [PATCH v3 5/8] x86/sme: Replace occurrences of sme_active() with cc_platform_has()

2021-09-22 Thread Borislav Petkov
On Wed, Sep 22, 2021 at 05:30:15PM +0300, Kirill A. Shutemov wrote:
> Not fine, but waiting to blowup with random build environment change.

Why is it not fine?

Are you suspecting that the compiler might generate something else and
not a rip-relative access?

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette


doesn't boot when linkaddr=0x0090.0000

2021-09-22 Thread cp
hi
I am new to this list. Hope this is the right place to ask.

I am working with a PPC405GP board, and as far as I understand, the
support for ppc40x platforms like Acadia and Walnut were dropped with
kernel 5.8.0, so this seems like a pretty straightforward question,
but extensive experiments from kernel 4.11 to kernel 5.7.19 haven't
shown a really clear, up-to-date answer.

In k4.11 .. k5.7.19, when the kernel size is bigger than 8 MB, the
final kernel doesn't boot but rather arch/powerpc/boot/main.c dies
before the first message from the kernel shows up.

Why?

Digging deeper I see the relation between the kernel size and link_addr

# Round the size to next higher MB limit
round_size=$(((strip_size + 0xf) & 0xfff0))

round_size=0x$(printf "%x" $round_size)
link_addr=$(printf "%d" $link_address)

and this is where link_addr is involved

text_start="-Ttext $link_address"

My kernels are compiled for cuboot, and the code that invokes "kentry"
is entirely located in arch/powerpc/boot/main.c

I instrumned that module, and this is what I see on the condole

The following is the same kernel, compiled with the same .config, but
with two link_addr values

A) with link_addr=0x0080.
image loaded from 0x0080
SP=0x03eb1b80
kernel_size = 7411084 bytes
copying 256 bytes from kernel-image at 0x0080f000 to elfheader
elf_info.loadsize = 0x00700e68
elf_info.memsize  = 0x0074234c
allocating 7611212 bytes for the new kernel
copying ...
from = 0x0081f000
to = 0x
size = 7343720
flush_cache, 32Mbyte flushed
cmdline: uboot bootargs overridden
cmdline=[console=ttyS0,115200 root=/dev/sda2 rootfstype=ext2 rw
init=/sbin/init ]
Finalizing device tree... flat tree at 0xf23b80
ft_addr=0xf23b80
my tp1: success
kernel booting 
(it boots)

B) with link_addr=0x0080.
image loaded from 0x0090
SP=0x03eb1b80
kernel_size = 7411084
copying 256 bytes from kernel-image at 0x0090f000 to elfheader
elf_info.loadsize = 0x00700e68
elf_info.memsize  = 0x0074234c
allocating 7611212 bytes for the new kernel
copying ...
from = 0x0091f000
to = 0x
size = 7343720
flush_cache, 32Mbyte flushed
cmdline: uboot bootargs overridden
cmdline=[console=ttyS0,115200 root=/dev/sda2 rootfstype=ext2 rw
init=/sbin/init ]
Finalizing device tree... flat tree at 0x1023b80
ft_addr=0x1023b80
my tp2: success
my tp3: success
invalidate_cache 0x+0x0200
my tp4: (point of no return)
calling kentry()...
kernel booting 
(it dies at this point, but without a debugger it's like watching
something fall into a black hole)

Any ideas?
I am lost ...

Carlo


Re: [PATCH] powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted()

2021-09-22 Thread Nathan Lynch
Srikar Dronamraju  writes:

> * Nathan Lynch  [2021-09-22 11:01:12]:
>
>> Srikar Dronamraju  writes:
>> > * Nathan Lynch  [2021-09-20 22:12:13]:
>> >
>> >> vcpu_is_preempted() can be used outside of preempt-disabled critical
>> >> sections, yielding warnings such as:
>> >> 
>> >> BUG: using smp_processor_id() in preemptible [] code: 
>> >> systemd-udevd/185
>> >> caller is rwsem_spin_on_owner+0x1cc/0x2d0
>> >> CPU: 1 PID: 185 Comm: systemd-udevd Not tainted 5.15.0-rc2+ #33
>> >> Call Trace:
>> >> [c00012907ac0] [c0aa30a8] dump_stack_lvl+0xac/0x108 
>> >> (unreliable)
>> >> [c00012907b00] [c1371f70] 
>> >> check_preemption_disabled+0x150/0x160
>> >> [c00012907b90] [c01e0e8c] rwsem_spin_on_owner+0x1cc/0x2d0
>> >> [c00012907be0] [c01e1408] 
>> >> rwsem_down_write_slowpath+0x478/0x9a0
>> >> [c00012907ca0] [c0576cf4] filename_create+0x94/0x1e0
>> >> [c00012907d10] [c057ac08] do_symlinkat+0x68/0x1a0
>> >> [c00012907d70] [c057ae18] sys_symlink+0x58/0x70
>> >> [c00012907da0] [c002e448] system_call_exception+0x198/0x3c0
>> >> [c00012907e10] [c000c54c] system_call_common+0xec/0x250
>> >> 
>> >> The result of vcpu_is_preempted() is always subject to invalidation by
>> >> events inside and outside of Linux; it's just a best guess at a point in
>> >> time. Use raw_smp_processor_id() to avoid such warnings.
>> >
>> > Typically smp_processor_id() and raw_smp_processor_id() except for the
>> > CONFIG_DEBUG_PREEMPT.
>> 
>> Sorry, I don't follow...
>
> I meant, Unless CONFIG_DEBUG_PREEMPT, smp_processor_id() is defined as
> raw_processor_id().
>
>> 
>> > In the CONFIG_DEBUG_PREEMPT case, smp_processor_id()
>> > is actually debug_smp_processor_id(), which does all the checks.
>> 
>> Yes, OK.
>> 
>> > I believe these checks in debug_smp_processor_id() are only valid for x86
>> > case (aka cases were they have __smp_processor_id() defined.)
>> 
>> Hmm, I am under the impression that the checks in
>> debug_smp_processor_id() are valid regardless of whether the arch
>> overrides __smp_processor_id().
>
> From include/linux/smp.h
>
> /*
>  * Allow the architecture to differentiate between a stable and unstable read.
>  * For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a
>  * regular asm read for the stable.
>  */
> #ifndef __smp_processor_id
> #define __smp_processor_id(x) raw_smp_processor_id(x)
> #endif
>
> As far as I see, only x86 has a definition of __smp_processor_id.
> So for archs like Powerpc, __smp_processor_id(), is always
> defined as raw_smp_processor_id(). Right?

Sure, yes.

> I would think debug_smp_processor_id() would be useful if __smp_processor_id()
> is different from raw_smp_processor_id(). Do note debug_smp_processor_id() 
> calls raw_smp_processor_id().

I do not think the utility of debug_smp_processor_id() is related to
whether the arch defines __smp_processor_id().

> Or can I understand how debug_smp_processor_id() is useful if
> __smp_processor_id() is defined as raw_smp_processor_id()?

So, for powerpc with DEBUG_PREEMPT unset, a call to smp_procesor_id()
expands to __smp_processor_id() which expands to raw_smp_processor_id(),
avoiding the preempt safety checks. This is working as intended.

For powerpc with DEBUG_PREEMPT=y, a call to smp_processor_id() expands
to the out of line call to debug_smp_processor_id(), which calls
raw_smp_processor_id() and performs the checks, warning if called in an
inappropriate context, as seen here. Also working as intended.

AFAICT __smp_processor_id() is a performance/codegen-oriented hook, and
not really related to the debug facility. Please see 9ed7d75b2f09d
("x86/percpu: Relax smp_processor_id()").

>> I think the stack trace here correctly identifies an incorrect use of
>> smp_processor_id(), and the call site needs to be changed. Do you
>> disagree?
>
> Yes the stack_trace shows that debug_smp_processor_id(). However what
> I want to understand is why should we even call
> debug_smp_processor_id(), when our __smp_processor_id() is defined as
> raw_smp_processor_id().

smp_processor_id() should always expand to debug_smp_processor_id() when
DEBUG_PREEMPT=y, regardless of whether the arch overrides
__smp_processor_id(). That is how I understand the intent of the code as
written.



doesn't boot when linkaddr=0x0090.0000

2021-09-22 Thread cp
hi
I am new to this list. Hope this is the right place to ask.

I am working with a PPC405GP board, and as far as I understand, the
support for ppc40x platforms like Acadia and Walnut were dropped with
kernel 5.8.0, so this seems like a pretty straightforward question,
but extensive experiments from kernel 4.11 to kernel 5.7.19 haven't
shown a really clear, up-to-date answer.

In k4.11 .. k5.7.19, when the kernel size is bigger than 8 MB, the
final kernel doesn't boot but rather arch/powerpc/boot/main.c dies
before the first message from the kernel shows up.

Why?

Digging deeper I see the relation between the kernel size and link_addr

# Round the size to next higher MB limit
round_size=$(((strip_size + 0xf) & 0xfff0))

round_size=0x$(printf "%x" $round_size)
link_addr=$(printf "%d" $link_address)

and this is where link_addr is involved

text_start="-Ttext $link_address"

My kernels are compiled for cuboot, and the code that invokes "kentry"
is entirely located in arch/powerpc/boot/main.c

I instrumned that module, and this is what I see on the condole

The following is the same kernel, compiled with the same .config, but
with two link_addr values

A) with link_addr=0x0080.
image loaded from 0x0080
SP=0x03eb1b80
kernel_size = 7411084 bytes
copying 256 bytes from kernel-image at 0x0080f000 to elfheader
elf_info.loadsize = 0x00700e68
elf_info.memsize  = 0x0074234c
allocating 7611212 bytes for the new kernel
copying ...
from = 0x0081f000
to = 0x
size = 7343720
flush_cache, 32Mbyte flushed
cmdline: uboot bootargs overridden
cmdline=[console=ttyS0,115200 root=/dev/sda2 rootfstype=ext2 rw
init=/sbin/init ]
Finalizing device tree... flat tree at 0xf23b80
ft_addr=0xf23b80
my tp1: success
kernel booting 
(it boots)

B) with link_addr=0x0080.
image loaded from 0x0090
SP=0x03eb1b80
kernel_size = 7411084
copying 256 bytes from kernel-image at 0x0090f000 to elfheader
elf_info.loadsize = 0x00700e68
elf_info.memsize  = 0x0074234c
allocating 7611212 bytes for the new kernel
copying ...
from = 0x0091f000
to = 0x
size = 7343720
flush_cache, 32Mbyte flushed
cmdline: uboot bootargs overridden
cmdline=[console=ttyS0,115200 root=/dev/sda2 rootfstype=ext2 rw
init=/sbin/init ]
Finalizing device tree... flat tree at 0x1023b80
ft_addr=0x1023b80
my tp2: success
my tp3: success
invalidate_cache 0x+0x0200
my tp4: (point of no return)
calling kentry()...
kernel booting 
(it dies at this point, but without a debugger it's like watching
something fall into a black hole)

Any ideas?
I am lost ...

Carlo


Re: [PATCH] powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted()

2021-09-22 Thread Srikar Dronamraju
* Nathan Lynch  [2021-09-22 11:01:12]:

> Srikar Dronamraju  writes:
> > * Nathan Lynch  [2021-09-20 22:12:13]:
> >
> >> vcpu_is_preempted() can be used outside of preempt-disabled critical
> >> sections, yielding warnings such as:
> >> 
> >> BUG: using smp_processor_id() in preemptible [] code: 
> >> systemd-udevd/185
> >> caller is rwsem_spin_on_owner+0x1cc/0x2d0
> >> CPU: 1 PID: 185 Comm: systemd-udevd Not tainted 5.15.0-rc2+ #33
> >> Call Trace:
> >> [c00012907ac0] [c0aa30a8] dump_stack_lvl+0xac/0x108 
> >> (unreliable)
> >> [c00012907b00] [c1371f70] check_preemption_disabled+0x150/0x160
> >> [c00012907b90] [c01e0e8c] rwsem_spin_on_owner+0x1cc/0x2d0
> >> [c00012907be0] [c01e1408] rwsem_down_write_slowpath+0x478/0x9a0
> >> [c00012907ca0] [c0576cf4] filename_create+0x94/0x1e0
> >> [c00012907d10] [c057ac08] do_symlinkat+0x68/0x1a0
> >> [c00012907d70] [c057ae18] sys_symlink+0x58/0x70
> >> [c00012907da0] [c002e448] system_call_exception+0x198/0x3c0
> >> [c00012907e10] [c000c54c] system_call_common+0xec/0x250
> >> 
> >> The result of vcpu_is_preempted() is always subject to invalidation by
> >> events inside and outside of Linux; it's just a best guess at a point in
> >> time. Use raw_smp_processor_id() to avoid such warnings.
> >
> > Typically smp_processor_id() and raw_smp_processor_id() except for the
> > CONFIG_DEBUG_PREEMPT.
> 
> Sorry, I don't follow...

I meant, Unless CONFIG_DEBUG_PREEMPT, smp_processor_id() is defined as
raw_processor_id().

> 
> > In the CONFIG_DEBUG_PREEMPT case, smp_processor_id()
> > is actually debug_smp_processor_id(), which does all the checks.
> 
> Yes, OK.
> 
> > I believe these checks in debug_smp_processor_id() are only valid for x86
> > case (aka cases were they have __smp_processor_id() defined.)
> 
> Hmm, I am under the impression that the checks in
> debug_smp_processor_id() are valid regardless of whether the arch
> overrides __smp_processor_id().

>From include/linux/smp.h

/*
 * Allow the architecture to differentiate between a stable and unstable read.
 * For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a
 * regular asm read for the stable.
 */
#ifndef __smp_processor_id
#define __smp_processor_id(x) raw_smp_processor_id(x)
#endif

As far as I see, only x86 has a definition of __smp_processor_id.
So for archs like Powerpc, __smp_processor_id(), is always
defined as raw_smp_processor_id(). Right?

I would think debug_smp_processor_id() would be useful if __smp_processor_id()
is different from raw_smp_processor_id(). Do note debug_smp_processor_id() 
calls raw_smp_processor_id().

Or can I understand how debug_smp_processor_id() is useful if
__smp_processor_id() is defined as raw_smp_processor_id()?

> I think the stack trace here correctly identifies an incorrect use of
> smp_processor_id(), and the call site needs to be changed. Do you
> disagree?

Yes the stack_trace shows that debug_smp_processor_id(). However what I want 
to understand is why should we even call debug_smp_processor_id(), when our
__smp_processor_id() is defined as raw_smp_processor_id().

-- 
Thanks and Regards
Srikar Dronamraju


[PATCH v2 1/2] powerpc/powermac: add missing g5_phy_disable_cpu1() declaration

2021-09-22 Thread Krzysztof Kozlowski
g5_phy_disable_cpu1() is used outside of platforms/powermac/feature.c,
so it should have a declaration to fix W=1 warning:

  arch/powerpc/platforms/powermac/feature.c:1533:6:
error: no previous prototype for ‘g5_phy_disable_cpu1’ 
[-Werror=missing-prototypes]

Signed-off-by: Krzysztof Kozlowski 

---

Changes since v1:
1. Drop declaration in powermac/smp.c
---
 arch/powerpc/include/asm/pmac_feature.h | 4 
 arch/powerpc/platforms/powermac/smp.c   | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/pmac_feature.h 
b/arch/powerpc/include/asm/pmac_feature.h
index e08e829261b6..7703e5bf1203 100644
--- a/arch/powerpc/include/asm/pmac_feature.h
+++ b/arch/powerpc/include/asm/pmac_feature.h
@@ -143,6 +143,10 @@
  */
 struct device_node;
 
+#ifdef CONFIG_PPC64
+void g5_phy_disable_cpu1(void);
+#endif /* CONFIG_PPC64 */
+
 static inline long pmac_call_feature(int selector, struct device_node* node,
long param, long value)
 {
diff --git a/arch/powerpc/platforms/powermac/smp.c 
b/arch/powerpc/platforms/powermac/smp.c
index 3256a316e884..5d0626f432d5 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -875,8 +875,6 @@ static int smp_core99_cpu_online(unsigned int cpu)
 
 static void __init smp_core99_bringup_done(void)
 {
-   extern void g5_phy_disable_cpu1(void);
-
/* Close i2c bus if it was used for tb sync */
if (pmac_tb_clock_chip_host)
pmac_i2c_close(pmac_tb_clock_chip_host);
-- 
2.30.2



[PATCH v2 2/2] powerpc/powermac: constify device_node in of_irq_parse_oldworld()

2021-09-22 Thread Krzysztof Kozlowski
The of_irq_parse_oldworld() does not modify passed device_node so make
it a pointer to const for safety.  Drop the extern while modifying the
line.

Signed-off-by: Krzysztof Kozlowski 

---

Changes since v1:
1. Drop extern.
---
 arch/powerpc/platforms/powermac/pic.c | 2 +-
 include/linux/of_irq.h| 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/powermac/pic.c 
b/arch/powerpc/platforms/powermac/pic.c
index 4921bccf0376..af5ca1f41bb1 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -384,7 +384,7 @@ static void __init pmac_pic_probe_oldstyle(void)
 #endif
 }
 
-int of_irq_parse_oldworld(struct device_node *device, int index,
+int of_irq_parse_oldworld(const struct device_node *device, int index,
struct of_phandle_args *out_irq)
 {
const u32 *ints = NULL;
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index aaf219bd0354..83fccd0c9bba 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -20,12 +20,12 @@ typedef int (*of_irq_init_cb_t)(struct device_node *, 
struct device_node *);
 #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
 extern unsigned int of_irq_workarounds;
 extern struct device_node *of_irq_dflt_pic;
-extern int of_irq_parse_oldworld(struct device_node *device, int index,
-  struct of_phandle_args *out_irq);
+int of_irq_parse_oldworld(const struct device_node *device, int index,
+ struct of_phandle_args *out_irq);
 #else /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
 #define of_irq_workarounds (0)
 #define of_irq_dflt_pic (NULL)
-static inline int of_irq_parse_oldworld(struct device_node *device, int index,
+static inline int of_irq_parse_oldworld(const struct device_node *device, int 
index,
  struct of_phandle_args *out_irq)
 {
return -EINVAL;
-- 
2.30.2



Re: [PATCH] powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted()

2021-09-22 Thread Nathan Lynch
Srikar Dronamraju  writes:
> * Nathan Lynch  [2021-09-20 22:12:13]:
>
>> vcpu_is_preempted() can be used outside of preempt-disabled critical
>> sections, yielding warnings such as:
>> 
>> BUG: using smp_processor_id() in preemptible [] code: 
>> systemd-udevd/185
>> caller is rwsem_spin_on_owner+0x1cc/0x2d0
>> CPU: 1 PID: 185 Comm: systemd-udevd Not tainted 5.15.0-rc2+ #33
>> Call Trace:
>> [c00012907ac0] [c0aa30a8] dump_stack_lvl+0xac/0x108 (unreliable)
>> [c00012907b00] [c1371f70] check_preemption_disabled+0x150/0x160
>> [c00012907b90] [c01e0e8c] rwsem_spin_on_owner+0x1cc/0x2d0
>> [c00012907be0] [c01e1408] rwsem_down_write_slowpath+0x478/0x9a0
>> [c00012907ca0] [c0576cf4] filename_create+0x94/0x1e0
>> [c00012907d10] [c057ac08] do_symlinkat+0x68/0x1a0
>> [c00012907d70] [c057ae18] sys_symlink+0x58/0x70
>> [c00012907da0] [c002e448] system_call_exception+0x198/0x3c0
>> [c00012907e10] [c000c54c] system_call_common+0xec/0x250
>> 
>> The result of vcpu_is_preempted() is always subject to invalidation by
>> events inside and outside of Linux; it's just a best guess at a point in
>> time. Use raw_smp_processor_id() to avoid such warnings.
>
> Typically smp_processor_id() and raw_smp_processor_id() except for the
> CONFIG_DEBUG_PREEMPT.

Sorry, I don't follow...

> In the CONFIG_DEBUG_PREEMPT case, smp_processor_id()
> is actually debug_smp_processor_id(), which does all the checks.

Yes, OK.

> I believe these checks in debug_smp_processor_id() are only valid for x86
> case (aka cases were they have __smp_processor_id() defined.)

Hmm, I am under the impression that the checks in
debug_smp_processor_id() are valid regardless of whether the arch
overrides __smp_processor_id().

I think the stack trace here correctly identifies an incorrect use of
smp_processor_id(), and the call site needs to be changed. Do you
disagree?


[Bug 213837] "Kernel panic - not syncing: corrupted stack end detected inside scheduler" at building via distcc on a G5

2021-09-22 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=213837

--- Comment #7 from Erhard F. (erhar...@mailbox.org) ---
Created attachment 298919
  --> https://bugzilla.kernel.org/attachment.cgi?id=298919&action=edit
dmesg (5.15-rc2 + patch, PowerMac G5 11,2)

(In reply to mpe from comment #6)
> Can you try this patch, it might help us work out what is corrupting the
> stack.
With your patch applied to recent v5.15-rc2 the output looks like this:

[...]
stack corrupted? stack end = 0xc00029fdc000
stack: c00029fdbc00: 5a5a5a5a 5a5a5a5a    
stack: c00029fdbc10: 0ddc 7c10    |...
stack: c00029fdbc20: 29fc4e41 673d4bb3 5a5a5a5a 5a5a5a5a  ).NAg=K.
stack: c00029fdbc30:   0ddc 8e10  
stack: c00029fdbc40:   41fc4e41 673d41a3  A.NAg=A.
stack: c00029fdbc50: 5a5a5a5a 5a5a5a5a    
stack: c00029fdbc60: 0ddc 8e0c    
stack: c00029fdbc70: 79fc4e41 673d4dab 5a5a5a5a 5a5a5a5a  y.NAg=M.
stack: c00029fdbc80:   0ddc 9008  
stack: c00029fdbc90:   91fc4e41 673d4573  ..NAg=Es
stack: c00029fdbca0: 5a5a5a5a 5a5a5a5a    
stack: c00029fdbcb0: 0dd7 ac16    
stack: c00029fdbcc0: c9fc4e41 673d4203 5a5a5a5a 5a5a5a5a  ..NAg=B.
stack: c00029fdbcd0:   0ddc 6c04  l...
stack: c00029fdbce0:   e1fc4e41 673d474b  ..NAg=GK
stack: c00029fdbcf0: 5a5a5a5a 5a5a5a5a    
stack: c00029fdbd00: 0ddc 8800    
stack: c00029fdbd10: 19fd4e41 673d4143 5a5a5a5a 5a5a5a5a  ..NAg=AC
[...]
stack: c00029fdffd0:      
stack: c00029fdffe0:      
stack: c00029fdfff0:      
Kernel panic - not syncing: corrupted stack end detected inside scheduler
CPU: 0 PID: 686 Comm: kworker/u4:0 Tainted: GW
5.15.0-rc2-PowerMacG5+ #2
Workqueue: writeback .wb_workfn (flush-254:1)
Call Trace:
[c00029fdf400] [c05532c8] .dump_stack_lvl+0x98/0xe0 (unreliable)
[c00029fdf490] [c0069534] .panic+0x14c/0x3e8
[c00029fdf540] [c081d598] .__schedule+0xc0/0x874
[c00029fdf610] [c081de98] .preempt_schedule_common+0x28/0x48
[c00029fdf690] [c081dee4] .__cond_resched+0x2c/0x50
[c00029fdf700] [c02b31b8] .writeback_sb_inodes+0x328/0x4c8
[c00029fdf880] [c02b33e8] .__writeback_inodes_wb+0x90/0xcc
[c00029fdf930] [c02b3650] .wb_writeback+0x22c/0x3c8
[c00029fdfa50] [c02b45a8] .wb_workfn+0x380/0x460
[c00029fdfbb0] [c008b300] .process_one_work+0x31c/0x4ec
[c00029fdfca0] [c008b950] .worker_thread+0x1d4/0x290
[c00029fdfd60] [c0093b0c] .kthread+0x124/0x12c
[c00029fdfe10] [c000bce0] .ret_from_kernel_thread+0x58/0x60
Rebooting in 40 seconds..

Can't make much sense out of it but hopefully you can. ;) For the full trace
please have a look at the attached kernel dmesg (via netconsole).

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.
You are watching someone on the CC list of the bug.

Re: [PATCH] powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted()

2021-09-22 Thread Nathan Lynch
Michael Ellerman  writes:
> Nathan Lynch  writes:
>> --- a/arch/powerpc/include/asm/paravirt.h
>> +++ b/arch/powerpc/include/asm/paravirt.h
>> @@ -97,7 +97,14 @@ static inline bool vcpu_is_preempted(int cpu)
>>  
>>  #ifdef CONFIG_PPC_SPLPAR
>>  if (!is_kvm_guest()) {
>> -int first_cpu = cpu_first_thread_sibling(smp_processor_id());
>> +int first_cpu;
>> +
>> +/*
>> + * This is only a guess at best, and this function may be
>> + * called with preemption enabled. Using raw_smp_processor_id()
>> + * does not damage accuracy.
>> + */
>> +first_cpu = cpu_first_thread_sibling(raw_smp_processor_id());
>
> This change seems good, except I think the comment needs to be a lot
> more explicit about what it's doing and why.
>
> A casual reader is going to be confused about vcpu preemption vs
> "preemption", which are basically unrelated yet use the same word.
>
> It's not clear how raw_smp_processor_id() is related to (Linux)
> preemption, unless you know that smp_processor_id() is the alternative
> and it contains a preemption check.
>
> And "this is only a guess" is not clear on what *this* is, you're
> referring to the result of the whole function, but that's not obvious.

You're right.

>
>>  /*
>>   * Preemption can only happen at core granularity. This CPU
>^^
>Means something different to "preemption" above.
>
> I know you didn't write that comment, and maybe we need to rewrite some
> of those existing comments to make it clear they're not talking about
> Linux preemption.

Thanks, agreed on all points. I'll rework the existing comments and any
new ones to clearly distinguish between the two senses of preemption
here.


[PATCH v3 6/6] powerpc/64s/interrupt: avoid saving CFAR in some asynchronous interrupts

2021-09-22 Thread Nicholas Piggin
Reading the CFAR register is quite costly (~20 cycles on POWER9). It is
a good idea to have for most synchronous interrupts, but for async ones
it is much less important.

Doorbell, external, and decrementer interrupts are the important
asynchronous ones. HV interrupts can't skip CFAR if KVM HV is possible,
because it might be a guest exit that requires CFAR preserved. But the
important pseries interrupts can avoid loading CFAR.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 63 
 1 file changed, 63 insertions(+)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 4dcc76206f8e..eb3af5abdd37 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -111,6 +111,8 @@ name:
 #define IAREA  .L_IAREA_\name\()   /* PACA save area */
 #define IVIRT  .L_IVIRT_\name\()   /* Has virt mode entry point */
 #define IISIDE .L_IISIDE_\name\()  /* Uses SRR0/1 not DAR/DSISR */
+#define ICFAR  .L_ICFAR_\name\()   /* Uses CFAR */
+#define ICFAR_IF_HVMODE.L_ICFAR_IF_HVMODE_\name\() /* Uses CFAR if HV 
*/
 #define IDAR   .L_IDAR_\name\()/* Uses DAR (or SRR0) */
 #define IDSISR .L_IDSISR_\name\()  /* Uses DSISR (or SRR1) */
 #define IBRANCH_TO_COMMON  .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch 
to common */
@@ -150,6 +152,12 @@ do_define_int n
.ifndef IISIDE
IISIDE=0
.endif
+   .ifndef ICFAR
+   ICFAR=1
+   .endif
+   .ifndef ICFAR_IF_HVMODE
+   ICFAR_IF_HVMODE=0
+   .endif
.ifndef IDAR
IDAR=0
.endif
@@ -287,9 +295,21 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
std r10,IAREA+EX_R10(r13)   /* save r10 - r12 */
+   .if ICFAR
 BEGIN_FTR_SECTION
mfspr   r10,SPRN_CFAR
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+   .elseif ICFAR_IF_HVMODE
+BEGIN_FTR_SECTION
+  BEGIN_FTR_SECTION_NESTED(69)
+   mfspr   r10,SPRN_CFAR
+  END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+FTR_SECTION_ELSE
+  BEGIN_FTR_SECTION_NESTED(69)
+   li  r10,0
+  END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+   .endif
.if \ool
.if !\virt
b   tramp_real_\name
@@ -305,9 +325,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 BEGIN_FTR_SECTION
std r9,IAREA+EX_PPR(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+   .if ICFAR || ICFAR_IF_HVMODE
 BEGIN_FTR_SECTION
std r10,IAREA+EX_CFAR(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+   .endif
INTERRUPT_TO_KERNEL
mfctr   r10
std r10,IAREA+EX_CTR(r13)
@@ -559,7 +581,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
.endif
 
 BEGIN_FTR_SECTION
+   .if ICFAR || ICFAR_IF_HVMODE
ld  r10,IAREA+EX_CFAR(r13)
+   .else
+   li  r10,0
+   .endif
std r10,ORIG_GPR3(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld  r10,IAREA+EX_CTR(r13)
@@ -1502,6 +1528,12 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
  *
  * If soft masked, the masked handler will note the pending interrupt for
  * replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not required because this is an asynchronous interrupt that in
+ * general won't have much bearing on the state of the CPU, with the possible
+ * exception of crash/debug IPIs, but those are generally moving to use SRESET
+ * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case
+ * it may be exiting the guest and need CFAR to be saved.
  */
 INT_DEFINE_BEGIN(hardware_interrupt)
IVEC=0x500
@@ -1509,6 +1541,10 @@ INT_DEFINE_BEGIN(hardware_interrupt)
IMASK=IRQS_DISABLED
IKVM_REAL=1
IKVM_VIRT=1
+   ICFAR=0
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+   ICFAR_IF_HVMODE=1
+#endif
 INT_DEFINE_END(hardware_interrupt)
 
 EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
@@ -1727,6 +1763,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
  * If PPC_WATCHDOG is configured, the soft masked handler will actually set
  * things back up to run soft_nmi_interrupt as a regular interrupt handler
  * on the emergency stack.
+ *
+ * CFAR is not required because this is asynchronous (see hardware_interrupt).
+ * A watchdog interrupt may like to have CFAR, but usually the interesting
+ * branch is long gone by that point (e.g., infinite loop).
  */
 INT_DEFINE_BEGIN(decrementer)
IVEC=0x900
@@ -1734,6 +1774,7 @@ INT_DEFINE_BEGIN(decrementer)
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
 #endif
+   ICFAR=0
 INT_DEFINE_END(decrementer)
 
 EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
@@ -1809,6 +1850,8 @@ EXC_COMMON_BEGIN(hdecrementer_common)
  * If soft masked, the masked handler will note the pending interrupt for
  * replay, leaving

[PATCH v3 5/6] powerpc/64/interrupt: reduce expensive debug tests

2021-09-22 Thread Nicholas Piggin
Move the assertions requiring restart table searches under
CONFIG_PPC_IRQ_SOFT_MASK_DEBUG.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/interrupt.h | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index e178d143671a..0e84e99af37b 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -97,6 +97,11 @@ static inline void srr_regs_clobbered(void)
local_paca->hsrr_valid = 0;
 }
 #else
+static inline unsigned long search_kernel_restart_table(unsigned long addr)
+{
+   return 0;
+}
+
 static inline bool is_implicit_soft_masked(struct pt_regs *regs)
 {
return false;
@@ -190,13 +195,14 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs, struct interrup
 */
if (TRAP(regs) != INTERRUPT_PROGRAM) {
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
-   BUG_ON(is_implicit_soft_masked(regs));
+   if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+   BUG_ON(is_implicit_soft_masked(regs));
}
-#ifdef CONFIG_PPC_BOOK3S
+
/* Move this under a debugging check */
-   if (arch_irq_disabled_regs(regs))
+   if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) &&
+   arch_irq_disabled_regs(regs))
BUG_ON(search_kernel_restart_table(regs->nip));
-#endif
}
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE));
-- 
2.23.0



[PATCH v3 4/6] powerpc/64s/interrupt: Don't enable MSR[EE] in irq handlers unless perf is in use

2021-09-22 Thread Nicholas Piggin
Enabling MSR[EE] in interrupt handlers while interrupts are still soft
masked allows PMIs to profile interrupt handlers to some degree, beyond
what SIAR latching allows.

When perf is not being used, this is almost useless work. It requires an
extra mtmsrd in the irq handler, and it also opens the door to masked
interrupts hitting and requiring replay, which is more expensive than
just taking them directly. This effect can be noticable in high IRQ
workloads.

Avoid enabling MSR[EE] unless perf is currently in use. This saves about
60 cycles (or 8%) on a simple decrementer interrupt microbenchmark.
Replayed interrupts drop from 1.4% of all interrupts taken, to 0.003%.

This does prevent the soft-nmi interrupt being taken in these handlers,
but that's not too reliable anyway. The SMP watchdog will continue to be
the reliable way to catch lockups.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/hw_irq.h | 57 +--
 arch/powerpc/kernel/dbell.c   |  3 +-
 arch/powerpc/kernel/irq.c |  3 +-
 arch/powerpc/kernel/time.c| 31 +
 4 files changed, 67 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index b987822e552e..55e3fa44f280 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -309,17 +309,54 @@ static inline bool lazy_irq_pending_nocheck(void)
 bool power_pmu_wants_prompt_pmi(void);
 
 /*
- * This is called by asynchronous interrupts to conditionally
- * re-enable hard interrupts after having cleared the source
- * of the interrupt. They are kept disabled if there is a different
- * soft-masked interrupt pending that requires hard masking.
+ * This is called by asynchronous interrupts to check whether to
+ * conditionally re-enable hard interrupts after having cleared
+ * the source of the interrupt. They are kept disabled if there
+ * is a different soft-masked interrupt pending that requires hard
+ * masking.
  */
-static inline void may_hard_irq_enable(void)
+static inline bool should_hard_irq_enable(void)
 {
-   if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) {
-   get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
-   __hard_irq_enable();
-   }
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+   WARN_ON(irq_soft_mask_return() == IRQS_ENABLED);
+   WARN_ON(mfmsr() & MSR_EE);
+#endif
+#ifdef CONFIG_PERF_EVENTS
+   /*
+* If the PMU is not running, there is not much reason to enable
+* MSR[EE] in irq handlers because any interrupts would just be
+* soft-masked.
+*
+* TODO: Add test for 64e
+*/
+   if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !power_pmu_wants_prompt_pmi())
+   return false;
+
+   if (get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)
+   return false;
+
+   return true;
+#else
+   return false;
+#endif
+}
+
+/*
+ * Do the hard enabling, only call this if should_hard_irq_enable is true.
+ */
+static inline void do_hard_irq_enable(void)
+{
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+   WARN_ON(irq_soft_mask_return() == IRQS_ENABLED);
+   WARN_ON(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK);
+   WARN_ON(mfmsr() & MSR_EE);
+#endif
+   /*
+* This allows PMI interrupts (and watchdog soft-NMIs) through.
+* There is no other reason to enable this way.
+*/
+   get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
+   __hard_irq_enable();
 }
 
 static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
@@ -400,7 +437,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs 
*regs)
return !(regs->msr & MSR_EE);
 }
 
-static inline bool may_hard_irq_enable(void)
+static inline bool should_hard_irq_enable(void)
 {
return false;
 }
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 5545c9cd17c1..f55c6fb34a3a 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -27,7 +27,8 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
 
ppc_msgsync();
 
-   may_hard_irq_enable();
+   if (should_hard_irq_enable())
+   do_hard_irq_enable();
 
kvmppc_clear_host_ipi(smp_processor_id());
__this_cpu_inc(irq_stat.doorbell_irqs);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 551b653228c4..f658aa22a21e 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -739,7 +739,8 @@ void __do_irq(struct pt_regs *regs)
irq = ppc_md.get_irq();
 
/* We can hard enable interrupts now to allow perf interrupts */
-   may_hard_irq_enable();
+   if (should_hard_irq_enable())
+   do_hard_irq_enable();
 
/* And finally process it */
if (unlikely(!irq))
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 934d8ae66cc6..c3c663ff7c48 100644
--- a/arch/powerpc/kernel/t

[PATCH v3 3/6] powerpc/64s/perf: add power_pmu_wants_prompt_pmi to say whether perf wants PMIs to be soft-NMI

2021-09-22 Thread Nicholas Piggin
Interrupt code enables MSR[EE] in some irq handlers while keeping local
irqs disabled via soft-mask, allowing PMI interrupts to be taken as
soft-NMI to improve profiling of irq handlers.

When perf is not enabled, there is no point to doing this, it's
additional overhead. So provide a function that can say if PMIs should
be taken promptly if possible.

Cc: Madhavan Srinivasan 
Cc: Athira Rajeev 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/hw_irq.h |  2 ++
 arch/powerpc/perf/core-book3s.c   | 31 +++
 2 files changed, 33 insertions(+)

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index 21cc571ea9c2..b987822e552e 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -306,6 +306,8 @@ static inline bool lazy_irq_pending_nocheck(void)
return __lazy_irq_pending(local_paca->irq_happened);
 }
 
+bool power_pmu_wants_prompt_pmi(void);
+
 /*
  * This is called by asynchronous interrupts to conditionally
  * re-enable hard interrupts after having cleared the source
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 73e62e9b179b..773d07d68b6d 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #ifdef CONFIG_PPC64
@@ -2381,6 +2382,36 @@ static void perf_event_interrupt(struct pt_regs *regs)
perf_sample_event_took(sched_clock() - start_clock);
 }
 
+/*
+ * If the perf subsystem wants performance monitor interrupts as soon as
+ * possible (e.g., to sample the instruction address and stack chain),
+ * this should return true. The IRQ masking code can then enable MSR[EE]
+ * in some places (e.g., interrupt handlers) that allows PMI interrupts
+ * though to improve accuracy of profiles, at the cost of some performance.
+ *
+ * The PMU counters can be enabled by other means (e.g., sysfs raw SPR
+ * access), but in that case there is no need for prompt PMI handling.
+ *
+ * This currently returns true if any perf counter is being used. It
+ * could possibly return false if only events are being counted rather than
+ * samples being taken, but for now this is good enough.
+ */
+bool power_pmu_wants_prompt_pmi(void)
+{
+   struct cpu_hw_events *cpuhw;
+
+   /*
+* This could simply test local_paca->pmcregs_in_use if that were not
+* under ifdef KVM.
+*/
+
+   if (!ppmu)
+   return false;
+
+   cpuhw = this_cpu_ptr(&cpu_hw_events);
+   return cpuhw->n_events;
+}
+
 static int power_pmu_prepare_cpu(unsigned int cpu)
 {
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
-- 
2.23.0



[PATCH v3 2/6] powerpc/64s/interrupt: handle MSR EE and RI in interrupt entry wrapper

2021-09-22 Thread Nicholas Piggin
The mtmsrd to enable MSR[RI] can be combined with the mtmsrd to enable
MSR[EE] in interrupt entry code, for those interrupts which enable EE.
This helps performance of important synchronous interrupts (e.g., page
faults).

This is similar to what commit dd152f70bdc1 ("powerpc/64s: system call
avoid setting MSR[RI] until we set MSR[EE]") does for system calls.

Do this by enabling EE and RI together at the beginning of the entry
wrapper if PACA_IRQ_HARD_DIS is clear, and only enabling RI if it is
set.

Asynchronous interrupts set PACA_IRQ_HARD_DIS, but synchronous ones
leave it unchanged, so by default they always get EE=1 unless they have
interrupted a caller that is hard disabled. When the sync interrupt
later calls interrupt_cond_local_irq_enable(), it will not require
another mtmsrd because MSR[EE] was already enabled here.

This avoids one mtmsrd L=1 for synchronous interrupts on 64s, which
saves about 20 cycles on POWER9. And for kernel-mode interrupts, both
synchronous and asynchronous, this saves an additional 40 cycles due to
the mtmsrd being moved ahead of mfspr SPRN_AMR, which prevents a SPR
scoreboard stall.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/interrupt.h | 27 +---
 arch/powerpc/kernel/exceptions-64s.S | 38 +++-
 arch/powerpc/kernel/fpu.S|  5 
 arch/powerpc/kernel/vector.S | 10 
 4 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index 3802390d8eea..e178d143671a 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -148,8 +148,14 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs, struct interrup
 #endif
 
 #ifdef CONFIG_PPC64
-   if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
-   trace_hardirqs_off();
+   bool trace_enable = false;
+
+   if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS)) {
+   if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
+   trace_enable = true;
+   } else {
+   irq_soft_mask_set(IRQS_ALL_DISABLED);
+   }
 
/*
 * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE].
@@ -163,8 +169,14 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs, struct interrup
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
BUG_ON(!(regs->msr & MSR_EE));
__hard_irq_enable();
+   } else {
+   __hard_RI_enable();
}
 
+   /* Do this when RI=1 because it can cause SLB faults */
+   if (trace_enable)
+   trace_hardirqs_off();
+
if (user_mode(regs)) {
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit_irqoff();
@@ -217,13 +229,16 @@ static inline void interrupt_async_enter_prepare(struct 
pt_regs *regs, struct in
/* Ensure interrupt_enter_prepare does not enable MSR[EE] */
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
 #endif
+   interrupt_enter_prepare(regs, state);
 #ifdef CONFIG_PPC_BOOK3S_64
+   /*
+* RI=1 is set by interrupt_enter_prepare, so this thread flags access
+* has to come afterward (it can cause SLB faults).
+*/
if (cpu_has_feature(CPU_FTR_CTRL) &&
!test_thread_local_flags(_TLF_RUNLATCH))
__ppc64_runlatch_on();
 #endif
-
-   interrupt_enter_prepare(regs, state);
irq_enter();
 }
 
@@ -293,6 +308,8 @@ static inline void interrupt_nmi_enter_prepare(struct 
pt_regs *regs, struct inte
regs->softe = IRQS_ALL_DISABLED;
}
 
+   __hard_RI_enable();
+
/* Don't do any per-CPU operations until interrupt state is fixed */
 
if (nmi_disables_ftrace(regs)) {
@@ -390,6 +407,8 @@ interrupt_handler long func(struct pt_regs *regs)   
\
 {  \
long ret;   \
\
+   __hard_RI_enable(); \
+   \
ret = ##func (regs);\
\
return ret; \
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 37859e62a8dc..4dcc76206f8e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -113,7 +113,6 @@ name:
 #define IISIDE .L_IISIDE_\name\()  /* Uses SRR0/1 not DAR/DSISR */
 #define IDAR   .L_IDAR_\name\()/* Uses DAR (or SRR0) */
 #define IDSISR .L_ID

[PATCH v3 1/6] powerpc/64/interrupt: make normal synchronous interrupts enable MSR[EE] if possible

2021-09-22 Thread Nicholas Piggin
Make synchronous interrupt handler entry wrappers enable MSR[EE] if
MSR[EE] was enabled in the interrupted context. IRQs are soft-disabled
at this point so there is no change to high level code, but it's a
masked interrupt could fire.

This is a performance disadvantage for interrupts which do not later
call interrupt_cond_local_irq_enable(), because an an additional mtmsrd
or wrtee instruction is executed. However the important synchronous
interrupts (e.g., page fault) do enable interrupts, so the performance
disadvantage is mostly avoided.

In the next patch, MSR[RI] enabling can be combined with MSR[EE]
enabling, which mitigates the performance drop for the former and gives
a performance advanage for the latter interrupts, on 64s machines. 64e
is coming along for the ride for now to avoid divergences with 64s in
this tricky code.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/interrupt.h | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index b76ab848aa0d..3802390d8eea 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -150,7 +150,20 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs, struct interrup
 #ifdef CONFIG_PPC64
if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
trace_hardirqs_off();
-   local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+   /*
+* If the interrupt was taken with HARD_DIS clear, then enable MSR[EE].
+* Asynchronous interrupts get here with HARD_DIS set (see below), so
+* this enables MSR[EE] for synchronous interrupts. IRQs remain
+* soft-masked. The interrupt handler may later call
+* interrupt_cond_local_irq_enable() to achieve a regular process
+* context.
+*/
+   if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) {
+   if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+   BUG_ON(!(regs->msr & MSR_EE));
+   __hard_irq_enable();
+   }
 
if (user_mode(regs)) {
CT_WARN_ON(ct_state() != CONTEXT_USER);
@@ -200,6 +213,10 @@ static inline void interrupt_exit_prepare(struct pt_regs 
*regs, struct interrupt
 
 static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct 
interrupt_state *state)
 {
+#ifdef CONFIG_PPC64
+   /* Ensure interrupt_enter_prepare does not enable MSR[EE] */
+   local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+#endif
 #ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_CTRL) &&
!test_thread_local_flags(_TLF_RUNLATCH))
-- 
2.23.0



[PATCH v3 0/6] powerpc/64s: interrupt speedups

2021-09-22 Thread Nicholas Piggin
Here's a few stragglers. The first patch was submitted already but had
some bugs with unrecoverable exceptions on HPT (current->blah being
accessed before MSR[RI] was enabled). Those should be fixed now.

The others are generally for helping asynch interrupts, which are a bit
harder to measure well but important for IO and IPIs.

After this series, the SPR accesses of the interrupt handlers for radix
are becoming pretty optimal except for PPR which we could improve on,
and virt CPU accounting which is very costly -- we might disable that
by default unless someone comes up with a good reason to keep it.

Since v1:
- Compile fixes for 64e.
- Fixed a SOFT_MASK_DEBUG false positive.
- Improve function name and comments explaining why patch 2 does not
  need to hard enable when PMU is enabled via sysfs.

Since v2:
- Split first patch into patch 1 and 2, improve on the changelogs.
- More compile fixes.
- Fixed several review comments from Daniel.
- Added patch 5.

Thanks,
Nick

Nicholas Piggin (6):
  powerpc/64/interrupt: make normal synchronous interrupts enable
MSR[EE] if possible
  powerpc/64s/interrupt: handle MSR EE and RI in interrupt entry wrapper
  powerpc/64s/perf: add power_pmu_wants_prompt_pmi to say whether perf
wants PMIs to be soft-NMI
  powerpc/64s/interrupt: Don't enable MSR[EE] in irq handlers unless
perf is in use
  powerpc/64/interrupt: reduce expensive debug tests
  powerpc/64s/interrupt: avoid saving CFAR in some asynchronous
interrupts

 arch/powerpc/include/asm/hw_irq.h|  59 +---
 arch/powerpc/include/asm/interrupt.h |  58 ---
 arch/powerpc/kernel/dbell.c  |   3 +-
 arch/powerpc/kernel/exceptions-64s.S | 101 ++-
 arch/powerpc/kernel/fpu.S|   5 ++
 arch/powerpc/kernel/irq.c|   3 +-
 arch/powerpc/kernel/time.c   |  31 
 arch/powerpc/kernel/vector.S |  10 +++
 arch/powerpc/perf/core-book3s.c  |  31 
 9 files changed, 232 insertions(+), 69 deletions(-)

-- 
2.23.0



[PATCH v1] powerpc/64/interrupt: Reconcile soft-mask state in NMI and fix false BUG

2021-09-22 Thread Nicholas Piggin
If a NMI hits early in an interrupt handler before the irq soft-mask
state is reconciled, that can cause a false-positive BUG with a
CONFIG_PPC_IRQ_SOFT_MASK_DEBUG assertion.

Remove that assertion and instead check the case that if regs->msr has
EE clear, then regs->softe should be marked as disabled so the irq state
looks correct to NMI handlers, the same as how it's fixed up in the
case it was implicit soft-masked.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/interrupt.h | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index b32ed910a8cf..b76ab848aa0d 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -265,13 +265,16 @@ static inline void interrupt_nmi_enter_prepare(struct 
pt_regs *regs, struct inte
local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
 
-   if (is_implicit_soft_masked(regs)) {
-   // Adjust regs->softe soft implicit soft-mask, so
-   // arch_irq_disabled_regs(regs) behaves as expected.
+   if (!(regs->msr & MSR_EE) || is_implicit_soft_masked(regs)) {
+   /*
+* Adjust regs->softe to be soft-masked if it had not been
+* reconcied (e.g., interrupt entry with MSR[EE]=0 but softe
+* not yet set disabled), or if it was in an implicit soft
+* masked state. This makes arch_irq_disabled_regs(regs)
+* behave as expected.
+*/
regs->softe = IRQS_ALL_DISABLED;
}
-   if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
-   BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE));
 
/* Don't do any per-CPU operations until interrupt state is fixed */
 
-- 
2.23.0



Re: [PATCH v3 5/8] x86/sme: Replace occurrences of sme_active() with cc_platform_has()

2021-09-22 Thread Kirill A. Shutemov
On Wed, Sep 22, 2021 at 08:40:43AM -0500, Tom Lendacky wrote:
> On 9/21/21 4:58 PM, Kirill A. Shutemov wrote:
> > On Tue, Sep 21, 2021 at 04:43:59PM -0500, Tom Lendacky wrote:
> > > On 9/21/21 4:34 PM, Kirill A. Shutemov wrote:
> > > > On Tue, Sep 21, 2021 at 11:27:17PM +0200, Borislav Petkov wrote:
> > > > > On Wed, Sep 22, 2021 at 12:20:59AM +0300, Kirill A. Shutemov wrote:
> > > > > > I still believe calling cc_platform_has() from __startup_64() is 
> > > > > > totally
> > > > > > broken as it lacks proper wrapping while accessing global variables.
> > > > > 
> > > > > Well, one of the issues on the AMD side was using boot_cpu_data too
> > > > > early and the Intel side uses it too. Can you replace those checks 
> > > > > with
> > > > > is_tdx_guest() or whatever was the helper's name which would check
> > > > > whether the the kernel is running as a TDX guest, and see if that 
> > > > > helps?
> > > > 
> > > > There's no need in Intel check this early. Only AMD need it. Maybe just
> > > > opencode them?
> > > 
> > > Any way you can put a gzipped/bzipped copy of your vmlinux file somewhere 
> > > I
> > > can grab it from and take a look at it?
> > 
> > You can find broken vmlinux and bzImage here:
> > 
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdrive.google.com%2Fdrive%2Ffolders%2F1n74vUQHOGebnF70Im32qLFY8iS3wvjIs%3Fusp%3Dsharing&data=04%7C01%7Cthomas.lendacky%40amd.com%7C1c7adf380cbe4c1a6bb708d97d4af6ff%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637678583935705530%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=gA30x%2Bfu97tUx0p2UqI8HgjiL8bxDbK1GqgJBbUrUE4%3D&reserved=0
> > 
> > Let me know when I can remove it.
> 
> Looking at everything, it is all RIP relative addressing, so those
> accesses should be fine.

Not fine, but waiting to blowup with random build environment change.

> Your image has the intel_cc_platform_has()
> function, does it work if you remove that call? Because I think it may be
> the early call into that function which looks like it has instrumentation
> that uses %gs in __sanitizer_cov_trace_pc and %gs is not setup properly
> yet. And since boot_cpu_data.x86_vendor will likely be zero this early it
> will match X86_VENDOR_INTEL and call into that function.

Right removing call to intel_cc_platform_has() or moving it to
cc_platform.c fixes the issue.

-- 
 Kirill A. Shutemov


Re: [PATCH 01/16] ASoC: eureka-tlv320: Update to modern clocking terminology

2021-09-22 Thread Mark Brown
On Tue, 21 Sep 2021 22:10:25 +0100, Mark Brown wrote:
> As part of moving to remove the old style defines for the bus clocks update
> the eureka-tlv320 driver to use more modern terminology for clocking.
> 
> 

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next

Thanks!

[01/16] ASoC: eureka-tlv320: Update to modern clocking terminology
commit: 4348be6330a18b123fa82494df9f5a134feecb7f
[02/16] ASoC: fsl-asoc-card: Update to modern clocking terminology
commit: 8fcfd3493426c229f4f28bc5757dd3359e02cee8
[03/16] ASoC: fsl-audmix: Update to modern clocking terminology
commit: 2757b340b25dd2cb3afc748d48c1dff6c9689f80
[04/16] ASoC: fsl-esai: Update to modern clocking terminology
commit: e0b64fa34c7f444908549c32dd68f81ac436299e
[05/16] ASoC: fsl-mqs: Update to modern clocking terminology
commit: a51da9dc9b3a844460a355cd10d0db4320f4d726
[06/16] ASoC: fsl_sai: Update to modern clocking terminology
commit: 361284a4eb598eaf28e8458c542f214d3689b134
[07/16] ASoC: fsl_ssi: Update to modern clocking terminology
commit: 89efbdaaa444d63346bf1bdf3b58dfb421de91f1
[08/16] ASoC: imx-audmix: Update to modern clocking terminology
commit: bf101022487091032fd8102c835b1157b8283c43
[09/16] ASoC: imx-card: Update to modern clocking terminology
commit: d689e280121abf1cdf0d37734b0b306098a774ed
[10/16] ASoC: imx-es8328: Update to modern clocking terminology
commit: 56b69e4e4bc24c732b68ff6df54be83226a3b4e6
[11/16] ASoC: imx-hdmi: Update to modern clocking terminology
commit: a90f847ad2f1c8575f6a7980e5ee9937d1a5eeb4
[12/16] ASoC: imx-rpmsg: Update to modern clocking terminology
commit: caa0a6075a6e9239e49690a40a131496398602ab
[13/16] ASoC: imx-sgtl5000: Update to modern clocking terminology
commit: 419099b4c3318a3c486f9f65b015760e71d53f0a
[14/16] ASoC: mpc8610_hpcd: Update to modern clocking terminology
commit: 8a7f299b857b81a10566fe19c585fae4d1c1f8ef
[15/16] ASoC: pl1022_ds: Update to modern clocking terminology
commit: fcd444bf6a29a22e529510de07c72555b7e46224
[16/16] ASoC: pl1022_rdk: Update to modern clocking terminology
commit: 39e178a4cc7d042cd6353e73f3024d87e79a86ca

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark


Re: [PATCH v2 01/16] ASoC: eureka-tlv320: Update to modern clocking terminology

2021-09-22 Thread Mark Brown
On Tue, 21 Sep 2021 22:35:27 +0100, Mark Brown wrote:
> As part of moving to remove the old style defines for the bus clocks update
> the eureka-tlv320 driver to use more modern terminology for clocking.
> 
> 

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next

Thanks!

[01/16] ASoC: eureka-tlv320: Update to modern clocking terminology
commit: 4348be6330a18b123fa82494df9f5a134feecb7f
[02/16] ASoC: fsl-asoc-card: Update to modern clocking terminology
commit: 8fcfd3493426c229f4f28bc5757dd3359e02cee8
[03/16] ASoC: fsl-audmix: Update to modern clocking terminology
commit: 2757b340b25dd2cb3afc748d48c1dff6c9689f80
[04/16] ASoC: fsl-esai: Update to modern clocking terminology
commit: e0b64fa34c7f444908549c32dd68f81ac436299e
[05/16] ASoC: fsl-mqs: Update to modern clocking terminology
commit: a51da9dc9b3a844460a355cd10d0db4320f4d726
[06/16] ASoC: fsl_sai: Update to modern clocking terminology
commit: 361284a4eb598eaf28e8458c542f214d3689b134
[07/16] ASoC: fsl_ssi: Update to modern clocking terminology
commit: 89efbdaaa444d63346bf1bdf3b58dfb421de91f1
[08/16] ASoC: imx-audmix: Update to modern clocking terminology
commit: bf101022487091032fd8102c835b1157b8283c43
[09/16] ASoC: imx-card: Update to modern clocking terminology
commit: d689e280121abf1cdf0d37734b0b306098a774ed
[10/16] ASoC: imx-es8328: Update to modern clocking terminology
commit: 56b69e4e4bc24c732b68ff6df54be83226a3b4e6
[11/16] ASoC: imx-hdmi: Update to modern clocking terminology
commit: a90f847ad2f1c8575f6a7980e5ee9937d1a5eeb4
[12/16] ASoC: imx-rpmsg: Update to modern clocking terminology
commit: caa0a6075a6e9239e49690a40a131496398602ab
[13/16] ASoC: imx-sgtl5000: Update to modern clocking terminology
commit: 419099b4c3318a3c486f9f65b015760e71d53f0a
[14/16] ASoC: mpc8610_hpcd: Update to modern clocking terminology
commit: 8a7f299b857b81a10566fe19c585fae4d1c1f8ef
[15/16] ASoC: pl1022_ds: Update to modern clocking terminology
commit: fcd444bf6a29a22e529510de07c72555b7e46224
[16/16] ASoC: pl1022_rdk: Update to modern clocking terminology
commit: 39e178a4cc7d042cd6353e73f3024d87e79a86ca

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark


Re: [PATCH 0/5] KVM: rseq: Fix and a test for a KVM+rseq bug

2021-09-22 Thread Paolo Bonzini

On 18/08/21 02:12, Sean Christopherson wrote:

Patch 1 fixes a KVM+rseq bug where KVM's handling of TIF_NOTIFY_RESUME,
e.g. for task migration, clears the flag without informing rseq and leads
to stale data in userspace's rseq struct.

Patch 2 is a cleanup to try and make future bugs less likely.  It's also
a baby step towards moving and renaming tracehook_notify_resume() since
it has nothing to do with tracing.  It kills me to not do the move/rename
as part of this series, but having a dedicated series/discussion seems
more appropriate given the sheer number of architectures that call
tracehook_notify_resume() and the lack of an obvious home for the code.

Patch 3 is a fix/cleanup to stop overriding x86's unistd_{32,64}.h when
the include path (intentionally) omits tools' uapi headers.  KVM's
selftests do exactly that so that they can pick up the uapi headers from
the installed kernel headers, and still use various tools/ headers that
mirror kernel code, e.g. linux/types.h.  This allows the new test in
patch 4 to reference __NR_rseq without having to manually define it.

Patch 4 is a regression test for the KVM+rseq bug.

Patch 5 is a cleanup made possible by patch 3.


Sean Christopherson (5):
   KVM: rseq: Update rseq when processing NOTIFY_RESUME on xfer to KVM
 guest
   entry: rseq: Call rseq_handle_notify_resume() in
 tracehook_notify_resume()
   tools: Move x86 syscall number fallbacks to .../uapi/
   KVM: selftests: Add a test for KVM_RUN+rseq to detect task migration
 bugs
   KVM: selftests: Remove __NR_userfaultfd syscall fallback

  arch/arm/kernel/signal.c  |   1 -
  arch/arm64/kernel/signal.c|   1 -
  arch/csky/kernel/signal.c |   4 +-
  arch/mips/kernel/signal.c |   4 +-
  arch/powerpc/kernel/signal.c  |   4 +-
  arch/s390/kernel/signal.c |   1 -
  include/linux/tracehook.h |   2 +
  kernel/entry/common.c |   4 +-
  kernel/rseq.c |   4 +-
  .../x86/include/{ => uapi}/asm/unistd_32.h|   0
  .../x86/include/{ => uapi}/asm/unistd_64.h|   3 -
  tools/testing/selftests/kvm/.gitignore|   1 +
  tools/testing/selftests/kvm/Makefile  |   3 +
  tools/testing/selftests/kvm/rseq_test.c   | 131 ++
  14 files changed, 143 insertions(+), 20 deletions(-)
  rename tools/arch/x86/include/{ => uapi}/asm/unistd_32.h (100%)
  rename tools/arch/x86/include/{ => uapi}/asm/unistd_64.h (83%)
  create mode 100644 tools/testing/selftests/kvm/rseq_test.c



Queued v3, thanks.  I'll send it in a separate pull request to Linus 
since it touches stuff outside my usual turf.


Thanks,

Paolo



Re: [RESEND PATCH 2/2] powerpc/powermac: constify device_node in of_irq_parse_oldworld()

2021-09-22 Thread Krzysztof Kozlowski
On 22/09/2021 15:55, Christophe Leroy wrote:
> 
> 
> Le 22/09/2021 à 10:44, Krzysztof Kozlowski a écrit :
>> The of_irq_parse_oldworld() does not modify passed device_node so make
>> it a pointer to const for safety.
> 
> AFAIKS this patch is unrelated to previous one so you should send them 
> out separately instead of sending as a series.

The relation it's a series of bugfixes. Although they can be applied
independently, having a series is actually very useful - you run "b4 am"
on one message ID and get everything. The same with patchwork, if you
use that one.

> 
>>
>> Signed-off-by: Krzysztof Kozlowski 
>> ---
>>   arch/powerpc/platforms/powermac/pic.c | 2 +-
>>   include/linux/of_irq.h| 4 ++--
>>   2 files changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/platforms/powermac/pic.c 
>> b/arch/powerpc/platforms/powermac/pic.c
>> index 4921bccf0376..af5ca1f41bb1 100644
>> --- a/arch/powerpc/platforms/powermac/pic.c
>> +++ b/arch/powerpc/platforms/powermac/pic.c
>> @@ -384,7 +384,7 @@ static void __init pmac_pic_probe_oldstyle(void)
>>   #endif
>>   }
>>   
>> -int of_irq_parse_oldworld(struct device_node *device, int index,
>> +int of_irq_parse_oldworld(const struct device_node *device, int index,
>>  struct of_phandle_args *out_irq)
>>   {
>>  const u32 *ints = NULL;
>> diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
>> index aaf219bd0354..6074fdf51f0c 100644
>> --- a/include/linux/of_irq.h
>> +++ b/include/linux/of_irq.h
>> @@ -20,12 +20,12 @@ typedef int (*of_irq_init_cb_t)(struct device_node *, 
>> struct device_node *);
>>   #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
>>   extern unsigned int of_irq_workarounds;
>>   extern struct device_node *of_irq_dflt_pic;
>> -extern int of_irq_parse_oldworld(struct device_node *device, int index,
>> +extern int of_irq_parse_oldworld(const struct device_node *device, int 
>> index,
>> struct of_phandle_args *out_irq);
> 
> Please remove 'extern' which is useless for prototypes.

OK


Best regards,
Krzysztof


Re: [RESEND PATCH 1/2] powerpc/powermac: add missing g5_phy_disable_cpu1() declaration

2021-09-22 Thread Krzysztof Kozlowski
On 22/09/2021 15:52, Christophe Leroy wrote:
> 
> 
> Le 22/09/2021 à 10:44, Krzysztof Kozlowski a écrit :
>> g5_phy_disable_cpu1() is used outside of platforms/powermac/feature.c,
>> so it should have a declaration to fix W=1 warning:
>>
>>arch/powerpc/platforms/powermac/feature.c:1533:6:
>>  error: no previous prototype for ‘g5_phy_disable_cpu1’ 
>> [-Werror=missing-prototypes]
> 
> 
> While you are at it, can you clean it up completely, that is remove the 
> declaration in arch/powerpc/platforms/powermac/smp.c ?
> 

Sure, I'll send a v2. Thanks for pointing this out.


Best regards,
Krzysztof


Re: [RESEND PATCH 2/2] powerpc/powermac: constify device_node in of_irq_parse_oldworld()

2021-09-22 Thread Christophe Leroy




Le 22/09/2021 à 10:44, Krzysztof Kozlowski a écrit :

The of_irq_parse_oldworld() does not modify passed device_node so make
it a pointer to const for safety.


AFAIKS this patch is unrelated to previous one so you should send them 
out separately instead of sending as a series.




Signed-off-by: Krzysztof Kozlowski 
---
  arch/powerpc/platforms/powermac/pic.c | 2 +-
  include/linux/of_irq.h| 4 ++--
  2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/powermac/pic.c 
b/arch/powerpc/platforms/powermac/pic.c
index 4921bccf0376..af5ca1f41bb1 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -384,7 +384,7 @@ static void __init pmac_pic_probe_oldstyle(void)
  #endif
  }
  
-int of_irq_parse_oldworld(struct device_node *device, int index,

+int of_irq_parse_oldworld(const struct device_node *device, int index,
struct of_phandle_args *out_irq)
  {
const u32 *ints = NULL;
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index aaf219bd0354..6074fdf51f0c 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -20,12 +20,12 @@ typedef int (*of_irq_init_cb_t)(struct device_node *, 
struct device_node *);
  #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
  extern unsigned int of_irq_workarounds;
  extern struct device_node *of_irq_dflt_pic;
-extern int of_irq_parse_oldworld(struct device_node *device, int index,
+extern int of_irq_parse_oldworld(const struct device_node *device, int index,
   struct of_phandle_args *out_irq);


Please remove 'extern' which is useless for prototypes.


  #else /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
  #define of_irq_workarounds (0)
  #define of_irq_dflt_pic (NULL)
-static inline int of_irq_parse_oldworld(struct device_node *device, int index,
+static inline int of_irq_parse_oldworld(const struct device_node *device, int 
index,
  struct of_phandle_args *out_irq)
  {
return -EINVAL;



Re: [RESEND PATCH 1/2] powerpc/powermac: add missing g5_phy_disable_cpu1() declaration

2021-09-22 Thread Christophe Leroy




Le 22/09/2021 à 10:44, Krzysztof Kozlowski a écrit :

g5_phy_disable_cpu1() is used outside of platforms/powermac/feature.c,
so it should have a declaration to fix W=1 warning:

   arch/powerpc/platforms/powermac/feature.c:1533:6:
 error: no previous prototype for ‘g5_phy_disable_cpu1’ 
[-Werror=missing-prototypes]



While you are at it, can you clean it up completely, that is remove the 
declaration in arch/powerpc/platforms/powermac/smp.c ?





Signed-off-by: Krzysztof Kozlowski 
---
  arch/powerpc/include/asm/pmac_feature.h | 4 
  1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/pmac_feature.h 
b/arch/powerpc/include/asm/pmac_feature.h
index e08e829261b6..7703e5bf1203 100644
--- a/arch/powerpc/include/asm/pmac_feature.h
+++ b/arch/powerpc/include/asm/pmac_feature.h
@@ -143,6 +143,10 @@
   */
  struct device_node;
  
+#ifdef CONFIG_PPC64

+void g5_phy_disable_cpu1(void);
+#endif /* CONFIG_PPC64 */
+
  static inline long pmac_call_feature(int selector, struct device_node* node,
long param, long value)
  {



Re: [PATCH v3 5/8] x86/sme: Replace occurrences of sme_active() with cc_platform_has()

2021-09-22 Thread Tom Lendacky

On 9/21/21 4:58 PM, Kirill A. Shutemov wrote:

On Tue, Sep 21, 2021 at 04:43:59PM -0500, Tom Lendacky wrote:

On 9/21/21 4:34 PM, Kirill A. Shutemov wrote:

On Tue, Sep 21, 2021 at 11:27:17PM +0200, Borislav Petkov wrote:

On Wed, Sep 22, 2021 at 12:20:59AM +0300, Kirill A. Shutemov wrote:

I still believe calling cc_platform_has() from __startup_64() is totally
broken as it lacks proper wrapping while accessing global variables.


Well, one of the issues on the AMD side was using boot_cpu_data too
early and the Intel side uses it too. Can you replace those checks with
is_tdx_guest() or whatever was the helper's name which would check
whether the the kernel is running as a TDX guest, and see if that helps?


There's no need in Intel check this early. Only AMD need it. Maybe just
opencode them?


Any way you can put a gzipped/bzipped copy of your vmlinux file somewhere I
can grab it from and take a look at it?


You can find broken vmlinux and bzImage here:

https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdrive.google.com%2Fdrive%2Ffolders%2F1n74vUQHOGebnF70Im32qLFY8iS3wvjIs%3Fusp%3Dsharing&data=04%7C01%7Cthomas.lendacky%40amd.com%7C1c7adf380cbe4c1a6bb708d97d4af6ff%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637678583935705530%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=gA30x%2Bfu97tUx0p2UqI8HgjiL8bxDbK1GqgJBbUrUE4%3D&reserved=0

Let me know when I can remove it.


Looking at everything, it is all RIP relative addressing, so those
accesses should be fine. Your image has the intel_cc_platform_has()
function, does it work if you remove that call? Because I think it may be
the early call into that function which looks like it has instrumentation
that uses %gs in __sanitizer_cov_trace_pc and %gs is not setup properly
yet. And since boot_cpu_data.x86_vendor will likely be zero this early it
will match X86_VENDOR_INTEL and call into that function.

8124f880 :
8124f880:   e8 bb 64 06 00  callq  812b5d40 
<__fentry__>
8124f885:   e8 36 ca 42 00  callq  8167c2c0 
<__sanitizer_cov_trace_pc>
8124f88a:   31 c0   xor%eax,%eax
8124f88c:   c3  retq


8167c2c0 <__sanitizer_cov_trace_pc>:
8167c2c0:   65 8b 05 39 ad 9a 7emov%gs:0x7e9aad39(%rip),%eax  
  # 27000 <__preempt_count>
8167c2c7:   89 c6   mov%eax,%esi
8167c2c9:   48 8b 0c 24 mov(%rsp),%rcx
8167c2cd:   81 e6 00 01 00 00   and$0x100,%esi
8167c2d3:   65 48 8b 14 25 40 70mov%gs:0x27040,%rdx

Thanks,
Tom





[PATCH] powerpc/breakpoint: Cleanup

2021-09-22 Thread Christophe Leroy
cache_op_size() does exactly the same as l1_dcache_bytes().

Remove it.

MSR_64BIT already exists, no need to enclode the check
around #ifdef __powerpc64__

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/hw_breakpoint_constraints.c | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c 
b/arch/powerpc/kernel/hw_breakpoint_constraints.c
index 675d1f66ab72..42b967e3d85c 100644
--- a/arch/powerpc/kernel/hw_breakpoint_constraints.c
+++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c
@@ -127,15 +127,6 @@ bool wp_check_constraints(struct pt_regs *regs, struct 
ppc_inst instr,
return false;
 }
 
-static int cache_op_size(void)
-{
-#ifdef __powerpc64__
-   return ppc64_caches.l1d.block_size;
-#else
-   return L1_CACHE_BYTES;
-#endif
-}
-
 void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
 int *type, int *size, unsigned long *ea)
 {
@@ -147,14 +138,14 @@ void wp_get_instr_detail(struct pt_regs *regs, struct 
ppc_inst *instr,
analyse_instr(&op, regs, *instr);
*type = GETTYPE(op.type);
*ea = op.ea;
-#ifdef __powerpc64__
+
if (!(regs->msr & MSR_64BIT))
*ea &= 0xUL;
-#endif
+
 
*size = GETSIZE(op.type);
if (*type == CACHEOP) {
-   *size = cache_op_size();
+   *size = l1_dcache_bytes();
*ea &= ~(*size - 1);
} else if (*type == LOAD_VMX || *type == STORE_VMX) {
*ea &= ~(*size - 1);
-- 
2.31.1



[RESEND PATCH 1/2] powerpc/powermac: add missing g5_phy_disable_cpu1() declaration

2021-09-22 Thread Krzysztof Kozlowski
g5_phy_disable_cpu1() is used outside of platforms/powermac/feature.c,
so it should have a declaration to fix W=1 warning:

  arch/powerpc/platforms/powermac/feature.c:1533:6:
error: no previous prototype for ‘g5_phy_disable_cpu1’ 
[-Werror=missing-prototypes]

Signed-off-by: Krzysztof Kozlowski 
---
 arch/powerpc/include/asm/pmac_feature.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/pmac_feature.h 
b/arch/powerpc/include/asm/pmac_feature.h
index e08e829261b6..7703e5bf1203 100644
--- a/arch/powerpc/include/asm/pmac_feature.h
+++ b/arch/powerpc/include/asm/pmac_feature.h
@@ -143,6 +143,10 @@
  */
 struct device_node;
 
+#ifdef CONFIG_PPC64
+void g5_phy_disable_cpu1(void);
+#endif /* CONFIG_PPC64 */
+
 static inline long pmac_call_feature(int selector, struct device_node* node,
long param, long value)
 {
-- 
2.30.2



[RESEND PATCH 2/2] powerpc/powermac: constify device_node in of_irq_parse_oldworld()

2021-09-22 Thread Krzysztof Kozlowski
The of_irq_parse_oldworld() does not modify passed device_node so make
it a pointer to const for safety.

Signed-off-by: Krzysztof Kozlowski 
---
 arch/powerpc/platforms/powermac/pic.c | 2 +-
 include/linux/of_irq.h| 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/powermac/pic.c 
b/arch/powerpc/platforms/powermac/pic.c
index 4921bccf0376..af5ca1f41bb1 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -384,7 +384,7 @@ static void __init pmac_pic_probe_oldstyle(void)
 #endif
 }
 
-int of_irq_parse_oldworld(struct device_node *device, int index,
+int of_irq_parse_oldworld(const struct device_node *device, int index,
struct of_phandle_args *out_irq)
 {
const u32 *ints = NULL;
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index aaf219bd0354..6074fdf51f0c 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -20,12 +20,12 @@ typedef int (*of_irq_init_cb_t)(struct device_node *, 
struct device_node *);
 #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
 extern unsigned int of_irq_workarounds;
 extern struct device_node *of_irq_dflt_pic;
-extern int of_irq_parse_oldworld(struct device_node *device, int index,
+extern int of_irq_parse_oldworld(const struct device_node *device, int index,
   struct of_phandle_args *out_irq);
 #else /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
 #define of_irq_workarounds (0)
 #define of_irq_dflt_pic (NULL)
-static inline int of_irq_parse_oldworld(struct device_node *device, int index,
+static inline int of_irq_parse_oldworld(const struct device_node *device, int 
index,
  struct of_phandle_args *out_irq)
 {
return -EINVAL;
-- 
2.30.2



Re: [PATCH] powerpc/code-patching: Return error on patch_branch() out-of-range failure

2021-09-22 Thread Naveen N. Rao

Christophe Leroy wrote:

Do not silentely ignore a failure of create_branch() in
patch_branch(). Return -ERANGE.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/lib/code-patching.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)


Reviewed-by: Naveen N. Rao 




diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index f9a3019e37b4..0bc9cc0416b8 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -202,7 +202,9 @@ int patch_branch(u32 *addr, unsigned long target, int flags)
 {
struct ppc_inst instr;
 
-	create_branch(&instr, addr, target, flags);

+   if (create_branch(&instr, addr, target, flags))
+   return -ERANGE;
+
return patch_instruction(addr, instr);
 }
 
--

2.25.0




Re: [PATCH 1/4] crypto: nintendo-aes - add a new AES driver

2021-09-22 Thread Ard Biesheuvel
On Wed, 22 Sept 2021 at 12:43, Emmanuel Gil Peyrot
 wrote:
>
> On Wed, Sep 22, 2021 at 12:10:41PM +0200, Ard Biesheuvel wrote:
> > On Tue, 21 Sept 2021 at 23:49, Emmanuel Gil Peyrot
> >  wrote:
> > >
> > > This engine implements AES in CBC mode, using 128-bit keys only.  It is
> > > present on both the Wii and the Wii U, and is apparently identical in
> > > both consoles.
> > >
> > > The hardware is capable of firing an interrupt when the operation is
> > > done, but this driver currently uses a busy loop, I’m not too sure
> > > whether it would be preferable to switch, nor how to achieve that.
> > >
> > > It also supports a mode where no operation is done, and thus could be
> > > used as a DMA copy engine, but I don’t know how to expose that to the
> > > kernel or whether it would even be useful.
> > >
> > > In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
> > > aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
> > > speedup.
> > >
> > > This driver was written based on reversed documentation, see:
> > > https://wiibrew.org/wiki/Hardware/AES
> > >
> > > Signed-off-by: Emmanuel Gil Peyrot 
> > > Tested-by: Emmanuel Gil Peyrot   # on Wii U
> >
> > This is redundant - everybody should test the code they submit.
>
> Indeed, except for the comment, as I haven’t been able to test on the
> Wii just yet and that’s kind of a call for doing exactly that. :)
>
> >
> > ...
> > > +   /* TODO: figure out how to use interrupts here, this will probably
> > > +* lower throughput but let the CPU do other things while the AES
> > > +* engine is doing its work. */
> >
> > So is it worthwhile like this? How much faster is it to use this
> > accelerator rather than the CPU?
>
> As I mentioned above, on my hardware it reaches 80.7 MiB/s using this
> busy loop instead of 30.9 MiB/s using aes-generic, measured using
> `cryptsetup benchmark --cipher=aes --key-size=128`.  I expect the
> difference would be even more pronounced on the Wii, with its CPU being
> clocked lower.
>

Ah apologies for not spotting that. This is a nice speedup.

> I will give a try at using the interrupt, but I fully expect a lower
> throughput alongside a lower CPU usage (for large requests).
>

You should consider latency as well. Is it really necessary to disable
interrupts as well? A scheduling blackout of ~1ms (for the worst case
of 64k of input @ 80 MB/s) may be tolerable but keeping interrupts
disabled for that long is probably not a great idea. (Just make sure
you use spin_lock_bh() to prevent deadlocks that could occur if your
code is called from softirq context)

But using the interrupt is obviously preferred. What's wrong with it?

Btw the crypto API does not permit AES-128 only - you will need to add
a fallback for other key sizes as well.


> >
> > > +   do {
> > > +   status = ioread32be(base + AES_CTRL);
> > > +   cpu_relax();
> > > +   } while ((status & AES_CTRL_EXEC) && --counter);
> > > +
> > > +   /* Do we ever get called with dst ≠ src?  If so we have to 
> > > invalidate
> > > +* dst in addition to the earlier flush of src. */
> > > +   if (unlikely(dst != src)) {
> > > +   for (i = 0; i < len; i += 32)
> > > +   __asm__("dcbi 0, %0" : : "r" (dst + i));
> > > +   __asm__("sync" : : : "memory");
> > > +   }
> > > +
> > > +   return counter ? 0 : 1;
> > > +}
> > > +
> > > +static void
> > > +nintendo_aes_crypt(const void *src, void *dst, u32 len, u8 *iv, int dir,
> > > +  bool firstchunk)
> > > +{
> > > +   u32 flags = 0;
> > > +   unsigned long iflags;
> > > +   int ret;
> > > +
> > > +   flags |= AES_CTRL_EXEC_INIT /* | AES_CTRL_IRQ */ | AES_CTRL_ENA;
> > > +
> > > +   if (dir == AES_DIR_DECRYPT)
> > > +   flags |= AES_CTRL_DEC;
> > > +
> > > +   if (!firstchunk)
> > > +   flags |= AES_CTRL_IV;
> > > +
> > > +   /* Start the critical section */
> > > +   spin_lock_irqsave(&lock, iflags);
> > > +
> > > +   if (firstchunk)
> > > +   writefield(AES_IV, iv);
> > > +
> > > +   ret = do_crypt(src, dst, len, flags);
> > > +   BUG_ON(ret);
> > > +
> > > +   spin_unlock_irqrestore(&lock, iflags);
> > > +}
> > > +
> > > +static int nintendo_setkey_skcipher(struct crypto_skcipher *tfm, const 
> > > u8 *key,
> > > +   unsigned int len)
> > > +{
> > > +   /* The hardware only supports AES-128 */
> > > +   if (len != AES_KEYSIZE_128)
> > > +   return -EINVAL;
> > > +
> > > +   writefield(AES_KEY, key);
> > > +   return 0;
> > > +}
> > > +
> > > +static int nintendo_skcipher_crypt(struct skcipher_request *req, int dir)
> > > +{
> > > +   struct skcipher_walk walk;
> > > +   unsigned int nbytes;
> > > +   int err;
> > > +   char ivbuf[AES_BLOCK_SIZE];
> > > +   unsigned int ivsize;
> > > +
> > > +   bool firstchunk

Re: [PATCH 1/4] crypto: nintendo-aes - add a new AES driver

2021-09-22 Thread Emmanuel Gil Peyrot
On Wed, Sep 22, 2021 at 12:10:41PM +0200, Ard Biesheuvel wrote:
> On Tue, 21 Sept 2021 at 23:49, Emmanuel Gil Peyrot
>  wrote:
> >
> > This engine implements AES in CBC mode, using 128-bit keys only.  It is
> > present on both the Wii and the Wii U, and is apparently identical in
> > both consoles.
> >
> > The hardware is capable of firing an interrupt when the operation is
> > done, but this driver currently uses a busy loop, I’m not too sure
> > whether it would be preferable to switch, nor how to achieve that.
> >
> > It also supports a mode where no operation is done, and thus could be
> > used as a DMA copy engine, but I don’t know how to expose that to the
> > kernel or whether it would even be useful.
> >
> > In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
> > aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
> > speedup.
> >
> > This driver was written based on reversed documentation, see:
> > https://wiibrew.org/wiki/Hardware/AES
> >
> > Signed-off-by: Emmanuel Gil Peyrot 
> > Tested-by: Emmanuel Gil Peyrot   # on Wii U
> 
> This is redundant - everybody should test the code they submit.

Indeed, except for the comment, as I haven’t been able to test on the
Wii just yet and that’s kind of a call for doing exactly that. :)

> 
> ...
> > +   /* TODO: figure out how to use interrupts here, this will probably
> > +* lower throughput but let the CPU do other things while the AES
> > +* engine is doing its work. */
> 
> So is it worthwhile like this? How much faster is it to use this
> accelerator rather than the CPU?

As I mentioned above, on my hardware it reaches 80.7 MiB/s using this
busy loop instead of 30.9 MiB/s using aes-generic, measured using
`cryptsetup benchmark --cipher=aes --key-size=128`.  I expect the
difference would be even more pronounced on the Wii, with its CPU being
clocked lower.

I will give a try at using the interrupt, but I fully expect a lower
throughput alongside a lower CPU usage (for large requests).

> 
> > +   do {
> > +   status = ioread32be(base + AES_CTRL);
> > +   cpu_relax();
> > +   } while ((status & AES_CTRL_EXEC) && --counter);
> > +
> > +   /* Do we ever get called with dst ≠ src?  If so we have to 
> > invalidate
> > +* dst in addition to the earlier flush of src. */
> > +   if (unlikely(dst != src)) {
> > +   for (i = 0; i < len; i += 32)
> > +   __asm__("dcbi 0, %0" : : "r" (dst + i));
> > +   __asm__("sync" : : : "memory");
> > +   }
> > +
> > +   return counter ? 0 : 1;
> > +}
> > +
> > +static void
> > +nintendo_aes_crypt(const void *src, void *dst, u32 len, u8 *iv, int dir,
> > +  bool firstchunk)
> > +{
> > +   u32 flags = 0;
> > +   unsigned long iflags;
> > +   int ret;
> > +
> > +   flags |= AES_CTRL_EXEC_INIT /* | AES_CTRL_IRQ */ | AES_CTRL_ENA;
> > +
> > +   if (dir == AES_DIR_DECRYPT)
> > +   flags |= AES_CTRL_DEC;
> > +
> > +   if (!firstchunk)
> > +   flags |= AES_CTRL_IV;
> > +
> > +   /* Start the critical section */
> > +   spin_lock_irqsave(&lock, iflags);
> > +
> > +   if (firstchunk)
> > +   writefield(AES_IV, iv);
> > +
> > +   ret = do_crypt(src, dst, len, flags);
> > +   BUG_ON(ret);
> > +
> > +   spin_unlock_irqrestore(&lock, iflags);
> > +}
> > +
> > +static int nintendo_setkey_skcipher(struct crypto_skcipher *tfm, const u8 
> > *key,
> > +   unsigned int len)
> > +{
> > +   /* The hardware only supports AES-128 */
> > +   if (len != AES_KEYSIZE_128)
> > +   return -EINVAL;
> > +
> > +   writefield(AES_KEY, key);
> > +   return 0;
> > +}
> > +
> > +static int nintendo_skcipher_crypt(struct skcipher_request *req, int dir)
> > +{
> > +   struct skcipher_walk walk;
> > +   unsigned int nbytes;
> > +   int err;
> > +   char ivbuf[AES_BLOCK_SIZE];
> > +   unsigned int ivsize;
> > +
> > +   bool firstchunk = true;
> > +
> > +   /* Reset the engine */
> > +   iowrite32be(0, base + AES_CTRL);
> > +
> > +   err = skcipher_walk_virt(&walk, req, false);
> > +   ivsize = min(sizeof(ivbuf), walk.ivsize);
> > +
> > +   while ((nbytes = walk.nbytes) != 0) {
> > +   unsigned int chunkbytes = round_down(nbytes, 
> > AES_BLOCK_SIZE);
> > +   unsigned int ret = nbytes % AES_BLOCK_SIZE;
> > +
> > +   if (walk.total == chunkbytes && dir == AES_DIR_DECRYPT) {
> > +   /* If this is the last chunk and we're decrypting, 
> > take
> > +* note of the IV (which is the last ciphertext 
> > block)
> > +*/
> > +   memcpy(ivbuf, walk.src.virt.addr + walk.total - 
> > ivsize,
> > +  ivsize);
> > +   }
> > +
> > +   nintendo_aes_crypt(wal

Re: [PATCH 1/4] crypto: nintendo-aes - add a new AES driver

2021-09-22 Thread Ard Biesheuvel
On Tue, 21 Sept 2021 at 23:49, Emmanuel Gil Peyrot
 wrote:
>
> This engine implements AES in CBC mode, using 128-bit keys only.  It is
> present on both the Wii and the Wii U, and is apparently identical in
> both consoles.
>
> The hardware is capable of firing an interrupt when the operation is
> done, but this driver currently uses a busy loop, I’m not too sure
> whether it would be preferable to switch, nor how to achieve that.
>
> It also supports a mode where no operation is done, and thus could be
> used as a DMA copy engine, but I don’t know how to expose that to the
> kernel or whether it would even be useful.
>
> In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
> aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
> speedup.
>
> This driver was written based on reversed documentation, see:
> https://wiibrew.org/wiki/Hardware/AES
>
> Signed-off-by: Emmanuel Gil Peyrot 
> Tested-by: Emmanuel Gil Peyrot   # on Wii U

This is redundant - everybody should test the code they submit.

...
> +   /* TODO: figure out how to use interrupts here, this will probably
> +* lower throughput but let the CPU do other things while the AES
> +* engine is doing its work. */

So is it worthwhile like this? How much faster is it to use this
accelerator rather than the CPU?

> +   do {
> +   status = ioread32be(base + AES_CTRL);
> +   cpu_relax();
> +   } while ((status & AES_CTRL_EXEC) && --counter);
> +
> +   /* Do we ever get called with dst ≠ src?  If so we have to invalidate
> +* dst in addition to the earlier flush of src. */
> +   if (unlikely(dst != src)) {
> +   for (i = 0; i < len; i += 32)
> +   __asm__("dcbi 0, %0" : : "r" (dst + i));
> +   __asm__("sync" : : : "memory");
> +   }
> +
> +   return counter ? 0 : 1;
> +}
> +
> +static void
> +nintendo_aes_crypt(const void *src, void *dst, u32 len, u8 *iv, int dir,
> +  bool firstchunk)
> +{
> +   u32 flags = 0;
> +   unsigned long iflags;
> +   int ret;
> +
> +   flags |= AES_CTRL_EXEC_INIT /* | AES_CTRL_IRQ */ | AES_CTRL_ENA;
> +
> +   if (dir == AES_DIR_DECRYPT)
> +   flags |= AES_CTRL_DEC;
> +
> +   if (!firstchunk)
> +   flags |= AES_CTRL_IV;
> +
> +   /* Start the critical section */
> +   spin_lock_irqsave(&lock, iflags);
> +
> +   if (firstchunk)
> +   writefield(AES_IV, iv);
> +
> +   ret = do_crypt(src, dst, len, flags);
> +   BUG_ON(ret);
> +
> +   spin_unlock_irqrestore(&lock, iflags);
> +}
> +
> +static int nintendo_setkey_skcipher(struct crypto_skcipher *tfm, const u8 
> *key,
> +   unsigned int len)
> +{
> +   /* The hardware only supports AES-128 */
> +   if (len != AES_KEYSIZE_128)
> +   return -EINVAL;
> +
> +   writefield(AES_KEY, key);
> +   return 0;
> +}
> +
> +static int nintendo_skcipher_crypt(struct skcipher_request *req, int dir)
> +{
> +   struct skcipher_walk walk;
> +   unsigned int nbytes;
> +   int err;
> +   char ivbuf[AES_BLOCK_SIZE];
> +   unsigned int ivsize;
> +
> +   bool firstchunk = true;
> +
> +   /* Reset the engine */
> +   iowrite32be(0, base + AES_CTRL);
> +
> +   err = skcipher_walk_virt(&walk, req, false);
> +   ivsize = min(sizeof(ivbuf), walk.ivsize);
> +
> +   while ((nbytes = walk.nbytes) != 0) {
> +   unsigned int chunkbytes = round_down(nbytes, AES_BLOCK_SIZE);
> +   unsigned int ret = nbytes % AES_BLOCK_SIZE;
> +
> +   if (walk.total == chunkbytes && dir == AES_DIR_DECRYPT) {
> +   /* If this is the last chunk and we're decrypting, 
> take
> +* note of the IV (which is the last ciphertext block)
> +*/
> +   memcpy(ivbuf, walk.src.virt.addr + walk.total - 
> ivsize,
> +  ivsize);
> +   }
> +
> +   nintendo_aes_crypt(walk.src.virt.addr, walk.dst.virt.addr,
> +  chunkbytes, walk.iv, dir, firstchunk);
> +
> +   if (walk.total == chunkbytes && dir == AES_DIR_ENCRYPT) {
> +   /* If this is the last chunk and we're encrypting, 
> take
> +* note of the IV (which is the last ciphertext block)
> +*/
> +   memcpy(walk.iv,
> +  walk.dst.virt.addr + walk.total - ivsize,
> +  ivsize);
> +   } else if (walk.total == chunkbytes && dir == 
> AES_DIR_DECRYPT) {
> +   memcpy(walk.iv, ivbuf, ivsize);
> +   }
> +
> +   err = skcipher_walk_done(&walk, ret);
> +   firstchunk = false;
> +   }
> +
> +   return err;
> +}
> +
> +static int nintendo_cbc

Re: [PATCH 7/7] ocxl: Use pci core's DVSEC functionality

2021-09-22 Thread Frederic Barrat




On 22/09/2021 02:44, Dan Williams wrote:

On Tue, Sep 21, 2021 at 3:05 PM Ben Widawsky  wrote:


Reduce maintenance burden of DVSEC query implementation by using the
centralized PCI core implementation.

Cc: linuxppc-dev@lists.ozlabs.org
Cc: Frederic Barrat 
Cc: Andrew Donnellan 
Signed-off-by: Ben Widawsky 
---
  drivers/misc/ocxl/config.c | 13 +
  1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index a68738f38252..e401a51596b9 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -33,18 +33,7 @@

  static int find_dvsec(struct pci_dev *dev, int dvsec_id)
  {
-   int vsec = 0;
-   u16 vendor, id;
-
-   while ((vsec = pci_find_next_ext_capability(dev, vsec,
-   OCXL_EXT_CAP_ID_DVSEC))) {
-   pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
-   &vendor);
-   pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
-   if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
-   return vsec;
-   }
-   return 0;
+   return pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM, dvsec_id);
  }



That looks fine, thanks for spotting it. You can add this for the next 
revision:

Acked-by: Frederic Barrat 





What about:

arch/powerpc/platforms/powernv/ocxl.c::find_dvsec_from_pos()

...?  With that converted the redundant definitions below:

OCXL_EXT_CAP_ID_DVSEC
OCXL_DVSEC_VENDOR_OFFSET
OCXL_DVSEC_ID_OFFSET

...can be cleaned up in favor of the core definitions.



That would be great. Are you guys willing to do it? If not, I could have 
a follow-on patch, if I don't forget :-)


Thanks,

  Fred



Re: [PATCH 5/7] PCI: Add pci_find_dvsec_capability to find designated VSEC

2021-09-22 Thread Frederic Barrat




On 22/09/2021 00:04, Ben Widawsky wrote:

Add pci_find_dvsec_capability to locate a Designated Vendor-Specific
Extended Capability with the specified DVSEC ID.

The Designated Vendor-Specific Extended Capability (DVSEC) allows one or
more vendor specific capabilities that aren't tied to the vendor ID of
the PCI component.

DVSEC is critical for both the Compute Express Link (CXL) driver as well
as the driver for OpenCAPI coherent accelerator (OCXL).

Cc: David E. Box 
Cc: Jonathan Cameron 
Cc: Bjorn Helgaas 
Cc: Dan Williams 
Cc: linux-...@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Frederic Barrat 
Cc: Andrew Donnellan 
Signed-off-by: Ben Widawsky 
---



LGTM
Reviewed-by: Frederic Barrat 



  drivers/pci/pci.c   | 32 
  include/linux/pci.h |  1 +
  2 files changed, 33 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index ce2ab62b64cf..94ac86ff28b0 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -732,6 +732,38 @@ u16 pci_find_vsec_capability(struct pci_dev *dev, u16 
vendor, int cap)
  }
  EXPORT_SYMBOL_GPL(pci_find_vsec_capability);
  
+/**

+ * pci_find_dvsec_capability - Find DVSEC for vendor
+ * @dev: PCI device to query
+ * @vendor: Vendor ID to match for the DVSEC
+ * @dvsec: Designated Vendor-specific capability ID
+ *
+ * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability
+ * offset in config space; otherwise return 0.
+ */
+u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec)
+{
+   int pos;
+
+   pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DVSEC);
+   if (!pos)
+   return 0;
+
+   while (pos) {
+   u16 v, id;
+
+   pci_read_config_word(dev, pos + PCI_DVSEC_HEADER1, &v);
+   pci_read_config_word(dev, pos + PCI_DVSEC_HEADER2, &id);
+   if (vendor == v && dvsec == id)
+   return pos;
+
+   pos = pci_find_next_ext_capability(dev, pos, 
PCI_EXT_CAP_ID_DVSEC);
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(pci_find_dvsec_capability);
+
  /**
   * pci_find_parent_resource - return resource region of parent bus of given
   *  region
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cd8aa6fce204..c93ccfa4571b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1130,6 +1130,7 @@ u16 pci_find_ext_capability(struct pci_dev *dev, int cap);
  u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 pos, int cap);
  struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
  u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap);
+u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec);
  
  u64 pci_get_dsn(struct pci_dev *dev);
  



Re: [PATCH] powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted()

2021-09-22 Thread Srikar Dronamraju
* Nathan Lynch  [2021-09-20 22:12:13]:

> vcpu_is_preempted() can be used outside of preempt-disabled critical
> sections, yielding warnings such as:
> 
> BUG: using smp_processor_id() in preemptible [] code: 
> systemd-udevd/185
> caller is rwsem_spin_on_owner+0x1cc/0x2d0
> CPU: 1 PID: 185 Comm: systemd-udevd Not tainted 5.15.0-rc2+ #33
> Call Trace:
> [c00012907ac0] [c0aa30a8] dump_stack_lvl+0xac/0x108 (unreliable)
> [c00012907b00] [c1371f70] check_preemption_disabled+0x150/0x160
> [c00012907b90] [c01e0e8c] rwsem_spin_on_owner+0x1cc/0x2d0
> [c00012907be0] [c01e1408] rwsem_down_write_slowpath+0x478/0x9a0
> [c00012907ca0] [c0576cf4] filename_create+0x94/0x1e0
> [c00012907d10] [c057ac08] do_symlinkat+0x68/0x1a0
> [c00012907d70] [c057ae18] sys_symlink+0x58/0x70
> [c00012907da0] [c002e448] system_call_exception+0x198/0x3c0
> [c00012907e10] [c000c54c] system_call_common+0xec/0x250
> 
> The result of vcpu_is_preempted() is always subject to invalidation by
> events inside and outside of Linux; it's just a best guess at a point in
> time. Use raw_smp_processor_id() to avoid such warnings.

Typically smp_processor_id() and raw_smp_processor_id() except for the
CONFIG_DEBUG_PREEMPT. In the CONFIG_DEBUG_PREEMPT case, smp_processor_id()
is actually debug_smp_processor_id(), which does all the checks.

I believe these checks in debug_smp_processor_id() are only valid for x86
case (aka cases were they have __smp_processor_id() defined.)
i.e x86 has a different implementation of _smp_processor_id() for stable and
unstable

> 
> Signed-off-by: Nathan Lynch 
> Fixes: ca3f969dcb11 ("powerpc/paravirt: Use is_kvm_guest() in 
> vcpu_is_preempted()")
> ---
>  arch/powerpc/include/asm/paravirt.h | 9 -
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/paravirt.h 
> b/arch/powerpc/include/asm/paravirt.h
> index bcb7b5f917be..e429aca566de 100644
> --- a/arch/powerpc/include/asm/paravirt.h
> +++ b/arch/powerpc/include/asm/paravirt.h
> @@ -97,7 +97,14 @@ static inline bool vcpu_is_preempted(int cpu)
> 
>  #ifdef CONFIG_PPC_SPLPAR
>   if (!is_kvm_guest()) {
> - int first_cpu = cpu_first_thread_sibling(smp_processor_id());
> + int first_cpu;
> +
> + /*
> +  * This is only a guess at best, and this function may be
> +  * called with preemption enabled. Using raw_smp_processor_id()
> +  * does not damage accuracy.
> +  */
> + first_cpu = cpu_first_thread_sibling(raw_smp_processor_id());
> 
>   /*
>* Preemption can only happen at core granularity. This CPU
> -- 
> 2.31.1
> 

How about something like the below?

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 510519e8a1eb..8c669e8ceb73 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -256,12 +256,14 @@ static inline int get_boot_cpu_id(void)
  */
 #ifndef __smp_processor_id
 #define __smp_processor_id(x) raw_smp_processor_id(x)
-#endif
-
+#else
 #ifdef CONFIG_DEBUG_PREEMPT
   extern unsigned int debug_smp_processor_id(void);
 # define smp_processor_id() debug_smp_processor_id()
-#else
+#endif
+#endif
+
+#ifndef smp_processor_id
 # define smp_processor_id() __smp_processor_id()
 #endif
 

-- 
Thanks and Regards
Srikar Dronamraju