Re: [PATCH v2 0/2] of: of_device.h cleanups

2021-02-11 Thread Greg Kroah-Hartman
On Thu, Feb 11, 2021 at 05:27:43PM -0600, Rob Herring wrote:
> This is a couple of cleanups for of_device.h. They fell out from my
> attempt at decoupling of_device.h and of_platform.h which is a mess
> and I haven't finished, but there's no reason to wait on these.

Reviewed-by: Greg Kroah-Hartman 


Re: [PATCH v5 06/10] powerpc/signal64: Replace setup_sigcontext() w/ unsafe_setup_sigcontext()

2021-02-11 Thread Daniel Axtens
Hi Chris,

> Previously setup_sigcontext() performed a costly KUAP switch on every
> uaccess operation. These repeated uaccess switches cause a significant
> drop in signal handling performance.
>
> Rewrite setup_sigcontext() to assume that a userspace write access window
> is open. Replace all uaccess functions with their 'unsafe' versions
> which avoid the repeated uaccess switches.

Just to clarify the commit message a bit: you're also changing the
callers of the old safe versions to first open the window, then call the
unsafe variants, then close the window again.

> Signed-off-by: Christopher M. Riedl 
> ---
>  arch/powerpc/kernel/signal_64.c | 70 -
>  1 file changed, 43 insertions(+), 27 deletions(-)
>
> diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
> index 8e1d804ce552..4248e4489ff1 100644
> --- a/arch/powerpc/kernel/signal_64.c
> +++ b/arch/powerpc/kernel/signal_64.c
> @@ -101,9 +101,13 @@ static void prepare_setup_sigcontext(struct task_struct 
> *tsk, int ctx_has_vsx_re
>   * Set up the sigcontext for the signal frame.
>   */
>  
> -static long setup_sigcontext(struct sigcontext __user *sc,
> - struct task_struct *tsk, int signr, sigset_t *set,
> - unsigned long handler, int ctx_has_vsx_region)
> +#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler,
> \
> + ctx_has_vsx_region, e)  \
> + unsafe_op_wrap(__unsafe_setup_sigcontext(sc, tsk, signr, set,   \
> + handler, ctx_has_vsx_region), e)
> +static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc,
> + struct task_struct *tsk, int signr, 
> sigset_t *set,
> + unsigned long handler, int 
> ctx_has_vsx_region)
>  {
>   /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
>* process never used altivec yet (MSR_VEC is zero in pt_regs of
> @@ -118,20 +122,19 @@ static long setup_sigcontext(struct sigcontext __user 
> *sc,
>  #endif
>   struct pt_regs *regs = tsk->thread.regs;
>   unsigned long msr = regs->msr;
> - long err = 0;
>   /* Force usr to alway see softe as 1 (interrupts enabled) */
>   unsigned long softe = 0x1;
>  
>   BUG_ON(tsk != current);
>  
>  #ifdef CONFIG_ALTIVEC
> - err |= __put_user(v_regs, >v_regs);
> + unsafe_put_user(v_regs, >v_regs, efault_out);
>  
>   /* save altivec registers */
>   if (tsk->thread.used_vr) {
>   /* Copy 33 vec registers (vr0..31 and vscr) to the stack */
> - err |= __copy_to_user(v_regs, >thread.vr_state,
> -   33 * sizeof(vector128));
> + unsafe_copy_to_user(v_regs, >thread.vr_state,
> + 33 * sizeof(vector128), efault_out);
>   /* set MSR_VEC in the MSR value in the frame to indicate that 
> sc->v_reg)
>* contains valid data.
>*/
> @@ -140,12 +143,12 @@ static long setup_sigcontext(struct sigcontext __user 
> *sc,
>   /* We always copy to/from vrsave, it's 0 if we don't have or don't
>* use altivec.
>*/
> - err |= __put_user(tsk->thread.vrsave, (u32 __user *)_regs[33]);
> + unsafe_put_user(tsk->thread.vrsave, (u32 __user *)_regs[33], 
> efault_out);
>  #else /* CONFIG_ALTIVEC */
> - err |= __put_user(0, >v_regs);
> + unsafe_put_user(0, >v_regs, efault_out);
>  #endif /* CONFIG_ALTIVEC */
>   /* copy fpr regs and fpscr */
> - err |= copy_fpr_to_user(>fp_regs, tsk);
> + unsafe_copy_fpr_to_user(>fp_regs, tsk, efault_out);
>  
>   /*
>* Clear the MSR VSX bit to indicate there is no valid state attached
> @@ -160,24 +163,27 @@ static long setup_sigcontext(struct sigcontext __user 
> *sc,
>*/
>   if (tsk->thread.used_vsr && ctx_has_vsx_region) {
>   v_regs += ELF_NVRREG;
> - err |= copy_vsx_to_user(v_regs, tsk);
> + unsafe_copy_vsx_to_user(v_regs, tsk, efault_out);
>   /* set MSR_VSX in the MSR value in the frame to
>* indicate that sc->vs_reg) contains valid data.
>*/
>   msr |= MSR_VSX;
>   }
>  #endif /* CONFIG_VSX */
> - err |= __put_user(>gp_regs, >regs);
> + unsafe_put_user(>gp_regs, >regs, efault_out);
>   WARN_ON(!FULL_REGS(regs));
> - err |= __copy_to_user(>gp_regs, regs, GP_REGS_SIZE);
> - err |= __put_user(msr, >gp_regs[PT_MSR]);
> - err |= __put_user(softe, >gp_regs[PT_SOFTE]);
> - err |= __put_user(signr, >signal);
> - err |= __put_user(handler, >handler);
> + unsafe_copy_to_user(>gp_regs, regs, GP_REGS_SIZE, efault_out);
> + unsafe_put_user(msr, >gp_regs[PT_MSR], efault_out);
> + unsafe_put_user(softe, >gp_regs[PT_SOFTE], efault_out);
> + unsafe_put_user(signr, >signal, efault_out);
> + 

Re: [PATCH v5 05/10] powerpc/signal64: Remove TM ifdefery in middle of if/else block

2021-02-11 Thread Daniel Axtens
Hi Chris,

> Rework the messy ifdef breaking up the if-else for TM similar to
> commit f1cf4f93de2f ("powerpc/signal32: Remove ifdefery in middle of 
> if/else").

I'm not sure what 'the messy ifdef' and 'the if-else for TM' is (yet):
perhaps you could start the commit message with a tiny bit of
background.

> Unlike that commit for ppc32, the ifdef can't be removed entirely since
> uc_transact in sigframe depends on CONFIG_PPC_TRANSACTIONAL_MEM.
>
> Signed-off-by: Christopher M. Riedl 
> ---
>  arch/powerpc/kernel/signal_64.c | 16 +++-
>  1 file changed, 7 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
> index b211a8ea4f6e..8e1d804ce552 100644
> --- a/arch/powerpc/kernel/signal_64.c
> +++ b/arch/powerpc/kernel/signal_64.c
> @@ -710,9 +710,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
>   struct pt_regs *regs = current_pt_regs();
>   struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
>   sigset_t set;
> -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>   unsigned long msr;
> -#endif
>  
>   /* Always make any pending restarted system calls return -EINTR */
>   current->restart_block.fn = do_no_restart_syscall;
> @@ -765,7 +763,10 @@ SYSCALL_DEFINE0(rt_sigreturn)
>  
>   if (__get_user(msr, >uc_mcontext.gp_regs[PT_MSR]))
>   goto badframe;
> +#endif
> +
>   if (MSR_TM_ACTIVE(msr)) {
> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>   /* We recheckpoint on return. */
>   struct ucontext __user *uc_transact;
>  
> @@ -778,9 +779,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
>   if (restore_tm_sigcontexts(current, >uc_mcontext,
>  _transact->uc_mcontext))
>   goto badframe;
> - } else
>  #endif
> - {
> + } else {
>   /*
>* Fall through, for non-TM restore
>*

I think you can get rid of all the ifdefs in _this function_ by
providing a couple of stubs:

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a66f435dabbf..19059a4b798f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1120,6 +1120,7 @@ void restore_tm_state(struct pt_regs *regs)
 #else
 #define tm_recheckpoint_new_task(new)
 #define __switch_to_tm(prev, new)
+void tm_reclaim_current(uint8_t cause) {}
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
 static inline void save_sprs(struct thread_struct *t)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 8e1d804ce552..ed58ca019ad9 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -594,6 +594,13 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
 
return err;
 }
+#else
+static long restore_tm_sigcontexts(struct task_struct *tsk,
+  struct sigcontext __user *sc,
+  struct sigcontext __user *tm_sc)
+{
+   return -EINVAL;
+}
 #endif
 
 /*
@@ -722,7 +729,6 @@ SYSCALL_DEFINE0(rt_sigreturn)
goto badframe;
set_current_blocked();
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
 * If there is a transactional state then throw it away.
 * The purpose of a sigreturn is to destroy all traces of the
@@ -763,10 +769,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
 
if (__get_user(msr, >uc_mcontext.gp_regs[PT_MSR]))
goto badframe;
-#endif
 
if (MSR_TM_ACTIVE(msr)) {
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* We recheckpoint on return. */
struct ucontext __user *uc_transact;
 
@@ -779,7 +783,6 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (restore_tm_sigcontexts(current, >uc_mcontext,
   _transact->uc_mcontext))
goto badframe;
-#endif
} else {
/*
 * Fall through, for non-TM restore

My only concern here was whether it was valid to access
if (__get_user(msr, >uc_mcontext.gp_regs[PT_MSR]))
if CONFIG_PPC_TRANSACTIONAL_MEM was not defined, but I didn't think of
any obvious reason why it wouldn't be...

> @@ -818,10 +818,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t 
> *set,
>   unsigned long newsp = 0;
>   long err = 0;
>   struct pt_regs *regs = tsk->thread.regs;
> -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>   /* Save the thread's msr before get_tm_stackpointer() changes it */
>   unsigned long msr = regs->msr;
> -#endif

I wondered if that comment still makes sense in the absence of the
ifdef. It is preserved in commit f1cf4f93de2f ("powerpc/signal32: Remove
ifdefery in middle of if/else"), and I can't figure out how you would
improve it, so I guess it's probably good as it is.

>   frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
>   if (!access_ok(frame, sizeof(*frame)))
> @@ -836,8 +834,9 @@ int 

Re: [PATCH v5 04/10] powerpc: Reference param in MSR_TM_ACTIVE() macro

2021-02-11 Thread Daniel Axtens
Hi Chris,

(totally bikeshedding, but I'd use the full word parameter in the
subject if you have enough spare characters.)

> Unlike the other MSR_TM_* macros, MSR_TM_ACTIVE does not reference or
> use its parameter unless CONFIG_PPC_TRANSACTIONAL_MEM is defined. This
> causes an 'unused variable' compile warning unless the variable is also
> guarded with CONFIG_PPC_TRANSACTIONAL_MEM.
>
> Reference but do nothing with the argument in the macro to avoid a
> potential compile warning.

Andrew asked why we weren't seeing these warnings already.

I was able to reproduce them with CONFIG_PPC_TRANSACTIONAL_MEM off, but
only if I compiled with W=1:

arch/powerpc/kernel/process.c: In function ‘enable_kernel_fp’:
arch/powerpc/kernel/process.c:210:16: error: variable ‘cpumsr’ set but not used 
[-Werror=unused-but-set-variable]
  210 |  unsigned long cpumsr;
  |^~

Having said that, this change seems like a good idea, squashing warnings
at W=1 is still valuable.

Reviewed-by: Daniel Axtens 

Kind regards,
Daniel

> Signed-off-by: Christopher M. Riedl 
> ---
>  arch/powerpc/include/asm/reg.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index e40a921d78f9..c5a3e856191c 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -124,7 +124,7 @@
>  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>  #define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
>  #else
> -#define MSR_TM_ACTIVE(x) 0
> +#define MSR_TM_ACTIVE(x) ((void)(x), 0)
>  #endif
>  
>  #if defined(CONFIG_PPC_BOOK3S_64)
> -- 
> 2.26.1


linux-next: manual merge of the spi tree with the powerpc tree

2021-02-11 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the spi tree got a conflict in:

  drivers/spi/spi-mpc52xx.c

between commit:

  e10656114d32 ("spi: mpc52xx: Avoid using get_tbl()")

from the powerpc tree and commit:

  258ea99fe25a ("spi: spi-mpc52xx: Use new structure for SPI transfer delays")

from the spi tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

BTW Mark: the author's address in 258ea99fe25a uses a non existent domain :-(
-- 
Cheers,
Stephen Rothwell

diff --cc drivers/spi/spi-mpc52xx.c
index e6a30f232370,36f941500676..
--- a/drivers/spi/spi-mpc52xx.c
+++ b/drivers/spi/spi-mpc52xx.c
@@@ -247,8 -247,10 +247,10 @@@ static int mpc52xx_spi_fsmstate_transfe
/* Is the transfer complete? */
ms->len--;
if (ms->len == 0) {
 -  ms->timestamp = get_tbl();
 +  ms->timestamp = mftb();
-   ms->timestamp += ms->transfer->delay_usecs * tb_ticks_per_usec;
+   if (ms->transfer->delay.unit == SPI_DELAY_UNIT_USECS)
+   ms->timestamp += ms->transfer->delay.value *
+tb_ticks_per_usec;
ms->state = mpc52xx_spi_fsmstate_wait;
return FSM_CONTINUE;
}


pgp5F9M5sQSJb.pgp
Description: OpenPGP digital signature


Re: Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function

2021-02-11 Thread Thiago Jung Bauermann


Lakshmi Ramasubramanian  writes:

> On 2/11/21 6:11 PM, Thiago Jung Bauermann wrote:
>> Lakshmi Ramasubramanian  writes:
>> 
>>> On 2/11/21 3:59 PM, Thiago Jung Bauermann wrote:
 Lakshmi Ramasubramanian  writes:

> On 2/11/21 9:42 AM, Lakshmi Ramasubramanian wrote:
>> Hi Rob,
>> [PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem
>> This change causes build problem for x86_64 architecture (please see the
>> mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses
>> "elf_load_addr" for the ELF header buffer address and not
>> "elf_headers_mem".
>> struct kimage_arch {
>>...
>>/* Core ELF header buffer */
>>void *elf_headers;
>>unsigned long elf_headers_sz;
>>unsigned long elf_load_addr;
>> };
>> I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and
>> PPC64 since they are the only ones using this function now.
>> #if defined(CONFIG_ARM64) && defined(CONFIG_PPC64)
> Sorry - I meant to say
> #if defined(CONFIG_ARM64) || defined(CONFIG_PPC64)
>
 Does it build correctly if you rename elf_headers_mem to elf_load_addr?
 Or the other way around, renaming x86's elf_load_addr to
 elf_headers_mem. I don't really have a preference.
>>>
>>> Yes - changing arm64 and ppc from "elf_headers_mem" to "elf_load_addr" 
>>> builds
>>> fine.
>>>
>>> But I am concerned about a few other architectures that also define "struct
>>> kimage_arch" such as "parisc", "arm" which do not have any ELF related 
>>> fields.
>>> They would not build if the config defines CONFIG_KEXEC_FILE and
>>> CONFIG_OF_FLATTREE.
>>>
>>> Do you think that could be an issue?
>> That's a good point. But in practice, arm doesn't support
>> CONFIG_KEXEC_FILE. And while parisc does support CONFIG_KEXEC_FILE, as
>> far as I could determine it doesn't support CONFIG_OF.
>> So IMHO we don't need to worry about them. We'll cross that bridge if we
>> get there. If they ever implement KEXEC_FILE or OF_FLATTREE support,
>> then (again, IMHO) the natural solution would be for them to name the
>> ELF header member the same way the other arches do.
>> And since no other architecture defines struct kimage_arch, those are
>> the only ones we need to consider.
>> 
>
> Sounds good Thiago.
>
> I'll rename arm64 and ppc kimage_arch ELF address field to match that defined
> for x86/x64.
>
> Also, will add "fdt_size" param to of_kexec_alloc_and_setup_fdt(). For now, 
> I'll
> use 2*fdt_totalsize(initial_boot_params) for ppc.
>
> Will send the updated patches shortly.

Sounds good. There will be a small conflict with powerpc/next because of
the patch I mentioned, but it's simple to fix by whoever merges the
series.

-- 
Thiago Jung Bauermann
IBM Linux Technology Center


Re: Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function

2021-02-11 Thread Lakshmi Ramasubramanian

On 2/11/21 6:11 PM, Thiago Jung Bauermann wrote:


Lakshmi Ramasubramanian  writes:


On 2/11/21 3:59 PM, Thiago Jung Bauermann wrote:

Lakshmi Ramasubramanian  writes:


On 2/11/21 9:42 AM, Lakshmi Ramasubramanian wrote:

Hi Rob,
[PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem
This change causes build problem for x86_64 architecture (please see the
mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses
"elf_load_addr" for the ELF header buffer address and not
"elf_headers_mem".
struct kimage_arch {
   ...
   /* Core ELF header buffer */
   void *elf_headers;
   unsigned long elf_headers_sz;
   unsigned long elf_load_addr;
};
I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and
PPC64 since they are the only ones using this function now.
#if defined(CONFIG_ARM64) && defined(CONFIG_PPC64)

Sorry - I meant to say
#if defined(CONFIG_ARM64) || defined(CONFIG_PPC64)


Does it build correctly if you rename elf_headers_mem to elf_load_addr?
Or the other way around, renaming x86's elf_load_addr to
elf_headers_mem. I don't really have a preference.


Yes - changing arm64 and ppc from "elf_headers_mem" to "elf_load_addr" builds
fine.

But I am concerned about a few other architectures that also define "struct
kimage_arch" such as "parisc", "arm" which do not have any ELF related fields.
They would not build if the config defines CONFIG_KEXEC_FILE and
CONFIG_OF_FLATTREE.

Do you think that could be an issue?


That's a good point. But in practice, arm doesn't support
CONFIG_KEXEC_FILE. And while parisc does support CONFIG_KEXEC_FILE, as
far as I could determine it doesn't support CONFIG_OF.

So IMHO we don't need to worry about them. We'll cross that bridge if we
get there. If they ever implement KEXEC_FILE or OF_FLATTREE support,
then (again, IMHO) the natural solution would be for them to name the
ELF header member the same way the other arches do.

And since no other architecture defines struct kimage_arch, those are
the only ones we need to consider.



Sounds good Thiago.

I'll rename arm64 and ppc kimage_arch ELF address field to match that 
defined for x86/x64.


Also, will add "fdt_size" param to of_kexec_alloc_and_setup_fdt(). For 
now, I'll use 2*fdt_totalsize(initial_boot_params) for ppc.


Will send the updated patches shortly.

 -lakshmi



Re: Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function

2021-02-11 Thread Thiago Jung Bauermann


Lakshmi Ramasubramanian  writes:

> On 2/11/21 3:59 PM, Thiago Jung Bauermann wrote:
>> Lakshmi Ramasubramanian  writes:
>> 
>>> On 2/11/21 9:42 AM, Lakshmi Ramasubramanian wrote:
 Hi Rob,
 [PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem
 This change causes build problem for x86_64 architecture (please see the
 mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses
 "elf_load_addr" for the ELF header buffer address and not
 "elf_headers_mem".
 struct kimage_arch {
   ...
   /* Core ELF header buffer */
   void *elf_headers;
   unsigned long elf_headers_sz;
   unsigned long elf_load_addr;
 };
 I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and
 PPC64 since they are the only ones using this function now.
 #if defined(CONFIG_ARM64) && defined(CONFIG_PPC64)
>>> Sorry - I meant to say
>>> #if defined(CONFIG_ARM64) || defined(CONFIG_PPC64)
>>>
>> Does it build correctly if you rename elf_headers_mem to elf_load_addr?
>> Or the other way around, renaming x86's elf_load_addr to
>> elf_headers_mem. I don't really have a preference.
>
> Yes - changing arm64 and ppc from "elf_headers_mem" to "elf_load_addr" builds
> fine.
>
> But I am concerned about a few other architectures that also define "struct
> kimage_arch" such as "parisc", "arm" which do not have any ELF related fields.
> They would not build if the config defines CONFIG_KEXEC_FILE and
> CONFIG_OF_FLATTREE.
>
> Do you think that could be an issue?

That's a good point. But in practice, arm doesn't support
CONFIG_KEXEC_FILE. And while parisc does support CONFIG_KEXEC_FILE, as
far as I could determine it doesn't support CONFIG_OF.

So IMHO we don't need to worry about them. We'll cross that bridge if we
get there. If they ever implement KEXEC_FILE or OF_FLATTREE support,
then (again, IMHO) the natural solution would be for them to name the
ELF header member the same way the other arches do.

And since no other architecture defines struct kimage_arch, those are
the only ones we need to consider.

-- 
Thiago Jung Bauermann
IBM Linux Technology Center


Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function

2021-02-11 Thread Thiago Jung Bauermann


Lakshmi Ramasubramanian  writes:

> On 2/11/21 5:09 PM, Thiago Jung Bauermann wrote:
>> There's actually a complication that I just noticed and needs to be
>> addressed. More below.
>> 
>
> <...>
>
>>> +
>>> +/*
>>> + * of_kexec_alloc_and_setup_fdt - Alloc and setup a new Flattened Device 
>>> Tree
>>> + *
>>> + * @image: kexec image being loaded.
>>> + * @initrd_load_addr:  Address where the next initrd will be loaded.
>>> + * @initrd_len:Size of the next initrd, or 0 if there will be 
>>> none.
>>> + * @cmdline:   Command line for the next kernel, or NULL if 
>>> there will
>>> + * be none.
>>> + *
>>> + * Return: fdt on success, or NULL errno on error.
>>> + */
>>> +void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
>>> +  unsigned long initrd_load_addr,
>>> +  unsigned long initrd_len,
>>> +  const char *cmdline)
>>> +{
>>> +   void *fdt;
>>> +   int ret, chosen_node;
>>> +   const void *prop;
>>> +   unsigned long fdt_size;
>>> +
>>> +   fdt_size = fdt_totalsize(initial_boot_params) +
>>> +  (cmdline ? strlen(cmdline) : 0) +
>>> +  FDT_EXTRA_SPACE;
>> Just adding 4 KB to initial_boot_params won't be enough for crash
>> kernels on ppc64. The current powerpc code doubles the size of
>> initial_boot_params (which is normally larger than 4 KB) and even that
>> isn't enough. A patch was added to powerpc/next today which uses a more
>> precise (but arch-specific) formula:
>> https://lore.kernel.org/linuxppc-dev/161243826811.119001.14083048209224609814.stgit@hbathini/
>> So I believe we need a hook here where architectures can provide their
>> own specific calculation for the size of the fdt. Perhaps a weakly
>> defined function providing a default implementation which an
>> arch-specific file can override (a la arch_kexec_kernel_image_load())?
>> Then the powerpc specific hook would be the kexec_fdt_totalsize_ppc64()
>> function from the patch I linked above.
>> 
>
> Do you think it'd better to add "fdt_size" parameter to
> of_kexec_alloc_and_setup_fdt() so that the caller can provide the 
> desired FDT buffer size?

Yes, that is actually simpler and better than my idea. :-)

-- 
Thiago Jung Bauermann
IBM Linux Technology Center


[RFC PATCH] powerpc/64s: introduce call_realmode to run C code in real-mode

2021-02-11 Thread Nicholas Piggin
The regular kernel stack can not be accessed in real mode in hash
guest kernels, which prevents the MMU from being disabled in general
C code. Provide a helper that can call a function pointer in real
mode using the emergency stack (accessable in real mode).

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/asm-prototypes.h |  1 +
 arch/powerpc/include/asm/book3s/64/mmu.h  |  2 +
 arch/powerpc/include/asm/thread_info.h| 16 
 arch/powerpc/kernel/irq.c | 16 
 arch/powerpc/kernel/misc_64.S | 22 +++
 arch/powerpc/mm/book3s64/pgtable.c| 48 +++
 6 files changed, 89 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
b/arch/powerpc/include/asm/asm-prototypes.h
index d0b832cbbec8..a973023c390a 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -126,6 +126,7 @@ extern s64 __ashldi3(s64, int);
 extern s64 __ashrdi3(s64, int);
 extern int __cmpdi2(s64, s64);
 extern int __ucmpdi2(u64, u64);
+int __call_realmode(int (*fn)(void *arg), void *arg, void *sp);
 
 /* tracing */
 void _mcount(void);
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h 
b/arch/powerpc/include/asm/book3s/64/mmu.h
index 995bbcdd0ef8..80b0d24415ac 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -274,5 +274,7 @@ static inline unsigned long get_user_vsid(mm_context_t *ctx,
return get_vsid(context, ea, ssize);
 }
 
+int call_realmode(int (*fn)(void *arg), void *arg);
+
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/thread_info.h 
b/arch/powerpc/include/asm/thread_info.h
index 3d8a47af7a25..9279e472d51e 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -172,6 +172,22 @@ static inline bool test_thread_local_flags(unsigned int 
flags)
 #define is_elf2_task() (0)
 #endif
 
+static inline void check_stack_overflow(void)
+{
+   long sp;
+
+   if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW))
+   return;
+
+   sp = current_stack_pointer & (THREAD_SIZE - 1);
+
+   /* check for stack overflow: is there less than 2KB free? */
+   if (unlikely(sp < 2048)) {
+   pr_err("do_IRQ: stack overflow: %ld\n", sp);
+   dump_stack();
+   }
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 6b1eca53e36c..193b47b5b6a5 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -620,22 +620,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
return sum;
 }
 
-static inline void check_stack_overflow(void)
-{
-   long sp;
-
-   if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW))
-   return;
-
-   sp = current_stack_pointer & (THREAD_SIZE - 1);
-
-   /* check for stack overflow: is there less than 2KB free? */
-   if (unlikely(sp < 2048)) {
-   pr_err("do_IRQ: stack overflow: %ld\n", sp);
-   dump_stack();
-   }
-}
-
 void __do_irq(struct pt_regs *regs)
 {
unsigned int irq;
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 070465825c21..5e911d0b0b16 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -27,6 +27,28 @@
 
.text
 
+#ifdef CONFIG_PPC_BOOK3S_64
+_GLOBAL(__call_realmode)
+   mflrr0
+   std r0,16(r1)
+   stdur1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
+   mr  r1,r5
+   mtctr   r3
+   mr  r3,r4
+   mfmsr   r4
+   xorir4,r4,(MSR_IR|MSR_DR)
+   mtmsrd  r4
+   bctrl
+   mfmsr   r4
+   xorir4,r4,(MSR_IR|MSR_DR)
+   mtmsrd  r4
+   ld  r1,0(r1)
+   ld  r0,16(r1)
+   mtlrr0
+   blr
+
+#endif
+
 _GLOBAL(call_do_softirq)
mflrr0
std r0,16(r1)
diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
b/arch/powerpc/mm/book3s64/pgtable.c
index 5b3a3bae21aa..aad0e2059305 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -474,6 +474,54 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
return true;
 }
 
+/*
+ * Executing C code in real-mode in general Book3S-64 code can only be done
+ * via this function that switches the stack to one inside the real-mode-area,
+ * which may cover only a small first part of real memory on hash guest LPARs.
+ * fn must be NOKPROBES, must not access vmalloc or anything outside the RMA,
+ * probably shouldn't enable the MMU or interrupts, etc, and be very careful
+ * about calling other generic kernel or powerpc functions.
+ */
+int call_realmode(int (*fn)(void *arg), void *arg)
+{
+   unsigned long flags;
+   void *cursp, *emsp;
+   int ret;
+
+   if (WARN_ON_ONCE(!(mfmsr() & MSR_DR)))
+   return -EINVAL;
+   

Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function

2021-02-11 Thread Lakshmi Ramasubramanian

On 2/11/21 5:09 PM, Thiago Jung Bauermann wrote:


There's actually a complication that I just noticed and needs to be
addressed. More below.



<...>


+
+/*
+ * of_kexec_alloc_and_setup_fdt - Alloc and setup a new Flattened Device Tree
+ *
+ * @image: kexec image being loaded.
+ * @initrd_load_addr:  Address where the next initrd will be loaded.
+ * @initrd_len:Size of the next initrd, or 0 if there will be 
none.
+ * @cmdline:   Command line for the next kernel, or NULL if there will
+ * be none.
+ *
+ * Return: fdt on success, or NULL errno on error.
+ */
+void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
+  unsigned long initrd_load_addr,
+  unsigned long initrd_len,
+  const char *cmdline)
+{
+   void *fdt;
+   int ret, chosen_node;
+   const void *prop;
+   unsigned long fdt_size;
+
+   fdt_size = fdt_totalsize(initial_boot_params) +
+  (cmdline ? strlen(cmdline) : 0) +
+  FDT_EXTRA_SPACE;


Just adding 4 KB to initial_boot_params won't be enough for crash
kernels on ppc64. The current powerpc code doubles the size of
initial_boot_params (which is normally larger than 4 KB) and even that
isn't enough. A patch was added to powerpc/next today which uses a more
precise (but arch-specific) formula:

https://lore.kernel.org/linuxppc-dev/161243826811.119001.14083048209224609814.stgit@hbathini/

So I believe we need a hook here where architectures can provide their
own specific calculation for the size of the fdt. Perhaps a weakly
defined function providing a default implementation which an
arch-specific file can override (a la arch_kexec_kernel_image_load())?

Then the powerpc specific hook would be the kexec_fdt_totalsize_ppc64()
function from the patch I linked above.



Do you think it'd better to add "fdt_size" parameter to 
of_kexec_alloc_and_setup_fdt() so that the caller can provide the 
desired FDT buffer size?


thanks,
 -lakshmi


Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function

2021-02-11 Thread Thiago Jung Bauermann


There's actually a complication that I just noticed and needs to be
addressed. More below.

Lakshmi Ramasubramanian  writes:

> From: Rob Herring 
>
> Both arm64 and powerpc do essentially the same FDT /chosen setup for
> kexec.  The differences are either omissions that arm64 should have
> or additional properties that will be ignored.  The setup code can be
> combined and shared by both powerpc and arm64.
>
> The differences relative to the arm64 version:
>  - If /chosen doesn't exist, it will be created (should never happen).
>  - Any old dtb and initrd reserved memory will be released.
>  - The new initrd and elfcorehdr are marked reserved.
>  - "linux,booted-from-kexec" is set.
>
> The differences relative to the powerpc version:
>  - "kaslr-seed" and "rng-seed" may be set.
>  - "linux,elfcorehdr" is set.
>  - Any existing "linux,usable-memory-range" is removed.
>
> Combine the code for setting up the /chosen node in the FDT and updating
> the memory reservation for kexec, for powerpc and arm64, in
> of_kexec_alloc_and_setup_fdt() and move it to "drivers/of/kexec.c".
>
> Signed-off-by: Rob Herring 
> Signed-off-by: Lakshmi Ramasubramanian 
> ---
>  drivers/of/Makefile |   6 ++
>  drivers/of/kexec.c  | 258 
>  include/linux/of.h  |  13 +++
>  3 files changed, 277 insertions(+)
>  create mode 100644 drivers/of/kexec.c
>
> diff --git a/drivers/of/Makefile b/drivers/of/Makefile
> index 6e1e5212f058..c13b982084a3 100644
> --- a/drivers/of/Makefile
> +++ b/drivers/of/Makefile
> @@ -14,4 +14,10 @@ obj-$(CONFIG_OF_RESOLVE)  += resolver.o
>  obj-$(CONFIG_OF_OVERLAY) += overlay.o
>  obj-$(CONFIG_OF_NUMA) += of_numa.o
>  
> +ifdef CONFIG_KEXEC_FILE
> +ifdef CONFIG_OF_FLATTREE
> +obj-y+= kexec.o
> +endif
> +endif
> +
>  obj-$(CONFIG_OF_UNITTEST) += unittest-data/
> diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c
> new file mode 100644
> index ..469e09613cdd
> --- /dev/null
> +++ b/drivers/of/kexec.c
> @@ -0,0 +1,258 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2020 Arm Limited
> + *
> + * Based on arch/arm64/kernel/machine_kexec_file.c:
> + *  Copyright (C) 2018 Linaro Limited
> + *
> + * And arch/powerpc/kexec/file_load.c:
> + *  Copyright (C) 2016  IBM Corporation
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/* relevant device tree properties */
> +#define FDT_PROP_KEXEC_ELFHDR"linux,elfcorehdr"
> +#define FDT_PROP_MEM_RANGE   "linux,usable-memory-range"
> +#define FDT_PROP_INITRD_START"linux,initrd-start"
> +#define FDT_PROP_INITRD_END  "linux,initrd-end"
> +#define FDT_PROP_BOOTARGS"bootargs"
> +#define FDT_PROP_KASLR_SEED  "kaslr-seed"
> +#define FDT_PROP_RNG_SEED"rng-seed"
> +#define RNG_SEED_SIZE128
> +
> +/**
> + * fdt_find_and_del_mem_rsv - delete memory reservation with given address 
> and size
> + *
> + * @fdt: Flattened device tree for the current kernel.
> + * @start:   Starting address of the reserved memory.
> + * @size:Size of the reserved memory.
> + *
> + * Return: 0 on success, or negative errno on error.
> + */
> +static int fdt_find_and_del_mem_rsv(void *fdt, unsigned long start, unsigned 
> long size)
> +{
> + int i, ret, num_rsvs = fdt_num_mem_rsv(fdt);
> +
> + for (i = 0; i < num_rsvs; i++) {
> + u64 rsv_start, rsv_size;
> +
> + ret = fdt_get_mem_rsv(fdt, i, _start, _size);
> + if (ret) {
> + pr_err("Malformed device tree.\n");
> + return -EINVAL;
> + }
> +
> + if (rsv_start == start && rsv_size == size) {
> + ret = fdt_del_mem_rsv(fdt, i);
> + if (ret) {
> + pr_err("Error deleting device tree 
> reservation.\n");
> + return -EINVAL;
> + }
> +
> + return 0;
> + }
> + }
> +
> + return -ENOENT;
> +}
> +
> +/*
> + * of_kexec_alloc_and_setup_fdt - Alloc and setup a new Flattened Device Tree
> + *
> + * @image:   kexec image being loaded.
> + * @initrd_load_addr:Address where the next initrd will be loaded.
> + * @initrd_len:  Size of the next initrd, or 0 if there will be 
> none.
> + * @cmdline: Command line for the next kernel, or NULL if there will
> + *   be none.
> + *
> + * Return: fdt on success, or NULL errno on error.
> + */
> +void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
> +unsigned long initrd_load_addr,
> +unsigned long initrd_len,
> +const char *cmdline)
> +{
> + void *fdt;
> + int ret, chosen_node;
> + const void *prop;
> + unsigned long fdt_size;
> +
> + fdt_size = fdt_totalsize(initial_boot_params) +
> +(cmdline 

Re: Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function

2021-02-11 Thread Lakshmi Ramasubramanian

On 2/11/21 3:59 PM, Thiago Jung Bauermann wrote:


Lakshmi Ramasubramanian  writes:


On 2/11/21 9:42 AM, Lakshmi Ramasubramanian wrote:

Hi Rob,
[PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem
This change causes build problem for x86_64 architecture (please see the
mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses
"elf_load_addr" for the ELF header buffer address and not
"elf_headers_mem".
struct kimage_arch {
  ...
  /* Core ELF header buffer */
  void *elf_headers;
  unsigned long elf_headers_sz;
  unsigned long elf_load_addr;
};
I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and
PPC64 since they are the only ones using this function now.
#if defined(CONFIG_ARM64) && defined(CONFIG_PPC64)

Sorry - I meant to say
#if defined(CONFIG_ARM64) || defined(CONFIG_PPC64)



Does it build correctly if you rename elf_headers_mem to elf_load_addr?
Or the other way around, renaming x86's elf_load_addr to
elf_headers_mem. I don't really have a preference.


Yes - changing arm64 and ppc from "elf_headers_mem" to "elf_load_addr" 
builds fine.


But I am concerned about a few other architectures that also define 
"struct kimage_arch" such as "parisc", "arm" which do not have any ELF 
related fields. They would not build if the config defines 
CONFIG_KEXEC_FILE and CONFIG_OF_FLATTREE.


Do you think that could be an issue?

thanks,
 -lakshmi



That would be better than adding an #if condition.


void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
 unsigned long initrd_load_addr,
 unsigned long initrd_len,
 const char *cmdline)
{
  ...
}
#endif /* defined(CONFIG_ARM64) && defined(CONFIG_PPC64) */
Please let me know if you have any concerns.
thanks,
   -lakshmi






Re: [PATCH 5/6] powerpc/mm/64s/hash: Add real-mode change_memory_range() for hash LPAR

2021-02-11 Thread Nicholas Piggin
Excerpts from Michael Ellerman's message of February 11, 2021 11:51 pm:
> When we enabled STRICT_KERNEL_RWX we received some reports of boot
> failures when using the Hash MMU and running under phyp. The crashes
> are intermittent, and often exhibit as a completely unresponsive
> system, or possibly an oops.
> 
> One example, which was caught in xmon:
> 
>   [   14.068327][T1] devtmpfs: mounted
>   [   14.069302][T1] Freeing unused kernel memory: 5568K
>   [   14.142060][  T347] BUG: Unable to handle kernel instruction fetch
>   [   14.142063][T1] Run /sbin/init as init process
>   [   14.142074][  T347] Faulting instruction address: 0xc0004400
>   cpu 0x2: Vector: 400 (Instruction Access) at [cc7475e0]
>   pc: c0004400: exc_virt_0x4400_instruction_access+0x0/0x80
>   lr: c01862d4: update_rq_clock+0x44/0x110
>   sp: cc747880
>  msr: 800040001031
> current = 0xcc60d380
> paca= 0xc0001ec9de80   irqmask: 0x03   irq_happened: 0x01
>   pid   = 347, comm = kworker/2:1
>   ...
>   enter ? for help
>   [cc747880] c01862d4 update_rq_clock+0x44/0x110 (unreliable)
>   [cc7478f0] c0198794 update_blocked_averages+0xb4/0x6d0
>   [cc7479f0] c0198e40 update_nohz_stats+0x90/0xd0
>   [cc747a20] c01a13b4 _nohz_idle_balance+0x164/0x390
>   [cc747b10] c01a1af8 newidle_balance+0x478/0x610
>   [cc747be0] c01a1d48 pick_next_task_fair+0x58/0x480
>   [cc747c40] c0eaab5c __schedule+0x12c/0x950
>   [cc747cd0] c0eab3e8 schedule+0x68/0x120
>   [cc747d00] c016b730 worker_thread+0x130/0x640
>   [cc747da0] c0174d50 kthread+0x1a0/0x1b0
>   [cc747e10] c000e0f0 ret_from_kernel_thread+0x5c/0x6c
> 
> This shows that CPU 2, which was idle, woke up and then appears to
> randomly take an instruction fault on a completely valid area of
> kernel text.
> 
> The cause turns out to be the call to hash__mark_rodata_ro(), late in
> boot. Due to the way we layout text and rodata, that function actually
> changes the permissions for all of text and rodata to read-only plus
> execute.
> 
> To do the permission change we use a hypervisor call, H_PROTECT. On
> phyp that appears to be implemented by briefly removing the mapping of
> the kernel text, before putting it back with the updated permissions.
> If any other CPU is executing during that window, it will see spurious
> faults on the kernel text and/or data, leading to crashes.
> 
> To fix it we use stop machine to collect all other CPUs, and then have
> them drop into real mode (MMU off), while we change the mapping. That
> way they are unaffected by the mapping temporarily disappearing.
> 
> Signed-off-by: Michael Ellerman 
> ---
>  arch/powerpc/mm/book3s64/hash_pgtable.c | 105 +++-
>  1 file changed, 104 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c 
> b/arch/powerpc/mm/book3s64/hash_pgtable.c
> index 3663d3cdffac..01de985df2c4 100644
> --- a/arch/powerpc/mm/book3s64/hash_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
> @@ -8,6 +8,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -400,6 +401,19 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
>  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>  
>  #ifdef CONFIG_STRICT_KERNEL_RWX
> +
> +struct change_memory_parms {
> + unsigned long start, end, newpp;
> + unsigned int step, nr_cpus, master_cpu;
> + atomic_t cpu_counter;
> +};
> +
> +// We'd rather this was on the stack but it has to be in the RMO
> +static struct change_memory_parms chmem_parms;
> +
> +// And therefore we need a lock to protect it from concurrent use
> +static DEFINE_MUTEX(chmem_lock);
> +
>  static void change_memory_range(unsigned long start, unsigned long end,
>   unsigned int step, unsigned long newpp)
>  {
> @@ -414,6 +428,73 @@ static void change_memory_range(unsigned long start, 
> unsigned long end,
>   mmu_kernel_ssize);
>  }
>  
> +static int notrace chmem_secondary_loop(struct change_memory_parms *parms)
> +{
> + unsigned long msr, tmp, flags;
> + int *p;
> +
> + p = >cpu_counter.counter;
> +
> + local_irq_save(flags);
> + __hard_EE_RI_disable();
> +
> + asm volatile (
> + // Switch to real mode and leave interrupts off
> + "mfmsr  %[msr]  ;"
> + "li %[tmp], %[MSR_IR_DR];"
> + "andc   %[tmp], %[msr], %[tmp]  ;"
> + "mtmsrd %[tmp]  ;"
> +
> + // Tell the master we are in real mode
> + "1: "
> + "lwarx  %[tmp], 0, %[p] ;"
> + "addic  %[tmp], %[tmp], -1  ;"
> + "stwcx. %[tmp], 0, %[p] ;"
> + "bne-   1b  ;"
> +
> + // Spin until 

Re: [PATCH] powerpc/pci: Remove unimplemented prototypes

2021-02-11 Thread Michael Ellerman
On Wed, 2 Sep 2020 13:51:38 +1000, Oliver O'Halloran wrote:
> The corresponding definitions were deleted in commit 3d5134ee8341
> ("[POWERPC] Rewrite IO allocation & mapping on powerpc64") which
> was merged a mere 13 years ago.

Applied to powerpc/next.

[1/1] powerpc/pci: Remove unimplemented prototypes
  https://git.kernel.org/powerpc/c/b3abe590c80e0ba55b6fce48762232d90dbc37a5

cheers


Re: [PATCH] powerpc: remove interrupt handler functions from the noinstr section

2021-02-11 Thread Michael Ellerman
On Thu, 11 Feb 2021 16:36:36 +1000, Nicholas Piggin wrote:
> The allyesconfig ppc64 kernel fails to link with relocations unable to
> fit after commit 3a96570ffceb ("powerpc: convert interrupt handlers to
> use wrappers"), which is due to the interrupt handler functions being
> put into the .noinstr.text section, which the linker script places on
> the opposite side of the main .text section from the interrupt entry
> asm code which calls the handlers.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: remove interrupt handler functions from the noinstr section
  https://git.kernel.org/powerpc/c/e4bb64c7a42e61bcb6f8b70279fc1f7805eaad3f

cheers


Re: [PATCH] powerpc/64s: syscall real mode entry use mtmsrd rather than rfid

2021-02-11 Thread Michael Ellerman
On Mon, 8 Feb 2021 16:33:26 +1000, Nicholas Piggin wrote:
> Have the real mode system call entry handler branch to the kernel
> 0xc000... address and then use mtmsrd to enable the MMU, rather than use
> SRRs and rfid.
> 
> Commit 8729c26e675c ("powerpc/64s/exception: Move real to virt switch
> into the common handler") implemented this style of real mode entry for
> other interrupt handlers, so this brings system calls into line with
> them, which is the main motivcation for the change.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/64s: syscall real mode entry use mtmsrd rather than rfid
  https://git.kernel.org/powerpc/c/14ad0e7d04f46865775fb010ccd96fb1cc83433a

cheers


Re: [PATCH] powerpc/64s: Remove EXSLB interrupt save area

2021-02-11 Thread Michael Ellerman
On Mon, 8 Feb 2021 16:34:06 +1000, Nicholas Piggin wrote:
> SLB faults should not be taken while the PACA save areas are live, all
> memory accesses should be fetches from the kernel text, and access to
> PACA and the current stack, before C code is called or any other
> accesses are made.
> 
> All of these have pinned SLBs so will not take a SLB fault. Therefore
> EXSLB is not be required.

Applied to powerpc/next.

[1/1] powerpc/64s: Remove EXSLB interrupt save area
  https://git.kernel.org/powerpc/c/ac7c5e9b08acdb54ef3525abcad24bdb3ed05551

cheers


Re: [PATCH] powerpc/64s: interrupt exit improve bounding of interrupt recursion

2021-02-11 Thread Michael Ellerman
On Sat, 23 Jan 2021 16:15:04 +1000, Nicholas Piggin wrote:
> When replaying pending soft-masked interrupts when an interrupt returns
> to an irqs-enabled context, there is a special case required if this was
> an asynchronous interrupt to avoid unbounded interrupt recursion.
> 
> This case was not tested for in the case the asynchronous interrupt hit
> in user context, because a subsequent nested interrupt would by definition
> hit in kernel mode, which then exits via the kernel path which does test
> this case.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/64s: interrupt exit improve bounding of interrupt recursion
  https://git.kernel.org/powerpc/c/c0ef717305f51e29b5ce0c78a6bfe566b3283415

cheers


Re: [PATCH 1/3] powerpc/83xx: Fix build error when CONFIG_PCI=n

2021-02-11 Thread Michael Ellerman
On Thu, 11 Feb 2021 00:08:02 +1100, Michael Ellerman wrote:
> As reported by lkp:
> 
>   arch/powerpc/platforms/83xx/km83xx.c:183:19: error: 'mpc83xx_setup_pci' 
> undeclared here (not in a function)
>  183 |  .discover_phbs = mpc83xx_setup_pci,
>|   ^
>|   mpc83xx_setup_arch
> 
> [...]

Applied to powerpc/next.

[1/3] powerpc/83xx: Fix build error when CONFIG_PCI=n
  https://git.kernel.org/powerpc/c/5c47c44f157f408c862b144bbd1d1e161a521aa2
[2/3] powerpc/mm/64s: Fix no previous prototype warning
  https://git.kernel.org/powerpc/c/2bb421a3d93601aa81bc39af7aac7280303e0761
[3/3] powerpc/amigaone: Make amigaone_discover_phbs() static
  https://git.kernel.org/powerpc/c/f30520c64f290589e91461d7326b497c23e7f5fd

cheers


Re: [PATCH V3] powerpc/perf: Adds support for programming of Thresholding in P10

2021-02-11 Thread Michael Ellerman
On Tue, 9 Feb 2021 15:22:34 +0530, Kajol Jain wrote:
> Thresholding, a performance monitoring unit feature, can be
> used to identify marked instructions which take more than
> expected cycles between start event and end event.
> Threshold compare (thresh_cmp) bits are programmed in MMCRA
> register. In Power9, thresh_cmp bits were part of the
> event code. But in case of P10, thresh_cmp are not part of
> event code due to inclusion of MMCR3 bits.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/perf: Adds support for programming of Thresholding in P10
  https://git.kernel.org/powerpc/c/82d2c16b350f72aa21ac2a6860c542aa4b43a51e

cheers


Re: [PATCH] powerpc/xive: Assign boolean values to a bool variable

2021-02-11 Thread Michael Ellerman
On Sun, 7 Feb 2021 14:43:12 +0800, Jiapeng Chong wrote:
> Fix the following coccicheck warnings:
> 
> ./arch/powerpc/kvm/book3s_xive.c:1856:2-17: WARNING: Assignment of 0/1
> to bool variable.
> 
> ./arch/powerpc/kvm/book3s_xive.c:1854:2-17: WARNING: Assignment of 0/1
> to bool variable.

Applied to powerpc/next.

[1/1] powerpc/xive: Assign boolean values to a bool variable
  https://git.kernel.org/powerpc/c/c9df3f809cc98b196548864f52d3c4e280dd1970

cheers


Re: [PATCH] powerpc/kexec_file: fix FDT size estimation for kdump kernel

2021-02-11 Thread Michael Ellerman
On Thu, 04 Feb 2021 17:01:10 +0530, Hari Bathini wrote:
> On systems with large amount of memory, loading kdump kernel through
> kexec_file_load syscall may fail with the below error:
> 
> "Failed to update fdt with linux,drconf-usable-memory property"
> 
> This happens because the size estimation for kdump kernel's FDT does
> not account for the additional space needed to setup usable memory
> properties. Fix it by accounting for the space needed to include
> linux,usable-memory & linux,drconf-usable-memory properties while
> estimating kdump kernel's FDT size.

Applied to powerpc/next.

[1/1] powerpc/kexec_file: fix FDT size estimation for kdump kernel
  https://git.kernel.org/powerpc/c/2377c92e37fe97bc5b365f55cf60f56dfc4849f5

cheers


Re: [PATCH v6 0/2] powerpc/32: Implement C syscall entry/exit (complement)

2021-02-11 Thread Michael Ellerman
On Tue, 9 Feb 2021 19:29:26 + (UTC), Christophe Leroy wrote:
> This series implements C syscall entry/exit for PPC32. It reuses
> the work already done for PPC64.
> 
> This series is based on today's next-test (f538b53fd47a) where main patchs 
> from v5 are merged in.
> 
> The first patch is important for performance.
> 
> [...]

Applied to powerpc/next.

[1/3] powerpc/syscall: Do not check unsupported scv vector on PPC32
  https://git.kernel.org/powerpc/c/b966f2279048ee9f30d83ef8568b99fa40917c54
[2/3] powerpc/32: Handle bookE debugging in C in syscall entry/exit
  https://git.kernel.org/powerpc/c/d524dda719f06967db4d3ba519edf9267f84c155
[3/3] powerpc/syscall: Avoid storing 'current' in another pointer
  https://git.kernel.org/powerpc/c/5b90b9661a3396e00f6e8bcbb617a0787fb683d0

cheers


Re: [PATCH v2 1/3] powerpc/uaccess: get rid of small constant size cases in raw_copy_{to, from}_user()

2021-02-11 Thread Michael Ellerman
On Tue, 9 Feb 2021 14:02:12 + (UTC), Christophe Leroy wrote:
> Copied from commit 4b842e4e25b1 ("x86: get rid of small
> constant size cases in raw_copy_{to,from}_user()")
> 
> Very few call sites where that would be triggered remain, and none
> of those is anywhere near hot enough to bother.

Applied to powerpc/next.

[1/3] powerpc/uaccess: get rid of small constant size cases in 
raw_copy_{to,from}_user()
  https://git.kernel.org/powerpc/c/6b385d1d7c0a346758e35b128815afa25d4709ee
[2/3] powerpc/uaccess: Merge __put_user_size_allowed() into __put_user_size()
  https://git.kernel.org/powerpc/c/95d019e0f9225954e33b6efcad315be9d548a4d7
[3/3] powerpc/uaccess: Merge raw_copy_to_user_allowed() into raw_copy_to_user()
  https://git.kernel.org/powerpc/c/052f9d206f6c4b5b512b8c201d375f2dd194be35

cheers


Re: [PATCH v5 00/22] powerpc/32: Implement C syscall entry/exit

2021-02-11 Thread Michael Ellerman
On Mon, 8 Feb 2021 15:10:19 + (UTC), Christophe Leroy wrote:
> This series implements C syscall entry/exit for PPC32. It reuses
> the work already done for PPC64.
> 
> This series is based on today's merge-test 
> (b6f72fc05389e3fc694bf5a5fa1bbd33f61879e0)
> 
> In terms on performance we have the following number of cycles on an
> 8xx running null_syscall benchmark:
> - mainline: 296 cycles
> - after patch 4: 283 cycles
> - after patch 16: 304 cycles
> - after patch 17: 348 cycles
> - at the end of the series: 320 cycles
> 
> [...]

Patches 1-15 and 21 applied to powerpc/next.

[01/22] powerpc/32s: Add missing call to kuep_lock on syscall entry

https://git.kernel.org/powerpc/c/57fdfbce89137ae85cd5cef48be168040a47dd13
[02/22] powerpc/32: Always enable data translation on syscall entry

https://git.kernel.org/powerpc/c/eca2411040c1ee15b8882c6427fb4eb5a48ada69
[03/22] powerpc/32: On syscall entry, enable instruction translation at the 
same time as data

https://git.kernel.org/powerpc/c/76249ddc27080b6b835a89cedcc4185b3b5a6b23
[04/22] powerpc/32: Reorder instructions to avoid using CTR in syscall entry

https://git.kernel.org/powerpc/c/2c59e5104821c5720e88bafa9e522f8bea9ce8fa
[05/22] powerpc/irq: Add helper to set regs->softe

https://git.kernel.org/powerpc/c/fb5608fd117a8b48752d2b5a7e70847c1ed33d33
[06/22] powerpc/irq: Rework helpers that manipulate MSR[EE/RI]

https://git.kernel.org/powerpc/c/08353779f2889305f64e04de3e46ed59ed60f859
[07/22] powerpc/irq: Add stub irq_soft_mask_return() for PPC32

https://git.kernel.org/powerpc/c/6650c4782d5788346a25a4f698880d124f2699a0
[08/22] powerpc/syscall: Rename syscall_64.c into interrupt.c

https://git.kernel.org/powerpc/c/ab1a517d55b01b54ba70f5d54f926f5ab4b18339
[09/22] powerpc/syscall: Make interrupt.c buildable on PPC32

https://git.kernel.org/powerpc/c/344bb20b159dd0996e521c0d4c131a6ae10c322a
[10/22] powerpc/syscall: Use is_compat_task()

https://git.kernel.org/powerpc/c/72b7a9e56b25babfe4c90bf3ce88285c7fb62ab9
[11/22] powerpc/syscall: Save r3 in regs->orig_r3

https://git.kernel.org/powerpc/c/8875f47b7681aa4e4484a9b612577b044725f839
[12/22] powerpc/syscall: Change condition to check MSR_RI

https://git.kernel.org/powerpc/c/c01b916658150e98f00a4981750c37a3224c8735
[13/22] powerpc/32: Always save non volatile GPRs at syscall entry

https://git.kernel.org/powerpc/c/fbcee2ebe8edbb6a93316f0a189ae7fcfaa7094f
[14/22] powerpc/syscall: implement system call entry/exit logic in C for PPC32

https://git.kernel.org/powerpc/c/6f76a01173ccaa363739f913394d4e138d92d718
[15/22] powerpc/32: Remove verification of MSR_PR on syscall in the ASM entry

https://git.kernel.org/powerpc/c/4d67facbcbdb3d9e3c9cb82e4ec47fc63d298dd8
[21/22] powerpc/32: Remove the counter in global_dbcr0

https://git.kernel.org/powerpc/c/eb595eca74067b78d36fb188b555e30f28686fc7

cheers


Re: [PATCH] powerpc/32: Preserve cr1 in exception prolog stack check to fix build error

2021-02-11 Thread Michael Ellerman
On Mon, 8 Feb 2021 07:17:40 + (UTC), Christophe Leroy wrote:
> THREAD_ALIGN_SHIFT = THREAD_SHIFT + 1 = PAGE_SHIFT + 1
> Maximum PAGE_SHIFT is 18 for 256k pages so
> THREAD_ALIGN_SHIFT is 19 at the maximum.
> 
> No need to clobber cr1, it can be preserved when moving r1
> into CR when we check stack overflow.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/32: Preserve cr1 in exception prolog stack check to fix build 
error
  https://git.kernel.org/powerpc/c/3642eb21256a317ac14e9ed560242c6d20cf06d9

cheers


Re: [PATCH 1/3] spi: mpc52xx: Avoid using get_tbl()

2021-02-11 Thread Michael Ellerman
On Tue, 9 Feb 2021 10:26:21 + (UTC), Christophe Leroy wrote:
> get_tbl() is confusing as it returns the content TBL register
> on PPC32 but the concatenation of TBL and TBU on PPC64.
> 
> Use mftb() instead.
> 
> This will allow the removal of get_tbl() in a following patch.

Applied to powerpc/next.

[1/3] spi: mpc52xx: Avoid using get_tbl()
  https://git.kernel.org/powerpc/c/e10656114d32c659768e7ca8aebaaa6ac6e959ab
[2/3] powerpc/time: Avoid using get_tbl()
  https://git.kernel.org/powerpc/c/55d68df623eb679cc91f61137f14751e7f369662
[3/3] powerpc/time: Remove get_tbl()
  https://git.kernel.org/powerpc/c/132f94f133961d18af615cb3503368e59529e9a8

cheers


Re: [PATCH 1/3] powerpc/mm: Enable compound page check for both THP and HugeTLB

2021-02-11 Thread Michael Ellerman
On Wed, 3 Feb 2021 10:28:10 +0530, Aneesh Kumar K.V wrote:
> THP config results in compound pages. Make sure the kernel enables
> the PageCompound() check with CONFIG_HUGETLB_PAGE disabled and
> CONFIG_TRANSPARENT_HUGEPAGE enabled.
> 
> This makes sure we correctly flush the icache with THP pages.
> flush_dcache_icache_page only matter for platforms that don't support
> COHERENT_ICACHE.

Applied to powerpc/next.

[1/3] powerpc/mm: Enable compound page check for both THP and HugeTLB
  https://git.kernel.org/powerpc/c/c7ba2d636342093cfb842f47640e5b62192adfed
[2/3] powerpc/mm: Add PG_dcache_clean to indicate dcache clean state
  https://git.kernel.org/powerpc/c/ec94b9b23d620d40ab2ced094a30c22bb8d69b9f
[3/3] powerpc/mm: Remove dcache flush from memory remove.
  https://git.kernel.org/powerpc/c/2ac02e5ecec0cc2484d60a73b1bc6394aa2fad28

cheers


Re: [PATCH kernel v3] powerpc/uaccess: Skip might_fault() when user access is enabled

2021-02-11 Thread Michael Ellerman
On Thu, 4 Feb 2021 23:16:12 +1100, Alexey Kardashevskiy wrote:
> The amount of code executed with enabled user space access (unlocked KUAP)
> should be minimal. However with CONFIG_PROVE_LOCKING or
> CONFIG_DEBUG_ATOMIC_SLEEP enabled, might_fault() may end up replaying
> interrupts which in turn may access the user space and forget to restore
> the KUAP state.
> 
> The problem places are:
> 1. strncpy_from_user (and similar) which unlock KUAP and call
> unsafe_get_user -> __get_user_allowed -> __get_user_nocheck()
> with do_allow=false to skip KUAP as the caller took care of it.
> 2. __put_user_nocheck_goto() which is called with unlocked KUAP.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/uaccess: Avoid might_fault() when user access is enabled
  https://git.kernel.org/powerpc/c/7d506ca97b665b95e698a53697dad99fae813c1a

cheers


Re: [PATCH kernel] powerpc/kuap: Restore AMR after replaying soft interrupts

2021-02-11 Thread Michael Ellerman
On Tue, 2 Feb 2021 20:15:41 +1100, Alexey Kardashevskiy wrote:
> Since de78a9c "powerpc: Add a framework for Kernel Userspace Access
> Protection", user access helpers call user_{read|write}_access_{begin|end}
> when user space access is allowed.
> 
> 890274c "powerpc/64s: Implement KUAP for Radix MMU" made the mentioned
> helpers program a AMR special register to allow such access for a short
> period of time, most of the time AMR is expected to block user memory
> access by the kernel.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/kuap: Restore AMR after replaying soft interrupts
  https://git.kernel.org/powerpc/c/60a707d0c99aff4eadb7fd334c5fd21df386723e

cheers


Re: Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function

2021-02-11 Thread Thiago Jung Bauermann


Lakshmi Ramasubramanian  writes:

> On 2/11/21 9:42 AM, Lakshmi Ramasubramanian wrote:
>> Hi Rob,
>> [PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem
>> This change causes build problem for x86_64 architecture (please see the 
>> mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses
>> "elf_load_addr" for the ELF header buffer address and not 
>> "elf_headers_mem".
>> struct kimage_arch {
>>  ...
>>  /* Core ELF header buffer */
>>  void *elf_headers;
>>  unsigned long elf_headers_sz;
>>  unsigned long elf_load_addr;
>> };
>> I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and 
>> PPC64 since they are the only ones using this function now.
>> #if defined(CONFIG_ARM64) && defined(CONFIG_PPC64)
> Sorry - I meant to say
> #if defined(CONFIG_ARM64) || defined(CONFIG_PPC64)
>

Does it build correctly if you rename elf_headers_mem to elf_load_addr?
Or the other way around, renaming x86's elf_load_addr to
elf_headers_mem. I don't really have a preference.

That would be better than adding an #if condition.

>> void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
>> unsigned long initrd_load_addr,
>> unsigned long initrd_len,
>> const char *cmdline)
>> {
>>  ...
>> }
>> #endif /* defined(CONFIG_ARM64) && defined(CONFIG_PPC64) */
>> Please let me know if you have any concerns.
>> thanks,
>>   -lakshmi

-- 
Thiago Jung Bauermann
IBM Linux Technology Center


Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.11-8 tag

2021-02-11 Thread pr-tracker-bot
The pull request you sent on Fri, 12 Feb 2021 10:15:59 +1100:

> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
> tags/powerpc-5.11-8

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/dcc0b49040c70ad827a7f3d58a21b01fdb14e749

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html


[PATCH v2 2/2] driver core: platform: Drop of_device_node_put() wrapper

2021-02-11 Thread Rob Herring
of_device_node_put() is just a wrapper for of_node_put(). The platform
driver core is already polluted with of_node pointers and the only 'get'
already uses of_node_get() (though typically the get would happen in
of_device_alloc()).

Cc: Greg Kroah-Hartman 
Cc: "Rafael J. Wysocki" 
Cc: Frank Rowand 
Signed-off-by: Rob Herring 
---
v2:
 - Fix build
---
 drivers/base/platform.c   | 2 +-
 include/linux/of_device.h | 7 ---
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 95fd1549f87d..9d5171e5f967 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -571,7 +571,7 @@ static void platform_device_release(struct device *dev)
struct platform_object *pa = container_of(dev, struct platform_object,
  pdev.dev);
 
-   of_device_node_put(>pdev.dev);
+   of_node_put(pa->pdev.dev.of_node);
kfree(pa->pdev.dev.platform_data);
kfree(pa->pdev.mfd_cell);
kfree(pa->pdev.resource);
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index d7a407dfeecb..1d7992a02e36 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -38,11 +38,6 @@ extern int of_device_request_module(struct device *dev);
 extern void of_device_uevent(struct device *dev, struct kobj_uevent_env *env);
 extern int of_device_uevent_modalias(struct device *dev, struct 
kobj_uevent_env *env);
 
-static inline void of_device_node_put(struct device *dev)
-{
-   of_node_put(dev->of_node);
-}
-
 static inline struct device_node *of_cpu_device_node_get(int cpu)
 {
struct device *cpu_dev;
@@ -94,8 +89,6 @@ static inline int of_device_uevent_modalias(struct device 
*dev,
return -ENODEV;
 }
 
-static inline void of_device_node_put(struct device *dev) { }
-
 static inline const struct of_device_id *of_match_device(
const struct of_device_id *matches, const struct device *dev)
 {
-- 
2.27.0



[PATCH v2 1/2] of: Remove of_dev_{get,put}()

2021-02-11 Thread Rob Herring
of_dev_get() and of_dev_put are just wrappers for get_device()/put_device()
on a platform_device. There's also already platform_device_{get,put}()
wrappers for this purpose. Let's update the few users and remove
of_dev_{get,put}().

Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: "David S. Miller" 
Cc: Jakub Kicinski 
Cc: Frank Rowand 
Cc: Patrice Chotard 
Cc: Felipe Balbi 
Cc: Greg Kroah-Hartman 
Cc: Julia Lawall 
Cc: Gilles Muller 
Cc: Nicolas Palix 
Cc: Michal Marek 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: net...@vger.kernel.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-...@vger.kernel.org
Cc: co...@systeme.lip6.fr
Signed-off-by: Rob Herring 
---
v2:
 - Fix build
---
 arch/powerpc/platforms/pseries/ibmebus.c |  4 ++--
 drivers/net/ethernet/ibm/emac/core.c | 15 ---
 drivers/of/device.c  | 21 -
 drivers/of/platform.c|  4 ++--
 drivers/of/unittest.c|  2 +-
 drivers/usb/dwc3/dwc3-st.c   |  2 +-
 include/linux/of_device.h|  3 ---
 scripts/coccinelle/free/put_device.cocci |  1 -
 8 files changed, 14 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/ibmebus.c 
b/arch/powerpc/platforms/pseries/ibmebus.c
index 8c6e509f6967..a15ab33646b3 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -355,12 +355,12 @@ static int ibmebus_bus_device_probe(struct device *dev)
if (!drv->probe)
return error;
 
-   of_dev_get(of_dev);
+   get_device(dev);
 
if (of_driver_match_device(dev, dev->driver))
error = drv->probe(of_dev);
if (error)
-   of_dev_put(of_dev);
+   put_device(dev);
 
return error;
 }
diff --git a/drivers/net/ethernet/ibm/emac/core.c 
b/drivers/net/ethernet/ibm/emac/core.c
index c00b9097eeea..471be6ec7e8a 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -38,6 +38,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -2390,11 +2391,11 @@ static int emac_check_deps(struct emac_instance *dev,
 
 static void emac_put_deps(struct emac_instance *dev)
 {
-   of_dev_put(dev->mal_dev);
-   of_dev_put(dev->zmii_dev);
-   of_dev_put(dev->rgmii_dev);
-   of_dev_put(dev->mdio_dev);
-   of_dev_put(dev->tah_dev);
+   platform_device_put(dev->mal_dev);
+   platform_device_put(dev->zmii_dev);
+   platform_device_put(dev->rgmii_dev);
+   platform_device_put(dev->mdio_dev);
+   platform_device_put(dev->tah_dev);
 }
 
 static int emac_of_bus_notify(struct notifier_block *nb, unsigned long action,
@@ -2435,7 +2436,7 @@ static int emac_wait_deps(struct emac_instance *dev)
for (i = 0; i < EMAC_DEP_COUNT; i++) {
of_node_put(deps[i].node);
if (err)
-   of_dev_put(deps[i].ofdev);
+   platform_device_put(deps[i].ofdev);
}
if (err == 0) {
dev->mal_dev = deps[EMAC_DEP_MAL_IDX].ofdev;
@@ -2444,7 +2445,7 @@ static int emac_wait_deps(struct emac_instance *dev)
dev->tah_dev = deps[EMAC_DEP_TAH_IDX].ofdev;
dev->mdio_dev = deps[EMAC_DEP_MDIO_IDX].ofdev;
}
-   of_dev_put(deps[EMAC_DEP_PREV_IDX].ofdev);
+   platform_device_put(deps[EMAC_DEP_PREV_IDX].ofdev);
return err;
 }
 
diff --git a/drivers/of/device.c b/drivers/of/device.c
index aedfaaafd3e7..9a748855b39d 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -33,27 +33,6 @@ const struct of_device_id *of_match_device(const struct 
of_device_id *matches,
 }
 EXPORT_SYMBOL(of_match_device);
 
-struct platform_device *of_dev_get(struct platform_device *dev)
-{
-   struct device *tmp;
-
-   if (!dev)
-   return NULL;
-   tmp = get_device(>dev);
-   if (tmp)
-   return to_platform_device(tmp);
-   else
-   return NULL;
-}
-EXPORT_SYMBOL(of_dev_get);
-
-void of_dev_put(struct platform_device *dev)
-{
-   if (dev)
-   put_device(>dev);
-}
-EXPORT_SYMBOL(of_dev_put);
-
 int of_device_add(struct platform_device *ofdev)
 {
BUG_ON(ofdev->dev.of_node == NULL);
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 79bd5f5a1bf1..020bf860c72c 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -687,7 +687,7 @@ static int of_platform_notify(struct notifier_block *nb,
pdev_parent = of_find_device_by_node(rd->dn->parent);
pdev = of_platform_device_create(rd->dn, NULL,
pdev_parent ? _parent->dev : NULL);
-   of_dev_put(pdev_parent);
+   platform_device_put(pdev_parent);
 
if (pdev == NULL) {
pr_err("%s: failed to create for '%pOF'\n",
@@ -712,7 +712,7 @@ static int 

[PATCH v2 0/2] of: of_device.h cleanups

2021-02-11 Thread Rob Herring
This is a couple of cleanups for of_device.h. They fell out from my
attempt at decoupling of_device.h and of_platform.h which is a mess
and I haven't finished, but there's no reason to wait on these.

Rob

Rob Herring (2):
  of: Remove of_dev_{get,put}()
  driver core: platform: Drop of_device_node_put() wrapper

 arch/powerpc/platforms/pseries/ibmebus.c |  4 ++--
 drivers/base/platform.c  |  2 +-
 drivers/net/ethernet/ibm/emac/core.c | 15 ---
 drivers/of/device.c  | 21 -
 drivers/of/platform.c|  4 ++--
 drivers/of/unittest.c|  2 +-
 drivers/usb/dwc3/dwc3-st.c   |  2 +-
 include/linux/of_device.h| 10 --
 scripts/coccinelle/free/put_device.cocci |  1 -
 9 files changed, 15 insertions(+), 46 deletions(-)

-- 
2.27.0



Re: [PATCH 5/6] powerpc/mm/64s/hash: Add real-mode change_memory_range() for hash LPAR

2021-02-11 Thread Nicholas Piggin
Excerpts from Michael Ellerman's message of February 11, 2021 11:51 pm:
> When we enabled STRICT_KERNEL_RWX we received some reports of boot
> failures when using the Hash MMU and running under phyp. The crashes
> are intermittent, and often exhibit as a completely unresponsive
> system, or possibly an oops.
> 
> One example, which was caught in xmon:
> 
>   [   14.068327][T1] devtmpfs: mounted
>   [   14.069302][T1] Freeing unused kernel memory: 5568K
>   [   14.142060][  T347] BUG: Unable to handle kernel instruction fetch
>   [   14.142063][T1] Run /sbin/init as init process
>   [   14.142074][  T347] Faulting instruction address: 0xc0004400
>   cpu 0x2: Vector: 400 (Instruction Access) at [cc7475e0]
>   pc: c0004400: exc_virt_0x4400_instruction_access+0x0/0x80
>   lr: c01862d4: update_rq_clock+0x44/0x110
>   sp: cc747880
>  msr: 800040001031
> current = 0xcc60d380
> paca= 0xc0001ec9de80   irqmask: 0x03   irq_happened: 0x01
>   pid   = 347, comm = kworker/2:1
>   ...
>   enter ? for help
>   [cc747880] c01862d4 update_rq_clock+0x44/0x110 (unreliable)
>   [cc7478f0] c0198794 update_blocked_averages+0xb4/0x6d0
>   [cc7479f0] c0198e40 update_nohz_stats+0x90/0xd0
>   [cc747a20] c01a13b4 _nohz_idle_balance+0x164/0x390
>   [cc747b10] c01a1af8 newidle_balance+0x478/0x610
>   [cc747be0] c01a1d48 pick_next_task_fair+0x58/0x480
>   [cc747c40] c0eaab5c __schedule+0x12c/0x950
>   [cc747cd0] c0eab3e8 schedule+0x68/0x120
>   [cc747d00] c016b730 worker_thread+0x130/0x640
>   [cc747da0] c0174d50 kthread+0x1a0/0x1b0
>   [cc747e10] c000e0f0 ret_from_kernel_thread+0x5c/0x6c
> 
> This shows that CPU 2, which was idle, woke up and then appears to
> randomly take an instruction fault on a completely valid area of
> kernel text.
> 
> The cause turns out to be the call to hash__mark_rodata_ro(), late in
> boot. Due to the way we layout text and rodata, that function actually
> changes the permissions for all of text and rodata to read-only plus
> execute.
> 
> To do the permission change we use a hypervisor call, H_PROTECT. On
> phyp that appears to be implemented by briefly removing the mapping of
> the kernel text, before putting it back with the updated permissions.
> If any other CPU is executing during that window, it will see spurious
> faults on the kernel text and/or data, leading to crashes.
> 
> To fix it we use stop machine to collect all other CPUs, and then have
> them drop into real mode (MMU off), while we change the mapping. That
> way they are unaffected by the mapping temporarily disappearing.
> 
> Signed-off-by: Michael Ellerman 
> ---
>  arch/powerpc/mm/book3s64/hash_pgtable.c | 105 +++-
>  1 file changed, 104 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c 
> b/arch/powerpc/mm/book3s64/hash_pgtable.c
> index 3663d3cdffac..01de985df2c4 100644
> --- a/arch/powerpc/mm/book3s64/hash_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
> @@ -8,6 +8,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -400,6 +401,19 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
>  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>  
>  #ifdef CONFIG_STRICT_KERNEL_RWX
> +
> +struct change_memory_parms {
> + unsigned long start, end, newpp;
> + unsigned int step, nr_cpus, master_cpu;
> + atomic_t cpu_counter;
> +};
> +
> +// We'd rather this was on the stack but it has to be in the RMO
> +static struct change_memory_parms chmem_parms;
> +
> +// And therefore we need a lock to protect it from concurrent use
> +static DEFINE_MUTEX(chmem_lock);
> +
>  static void change_memory_range(unsigned long start, unsigned long end,
>   unsigned int step, unsigned long newpp)
>  {
> @@ -414,6 +428,73 @@ static void change_memory_range(unsigned long start, 
> unsigned long end,
>   mmu_kernel_ssize);
>  }
>  
> +static int notrace chmem_secondary_loop(struct change_memory_parms *parms)
> +{
> + unsigned long msr, tmp, flags;
> + int *p;
> +
> + p = >cpu_counter.counter;
> +
> + local_irq_save(flags);
> + __hard_EE_RI_disable();
> +
> + asm volatile (
> + // Switch to real mode and leave interrupts off
> + "mfmsr  %[msr]  ;"
> + "li %[tmp], %[MSR_IR_DR];"
> + "andc   %[tmp], %[msr], %[tmp]  ;"
> + "mtmsrd %[tmp]  ;"
> +
> + // Tell the master we are in real mode
> + "1: "
> + "lwarx  %[tmp], 0, %[p] ;"
> + "addic  %[tmp], %[tmp], -1  ;"
> + "stwcx. %[tmp], 0, %[p] ;"
> + "bne-   1b  ;"
> +
> + // Spin until 

[GIT PULL] Please pull powerpc/linux.git powerpc-5.11-8 tag

2021-02-11 Thread Michael Ellerman
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA256

Hi Linus,

Please pull one final powerpc fix for 5.11:

The following changes since commit 24321ac668e452a4942598533d267805f291fdc9:

  powerpc/64/signal: Fix regression in __kernel_sigtramp_rt64() semantics 
(2021-02-02 22:14:41 +1100)

are available in the git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
tags/powerpc-5.11-8

for you to fetch changes up to 8c511eff1827239f24ded212b1bcda7ca5b16203:

  powerpc/kuap: Allow kernel thread to access userspace after kthread_use_mm 
(2021-02-06 23:13:04 +1100)

- --
powerpc fixes for 5.11 #8

One fix for a regression seen in io_uring, introduced by our support for KUAP
(Kernel User Access Prevention) with the Hash MMU.

Thanks to: Aneesh Kumar K.V, Zorro Lang.

- --
Aneesh Kumar K.V (1):
  powerpc/kuap: Allow kernel thread to access userspace after kthread_use_mm


 arch/powerpc/include/asm/book3s/64/kup.h   | 16 +++-
 arch/powerpc/include/asm/book3s/64/pkeys.h |  4 
 arch/powerpc/mm/book3s64/pkeys.c   |  1 +
 3 files changed, 12 insertions(+), 9 deletions(-)
-BEGIN PGP SIGNATURE-

iQIzBAEBCAAdFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAmAlumMACgkQUevqPMjh
pYBb9BAAqpyhvEjDiyP0xeHUTQ9UsNMnNUW7Hp03jJYH+EKWC9nT7kb3TY1eNyk5
KyMBOtXiQsggzWDE31bR32BpH/JChSeRPaCO4HHdDS9t+3ZIo16RTFksdiIhCN6m
nI4WhnfrdZszstMRsKWzfoLRVDGyNi0hyyQaMS3ypuKvRmozZk6u9K/YNXMa97Wf
ihhB0lYRdfMNgxMm66uaqEtzYt3Z4dRj9Y24LQirJnp32xK+sNgoleHl4gvvKG3m
r7CogqHlcExbkD3dl/PPe/SVEesfXpmTrQPCvJmi0qWm9NzkduQWrSEkUkUp1YQD
T0pBHnCxtI7GAAQpCphBA3gjrz03Og4/RAXmfESgI0JHyh7Vihx91XOwuonuJndn
5ThY2D9+nkZ2vnlis2/AoLx6ClbNgZysr0iAOsRyd2SYR9Er2CcPZ4OuNHXWTHlz
G4SmVYiZj9gnSrzqlEGIqBOVWdykV+x+xkLLQx86HUAI+7f1mFV1+dJ4E5NLGKzS
jB+XwwG2y6q6SnJt2iiybqDu9K1lPWnKFLNeb4at7GfpJ4riKNcRSYrkY9xYxmkR
kgKyXfW+rNt1RcIoy65kNa3hSnXi4p9wc5Lph7joS7zTkZIKR2E3QUJjGQao85Qa
rNxccSz3X7ZAL9OEJP/4nE72Zf3VXkzuKbB79D3dhFJ8SJmbPqQ=
=GDL5
-END PGP SIGNATURE-


Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()

2021-02-11 Thread Nicholas Piggin
Excerpts from Segher Boessenkool's message of February 11, 2021 9:50 pm:
> On Thu, Feb 11, 2021 at 08:04:55PM +1000, Nicholas Piggin wrote:
>> It would be nice if we could have a __builtin_trap_if that gcc would use 
>> conditional traps with, (and which never assumes following code is 
>> unreachable even for constant true, so we can use it with WARN and put 
>> explicit unreachable for BUG).
> 
> It automatically does that with just __builtin_trap, see my other mail :-)

If that is generated without branches (or at least with no more
branches than existing asm implementation), then it could be usable 
without trashing CFAR.

Unfortunately I don't think we will be parsing the dwarf information
to get line numbers from it any time soon, so not a drop in replacement 
but maybe one day someone would find a solution.

Thanks,
Nick


[PATCH 2/2] driver core: platform: Drop of_device_node_put() wrapper

2021-02-11 Thread Rob Herring
of_device_node_put() is just a wrapper for of_node_put(). The platform
driver core is already polluted with of_node pointers and the only 'get'
already uses of_node_get() (though typically the get would happen in
of_device_alloc()).

Cc: Greg Kroah-Hartman 
Cc: "Rafael J. Wysocki" 
Cc: Frank Rowand 
Signed-off-by: Rob Herring 
---
 drivers/base/platform.c   | 2 +-
 include/linux/of_device.h | 7 ---
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 95fd1549f87d..c31bc9e92dd1 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -571,7 +571,7 @@ static void platform_device_release(struct device *dev)
struct platform_object *pa = container_of(dev, struct platform_object,
  pdev.dev);
 
-   of_device_node_put(>pdev.dev);
+   of_node_put(>pdev.dev->of_node);
kfree(pa->pdev.dev.platform_data);
kfree(pa->pdev.mfd_cell);
kfree(pa->pdev.resource);
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index d7a407dfeecb..1d7992a02e36 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -38,11 +38,6 @@ extern int of_device_request_module(struct device *dev);
 extern void of_device_uevent(struct device *dev, struct kobj_uevent_env *env);
 extern int of_device_uevent_modalias(struct device *dev, struct 
kobj_uevent_env *env);
 
-static inline void of_device_node_put(struct device *dev)
-{
-   of_node_put(dev->of_node);
-}
-
 static inline struct device_node *of_cpu_device_node_get(int cpu)
 {
struct device *cpu_dev;
@@ -94,8 +89,6 @@ static inline int of_device_uevent_modalias(struct device 
*dev,
return -ENODEV;
 }
 
-static inline void of_device_node_put(struct device *dev) { }
-
 static inline const struct of_device_id *of_match_device(
const struct of_device_id *matches, const struct device *dev)
 {
-- 
2.27.0



[PATCH 0/2] of: of_device.h cleanups

2021-02-11 Thread Rob Herring
This is a couple of cleanups for of_device.h. They fell out from my
attempt at decoupling of_device.h and of_platform.h which is a mess
and I haven't finished, but there's no reason to wait on these.

Rob

Rob Herring (2):
  of: Remove of_dev_{get,put}()
  driver core: platform: Drop of_device_node_put() wrapper

 arch/powerpc/platforms/pseries/ibmebus.c |  4 ++--
 drivers/base/platform.c  |  2 +-
 drivers/net/ethernet/ibm/emac/core.c | 15 ---
 drivers/of/device.c  | 21 -
 drivers/of/platform.c|  4 ++--
 drivers/of/unittest.c|  2 +-
 drivers/usb/dwc3/dwc3-st.c   |  2 +-
 include/linux/of_device.h| 10 --
 scripts/coccinelle/free/put_device.cocci |  1 -
 9 files changed, 15 insertions(+), 46 deletions(-)

-- 
2.27.0



[PATCH 1/2] of: Remove of_dev_{get,put}()

2021-02-11 Thread Rob Herring
of_dev_get() and of_dev_put are just wrappers for get_device()/put_device()
on a platform_device. There's also already platform_device_{get,put}()
wrappers for this purpose. Let's update the few users and remove
of_dev_{get,put}().

Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: "David S. Miller" 
Cc: Jakub Kicinski 
Cc: Frank Rowand 
Cc: Patrice Chotard 
Cc: Felipe Balbi 
Cc: Greg Kroah-Hartman 
Cc: Julia Lawall 
Cc: Gilles Muller 
Cc: Nicolas Palix 
Cc: Michal Marek 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: net...@vger.kernel.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-...@vger.kernel.org
Cc: co...@systeme.lip6.fr
Signed-off-by: Rob Herring 
---
 arch/powerpc/platforms/pseries/ibmebus.c |  4 ++--
 drivers/net/ethernet/ibm/emac/core.c | 15 ---
 drivers/of/device.c  | 21 -
 drivers/of/platform.c|  4 ++--
 drivers/of/unittest.c|  2 +-
 drivers/usb/dwc3/dwc3-st.c   |  2 +-
 include/linux/of_device.h|  3 ---
 scripts/coccinelle/free/put_device.cocci |  1 -
 8 files changed, 14 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/ibmebus.c 
b/arch/powerpc/platforms/pseries/ibmebus.c
index 8c6e509f6967..c29328fe94e8 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -355,12 +355,12 @@ static int ibmebus_bus_device_probe(struct device *dev)
if (!drv->probe)
return error;
 
-   of_dev_get(of_dev);
+   get_device(dev);
 
if (of_driver_match_device(dev, dev->driver))
error = drv->probe(of_dev);
if (error)
-   of_dev_put(of_dev);
+   put_device(of_dev);
 
return error;
 }
diff --git a/drivers/net/ethernet/ibm/emac/core.c 
b/drivers/net/ethernet/ibm/emac/core.c
index c00b9097eeea..471be6ec7e8a 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -38,6 +38,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -2390,11 +2391,11 @@ static int emac_check_deps(struct emac_instance *dev,
 
 static void emac_put_deps(struct emac_instance *dev)
 {
-   of_dev_put(dev->mal_dev);
-   of_dev_put(dev->zmii_dev);
-   of_dev_put(dev->rgmii_dev);
-   of_dev_put(dev->mdio_dev);
-   of_dev_put(dev->tah_dev);
+   platform_device_put(dev->mal_dev);
+   platform_device_put(dev->zmii_dev);
+   platform_device_put(dev->rgmii_dev);
+   platform_device_put(dev->mdio_dev);
+   platform_device_put(dev->tah_dev);
 }
 
 static int emac_of_bus_notify(struct notifier_block *nb, unsigned long action,
@@ -2435,7 +2436,7 @@ static int emac_wait_deps(struct emac_instance *dev)
for (i = 0; i < EMAC_DEP_COUNT; i++) {
of_node_put(deps[i].node);
if (err)
-   of_dev_put(deps[i].ofdev);
+   platform_device_put(deps[i].ofdev);
}
if (err == 0) {
dev->mal_dev = deps[EMAC_DEP_MAL_IDX].ofdev;
@@ -2444,7 +2445,7 @@ static int emac_wait_deps(struct emac_instance *dev)
dev->tah_dev = deps[EMAC_DEP_TAH_IDX].ofdev;
dev->mdio_dev = deps[EMAC_DEP_MDIO_IDX].ofdev;
}
-   of_dev_put(deps[EMAC_DEP_PREV_IDX].ofdev);
+   platform_device_put(deps[EMAC_DEP_PREV_IDX].ofdev);
return err;
 }
 
diff --git a/drivers/of/device.c b/drivers/of/device.c
index aedfaaafd3e7..9a748855b39d 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -33,27 +33,6 @@ const struct of_device_id *of_match_device(const struct 
of_device_id *matches,
 }
 EXPORT_SYMBOL(of_match_device);
 
-struct platform_device *of_dev_get(struct platform_device *dev)
-{
-   struct device *tmp;
-
-   if (!dev)
-   return NULL;
-   tmp = get_device(>dev);
-   if (tmp)
-   return to_platform_device(tmp);
-   else
-   return NULL;
-}
-EXPORT_SYMBOL(of_dev_get);
-
-void of_dev_put(struct platform_device *dev)
-{
-   if (dev)
-   put_device(>dev);
-}
-EXPORT_SYMBOL(of_dev_put);
-
 int of_device_add(struct platform_device *ofdev)
 {
BUG_ON(ofdev->dev.of_node == NULL);
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 79bd5f5a1bf1..020bf860c72c 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -687,7 +687,7 @@ static int of_platform_notify(struct notifier_block *nb,
pdev_parent = of_find_device_by_node(rd->dn->parent);
pdev = of_platform_device_create(rd->dn, NULL,
pdev_parent ? _parent->dev : NULL);
-   of_dev_put(pdev_parent);
+   platform_device_put(pdev_parent);
 
if (pdev == NULL) {
pr_err("%s: failed to create for '%pOF'\n",
@@ -712,7 +712,7 @@ static int of_platform_notify(struct 

[PATCH 3/4] ibmvfc: treat H_CLOSED as success during sub-CRQ registration

2021-02-11 Thread Tyrel Datwyler
A non-zero return code for H_REG_SUB_CRQ is currently treated as a
failure resulting in failing sub-CRQ setup. The case of H_CLOSED should
not be treated as a failure. This return code translates to a successful
sub-CRQ registration by the hypervisor, and is meant to communicate back
that there is currently no partner VIOS CRQ connection established as of
yet. This is a common occurrence during a disconnect where the client
adapter can possibly come back up prior to the partner adapter.

For non-zero return code from H_REG_SUB_CRQ treat a H_CLOSED as success
so that sub-CRQs are successfully setup.

Fixes: faacf8c5f1d5 ("ibmvfc: add alloc/dealloc routines for SCSI Sub-CRQ 
Channels")
Signed-off-by: Tyrel Datwyler 
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 3dd20f383453..ba6fcf9cbc57 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -5636,7 +5636,8 @@ static int ibmvfc_register_scsi_channel(struct 
ibmvfc_host *vhost,
rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE,
   >cookie, >hw_irq);
 
-   if (rc) {
+   /* H_CLOSED indicates successful register, but no CRQ partner */
+   if (rc && rc != H_CLOSED) {
dev_warn(dev, "Error registering sub-crq: %d\n", rc);
if (rc == H_PARAMETER)
dev_warn_once(dev, "Firmware may not support MQ\n");
-- 
2.27.0



[PATCH 2/4] ibmvfc: fix invalid sub-CRQ handles after hard reset

2021-02-11 Thread Tyrel Datwyler
A hard reset results in a complete transport disconnect such that the
CRQ connection with the partner VIOS is broken. This has the side effect
of also invalidating the associated sub-CRQs. The current code assumes
that the sub-CRQs are perserved resulting in a protocol violation after
trying to reconnect them with the VIOS. This introduces an infinite loop
such that the VIOS forces a disconnect after each subsequent attempt to
re-register with invalid handles.

Avoid the aforementioned issue by releasing the sub-CRQs prior to CRQ
disconnect, and driving a reinitialization of the sub-CRQs once a new
CRQ is registered with the hypervisor.

fixes: faacf8c5f1d5 ("ibmvfc: add alloc/dealloc routines for SCSI Sub-CRQ 
Channels")
Signed-off-by: Tyrel Datwyler 
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 384960036f8b..3dd20f383453 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -158,6 +158,9 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *);
 static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *);
 static void ibmvfc_tgt_move_login(struct ibmvfc_target *);
 
+static void ibmvfc_release_sub_crqs(struct ibmvfc_host *);
+static void ibmvfc_init_sub_crqs(struct ibmvfc_host *);
+
 static const char *unknown_error = "unknown error";
 
 static long h_reg_sub_crq(unsigned long unit_address, unsigned long ioba,
@@ -926,8 +929,8 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
unsigned long flags;
struct vio_dev *vdev = to_vio_dev(vhost->dev);
struct ibmvfc_queue *crq = >crq;
-   struct ibmvfc_queue *scrq;
-   int i;
+
+   ibmvfc_release_sub_crqs(vhost);
 
/* Close the CRQ */
do {
@@ -947,16 +950,6 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
memset(crq->msgs.crq, 0, PAGE_SIZE);
crq->cur = 0;
 
-   if (vhost->scsi_scrqs.scrqs) {
-   for (i = 0; i < nr_scsi_hw_queues; i++) {
-   scrq = >scsi_scrqs.scrqs[i];
-   spin_lock(scrq->q_lock);
-   memset(scrq->msgs.scrq, 0, PAGE_SIZE);
-   scrq->cur = 0;
-   spin_unlock(scrq->q_lock);
-   }
-   }
-
/* And re-open it again */
rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
crq->msg_token, PAGE_SIZE);
@@ -966,6 +959,9 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
dev_warn(vhost->dev, "Partner adapter not ready\n");
else if (rc != 0)
dev_warn(vhost->dev, "Couldn't register crq (rc=%d)\n", rc);
+
+   ibmvfc_init_sub_crqs(vhost);
+
spin_unlock(vhost->crq.q_lock);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
 
@@ -5692,6 +5688,7 @@ static void ibmvfc_deregister_scsi_channel(struct 
ibmvfc_host *vhost, int index)
 
free_irq(scrq->irq, scrq);
irq_dispose_mapping(scrq->irq);
+   scrq->irq = 0;
 
do {
rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address,
-- 
2.27.0



[PATCH 0/4] ibmvfc: hard reset fixes

2021-02-11 Thread Tyrel Datwyler
This series contains a minor simplification of ibmvfc_init_sub_crqs() followed
by a couple fixes for sub-CRQ handling which effect hard reset of the
client/host adapter CRQ pair.



[PATCH 1/4] ibmvfc: simplify handling of sub-CRQ initialization

2021-02-11 Thread Tyrel Datwyler
If ibmvfc_init_sub_crqs() fails ibmvfc_probe() simply parrots
registration failure reported elsewhere, and futher
vhost->scsi_scrq.scrq == NULL is indication enough to the driver that it
has no sub-CRQs available. The mq_enabled check can also be moved into
ibmvfc_init_sub_crqs() such that each caller doesn't have to gate the
call with a mq_enabled check. Finally, in the case of sub-CRQ setup
failure setting do_enquiry can be turned off to putting the driver into
single queue fallback mode.

The aforementioned changes also simplify the next patch in the series
that fixes a hard reset issue, by tying a sub-CRQ setup failure and
do_enquiry logic into ibmvfc_init_sub_crqs().

Signed-off-by: Tyrel Datwyler 
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 21 ++---
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 7097028d4cb6..384960036f8b 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -5705,17 +5705,21 @@ static void ibmvfc_deregister_scsi_channel(struct 
ibmvfc_host *vhost, int index)
LEAVE;
 }
 
-static int ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
+static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
 {
int i, j;
 
ENTER;
+   if (!vhost->mq_enabled)
+   return;
 
vhost->scsi_scrqs.scrqs = kcalloc(nr_scsi_hw_queues,
  sizeof(*vhost->scsi_scrqs.scrqs),
  GFP_KERNEL);
-   if (!vhost->scsi_scrqs.scrqs)
-   return -1;
+   if (!vhost->scsi_scrqs.scrqs) {
+   vhost->do_enquiry = 0;
+   return;
+   }
 
for (i = 0; i < nr_scsi_hw_queues; i++) {
if (ibmvfc_register_scsi_channel(vhost, i)) {
@@ -5724,13 +5728,12 @@ static int ibmvfc_init_sub_crqs(struct ibmvfc_host 
*vhost)
kfree(vhost->scsi_scrqs.scrqs);
vhost->scsi_scrqs.scrqs = NULL;
vhost->scsi_scrqs.active_queues = 0;
-   LEAVE;
-   return -1;
+   vhost->do_enquiry = 0;
+   break;
}
}
 
LEAVE;
-   return 0;
 }
 
 static void ibmvfc_release_sub_crqs(struct ibmvfc_host *vhost)
@@ -5997,11 +6000,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const 
struct vio_device_id *id)
goto remove_shost;
}
 
-   if (vhost->mq_enabled) {
-   rc = ibmvfc_init_sub_crqs(vhost);
-   if (rc)
-   dev_warn(dev, "Failed to allocate Sub-CRQs. rc=%d\n", 
rc);
-   }
+   ibmvfc_init_sub_crqs(vhost);
 
if (shost_to_fc_host(shost)->rqst_q)
blk_queue_max_segments(shost_to_fc_host(shost)->rqst_q, 1);
-- 
2.27.0



[PATCH 4/4] ibmvfc: store return code of H_FREE_SUB_CRQ during cleanup

2021-02-11 Thread Tyrel Datwyler
The H_FREE_SUB_CRQ hypercall can return a retry delay return code that
indicates the call needs to be retried after a specific amount of time
delay. The error path to free a sub-CRQ in case of a failure during
channel registration fails to capture the return code of H_FREE_SUB_CRQ
which will result in the delay loop being skipped in the case of a retry
delay return code.

Store the return code result of the H_FREE_SUB_CRQ call such that the
return code check in the delay loop evaluates a meaningful value.

Fixes: 9288d35d70b5 ("ibmvfc: map/request irq and register Sub-CRQ interrupt 
handler")
Signed-off-by: Tyrel Datwyler 
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index ba6fcf9cbc57..23b803ac4a13 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -5670,7 +5670,7 @@ static int ibmvfc_register_scsi_channel(struct 
ibmvfc_host *vhost,
 
 irq_failed:
do {
-   plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, 
scrq->cookie);
+   rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, 
scrq->cookie);
} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 reg_failed:
ibmvfc_free_queue(vhost, scrq);
-- 
2.27.0



[PATCH] powerpc/pseries: extract host bridge from pci_bus prior to bus removal

2021-02-11 Thread Tyrel Datwyler
The pci_bus->bridge reference may no longer be valid after
pci_bus_remove() resulting in passing a bad value to device_unregister()
for the associated bridge device.

Store the host_bridge reference in a separate variable prior to
pci_bus_remove().

Fixes: 7340056567e3 ("powerpc/pci: Reorder pci bus/bridge unregistration during 
PHB removal")
Signed-off-by: Tyrel Datwyler 
---
 arch/powerpc/platforms/pseries/pci_dlpar.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c 
b/arch/powerpc/platforms/pseries/pci_dlpar.c
index f9ae17e8a0f4..a8f9140a24fa 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -50,6 +50,7 @@ EXPORT_SYMBOL_GPL(init_phb_dynamic);
 int remove_phb_dynamic(struct pci_controller *phb)
 {
struct pci_bus *b = phb->bus;
+   struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge);
struct resource *res;
int rc, i;
 
@@ -76,7 +77,8 @@ int remove_phb_dynamic(struct pci_controller *phb)
/* Remove the PCI bus and unregister the bridge device from sysfs */
phb->bus = NULL;
pci_remove_bus(b);
-   device_unregister(b->bridge);
+   host_bridge->bus = NULL;
+   device_unregister(_bridge->dev);
 
/* Now release the IO resource */
if (res->flags & IORESOURCE_IO)
-- 
2.27.0



Re: Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function

2021-02-11 Thread Lakshmi Ramasubramanian

On 2/11/21 9:42 AM, Lakshmi Ramasubramanian wrote:

Hi Rob,

[PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem

This change causes build problem for x86_64 architecture (please see the 
mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses 
"elf_load_addr" for the ELF header buffer address and not 
"elf_headers_mem".


struct kimage_arch {
 ...

 /* Core ELF header buffer */
 void *elf_headers;
 unsigned long elf_headers_sz;
 unsigned long elf_load_addr;
};

I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and 
PPC64 since they are the only ones using this function now.


#if defined(CONFIG_ARM64) && defined(CONFIG_PPC64)

Sorry - I meant to say
#if defined(CONFIG_ARM64) || defined(CONFIG_PPC64)


void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
    unsigned long initrd_load_addr,
    unsigned long initrd_len,
    const char *cmdline)
{
 ...
}
#endif /* defined(CONFIG_ARM64) && defined(CONFIG_PPC64) */

Please let me know if you have any concerns.

thanks,
  -lakshmi

 Forwarded Message 
Subject: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function
Date: Fri, 12 Feb 2021 00:50:20 +0800
From: kernel test robot 
To: Lakshmi Ramasubramanian 
CC: kbuild-...@lists.01.org

Hi Lakshmi,

I love your patch! Yet something to improve:

[auto build test ERROR on integrity/next-integrity]
[also build test ERROR on v5.11-rc7 next-20210211]
[cannot apply to powerpc/next robh/for-next arm64/for-next/core]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url: 
https://github.com/0day-ci/linux/commits/Lakshmi-Ramasubramanian/Carry-forward-IMA-measurement-log-on-kexec-on-ARM64/20210211-071924 

base: 
https://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git next-integrity 


config: x86_64-randconfig-m001-20210211 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce (this is a W=1 build):
     # 
https://github.com/0day-ci/linux/commit/12ae86067d115b84092353109e8798693d102f0d 


     git remote add linux-review https://github.com/0day-ci/linux
     git fetch --no-tags linux-review 
Lakshmi-Ramasubramanian/Carry-forward-IMA-measurement-log-on-kexec-on-ARM64/20210211-071924 


     git checkout 12ae86067d115b84092353109e8798693d102f0d
     # save the attached .config to linux build tree
     make W=1 ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

    drivers/of/kexec.c: In function 'of_kexec_alloc_and_setup_fdt':
drivers/of/kexec.c:183:17: error: 'const struct kimage_arch' has no 
member named 'elf_headers_mem'; did you mean 'elf_headers_sz'?

  183 | image->arch.elf_headers_mem,
  | ^~~
  | elf_headers_sz
    drivers/of/kexec.c:192:42: error: 'const struct kimage_arch' has no 
member named 'elf_headers_mem'; did you mean 'elf_headers_sz'?

  192 |   ret = fdt_add_mem_rsv(fdt, image->arch.elf_headers_mem,
  |  ^~~
  |  elf_headers_sz


vim +183 drivers/of/kexec.c

     65
     66    /*
     67 * of_kexec_alloc_and_setup_fdt - Alloc and setup a new 
Flattened Device Tree

     68 *
     69 * @image:    kexec image being loaded.
     70 * @initrd_load_addr:    Address where the next initrd will 
be loaded.
     71 * @initrd_len:    Size of the next initrd, or 0 if there 
will be none.
     72 * @cmdline:    Command line for the next kernel, or NULL 
if there will

     73 *    be none.
     74 *
     75 * Return: fdt on success, or NULL errno on error.
     76 */
     77    void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
     78   unsigned long initrd_load_addr,
     79   unsigned long initrd_len,
     80   const char *cmdline)
     81    {
     82    void *fdt;
     83    int ret, chosen_node;
     84    const void *prop;
     85    unsigned long fdt_size;
     86
     87    fdt_size = fdt_totalsize(initial_boot_params) +
     88   (cmdline ? strlen(cmdline) : 0) +
     89   FDT_EXTRA_SPACE;
     90
     91    fdt = kvmalloc(fdt_size, GFP_KERNEL);
     92    if (!fdt)
     93    return NULL;
     94
     95    ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
     96    if (ret < 0) {
     97    pr_err("Error %d setting up the new device tree.\n", 
ret);

     98    goto out;
     99    }
    100
    101   

Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function

2021-02-11 Thread Lakshmi Ramasubramanian

Hi Rob,

[PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem

This change causes build problem for x86_64 architecture (please see the 
mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses 
"elf_load_addr" for the ELF header buffer address and not "elf_headers_mem".


struct kimage_arch {
...

/* Core ELF header buffer */
void *elf_headers;
unsigned long elf_headers_sz;
unsigned long elf_load_addr;
};

I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and 
PPC64 since they are the only ones using this function now.


#if defined(CONFIG_ARM64) && defined(CONFIG_PPC64)
void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
   unsigned long initrd_load_addr,
   unsigned long initrd_len,
   const char *cmdline)
{
...
}
#endif /* defined(CONFIG_ARM64) && defined(CONFIG_PPC64) */

Please let me know if you have any concerns.

thanks,
 -lakshmi

 Forwarded Message 
Subject: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function
Date: Fri, 12 Feb 2021 00:50:20 +0800
From: kernel test robot 
To: Lakshmi Ramasubramanian 
CC: kbuild-...@lists.01.org

Hi Lakshmi,

I love your patch! Yet something to improve:

[auto build test ERROR on integrity/next-integrity]
[also build test ERROR on v5.11-rc7 next-20210211]
[cannot apply to powerpc/next robh/for-next arm64/for-next/core]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url: 
https://github.com/0day-ci/linux/commits/Lakshmi-Ramasubramanian/Carry-forward-IMA-measurement-log-on-kexec-on-ARM64/20210211-071924
base: 
https://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git 
next-integrity

config: x86_64-randconfig-m001-20210211 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce (this is a W=1 build):
# 
https://github.com/0day-ci/linux/commit/12ae86067d115b84092353109e8798693d102f0d

git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Lakshmi-Ramasubramanian/Carry-forward-IMA-measurement-log-on-kexec-on-ARM64/20210211-071924

git checkout 12ae86067d115b84092353109e8798693d102f0d
# save the attached .config to linux build tree
make W=1 ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   drivers/of/kexec.c: In function 'of_kexec_alloc_and_setup_fdt':

drivers/of/kexec.c:183:17: error: 'const struct kimage_arch' has no member 
named 'elf_headers_mem'; did you mean 'elf_headers_sz'?

 183 | image->arch.elf_headers_mem,
 | ^~~
 | elf_headers_sz
   drivers/of/kexec.c:192:42: error: 'const struct kimage_arch' has no 
member named 'elf_headers_mem'; did you mean 'elf_headers_sz'?

 192 |   ret = fdt_add_mem_rsv(fdt, image->arch.elf_headers_mem,
 |  ^~~
 |  elf_headers_sz


vim +183 drivers/of/kexec.c

65  
66  /*
67	 * of_kexec_alloc_and_setup_fdt - Alloc and setup a new 
Flattened Device Tree

68   *
69   * @image:  kexec image being loaded.
70   * @initrd_load_addr:   Address where the next initrd will be loaded.
71	 * @initrd_len:		Size of the next initrd, or 0 if there will be 
none.
72	 * @cmdline:		Command line for the next kernel, or NULL if there 
will

73   *  be none.
74   *
75   * Return: fdt on success, or NULL errno on error.
76   */
77  void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
78 unsigned long initrd_load_addr,
79 unsigned long initrd_len,
80 const char *cmdline)
81  {
82  void *fdt;
83  int ret, chosen_node;
84  const void *prop;
85  unsigned long fdt_size;
86  
87  fdt_size = fdt_totalsize(initial_boot_params) +
88 (cmdline ? strlen(cmdline) : 0) +
89 FDT_EXTRA_SPACE;
90  
91  fdt = kvmalloc(fdt_size, GFP_KERNEL);
92  if (!fdt)
93  return NULL;
94  
95  ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
96  if (ret < 0) {
97  pr_err("Error %d setting up the new device tree.\n", 
ret);
98  goto out;
99  }
   100  
   101  /* Remove memory reservation for the current d

Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()

2021-02-11 Thread Christophe Leroy




Le 11/02/2021 à 15:30, Segher Boessenkool a écrit :

On Thu, Feb 11, 2021 at 03:09:43PM +0100, Christophe Leroy wrote:

Le 11/02/2021 à 12:49, Segher Boessenkool a écrit :

On Thu, Feb 11, 2021 at 07:41:52AM +, Christophe Leroy wrote:

powerpc BUG_ON() is based on using twnei or tdnei instruction,
which obliges gcc to format the condition into a 0 or 1 value
in a register.


Huh?  Why is that?

Will it work better if this used __builtin_trap?  Or does the kernel only
detect very specific forms of trap instructions?


We already made a try with __builtin_trap() 1,5 year ago, see
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20510ce03cc9463f1c9e743c1d93b939de501b53.1566219503.git.christophe.le...@c-s.fr/

The main problems encountered are:
- It is only possible to use it for BUG_ON, not for WARN_ON because GCC
considers it as noreturn. Is there any workaround ?


A trap is noreturn by definition:

  -- Built-in Function: void __builtin_trap (void)
  This function causes the program to exit abnormally.


- The kernel (With CONFIG_DEBUG_BUGVERBOSE) needs to be able to identify
the source file and line corresponding to the trap. How can that be done
with __builtin_trap() ?


The DWARF debug info should be sufficient.  Perhaps you can post-process
some way?

You can create a trap that falls through yourself (by having a trap-on
condition with a condition that is always true, but make the compiler
not see that).  This isn't efficient though.

Could you file a feature request (in bugzilla)?  It is probably useful
for generic code as well, but we could implement this for powerpc only
if needed.



Ok I will.


For sure using __builtin_trap() would be the best.

unsigned long test1(unsigned long msr)
{
WARN_ON((msr & (MSR_DR | MSR_IR | MSR_RI)) != (MSR_DR | MSR_IR | 
MSR_RI));

return msr;
}

unsigned long test2(unsigned long msr)
{
if ((msr & (MSR_DR | MSR_IR | MSR_RI)) != (MSR_DR | MSR_IR | MSR_RI))
__builtin_trap();

return msr;
}

03c0 :
 3c0:   70 69 00 32 andi.   r9,r3,50
 3c4:   69 29 00 32 xorir9,r9,50
 3c8:   31 49 ff ff addic   r10,r9,-1
 3cc:   7d 2a 49 10 subfe   r9,r10,r9
 3d0:   0f 09 00 00 twnei   r9,0
 3d4:   4e 80 00 20 blr

03d8 :
 3d8:   70 69 00 32 andi.   r9,r3,50
 3dc:   0f 09 00 32 twnei   r9,50
 3e0:   4e 80 00 20 blr

Christophe


Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()

2021-02-11 Thread Segher Boessenkool
On Thu, Feb 11, 2021 at 03:09:43PM +0100, Christophe Leroy wrote:
> Le 11/02/2021 à 12:49, Segher Boessenkool a écrit :
> >On Thu, Feb 11, 2021 at 07:41:52AM +, Christophe Leroy wrote:
> >>powerpc BUG_ON() is based on using twnei or tdnei instruction,
> >>which obliges gcc to format the condition into a 0 or 1 value
> >>in a register.
> >
> >Huh?  Why is that?
> >
> >Will it work better if this used __builtin_trap?  Or does the kernel only
> >detect very specific forms of trap instructions?
> 
> We already made a try with __builtin_trap() 1,5 year ago, see 
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20510ce03cc9463f1c9e743c1d93b939de501b53.1566219503.git.christophe.le...@c-s.fr/
> 
> The main problems encountered are:
> - It is only possible to use it for BUG_ON, not for WARN_ON because GCC 
> considers it as noreturn. Is there any workaround ?

A trap is noreturn by definition:

 -- Built-in Function: void __builtin_trap (void)
 This function causes the program to exit abnormally.

> - The kernel (With CONFIG_DEBUG_BUGVERBOSE) needs to be able to identify 
> the source file and line corresponding to the trap. How can that be done 
> with __builtin_trap() ?

The DWARF debug info should be sufficient.  Perhaps you can post-process
some way?

You can create a trap that falls through yourself (by having a trap-on
condition with a condition that is always true, but make the compiler
not see that).  This isn't efficient though.

Could you file a feature request (in bugzilla)?  It is probably useful
for generic code as well, but we could implement this for powerpc only
if needed.


Segher


Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()

2021-02-11 Thread Christophe Leroy




Le 11/02/2021 à 12:49, Segher Boessenkool a écrit :

On Thu, Feb 11, 2021 at 07:41:52AM +, Christophe Leroy wrote:

powerpc BUG_ON() is based on using twnei or tdnei instruction,
which obliges gcc to format the condition into a 0 or 1 value
in a register.


Huh?  Why is that?

Will it work better if this used __builtin_trap?  Or does the kernel only
detect very specific forms of trap instructions?


We already made a try with __builtin_trap() 1,5 year ago, see 
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20510ce03cc9463f1c9e743c1d93b939de501b53.1566219503.git.christophe.le...@c-s.fr/


The main problems encountered are:
- It is only possible to use it for BUG_ON, not for WARN_ON because GCC considers it as noreturn. Is 
there any workaround ?
- The kernel (With CONFIG_DEBUG_BUGVERBOSE) needs to be able to identify the source file and line 
corresponding to the trap. How can that be done with __builtin_trap() ?


Christophe


[PATCH 6/6] powerpc/mm/64s: Allow STRICT_KERNEL_RWX again

2021-02-11 Thread Michael Ellerman
We have now fixed the known bugs in STRICT_KERNEL_RWX for Book3S
64-bit Hash and Radix MMUs, see preceding commits, so allow the
option to be selected again.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 107bb4319e0e..7d6080afbad6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -135,7 +135,7 @@ config PPC
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_SCALED_CPUTIME  if VIRT_CPU_ACCOUNTING_NATIVE 
&& PPC_BOOK3S_64
-   select ARCH_HAS_STRICT_KERNEL_RWX   if (PPC32 && !HIBERNATION)
+   select ARCH_HAS_STRICT_KERNEL_RWX   if ((PPC_BOOK3S_64 || PPC32) && 
!HIBERNATION)
select ARCH_HAS_TICK_BROADCAST  if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_COPY_MC if PPC64
-- 
2.25.1



[PATCH 5/6] powerpc/mm/64s/hash: Add real-mode change_memory_range() for hash LPAR

2021-02-11 Thread Michael Ellerman
When we enabled STRICT_KERNEL_RWX we received some reports of boot
failures when using the Hash MMU and running under phyp. The crashes
are intermittent, and often exhibit as a completely unresponsive
system, or possibly an oops.

One example, which was caught in xmon:

  [   14.068327][T1] devtmpfs: mounted
  [   14.069302][T1] Freeing unused kernel memory: 5568K
  [   14.142060][  T347] BUG: Unable to handle kernel instruction fetch
  [   14.142063][T1] Run /sbin/init as init process
  [   14.142074][  T347] Faulting instruction address: 0xc0004400
  cpu 0x2: Vector: 400 (Instruction Access) at [cc7475e0]
  pc: c0004400: exc_virt_0x4400_instruction_access+0x0/0x80
  lr: c01862d4: update_rq_clock+0x44/0x110
  sp: cc747880
 msr: 800040001031
current = 0xcc60d380
paca= 0xc0001ec9de80   irqmask: 0x03   irq_happened: 0x01
  pid   = 347, comm = kworker/2:1
  ...
  enter ? for help
  [cc747880] c01862d4 update_rq_clock+0x44/0x110 (unreliable)
  [cc7478f0] c0198794 update_blocked_averages+0xb4/0x6d0
  [cc7479f0] c0198e40 update_nohz_stats+0x90/0xd0
  [cc747a20] c01a13b4 _nohz_idle_balance+0x164/0x390
  [cc747b10] c01a1af8 newidle_balance+0x478/0x610
  [cc747be0] c01a1d48 pick_next_task_fair+0x58/0x480
  [cc747c40] c0eaab5c __schedule+0x12c/0x950
  [cc747cd0] c0eab3e8 schedule+0x68/0x120
  [cc747d00] c016b730 worker_thread+0x130/0x640
  [cc747da0] c0174d50 kthread+0x1a0/0x1b0
  [cc747e10] c000e0f0 ret_from_kernel_thread+0x5c/0x6c

This shows that CPU 2, which was idle, woke up and then appears to
randomly take an instruction fault on a completely valid area of
kernel text.

The cause turns out to be the call to hash__mark_rodata_ro(), late in
boot. Due to the way we layout text and rodata, that function actually
changes the permissions for all of text and rodata to read-only plus
execute.

To do the permission change we use a hypervisor call, H_PROTECT. On
phyp that appears to be implemented by briefly removing the mapping of
the kernel text, before putting it back with the updated permissions.
If any other CPU is executing during that window, it will see spurious
faults on the kernel text and/or data, leading to crashes.

To fix it we use stop machine to collect all other CPUs, and then have
them drop into real mode (MMU off), while we change the mapping. That
way they are unaffected by the mapping temporarily disappearing.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/mm/book3s64/hash_pgtable.c | 105 +++-
 1 file changed, 104 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c 
b/arch/powerpc/mm/book3s64/hash_pgtable.c
index 3663d3cdffac..01de985df2c4 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -400,6 +401,19 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
+
+struct change_memory_parms {
+   unsigned long start, end, newpp;
+   unsigned int step, nr_cpus, master_cpu;
+   atomic_t cpu_counter;
+};
+
+// We'd rather this was on the stack but it has to be in the RMO
+static struct change_memory_parms chmem_parms;
+
+// And therefore we need a lock to protect it from concurrent use
+static DEFINE_MUTEX(chmem_lock);
+
 static void change_memory_range(unsigned long start, unsigned long end,
unsigned int step, unsigned long newpp)
 {
@@ -414,6 +428,73 @@ static void change_memory_range(unsigned long start, 
unsigned long end,
mmu_kernel_ssize);
 }
 
+static int notrace chmem_secondary_loop(struct change_memory_parms *parms)
+{
+   unsigned long msr, tmp, flags;
+   int *p;
+
+   p = >cpu_counter.counter;
+
+   local_irq_save(flags);
+   __hard_EE_RI_disable();
+
+   asm volatile (
+   // Switch to real mode and leave interrupts off
+   "mfmsr  %[msr]  ;"
+   "li %[tmp], %[MSR_IR_DR];"
+   "andc   %[tmp], %[msr], %[tmp]  ;"
+   "mtmsrd %[tmp]  ;"
+
+   // Tell the master we are in real mode
+   "1: "
+   "lwarx  %[tmp], 0, %[p] ;"
+   "addic  %[tmp], %[tmp], -1  ;"
+   "stwcx. %[tmp], 0, %[p] ;"
+   "bne-   1b  ;"
+
+   // Spin until the counter goes to zero
+   "2: ;"
+   "lwz%[tmp], 0(%[p]) ;"
+   "cmpwi  %[tmp], 0   ;"
+   "bne-   2b  ;"
+
+   // Switch back to virtual mode
+   "mtmsrd %[msr]

[PATCH 4/6] powerpc/mm/64s/hash: Factor out change_memory_range()

2021-02-11 Thread Michael Ellerman
Pull the loop calling hpte_updateboltedpp() out of
hash__change_memory_range() into a helper function. We need it to be a
separate function for the next patch.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/mm/book3s64/hash_pgtable.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c 
b/arch/powerpc/mm/book3s64/hash_pgtable.c
index 03819c259f0a..3663d3cdffac 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -400,10 +400,23 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
+static void change_memory_range(unsigned long start, unsigned long end,
+   unsigned int step, unsigned long newpp)
+{
+   unsigned long idx;
+
+   pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 
0x%x\n",
+start, end, newpp, step);
+
+   for (idx = start; idx < end; idx += step)
+   /* Not sure if we can do much with the return value */
+   mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
+   mmu_kernel_ssize);
+}
+
 static bool hash__change_memory_range(unsigned long start, unsigned long end,
  unsigned long newpp)
 {
-   unsigned long idx;
unsigned int step, shift;
 
shift = mmu_psize_defs[mmu_linear_psize].shift;
@@ -415,13 +428,7 @@ static bool hash__change_memory_range(unsigned long start, 
unsigned long end,
if (start >= end)
return false;
 
-   pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 
0x%x\n",
-start, end, newpp, step);
-
-   for (idx = start; idx < end; idx += step)
-   /* Not sure if we can do much with the return value */
-   mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
-   mmu_kernel_ssize);
+   change_memory_range(start, end, step, newpp);
 
return true;
 }
-- 
2.25.1



[PATCH 2/6] powerpc/pseries: Add key to flags in pSeries_lpar_hpte_updateboltedpp()

2021-02-11 Thread Michael Ellerman
The flags argument to plpar_pte_protect() (aka. H_PROTECT), includes
the key in bits 9-13, but currently we always set those bits to zero.

In the past that hasn't been a problem because we always used key 0
for the kernel, and updateboltedpp() is only used for kernel mappings.

However since commit d94b827e89dc ("powerpc/book3s64/kuap: Use Key 3
for kernel mapping with hash translation") we are now inadvertently
changing the key (to zero) when we call plpar_pte_protect().

That hasn't broken anything because updateboltedpp() is only used for
STRICT_KERNEL_RWX, which is currently disabled on 64s due to other
bugs.

But we want to fix that, so first we need to pass the key correctly to
plpar_pte_protect(). In the `newpp` value the low 3 bits of the key
are already in the correct spot, but the high 2 bits of the key need
to be shifted down.

Fixes: d94b827e89dc ("powerpc/book3s64/kuap: Use Key 3 for kernel mapping with 
hash translation")
Signed-off-by: Michael Ellerman 
---
 arch/powerpc/platforms/pseries/lpar.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c 
b/arch/powerpc/platforms/pseries/lpar.c
index 764170fdb0f7..8bbbddff7226 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -976,11 +976,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned 
long newpp,
slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
BUG_ON(slot == -1);
 
-   flags = newpp & 7;
+   flags = newpp & (HPTE_R_PP | HPTE_R_N);
if (mmu_has_feature(MMU_FTR_KERNEL_RO))
/* Move pp0 into bit 8 (IBM 55) */
flags |= (newpp & HPTE_R_PP0) >> 55;
 
+   flags |= ((newpp & HPTE_R_KEY_HI) >> 48) | (newpp & HPTE_R_KEY_LO);
+
lpar_rc = plpar_pte_protect(flags, slot, 0);
 
BUG_ON(lpar_rc != H_SUCCESS);
-- 
2.25.1



[PATCH 1/6] powerpc/mm/64s: Add _PAGE_KERNEL_ROX

2021-02-11 Thread Michael Ellerman
In the past we had a fallback definition for _PAGE_KERNEL_ROX, but we
removed that in commit d82fd29c5a8c ("powerpc/mm: Distribute platform
specific PAGE and PMD flags and definitions") and added definitions
for each MMU family.

However we missed adding a definition for 64s, which was not really a
bug because it's currently not used.

But we'd like to use PAGE_KERNEL_ROX in a future patch so add a
definition now.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index a39886681629..1358fae01d04 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -116,6 +116,7 @@
  */
 #define _PAGE_KERNEL_RW(_PAGE_PRIVILEGED | _PAGE_RW | 
_PAGE_DIRTY)
 #define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_READ)
+#define _PAGE_KERNEL_ROX(_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC)
 #define _PAGE_KERNEL_RWX   (_PAGE_PRIVILEGED | _PAGE_DIRTY |   \
 _PAGE_RW | _PAGE_EXEC)
 /*
-- 
2.25.1



[PATCH 3/6] powerpc/64s: Use htab_convert_pte_flags() in hash__mark_rodata_ro()

2021-02-11 Thread Michael Ellerman
In hash__mark_rodata_ro() we pass the raw PP_RXXX value to
hash__change_memory_range(). That has the effect of setting the key to
zero, because PP_RXXX contains no key value.

Fix it by using htab_convert_pte_flags(), which knows how to convert a
pgprot into a pp value, including the key.

Fixes: d94b827e89dc ("powerpc/book3s64/kuap: Use Key 3 for kernel mapping with 
hash translation")
Signed-off-by: Michael Ellerman 
---
 arch/powerpc/mm/book3s64/hash_pgtable.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c 
b/arch/powerpc/mm/book3s64/hash_pgtable.c
index 567e0c6b3978..03819c259f0a 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -428,12 +428,14 @@ static bool hash__change_memory_range(unsigned long 
start, unsigned long end,
 
 void hash__mark_rodata_ro(void)
 {
-   unsigned long start, end;
+   unsigned long start, end, pp;
 
start = (unsigned long)_stext;
end = (unsigned long)__init_begin;
 
-   WARN_ON(!hash__change_memory_range(start, end, PP_RXXX));
+   pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), 
HPTE_USE_KERNEL_KEY);
+
+   WARN_ON(!hash__change_memory_range(start, end, pp));
 }
 
 void hash__mark_initmem_nx(void)
-- 
2.25.1



Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()

2021-02-11 Thread Segher Boessenkool
On Thu, Feb 11, 2021 at 01:26:12PM +0100, Christophe Leroy wrote:
> >What PowerPC cpus implement branch folding?  I know none.
> 
> Extract from powerpc mpc8323 reference manual:

> — Zero-cycle branch capability (branch folding)

Yeah, this is not what is traditionally called branch folding (which
stores the instruction being branched to in some cache, originally the
instruction cache itself; somewhat similar (but different) to the BTIC
on 750).  Overloaded terminology :-)

6xx/7xx CPUs had the branch execution unit in the frontend, and it would
not issue a branch at all if it could be resolved then already.  Power4
and later predict all branches, and most are not issued at all (those
that do need to be executed, like bdnz, are).  At completion time it is
checked if the prediction was correct (and corrective action is taken if
not).


Segher


Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()

2021-02-11 Thread Christophe Leroy




Le 11/02/2021 à 12:49, Segher Boessenkool a écrit :

On Thu, Feb 11, 2021 at 07:41:52AM +, Christophe Leroy wrote:

powerpc BUG_ON() is based on using twnei or tdnei instruction,
which obliges gcc to format the condition into a 0 or 1 value
in a register.


Huh?  Why is that?

Will it work better if this used __builtin_trap?  Or does the kernel only
detect very specific forms of trap instructions?


By using a generic implementation, gcc will generate a branch
to the unconditional trap generated by BUG().


That is many more instructions than ideal.


As modern powerpc implement branch folding, that's even more efficient.


What PowerPC cpus implement branch folding?  I know none.


Extract from powerpc mpc8323 reference manual:

High instruction and data throughput
— Zero-cycle branch capability (branch folding)
— Programmable static branch prediction on unresolved conditional branches
— Two integer units with enhanced multipliers in thee300c2 for increased 
integer instruction
throughput and a maximum two-cycle latency for multiply instructions
— Instruction fetch unit capable of fetching two instructions per clock from 
the instruction cache
— A six-entry instruction queue (IQ) that provides lookahead capability
— Independent pipelines with feed-forwarding that reduces data dependencies in 
hardware
— 16-Kbyte, four-way set-associative instruction and data caches on the e300c2.
— Cache write-back or write-through operation programmable on a per-page or 
per-block basis
— Features for instruction and data cache locking and protection
— BPU that performs CR lookahead operations
— Address translation facilities for 4-Kbyte page size, variable block size, 
and 256-Mbyte
segment size
— A 64-entry, two-way, set-associative ITLB and DTLB
— Eight-entry data and instruction BAT arrays providing 128-Kbyte to 256-Mbyte 
blocks
— Software table search operations and updates supported through fast trap 
mechanism
— 52-bit virtual address; 32-bit physical address

Christophe


Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()

2021-02-11 Thread Segher Boessenkool
On Thu, Feb 11, 2021 at 08:04:55PM +1000, Nicholas Piggin wrote:
> Excerpts from Christophe Leroy's message of February 11, 2021 5:41 pm:
> > As modern powerpc implement branch folding, that's even more efficient.

Ah, it seems you mean what Arm calls branch folding.  Sure, power4
already did that, and this has not changed.

> I think POWER will speculate conditional traps as non faulting always
> so it should be just as good if not better than the branch.

Right, these are not branch instructions, so are not branch predicted;
all trap instructions are assumed to fall through, like all other
non-branch instructions.


Segher


Re: [PATCH 1/3] powerpc/perf: Adds support for programming of Thresholding in P10

2021-02-11 Thread Michael Ellerman
Michael Ellerman  writes:
> From: Kajol Jain 
>
> Thresholding, a performance monitoring unit feature, can be
> used to identify marked instructions which take more than
> expected cycles between start event and end event.
> Threshold compare (thresh_cmp) bits are programmed in MMCRA
> register. In Power9, thresh_cmp bits were part of the
> event code. But in case of P10, thresh_cmp are not part of
> event code due to inclusion of MMCR3 bits.

Accidental resend, ignore.

cheers


Re: [PATCH] tools/perf: Fix powerpc gap between kernel end and module start

2021-02-11 Thread Athira Rajeev



> On 09-Feb-2021, at 6:17 PM, Arnaldo Carvalho de Melo  wrote:
> 
> Em Wed, Feb 03, 2021 at 12:31:48PM -0300, Arnaldo Carvalho de Melo escreveu:
>> Em Tue, Feb 02, 2021 at 04:02:36PM +0530, Athira Rajeev escreveu:
>>> 
>>> 
>>>On 18-Jan-2021, at 3:51 PM, kajoljain  wrote:
>>> 
>>> 
>>> 
>>>On 1/12/21 3:08 PM, Jiri Olsa wrote:
>>> 
>>>On Mon, Dec 28, 2020 at 09:14:14PM -0500, Athira Rajeev wrote:
>>> 
>>>SNIP
>>> 
>>> 
>>>c2799370 b backtrace_flag
>>>c2799378 B radix_tree_node_cachep
>>>c2799380 B __bss_stop
>>>c27a B _end
>>>c0080389 t icmp_checkentry  [ip_tables]
>>>c00803890038 t ipt_alloc_initial_table  [ip_tables]
>>>c00803890468 T ipt_do_table [ip_tables]
>>>c00803890de8 T ipt_unregister_table_pre_exit
>>> [ip_tables]
>>>...
>>> 
>>>Perf calls function symbols__fixup_end() which sets the end of
>>>symbol
>>>to 0xc0080389, which is the next address and this is the
>>>start
>>>address of first module (icmp_checkentry in above) which will 
>>> make
>>>the
>>>huge symbol size of 0x8010f.
>>> 
>>>After symbols__fixup_end:
>>>symbols__fixup_end: sym->name: _end, sym->start:
>>>0xc27a,
>>>sym->end: 0xc0080389
>>> 
>>>On powerpc, kernel text segment is located at 0xc000
>>>whereas the modules are located at very high memory addresses,
>>>0xc0080xxx. Since the gap between end of kernel text
>>>segment
>>>and beginning of first module's address is high, histogram
>>>allocation
>>>using calloc fails.
>>> 
>>>Fix this by detecting the kernel's last symbol and limiting
>>>the range of last kernel symbol to pagesize.
>>> 
>>> 
>>>Patch looks good to me.
>>> 
>>>Tested-By: Kajol Jain
>>> 
>>>Thanks,
>>>Kajol Jain
>>> 
>>> 
>>>Signed-off-by: Athira Rajeev
>>> 
>>> 
>>>I can't test, but since the same approach works for arm and s390,
>>>this also looks ok
>>> 
>>>Acked-by: Jiri Olsa 
>>> 
>>>thanks,
>>>jirka
>>> 
>>> 
>>> Hi Arnaldo,
>>> 
>>> Can you please help review this patch and merge if this looks good..
>> 
>> Thanks, collected the Tested-by from Kajol and the Acked-by from Jiri
>> and applied to my local tree for testing, then up to my perf/core
>> branch.
> 
> Had to apply this on top.
> 
> - Arnaldo
> 
> commit 0f000f9c89182950cd3500226729977251529364
> Author: Arnaldo Carvalho de Melo 
> Date:   Tue Feb 9 09:41:21 2021 -0300
> 
>perf powerpc: Fix printf conversion specifier for IP addresses
> 
>We need to use "%#" PRIx64 for u64 values, not "%lx", fixing this build
>problem on powerpc 32-bit:
> 
>  7213.69 ubuntu:18.04-x-powerpc: FAIL powerpc-linux-gnu-gcc 
> (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
>arch/powerpc/util/machine.c: In function 'arch__symbols__fixup_end':
>arch/powerpc/util/machine.c:23:12: error: format '%lx' expects 
> argument of type 'long unsigned int', but argument 6 has type 'u64 {aka long 
> long unsigned int}' [-Werror=format=]
>  pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
>^
>/git/linux/tools/perf/util/debug.h:18:21: note: in definition of macro 
> 'pr_fmt'
> #define pr_fmt(fmt) fmt
> ^~~
>/git/linux/tools/perf/util/debug.h:33:29: note: in expansion of macro 
> 'pr_debugN'
> #define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__)
> ^
>/git/linux/tools/perf/util/debug.h:33:42: note: in expansion of macro 
> 'pr_fmt'
> #define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__)
>  ^~
>arch/powerpc/util/machine.c:23:2: note: in expansion of macro 
> 'pr_debug4'
>  pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
>  ^
>cc1: all warnings being treated as errors
>/git/linux/tools/build/Makefile.build:139: recipe for target 'util' 
> failed
>make[5]: *** [util] Error 2
>/git/linux/tools/build/Makefile.build:139: recipe for target 'powerpc' 
> failed
>make[4]: *** [powerpc] Error 2
>/git/linux/tools/build/Makefile.build:139: recipe for target 'arch' 
> failed
>make[3]: *** [arch] Error 2
>  7330.47 ubuntu:18.04-x-powerpc64  : Ok   powerpc64-linux-gnu-gcc 
> (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
> 
>Fixes: 557c3eadb7712741 ("perf powerpc: Fix gap between kernel end and 
> module start")
>Cc: Athira Rajeev 
>Cc: Jiri Olsa 
>Cc: Kajol 

Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()

2021-02-11 Thread Segher Boessenkool
On Thu, Feb 11, 2021 at 08:04:55PM +1000, Nicholas Piggin wrote:
> It would be nice if we could have a __builtin_trap_if that gcc would use 
> conditional traps with, (and which never assumes following code is 
> unreachable even for constant true, so we can use it with WARN and put 
> explicit unreachable for BUG).

It automatically does that with just __builtin_trap, see my other mail :-)


Segher


Re: [PATCH] arm64: defconfig: enable modern virtio pci device

2021-02-11 Thread Arnd Bergmann
On Wed, Feb 10, 2021 at 8:05 PM Anders Roxell  wrote:
>
> Since patch ("virtio-pci: introduce modern device module") got added it
> is not possible to boot a defconfig kernel in qemu with a virtio pci
> device.  Add CONFIG_VIRTIO_PCI_MODERN=y fragment makes the kernel able
> to boot.
>
> Signed-off-by: Anders Roxell 
> ---
>  arch/arm/configs/multi_v7_defconfig | 1 +
>  arch/arm64/configs/defconfig| 1 +

Acked-by: Arnd Bergmann 

Michael, can you pick this up in the vhost tree that introduces the regression?

 Arnd


Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()

2021-02-11 Thread Segher Boessenkool
On Thu, Feb 11, 2021 at 07:41:52AM +, Christophe Leroy wrote:
> powerpc BUG_ON() is based on using twnei or tdnei instruction,
> which obliges gcc to format the condition into a 0 or 1 value
> in a register.

Huh?  Why is that?

Will it work better if this used __builtin_trap?  Or does the kernel only
detect very specific forms of trap instructions?

> By using a generic implementation, gcc will generate a branch
> to the unconditional trap generated by BUG().

That is many more instructions than ideal.

> As modern powerpc implement branch folding, that's even more efficient.

What PowerPC cpus implement branch folding?  I know none.

Some example code generated via __builtin_trap:

void trap(void) { __builtin_trap(); }
void trap_if_0(int x) { if (x == 0) __builtin_trap(); }
void trap_if_not_0(int x) { if (x != 0) __builtin_trap(); }

-m64:

trap:
trap
trap_if_0:
tdeqi 3,0
blr
trap_if_not_0:
tdnei 3,0
blr

-m32:

trap:
trap
trap_if_0:
tweqi 3,0
blr
trap_if_not_0:
twnei 3,0
blr


Segher


[PATCH] powerpc/powernv/pci: Use kzalloc() for phb related allocations

2021-02-11 Thread Michael Ellerman
As part of commit fbbefb320214 ("powerpc/pci: Move PHB discovery for
PCI_DN using platforms"), I switched some allocations from
memblock_alloc() to kmalloc(), otherwise memblock would warn that it
was being called after slab init.

However I missed that the code relied on the allocations being zeroed,
without which we could end up crashing:

  pci_bus :00: busn_res: [bus 00-ff] end is updated to ff
  BUG: Unable to handle kernel data access on read at 0x6b6b6b6b6b6b6af7
  Faulting instruction address: 0xc00dbc90
  Oops: Kernel access of bad area, sig: 11 [#1]
  LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
  ...
  NIP  pnv_ioda_get_pe_state+0xe0/0x1d0
  LR   pnv_ioda_get_pe_state+0xb4/0x1d0
  Call Trace:
pnv_ioda_get_pe_state+0xb4/0x1d0 (unreliable)
pnv_pci_config_check_eeh.isra.9+0x78/0x270
pnv_pci_read_config+0xf8/0x160
pci_bus_read_config_dword+0xa4/0x120
pci_bus_generic_read_dev_vendor_id+0x54/0x270
pci_scan_single_device+0xb8/0x140
pci_scan_slot+0x80/0x1b0
pci_scan_child_bus_extend+0x94/0x490
pcibios_scan_phb+0x1f8/0x3c0
pcibios_init+0x8c/0x12c
do_one_initcall+0x94/0x510
kernel_init_freeable+0x35c/0x3fc
kernel_init+0x2c/0x168
ret_from_kernel_thread+0x5c/0x70

Switch them to kzalloc().

Fixes: fbbefb320214 ("powerpc/pci: Move PHB discovery for PCI_DN using 
platforms")
Signed-off-by: Michael Ellerman 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 7ee14ac275bd..f0f901683a2f 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2921,7 +2921,7 @@ static void __init pnv_pci_init_ioda_phb(struct 
device_node *np,
phb_id = be64_to_cpup(prop64);
pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
 
-   phb = kmalloc(sizeof(*phb), GFP_KERNEL);
+   phb = kzalloc(sizeof(*phb), GFP_KERNEL);
if (!phb)
panic("%s: Failed to allocate %zu bytes\n", __func__,
  sizeof(*phb));
@@ -2970,7 +2970,7 @@ static void __init pnv_pci_init_ioda_phb(struct 
device_node *np,
else
phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE;
 
-   phb->diag_data = kmalloc(phb->diag_data_size, GFP_KERNEL);
+   phb->diag_data = kzalloc(phb->diag_data_size, GFP_KERNEL);
if (!phb->diag_data)
panic("%s: Failed to allocate %u bytes\n", __func__,
  phb->diag_data_size);
@@ -3032,7 +3032,7 @@ static void __init pnv_pci_init_ioda_phb(struct 
device_node *np,
}
pemap_off = size;
size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
-   aux = kmalloc(size, GFP_KERNEL);
+   aux = kzalloc(size, GFP_KERNEL);
if (!aux)
panic("%s: Failed to allocate %lu bytes\n", __func__, size);
 
-- 
2.25.1



[PATCH 1/3] powerpc/perf: Adds support for programming of Thresholding in P10

2021-02-11 Thread Michael Ellerman
From: Kajol Jain 

Thresholding, a performance monitoring unit feature, can be
used to identify marked instructions which take more than
expected cycles between start event and end event.
Threshold compare (thresh_cmp) bits are programmed in MMCRA
register. In Power9, thresh_cmp bits were part of the
event code. But in case of P10, thresh_cmp are not part of
event code due to inclusion of MMCR3 bits.

Patch here adds an option to use attr.config1 variable
to be used to pass thresh_cmp value to be programmed in
MMCRA register. A new ppmu flag called PPMU_HAS_ATTR_CONFIG1
has been added and this flag is used to notify the use of
attr.config1 variable.

Patch has extended the parameter list of 'compute_mmcr',
to include power_pmu's 'flags' element and parameter list of
get_constraint to include attr.config1 value. It also extend
parameter list of power_check_constraints inorder to pass
perf_event list.

As stated by commit ef0e3b650f8d ("powerpc/perf: Fix Threshold
Event Counter Multiplier width for P10"), constraint bits for
thresh_cmp is also needed to be increased to 11 bits, which is
handled as part of this patch. We added bit number 53 as part
of constraint bits of thresh_cmp for power10 to make it an
11 bit field.

Updated layout for p10:

/*
 * Layout of constraint bits:
 *
 *60565248444036
32
 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - 
- - |
 *   [   fab_match   ] [   thresh_cmp  ] [   thresh_ctl] [  
 ]
 *  |  |
 *   [  thresh_cmp bits for p10]   thresh_sel -*
 *
 *2824201612 8 4
 0
 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - 
- - |
 *   [ ] |   [ ] |  [  sample ]   [ ]   [6] [5]   [4] [3]   [2] 
[1]
 *|  ||  |  |
 *  BHRB IFM -*  ||  |*radix_scope  |  Count of events for each 
PMC.
 *  EBB -*| |p1, p2, p3, p4, p5, p6.
 *  L1 I/D qualifier -* |
 * nc - number of counters -*
 *
 * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints
 * we want the low bit of each field to be added to any existing value.
 *
 * Everything else is a value field.
 */

Result:
command#: cat /sys/devices/cpu/format/thresh_cmp
config1:0-17

ex. usage:

command#: perf record -I --weight -d  -e
 cpu/event=0x67340101EC,thresh_cmp=500/ ./ebizzy -S 2 -t 1 -s 4096
1826636 records/s
real  2.00 s
user  2.00 s
sys   0.00 s
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.038 MB perf.data (61 samples) ]

Signed-off-by: Kajol Jain 
Signed-off-by: Michael Ellerman 
Link: https://lore.kernel.org/r/20210209095234.837356-1-kj...@linux.ibm.com
---
 arch/powerpc/include/asm/perf_event_server.h |  5 +-
 arch/powerpc/perf/core-book3s.c  | 15 +++--
 arch/powerpc/perf/isa207-common.c| 67 +---
 arch/powerpc/perf/isa207-common.h| 15 +++--
 arch/powerpc/perf/mpc7450-pmu.c  |  5 +-
 arch/powerpc/perf/power10-pmu.c  |  4 +-
 arch/powerpc/perf/power5+-pmu.c  |  5 +-
 arch/powerpc/perf/power5-pmu.c   |  5 +-
 arch/powerpc/perf/power6-pmu.c   |  5 +-
 arch/powerpc/perf/power7-pmu.c   |  5 +-
 arch/powerpc/perf/ppc970-pmu.c   |  5 +-
 11 files changed, 102 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h 
b/arch/powerpc/include/asm/perf_event_server.h
index 3b7baba01c92..00e7e671bb4b 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -36,9 +36,9 @@ struct power_pmu {
unsigned long   test_adder;
int (*compute_mmcr)(u64 events[], int n_ev,
unsigned int hwc[], struct mmcr_regs *mmcr,
-   struct perf_event *pevents[]);
+   struct perf_event *pevents[], u32 flags);
int (*get_constraint)(u64 event_id, unsigned long *mskp,
-   unsigned long *valp);
+   unsigned long *valp, u64 event_config1);
int (*get_alternatives)(u64 event_id, unsigned int flags,
u64 alt[]);
void(*get_mem_data_src)(union perf_mem_data_src *dsrc,
@@ -83,6 +83,7 @@ struct power_pmu {
 #define PPMU_NO_SIAR   0x0100 /* Do not use SIAR */
 #define PPMU_ARCH_31   0x0200 /* Has MMCR3, SIER2 and SIER3 */
 #define PPMU_P10_DD1   0x0400 /* Is power10 DD1 processor version 
*/
+#define PPMU_HAS_ATTR_CONFIG1  0x0800 /* Using config1 

Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()

2021-02-11 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of February 11, 2021 5:41 pm:
> powerpc BUG_ON() is based on using twnei or tdnei instruction,
> which obliges gcc to format the condition into a 0 or 1 value
> in a register.
> 
> By using a generic implementation, gcc will generate a branch
> to the unconditional trap generated by BUG().

We don't want to do this on 64s because that will lose the useful CFAR
contents.

Unfortunately the code generation is not great and the registers that 
give some useful information about the condition are often mangled :(

It would be nice if we could have a __builtin_trap_if that gcc would use 
conditional traps with, (and which never assumes following code is 
unreachable even for constant true, so we can use it with WARN and put 
explicit unreachable for BUG).

> 
> As modern powerpc implement branch folding, that's even more efficient.

I think POWER will speculate conditional traps as non faulting always
so it should be just as good if not better than the branch.

Thanks,
Nick


Re: [PATCH] powerpc/traps: Declare unrecoverable_exception() as __noreturn

2021-02-11 Thread Christophe Leroy




Le 11/02/2021 à 08:47, Gabriel Paubert a écrit :

On Thu, Feb 11, 2021 at 06:34:43AM +, Christophe Leroy wrote:

unrecoverable_exception() is never expected to return, most callers
have an infiniteloop in case it returns.

Ensure it really never returns by terminating it with a BUG(), and
declare it __no_return.

It always GCC to really simplify functions calling it. In the exemple below,


s/always/allows ?


Yes



(Otherwise I can't parse it.)


it avoids the stack frame in the likely fast path and avoids code duplication
for the exit.


Indeed, nice code generation improvement.



With this patch:

0348 :
 348:   81 43 00 84 lwz r10,132(r3)
 34c:   71 48 00 02 andi.   r8,r10,2
 350:   41 82 00 2c beq 37c 
 354:   71 4a 40 00 andi.   r10,r10,16384
 358:   40 82 00 20 bne 378 
 35c:   80 62 00 70 lwz r3,112(r2)
 360:   74 63 00 01 andis.  r3,r3,1
 364:   40 82 00 28 bne 38c 
 368:   7d 40 00 a6 mfmsr   r10
 36c:   7c 11 13 a6 mtspr   81,r0
 370:   7c 12 13 a6 mtspr   82,r0
 374:   4e 80 00 20 blr
 378:   48 00 00 00 b   378 


Infinite loop (seems to be on test of MSR_PR)?


Yes, that's what you get when CONFIG_BUG is not selected.

/include/asm-generic/bug.h:

#ifndef HAVE_ARCH_BUG
#define BUG() do {} while (1)
#endif




Gabriel


 37c:   94 21 ff f0 stwur1,-16(r1)
 380:   7c 08 02 a6 mflrr0
 384:   90 01 00 14 stw r0,20(r1)
 388:   48 00 00 01 bl  388 
388: R_PPC_REL24unrecoverable_exception
 38c:   38 e2 00 70 addir7,r2,112
 390:   3d 00 00 01 lis r8,1
 394:   7c c0 38 28 lwarx   r6,0,r7
 398:   7c c6 40 78 andcr6,r6,r8
 39c:   7c c0 39 2d stwcx.  r6,0,r7
 3a0:   40 a2 ff f4 bne 394 
 3a4:   38 60 00 01 li  r3,1
 3a8:   4b ff ff c0 b   368 

Without this patch:

0348 :
 348:   94 21 ff f0 stwur1,-16(r1)
 34c:   93 e1 00 0c stw r31,12(r1)
 350:   7c 7f 1b 78 mr  r31,r3
 354:   81 23 00 84 lwz r9,132(r3)
 358:   71 2a 00 02 andi.   r10,r9,2
 35c:   41 82 00 34 beq 390 
 360:   71 29 40 00 andi.   r9,r9,16384
 364:   40 82 00 28 bne 38c 
 368:   80 62 00 70 lwz r3,112(r2)
 36c:   74 63 00 01 andis.  r3,r3,1
 370:   40 82 00 3c bne 3ac 
 374:   7d 20 00 a6 mfmsr   r9
 378:   7c 11 13 a6 mtspr   81,r0
 37c:   7c 12 13 a6 mtspr   82,r0
 380:   83 e1 00 0c lwz r31,12(r1)
 384:   38 21 00 10 addir1,r1,16
 388:   4e 80 00 20 blr
 38c:   48 00 00 00 b   38c 
 390:   7c 08 02 a6 mflrr0
 394:   90 01 00 14 stw r0,20(r1)
 398:   48 00 00 01 bl  398 
398: R_PPC_REL24unrecoverable_exception
 39c:   80 01 00 14 lwz r0,20(r1)
 3a0:   81 3f 00 84 lwz r9,132(r31)
 3a4:   7c 08 03 a6 mtlrr0
 3a8:   4b ff ff b8 b   360 
 3ac:   39 02 00 70 addir8,r2,112
 3b0:   3d 40 00 01 lis r10,1
 3b4:   7c e0 40 28 lwarx   r7,0,r8
 3b8:   7c e7 50 78 andcr7,r7,r10
 3bc:   7c e0 41 2d stwcx.  r7,0,r8
 3c0:   40 a2 ff f4 bne 3b4 
 3c4:   38 60 00 01 li  r3,1
 3c8:   4b ff ff ac b   374 

Signed-off-by: Christophe Leroy 
---
  arch/powerpc/include/asm/interrupt.h | 2 +-
  arch/powerpc/kernel/interrupt.c  | 1 -
  arch/powerpc/kernel/traps.c  | 2 ++
  3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index dcff30e3919b..fa8bfb91f8df 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -411,7 +411,7 @@ DECLARE_INTERRUPT_HANDLER(altivec_assist_exception);
  DECLARE_INTERRUPT_HANDLER(CacheLockingException);
  DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException);
  DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException);
-DECLARE_INTERRUPT_HANDLER(unrecoverable_exception);
+DECLARE_INTERRUPT_HANDLER(unrecoverable_exception) __noreturn;
  DECLARE_INTERRUPT_HANDLER(WatchdogException);
  DECLARE_INTERRUPT_HANDLER(kernel_bad_stack);
  
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c

index eca3be36c18c..7e7106641ca9 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -440,7 +440,6 @@ notrace unsigned long interrupt_exit_user_prepare(struct 
pt_regs *regs, unsigned
return