Re: [PATCH v2 0/2] of: of_device.h cleanups
On Thu, Feb 11, 2021 at 05:27:43PM -0600, Rob Herring wrote: > This is a couple of cleanups for of_device.h. They fell out from my > attempt at decoupling of_device.h and of_platform.h which is a mess > and I haven't finished, but there's no reason to wait on these. Reviewed-by: Greg Kroah-Hartman
Re: [PATCH v5 06/10] powerpc/signal64: Replace setup_sigcontext() w/ unsafe_setup_sigcontext()
Hi Chris, > Previously setup_sigcontext() performed a costly KUAP switch on every > uaccess operation. These repeated uaccess switches cause a significant > drop in signal handling performance. > > Rewrite setup_sigcontext() to assume that a userspace write access window > is open. Replace all uaccess functions with their 'unsafe' versions > which avoid the repeated uaccess switches. Just to clarify the commit message a bit: you're also changing the callers of the old safe versions to first open the window, then call the unsafe variants, then close the window again. > Signed-off-by: Christopher M. Riedl > --- > arch/powerpc/kernel/signal_64.c | 70 - > 1 file changed, 43 insertions(+), 27 deletions(-) > > diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c > index 8e1d804ce552..4248e4489ff1 100644 > --- a/arch/powerpc/kernel/signal_64.c > +++ b/arch/powerpc/kernel/signal_64.c > @@ -101,9 +101,13 @@ static void prepare_setup_sigcontext(struct task_struct > *tsk, int ctx_has_vsx_re > * Set up the sigcontext for the signal frame. > */ > > -static long setup_sigcontext(struct sigcontext __user *sc, > - struct task_struct *tsk, int signr, sigset_t *set, > - unsigned long handler, int ctx_has_vsx_region) > +#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler, > \ > + ctx_has_vsx_region, e) \ > + unsafe_op_wrap(__unsafe_setup_sigcontext(sc, tsk, signr, set, \ > + handler, ctx_has_vsx_region), e) > +static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc, > + struct task_struct *tsk, int signr, > sigset_t *set, > + unsigned long handler, int > ctx_has_vsx_region) > { > /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the >* process never used altivec yet (MSR_VEC is zero in pt_regs of > @@ -118,20 +122,19 @@ static long setup_sigcontext(struct sigcontext __user > *sc, > #endif > struct pt_regs *regs = tsk->thread.regs; > unsigned long msr = regs->msr; > - long err = 0; > /* Force usr to alway see softe as 1 (interrupts enabled) */ > unsigned long softe = 0x1; > > BUG_ON(tsk != current); > > #ifdef CONFIG_ALTIVEC > - err |= __put_user(v_regs, &sc->v_regs); > + unsafe_put_user(v_regs, &sc->v_regs, efault_out); > > /* save altivec registers */ > if (tsk->thread.used_vr) { > /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ > - err |= __copy_to_user(v_regs, &tsk->thread.vr_state, > - 33 * sizeof(vector128)); > + unsafe_copy_to_user(v_regs, &tsk->thread.vr_state, > + 33 * sizeof(vector128), efault_out); > /* set MSR_VEC in the MSR value in the frame to indicate that > sc->v_reg) >* contains valid data. >*/ > @@ -140,12 +143,12 @@ static long setup_sigcontext(struct sigcontext __user > *sc, > /* We always copy to/from vrsave, it's 0 if we don't have or don't >* use altivec. >*/ > - err |= __put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]); > + unsafe_put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], > efault_out); > #else /* CONFIG_ALTIVEC */ > - err |= __put_user(0, &sc->v_regs); > + unsafe_put_user(0, &sc->v_regs, efault_out); > #endif /* CONFIG_ALTIVEC */ > /* copy fpr regs and fpscr */ > - err |= copy_fpr_to_user(&sc->fp_regs, tsk); > + unsafe_copy_fpr_to_user(&sc->fp_regs, tsk, efault_out); > > /* >* Clear the MSR VSX bit to indicate there is no valid state attached > @@ -160,24 +163,27 @@ static long setup_sigcontext(struct sigcontext __user > *sc, >*/ > if (tsk->thread.used_vsr && ctx_has_vsx_region) { > v_regs += ELF_NVRREG; > - err |= copy_vsx_to_user(v_regs, tsk); > + unsafe_copy_vsx_to_user(v_regs, tsk, efault_out); > /* set MSR_VSX in the MSR value in the frame to >* indicate that sc->vs_reg) contains valid data. >*/ > msr |= MSR_VSX; > } > #endif /* CONFIG_VSX */ > - err |= __put_user(&sc->gp_regs, &sc->regs); > + unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out); > WARN_ON(!FULL_REGS(regs)); > - err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); > - err |= __put_user(msr, &sc->gp_regs[PT_MSR]); > - err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]); > - err |= __put_user(signr, &sc->signal); > - err |= __put_user(handler, &sc->handler); > + unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out); > + unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out); > + unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out
Re: [PATCH v5 05/10] powerpc/signal64: Remove TM ifdefery in middle of if/else block
Hi Chris, > Rework the messy ifdef breaking up the if-else for TM similar to > commit f1cf4f93de2f ("powerpc/signal32: Remove ifdefery in middle of > if/else"). I'm not sure what 'the messy ifdef' and 'the if-else for TM' is (yet): perhaps you could start the commit message with a tiny bit of background. > Unlike that commit for ppc32, the ifdef can't be removed entirely since > uc_transact in sigframe depends on CONFIG_PPC_TRANSACTIONAL_MEM. > > Signed-off-by: Christopher M. Riedl > --- > arch/powerpc/kernel/signal_64.c | 16 +++- > 1 file changed, 7 insertions(+), 9 deletions(-) > > diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c > index b211a8ea4f6e..8e1d804ce552 100644 > --- a/arch/powerpc/kernel/signal_64.c > +++ b/arch/powerpc/kernel/signal_64.c > @@ -710,9 +710,7 @@ SYSCALL_DEFINE0(rt_sigreturn) > struct pt_regs *regs = current_pt_regs(); > struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1]; > sigset_t set; > -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM > unsigned long msr; > -#endif > > /* Always make any pending restarted system calls return -EINTR */ > current->restart_block.fn = do_no_restart_syscall; > @@ -765,7 +763,10 @@ SYSCALL_DEFINE0(rt_sigreturn) > > if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) > goto badframe; > +#endif > + > if (MSR_TM_ACTIVE(msr)) { > +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM > /* We recheckpoint on return. */ > struct ucontext __user *uc_transact; > > @@ -778,9 +779,8 @@ SYSCALL_DEFINE0(rt_sigreturn) > if (restore_tm_sigcontexts(current, &uc->uc_mcontext, > &uc_transact->uc_mcontext)) > goto badframe; > - } else > #endif > - { > + } else { > /* >* Fall through, for non-TM restore >* I think you can get rid of all the ifdefs in _this function_ by providing a couple of stubs: diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a66f435dabbf..19059a4b798f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1120,6 +1120,7 @@ void restore_tm_state(struct pt_regs *regs) #else #define tm_recheckpoint_new_task(new) #define __switch_to_tm(prev, new) +void tm_reclaim_current(uint8_t cause) {} #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ static inline void save_sprs(struct thread_struct *t) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 8e1d804ce552..ed58ca019ad9 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -594,6 +594,13 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, return err; } +#else +static long restore_tm_sigcontexts(struct task_struct *tsk, + struct sigcontext __user *sc, + struct sigcontext __user *tm_sc) +{ + return -EINVAL; +} #endif /* @@ -722,7 +729,6 @@ SYSCALL_DEFINE0(rt_sigreturn) goto badframe; set_current_blocked(&set); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * If there is a transactional state then throw it away. * The purpose of a sigreturn is to destroy all traces of the @@ -763,10 +769,8 @@ SYSCALL_DEFINE0(rt_sigreturn) if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; -#endif if (MSR_TM_ACTIVE(msr)) { -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* We recheckpoint on return. */ struct ucontext __user *uc_transact; @@ -779,7 +783,6 @@ SYSCALL_DEFINE0(rt_sigreturn) if (restore_tm_sigcontexts(current, &uc->uc_mcontext, &uc_transact->uc_mcontext)) goto badframe; -#endif } else { /* * Fall through, for non-TM restore My only concern here was whether it was valid to access if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) if CONFIG_PPC_TRANSACTIONAL_MEM was not defined, but I didn't think of any obvious reason why it wouldn't be... > @@ -818,10 +818,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t > *set, > unsigned long newsp = 0; > long err = 0; > struct pt_regs *regs = tsk->thread.regs; > -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM > /* Save the thread's msr before get_tm_stackpointer() changes it */ > unsigned long msr = regs->msr; > -#endif I wondered if that comment still makes sense in the absence of the ifdef. It is preserved in commit f1cf4f93de2f ("powerpc/signal32: Remove ifdefery in middle of if/else"), and I can't figure out how you would improve it, so I guess it's probably good as it is. > frame = get_sigframe(ksig, tsk, sizeof(*frame), 0); > if (!access_ok(frame, sizeof(*frame))) > @@ -836,8 +834,9 @
Re: [PATCH v5 04/10] powerpc: Reference param in MSR_TM_ACTIVE() macro
Hi Chris, (totally bikeshedding, but I'd use the full word parameter in the subject if you have enough spare characters.) > Unlike the other MSR_TM_* macros, MSR_TM_ACTIVE does not reference or > use its parameter unless CONFIG_PPC_TRANSACTIONAL_MEM is defined. This > causes an 'unused variable' compile warning unless the variable is also > guarded with CONFIG_PPC_TRANSACTIONAL_MEM. > > Reference but do nothing with the argument in the macro to avoid a > potential compile warning. Andrew asked why we weren't seeing these warnings already. I was able to reproduce them with CONFIG_PPC_TRANSACTIONAL_MEM off, but only if I compiled with W=1: arch/powerpc/kernel/process.c: In function ‘enable_kernel_fp’: arch/powerpc/kernel/process.c:210:16: error: variable ‘cpumsr’ set but not used [-Werror=unused-but-set-variable] 210 | unsigned long cpumsr; |^~ Having said that, this change seems like a good idea, squashing warnings at W=1 is still valuable. Reviewed-by: Daniel Axtens Kind regards, Daniel > Signed-off-by: Christopher M. Riedl > --- > arch/powerpc/include/asm/reg.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h > index e40a921d78f9..c5a3e856191c 100644 > --- a/arch/powerpc/include/asm/reg.h > +++ b/arch/powerpc/include/asm/reg.h > @@ -124,7 +124,7 @@ > #ifdef CONFIG_PPC_TRANSACTIONAL_MEM > #define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */ > #else > -#define MSR_TM_ACTIVE(x) 0 > +#define MSR_TM_ACTIVE(x) ((void)(x), 0) > #endif > > #if defined(CONFIG_PPC_BOOK3S_64) > -- > 2.26.1
linux-next: manual merge of the spi tree with the powerpc tree
Hi all, Today's linux-next merge of the spi tree got a conflict in: drivers/spi/spi-mpc52xx.c between commit: e10656114d32 ("spi: mpc52xx: Avoid using get_tbl()") from the powerpc tree and commit: 258ea99fe25a ("spi: spi-mpc52xx: Use new structure for SPI transfer delays") from the spi tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. BTW Mark: the author's address in 258ea99fe25a uses a non existent domain :-( -- Cheers, Stephen Rothwell diff --cc drivers/spi/spi-mpc52xx.c index e6a30f232370,36f941500676.. --- a/drivers/spi/spi-mpc52xx.c +++ b/drivers/spi/spi-mpc52xx.c @@@ -247,8 -247,10 +247,10 @@@ static int mpc52xx_spi_fsmstate_transfe /* Is the transfer complete? */ ms->len--; if (ms->len == 0) { - ms->timestamp = get_tbl(); + ms->timestamp = mftb(); - ms->timestamp += ms->transfer->delay_usecs * tb_ticks_per_usec; + if (ms->transfer->delay.unit == SPI_DELAY_UNIT_USECS) + ms->timestamp += ms->transfer->delay.value * +tb_ticks_per_usec; ms->state = mpc52xx_spi_fsmstate_wait; return FSM_CONTINUE; } pgp5F9M5sQSJb.pgp Description: OpenPGP digital signature
Re: Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function
Lakshmi Ramasubramanian writes: > On 2/11/21 6:11 PM, Thiago Jung Bauermann wrote: >> Lakshmi Ramasubramanian writes: >> >>> On 2/11/21 3:59 PM, Thiago Jung Bauermann wrote: Lakshmi Ramasubramanian writes: > On 2/11/21 9:42 AM, Lakshmi Ramasubramanian wrote: >> Hi Rob, >> [PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem >> This change causes build problem for x86_64 architecture (please see the >> mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses >> "elf_load_addr" for the ELF header buffer address and not >> "elf_headers_mem". >> struct kimage_arch { >>... >>/* Core ELF header buffer */ >>void *elf_headers; >>unsigned long elf_headers_sz; >>unsigned long elf_load_addr; >> }; >> I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and >> PPC64 since they are the only ones using this function now. >> #if defined(CONFIG_ARM64) && defined(CONFIG_PPC64) > Sorry - I meant to say > #if defined(CONFIG_ARM64) || defined(CONFIG_PPC64) > Does it build correctly if you rename elf_headers_mem to elf_load_addr? Or the other way around, renaming x86's elf_load_addr to elf_headers_mem. I don't really have a preference. >>> >>> Yes - changing arm64 and ppc from "elf_headers_mem" to "elf_load_addr" >>> builds >>> fine. >>> >>> But I am concerned about a few other architectures that also define "struct >>> kimage_arch" such as "parisc", "arm" which do not have any ELF related >>> fields. >>> They would not build if the config defines CONFIG_KEXEC_FILE and >>> CONFIG_OF_FLATTREE. >>> >>> Do you think that could be an issue? >> That's a good point. But in practice, arm doesn't support >> CONFIG_KEXEC_FILE. And while parisc does support CONFIG_KEXEC_FILE, as >> far as I could determine it doesn't support CONFIG_OF. >> So IMHO we don't need to worry about them. We'll cross that bridge if we >> get there. If they ever implement KEXEC_FILE or OF_FLATTREE support, >> then (again, IMHO) the natural solution would be for them to name the >> ELF header member the same way the other arches do. >> And since no other architecture defines struct kimage_arch, those are >> the only ones we need to consider. >> > > Sounds good Thiago. > > I'll rename arm64 and ppc kimage_arch ELF address field to match that defined > for x86/x64. > > Also, will add "fdt_size" param to of_kexec_alloc_and_setup_fdt(). For now, > I'll > use 2*fdt_totalsize(initial_boot_params) for ppc. > > Will send the updated patches shortly. Sounds good. There will be a small conflict with powerpc/next because of the patch I mentioned, but it's simple to fix by whoever merges the series. -- Thiago Jung Bauermann IBM Linux Technology Center
Re: Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function
On 2/11/21 6:11 PM, Thiago Jung Bauermann wrote: Lakshmi Ramasubramanian writes: On 2/11/21 3:59 PM, Thiago Jung Bauermann wrote: Lakshmi Ramasubramanian writes: On 2/11/21 9:42 AM, Lakshmi Ramasubramanian wrote: Hi Rob, [PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem This change causes build problem for x86_64 architecture (please see the mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses "elf_load_addr" for the ELF header buffer address and not "elf_headers_mem". struct kimage_arch { ... /* Core ELF header buffer */ void *elf_headers; unsigned long elf_headers_sz; unsigned long elf_load_addr; }; I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and PPC64 since they are the only ones using this function now. #if defined(CONFIG_ARM64) && defined(CONFIG_PPC64) Sorry - I meant to say #if defined(CONFIG_ARM64) || defined(CONFIG_PPC64) Does it build correctly if you rename elf_headers_mem to elf_load_addr? Or the other way around, renaming x86's elf_load_addr to elf_headers_mem. I don't really have a preference. Yes - changing arm64 and ppc from "elf_headers_mem" to "elf_load_addr" builds fine. But I am concerned about a few other architectures that also define "struct kimage_arch" such as "parisc", "arm" which do not have any ELF related fields. They would not build if the config defines CONFIG_KEXEC_FILE and CONFIG_OF_FLATTREE. Do you think that could be an issue? That's a good point. But in practice, arm doesn't support CONFIG_KEXEC_FILE. And while parisc does support CONFIG_KEXEC_FILE, as far as I could determine it doesn't support CONFIG_OF. So IMHO we don't need to worry about them. We'll cross that bridge if we get there. If they ever implement KEXEC_FILE or OF_FLATTREE support, then (again, IMHO) the natural solution would be for them to name the ELF header member the same way the other arches do. And since no other architecture defines struct kimage_arch, those are the only ones we need to consider. Sounds good Thiago. I'll rename arm64 and ppc kimage_arch ELF address field to match that defined for x86/x64. Also, will add "fdt_size" param to of_kexec_alloc_and_setup_fdt(). For now, I'll use 2*fdt_totalsize(initial_boot_params) for ppc. Will send the updated patches shortly. -lakshmi
Re: Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function
Lakshmi Ramasubramanian writes: > On 2/11/21 3:59 PM, Thiago Jung Bauermann wrote: >> Lakshmi Ramasubramanian writes: >> >>> On 2/11/21 9:42 AM, Lakshmi Ramasubramanian wrote: Hi Rob, [PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem This change causes build problem for x86_64 architecture (please see the mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses "elf_load_addr" for the ELF header buffer address and not "elf_headers_mem". struct kimage_arch { ... /* Core ELF header buffer */ void *elf_headers; unsigned long elf_headers_sz; unsigned long elf_load_addr; }; I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and PPC64 since they are the only ones using this function now. #if defined(CONFIG_ARM64) && defined(CONFIG_PPC64) >>> Sorry - I meant to say >>> #if defined(CONFIG_ARM64) || defined(CONFIG_PPC64) >>> >> Does it build correctly if you rename elf_headers_mem to elf_load_addr? >> Or the other way around, renaming x86's elf_load_addr to >> elf_headers_mem. I don't really have a preference. > > Yes - changing arm64 and ppc from "elf_headers_mem" to "elf_load_addr" builds > fine. > > But I am concerned about a few other architectures that also define "struct > kimage_arch" such as "parisc", "arm" which do not have any ELF related fields. > They would not build if the config defines CONFIG_KEXEC_FILE and > CONFIG_OF_FLATTREE. > > Do you think that could be an issue? That's a good point. But in practice, arm doesn't support CONFIG_KEXEC_FILE. And while parisc does support CONFIG_KEXEC_FILE, as far as I could determine it doesn't support CONFIG_OF. So IMHO we don't need to worry about them. We'll cross that bridge if we get there. If they ever implement KEXEC_FILE or OF_FLATTREE support, then (again, IMHO) the natural solution would be for them to name the ELF header member the same way the other arches do. And since no other architecture defines struct kimage_arch, those are the only ones we need to consider. -- Thiago Jung Bauermann IBM Linux Technology Center
Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function
Lakshmi Ramasubramanian writes: > On 2/11/21 5:09 PM, Thiago Jung Bauermann wrote: >> There's actually a complication that I just noticed and needs to be >> addressed. More below. >> > > <...> > >>> + >>> +/* >>> + * of_kexec_alloc_and_setup_fdt - Alloc and setup a new Flattened Device >>> Tree >>> + * >>> + * @image: kexec image being loaded. >>> + * @initrd_load_addr: Address where the next initrd will be loaded. >>> + * @initrd_len:Size of the next initrd, or 0 if there will be >>> none. >>> + * @cmdline: Command line for the next kernel, or NULL if >>> there will >>> + * be none. >>> + * >>> + * Return: fdt on success, or NULL errno on error. >>> + */ >>> +void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, >>> + unsigned long initrd_load_addr, >>> + unsigned long initrd_len, >>> + const char *cmdline) >>> +{ >>> + void *fdt; >>> + int ret, chosen_node; >>> + const void *prop; >>> + unsigned long fdt_size; >>> + >>> + fdt_size = fdt_totalsize(initial_boot_params) + >>> + (cmdline ? strlen(cmdline) : 0) + >>> + FDT_EXTRA_SPACE; >> Just adding 4 KB to initial_boot_params won't be enough for crash >> kernels on ppc64. The current powerpc code doubles the size of >> initial_boot_params (which is normally larger than 4 KB) and even that >> isn't enough. A patch was added to powerpc/next today which uses a more >> precise (but arch-specific) formula: >> https://lore.kernel.org/linuxppc-dev/161243826811.119001.14083048209224609814.stgit@hbathini/ >> So I believe we need a hook here where architectures can provide their >> own specific calculation for the size of the fdt. Perhaps a weakly >> defined function providing a default implementation which an >> arch-specific file can override (a la arch_kexec_kernel_image_load())? >> Then the powerpc specific hook would be the kexec_fdt_totalsize_ppc64() >> function from the patch I linked above. >> > > Do you think it'd better to add "fdt_size" parameter to > of_kexec_alloc_and_setup_fdt() so that the caller can provide the > desired FDT buffer size? Yes, that is actually simpler and better than my idea. :-) -- Thiago Jung Bauermann IBM Linux Technology Center
[RFC PATCH] powerpc/64s: introduce call_realmode to run C code in real-mode
The regular kernel stack can not be accessed in real mode in hash guest kernels, which prevents the MMU from being disabled in general C code. Provide a helper that can call a function pointer in real mode using the emergency stack (accessable in real mode). Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/asm-prototypes.h | 1 + arch/powerpc/include/asm/book3s/64/mmu.h | 2 + arch/powerpc/include/asm/thread_info.h| 16 arch/powerpc/kernel/irq.c | 16 arch/powerpc/kernel/misc_64.S | 22 +++ arch/powerpc/mm/book3s64/pgtable.c| 48 +++ 6 files changed, 89 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index d0b832cbbec8..a973023c390a 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -126,6 +126,7 @@ extern s64 __ashldi3(s64, int); extern s64 __ashrdi3(s64, int); extern int __cmpdi2(s64, s64); extern int __ucmpdi2(u64, u64); +int __call_realmode(int (*fn)(void *arg), void *arg, void *sp); /* tracing */ void _mcount(void); diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 995bbcdd0ef8..80b0d24415ac 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -274,5 +274,7 @@ static inline unsigned long get_user_vsid(mm_context_t *ctx, return get_vsid(context, ea, ssize); } +int call_realmode(int (*fn)(void *arg), void *arg); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 3d8a47af7a25..9279e472d51e 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -172,6 +172,22 @@ static inline bool test_thread_local_flags(unsigned int flags) #define is_elf2_task() (0) #endif +static inline void check_stack_overflow(void) +{ + long sp; + + if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW)) + return; + + sp = current_stack_pointer & (THREAD_SIZE - 1); + + /* check for stack overflow: is there less than 2KB free? */ + if (unlikely(sp < 2048)) { + pr_err("do_IRQ: stack overflow: %ld\n", sp); + dump_stack(); + } +} + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 6b1eca53e36c..193b47b5b6a5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -620,22 +620,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) return sum; } -static inline void check_stack_overflow(void) -{ - long sp; - - if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW)) - return; - - sp = current_stack_pointer & (THREAD_SIZE - 1); - - /* check for stack overflow: is there less than 2KB free? */ - if (unlikely(sp < 2048)) { - pr_err("do_IRQ: stack overflow: %ld\n", sp); - dump_stack(); - } -} - void __do_irq(struct pt_regs *regs) { unsigned int irq; diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 070465825c21..5e911d0b0b16 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -27,6 +27,28 @@ .text +#ifdef CONFIG_PPC_BOOK3S_64 +_GLOBAL(__call_realmode) + mflrr0 + std r0,16(r1) + stdur1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) + mr r1,r5 + mtctr r3 + mr r3,r4 + mfmsr r4 + xorir4,r4,(MSR_IR|MSR_DR) + mtmsrd r4 + bctrl + mfmsr r4 + xorir4,r4,(MSR_IR|MSR_DR) + mtmsrd r4 + ld r1,0(r1) + ld r0,16(r1) + mtlrr0 + blr + +#endif + _GLOBAL(call_do_softirq) mflrr0 std r0,16(r1) diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 5b3a3bae21aa..aad0e2059305 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -474,6 +474,54 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, return true; } +/* + * Executing C code in real-mode in general Book3S-64 code can only be done + * via this function that switches the stack to one inside the real-mode-area, + * which may cover only a small first part of real memory on hash guest LPARs. + * fn must be NOKPROBES, must not access vmalloc or anything outside the RMA, + * probably shouldn't enable the MMU or interrupts, etc, and be very careful + * about calling other generic kernel or powerpc functions. + */ +int call_realmode(int (*fn)(void *arg), void *arg) +{ + unsigned long flags; + void *cursp, *emsp; + int ret; + + if (WARN_ON_ONCE(!(mfmsr() & MSR_DR))) + return -EINVAL; + i
Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function
On 2/11/21 5:09 PM, Thiago Jung Bauermann wrote: There's actually a complication that I just noticed and needs to be addressed. More below. <...> + +/* + * of_kexec_alloc_and_setup_fdt - Alloc and setup a new Flattened Device Tree + * + * @image: kexec image being loaded. + * @initrd_load_addr: Address where the next initrd will be loaded. + * @initrd_len:Size of the next initrd, or 0 if there will be none. + * @cmdline: Command line for the next kernel, or NULL if there will + * be none. + * + * Return: fdt on success, or NULL errno on error. + */ +void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, + unsigned long initrd_load_addr, + unsigned long initrd_len, + const char *cmdline) +{ + void *fdt; + int ret, chosen_node; + const void *prop; + unsigned long fdt_size; + + fdt_size = fdt_totalsize(initial_boot_params) + + (cmdline ? strlen(cmdline) : 0) + + FDT_EXTRA_SPACE; Just adding 4 KB to initial_boot_params won't be enough for crash kernels on ppc64. The current powerpc code doubles the size of initial_boot_params (which is normally larger than 4 KB) and even that isn't enough. A patch was added to powerpc/next today which uses a more precise (but arch-specific) formula: https://lore.kernel.org/linuxppc-dev/161243826811.119001.14083048209224609814.stgit@hbathini/ So I believe we need a hook here where architectures can provide their own specific calculation for the size of the fdt. Perhaps a weakly defined function providing a default implementation which an arch-specific file can override (a la arch_kexec_kernel_image_load())? Then the powerpc specific hook would be the kexec_fdt_totalsize_ppc64() function from the patch I linked above. Do you think it'd better to add "fdt_size" parameter to of_kexec_alloc_and_setup_fdt() so that the caller can provide the desired FDT buffer size? thanks, -lakshmi
Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function
There's actually a complication that I just noticed and needs to be addressed. More below. Lakshmi Ramasubramanian writes: > From: Rob Herring > > Both arm64 and powerpc do essentially the same FDT /chosen setup for > kexec. The differences are either omissions that arm64 should have > or additional properties that will be ignored. The setup code can be > combined and shared by both powerpc and arm64. > > The differences relative to the arm64 version: > - If /chosen doesn't exist, it will be created (should never happen). > - Any old dtb and initrd reserved memory will be released. > - The new initrd and elfcorehdr are marked reserved. > - "linux,booted-from-kexec" is set. > > The differences relative to the powerpc version: > - "kaslr-seed" and "rng-seed" may be set. > - "linux,elfcorehdr" is set. > - Any existing "linux,usable-memory-range" is removed. > > Combine the code for setting up the /chosen node in the FDT and updating > the memory reservation for kexec, for powerpc and arm64, in > of_kexec_alloc_and_setup_fdt() and move it to "drivers/of/kexec.c". > > Signed-off-by: Rob Herring > Signed-off-by: Lakshmi Ramasubramanian > --- > drivers/of/Makefile | 6 ++ > drivers/of/kexec.c | 258 > include/linux/of.h | 13 +++ > 3 files changed, 277 insertions(+) > create mode 100644 drivers/of/kexec.c > > diff --git a/drivers/of/Makefile b/drivers/of/Makefile > index 6e1e5212f058..c13b982084a3 100644 > --- a/drivers/of/Makefile > +++ b/drivers/of/Makefile > @@ -14,4 +14,10 @@ obj-$(CONFIG_OF_RESOLVE) += resolver.o > obj-$(CONFIG_OF_OVERLAY) += overlay.o > obj-$(CONFIG_OF_NUMA) += of_numa.o > > +ifdef CONFIG_KEXEC_FILE > +ifdef CONFIG_OF_FLATTREE > +obj-y+= kexec.o > +endif > +endif > + > obj-$(CONFIG_OF_UNITTEST) += unittest-data/ > diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c > new file mode 100644 > index ..469e09613cdd > --- /dev/null > +++ b/drivers/of/kexec.c > @@ -0,0 +1,258 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Copyright (C) 2020 Arm Limited > + * > + * Based on arch/arm64/kernel/machine_kexec_file.c: > + * Copyright (C) 2018 Linaro Limited > + * > + * And arch/powerpc/kexec/file_load.c: > + * Copyright (C) 2016 IBM Corporation > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +/* relevant device tree properties */ > +#define FDT_PROP_KEXEC_ELFHDR"linux,elfcorehdr" > +#define FDT_PROP_MEM_RANGE "linux,usable-memory-range" > +#define FDT_PROP_INITRD_START"linux,initrd-start" > +#define FDT_PROP_INITRD_END "linux,initrd-end" > +#define FDT_PROP_BOOTARGS"bootargs" > +#define FDT_PROP_KASLR_SEED "kaslr-seed" > +#define FDT_PROP_RNG_SEED"rng-seed" > +#define RNG_SEED_SIZE128 > + > +/** > + * fdt_find_and_del_mem_rsv - delete memory reservation with given address > and size > + * > + * @fdt: Flattened device tree for the current kernel. > + * @start: Starting address of the reserved memory. > + * @size:Size of the reserved memory. > + * > + * Return: 0 on success, or negative errno on error. > + */ > +static int fdt_find_and_del_mem_rsv(void *fdt, unsigned long start, unsigned > long size) > +{ > + int i, ret, num_rsvs = fdt_num_mem_rsv(fdt); > + > + for (i = 0; i < num_rsvs; i++) { > + u64 rsv_start, rsv_size; > + > + ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size); > + if (ret) { > + pr_err("Malformed device tree.\n"); > + return -EINVAL; > + } > + > + if (rsv_start == start && rsv_size == size) { > + ret = fdt_del_mem_rsv(fdt, i); > + if (ret) { > + pr_err("Error deleting device tree > reservation.\n"); > + return -EINVAL; > + } > + > + return 0; > + } > + } > + > + return -ENOENT; > +} > + > +/* > + * of_kexec_alloc_and_setup_fdt - Alloc and setup a new Flattened Device Tree > + * > + * @image: kexec image being loaded. > + * @initrd_load_addr:Address where the next initrd will be loaded. > + * @initrd_len: Size of the next initrd, or 0 if there will be > none. > + * @cmdline: Command line for the next kernel, or NULL if there will > + * be none. > + * > + * Return: fdt on success, or NULL errno on error. > + */ > +void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, > +unsigned long initrd_load_addr, > +unsigned long initrd_len, > +const char *cmdline) > +{ > + void *fdt; > + int ret, chosen_node; > + const void *prop; > + unsigned long fdt_size; > + > + fdt_size = fdt_totalsize(initial_boot_params) + > +(
Re: Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function
On 2/11/21 3:59 PM, Thiago Jung Bauermann wrote: Lakshmi Ramasubramanian writes: On 2/11/21 9:42 AM, Lakshmi Ramasubramanian wrote: Hi Rob, [PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem This change causes build problem for x86_64 architecture (please see the mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses "elf_load_addr" for the ELF header buffer address and not "elf_headers_mem". struct kimage_arch { ... /* Core ELF header buffer */ void *elf_headers; unsigned long elf_headers_sz; unsigned long elf_load_addr; }; I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and PPC64 since they are the only ones using this function now. #if defined(CONFIG_ARM64) && defined(CONFIG_PPC64) Sorry - I meant to say #if defined(CONFIG_ARM64) || defined(CONFIG_PPC64) Does it build correctly if you rename elf_headers_mem to elf_load_addr? Or the other way around, renaming x86's elf_load_addr to elf_headers_mem. I don't really have a preference. Yes - changing arm64 and ppc from "elf_headers_mem" to "elf_load_addr" builds fine. But I am concerned about a few other architectures that also define "struct kimage_arch" such as "parisc", "arm" which do not have any ELF related fields. They would not build if the config defines CONFIG_KEXEC_FILE and CONFIG_OF_FLATTREE. Do you think that could be an issue? thanks, -lakshmi That would be better than adding an #if condition. void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline) { ... } #endif /* defined(CONFIG_ARM64) && defined(CONFIG_PPC64) */ Please let me know if you have any concerns. thanks, -lakshmi
Re: [PATCH 5/6] powerpc/mm/64s/hash: Add real-mode change_memory_range() for hash LPAR
Excerpts from Michael Ellerman's message of February 11, 2021 11:51 pm: > When we enabled STRICT_KERNEL_RWX we received some reports of boot > failures when using the Hash MMU and running under phyp. The crashes > are intermittent, and often exhibit as a completely unresponsive > system, or possibly an oops. > > One example, which was caught in xmon: > > [ 14.068327][T1] devtmpfs: mounted > [ 14.069302][T1] Freeing unused kernel memory: 5568K > [ 14.142060][ T347] BUG: Unable to handle kernel instruction fetch > [ 14.142063][T1] Run /sbin/init as init process > [ 14.142074][ T347] Faulting instruction address: 0xc0004400 > cpu 0x2: Vector: 400 (Instruction Access) at [cc7475e0] > pc: c0004400: exc_virt_0x4400_instruction_access+0x0/0x80 > lr: c01862d4: update_rq_clock+0x44/0x110 > sp: cc747880 > msr: 800040001031 > current = 0xcc60d380 > paca= 0xc0001ec9de80 irqmask: 0x03 irq_happened: 0x01 > pid = 347, comm = kworker/2:1 > ... > enter ? for help > [cc747880] c01862d4 update_rq_clock+0x44/0x110 (unreliable) > [cc7478f0] c0198794 update_blocked_averages+0xb4/0x6d0 > [cc7479f0] c0198e40 update_nohz_stats+0x90/0xd0 > [cc747a20] c01a13b4 _nohz_idle_balance+0x164/0x390 > [cc747b10] c01a1af8 newidle_balance+0x478/0x610 > [cc747be0] c01a1d48 pick_next_task_fair+0x58/0x480 > [cc747c40] c0eaab5c __schedule+0x12c/0x950 > [cc747cd0] c0eab3e8 schedule+0x68/0x120 > [cc747d00] c016b730 worker_thread+0x130/0x640 > [cc747da0] c0174d50 kthread+0x1a0/0x1b0 > [cc747e10] c000e0f0 ret_from_kernel_thread+0x5c/0x6c > > This shows that CPU 2, which was idle, woke up and then appears to > randomly take an instruction fault on a completely valid area of > kernel text. > > The cause turns out to be the call to hash__mark_rodata_ro(), late in > boot. Due to the way we layout text and rodata, that function actually > changes the permissions for all of text and rodata to read-only plus > execute. > > To do the permission change we use a hypervisor call, H_PROTECT. On > phyp that appears to be implemented by briefly removing the mapping of > the kernel text, before putting it back with the updated permissions. > If any other CPU is executing during that window, it will see spurious > faults on the kernel text and/or data, leading to crashes. > > To fix it we use stop machine to collect all other CPUs, and then have > them drop into real mode (MMU off), while we change the mapping. That > way they are unaffected by the mapping temporarily disappearing. > > Signed-off-by: Michael Ellerman > --- > arch/powerpc/mm/book3s64/hash_pgtable.c | 105 +++- > 1 file changed, 104 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c > b/arch/powerpc/mm/book3s64/hash_pgtable.c > index 3663d3cdffac..01de985df2c4 100644 > --- a/arch/powerpc/mm/book3s64/hash_pgtable.c > +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c > @@ -8,6 +8,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -400,6 +401,19 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); > #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ > > #ifdef CONFIG_STRICT_KERNEL_RWX > + > +struct change_memory_parms { > + unsigned long start, end, newpp; > + unsigned int step, nr_cpus, master_cpu; > + atomic_t cpu_counter; > +}; > + > +// We'd rather this was on the stack but it has to be in the RMO > +static struct change_memory_parms chmem_parms; > + > +// And therefore we need a lock to protect it from concurrent use > +static DEFINE_MUTEX(chmem_lock); > + > static void change_memory_range(unsigned long start, unsigned long end, > unsigned int step, unsigned long newpp) > { > @@ -414,6 +428,73 @@ static void change_memory_range(unsigned long start, > unsigned long end, > mmu_kernel_ssize); > } > > +static int notrace chmem_secondary_loop(struct change_memory_parms *parms) > +{ > + unsigned long msr, tmp, flags; > + int *p; > + > + p = &parms->cpu_counter.counter; > + > + local_irq_save(flags); > + __hard_EE_RI_disable(); > + > + asm volatile ( > + // Switch to real mode and leave interrupts off > + "mfmsr %[msr] ;" > + "li %[tmp], %[MSR_IR_DR];" > + "andc %[tmp], %[msr], %[tmp] ;" > + "mtmsrd %[tmp] ;" > + > + // Tell the master we are in real mode > + "1: " > + "lwarx %[tmp], 0, %[p] ;" > + "addic %[tmp], %[tmp], -1 ;" > + "stwcx. %[tmp], 0, %[p] ;" > + "bne- 1b ;" > + > + // Spin
Re: [PATCH] powerpc/pci: Remove unimplemented prototypes
On Wed, 2 Sep 2020 13:51:38 +1000, Oliver O'Halloran wrote: > The corresponding definitions were deleted in commit 3d5134ee8341 > ("[POWERPC] Rewrite IO allocation & mapping on powerpc64") which > was merged a mere 13 years ago. Applied to powerpc/next. [1/1] powerpc/pci: Remove unimplemented prototypes https://git.kernel.org/powerpc/c/b3abe590c80e0ba55b6fce48762232d90dbc37a5 cheers
Re: [PATCH] powerpc: remove interrupt handler functions from the noinstr section
On Thu, 11 Feb 2021 16:36:36 +1000, Nicholas Piggin wrote: > The allyesconfig ppc64 kernel fails to link with relocations unable to > fit after commit 3a96570ffceb ("powerpc: convert interrupt handlers to > use wrappers"), which is due to the interrupt handler functions being > put into the .noinstr.text section, which the linker script places on > the opposite side of the main .text section from the interrupt entry > asm code which calls the handlers. > > [...] Applied to powerpc/next. [1/1] powerpc: remove interrupt handler functions from the noinstr section https://git.kernel.org/powerpc/c/e4bb64c7a42e61bcb6f8b70279fc1f7805eaad3f cheers
Re: [PATCH] powerpc/64s: syscall real mode entry use mtmsrd rather than rfid
On Mon, 8 Feb 2021 16:33:26 +1000, Nicholas Piggin wrote: > Have the real mode system call entry handler branch to the kernel > 0xc000... address and then use mtmsrd to enable the MMU, rather than use > SRRs and rfid. > > Commit 8729c26e675c ("powerpc/64s/exception: Move real to virt switch > into the common handler") implemented this style of real mode entry for > other interrupt handlers, so this brings system calls into line with > them, which is the main motivcation for the change. > > [...] Applied to powerpc/next. [1/1] powerpc/64s: syscall real mode entry use mtmsrd rather than rfid https://git.kernel.org/powerpc/c/14ad0e7d04f46865775fb010ccd96fb1cc83433a cheers
Re: [PATCH] powerpc/64s: Remove EXSLB interrupt save area
On Mon, 8 Feb 2021 16:34:06 +1000, Nicholas Piggin wrote: > SLB faults should not be taken while the PACA save areas are live, all > memory accesses should be fetches from the kernel text, and access to > PACA and the current stack, before C code is called or any other > accesses are made. > > All of these have pinned SLBs so will not take a SLB fault. Therefore > EXSLB is not be required. Applied to powerpc/next. [1/1] powerpc/64s: Remove EXSLB interrupt save area https://git.kernel.org/powerpc/c/ac7c5e9b08acdb54ef3525abcad24bdb3ed05551 cheers
Re: [PATCH] powerpc/64s: interrupt exit improve bounding of interrupt recursion
On Sat, 23 Jan 2021 16:15:04 +1000, Nicholas Piggin wrote: > When replaying pending soft-masked interrupts when an interrupt returns > to an irqs-enabled context, there is a special case required if this was > an asynchronous interrupt to avoid unbounded interrupt recursion. > > This case was not tested for in the case the asynchronous interrupt hit > in user context, because a subsequent nested interrupt would by definition > hit in kernel mode, which then exits via the kernel path which does test > this case. > > [...] Applied to powerpc/next. [1/1] powerpc/64s: interrupt exit improve bounding of interrupt recursion https://git.kernel.org/powerpc/c/c0ef717305f51e29b5ce0c78a6bfe566b3283415 cheers
Re: [PATCH 1/3] powerpc/83xx: Fix build error when CONFIG_PCI=n
On Thu, 11 Feb 2021 00:08:02 +1100, Michael Ellerman wrote: > As reported by lkp: > > arch/powerpc/platforms/83xx/km83xx.c:183:19: error: 'mpc83xx_setup_pci' > undeclared here (not in a function) > 183 | .discover_phbs = mpc83xx_setup_pci, >| ^ >| mpc83xx_setup_arch > > [...] Applied to powerpc/next. [1/3] powerpc/83xx: Fix build error when CONFIG_PCI=n https://git.kernel.org/powerpc/c/5c47c44f157f408c862b144bbd1d1e161a521aa2 [2/3] powerpc/mm/64s: Fix no previous prototype warning https://git.kernel.org/powerpc/c/2bb421a3d93601aa81bc39af7aac7280303e0761 [3/3] powerpc/amigaone: Make amigaone_discover_phbs() static https://git.kernel.org/powerpc/c/f30520c64f290589e91461d7326b497c23e7f5fd cheers
Re: [PATCH V3] powerpc/perf: Adds support for programming of Thresholding in P10
On Tue, 9 Feb 2021 15:22:34 +0530, Kajol Jain wrote: > Thresholding, a performance monitoring unit feature, can be > used to identify marked instructions which take more than > expected cycles between start event and end event. > Threshold compare (thresh_cmp) bits are programmed in MMCRA > register. In Power9, thresh_cmp bits were part of the > event code. But in case of P10, thresh_cmp are not part of > event code due to inclusion of MMCR3 bits. > > [...] Applied to powerpc/next. [1/1] powerpc/perf: Adds support for programming of Thresholding in P10 https://git.kernel.org/powerpc/c/82d2c16b350f72aa21ac2a6860c542aa4b43a51e cheers
Re: [PATCH] powerpc/xive: Assign boolean values to a bool variable
On Sun, 7 Feb 2021 14:43:12 +0800, Jiapeng Chong wrote: > Fix the following coccicheck warnings: > > ./arch/powerpc/kvm/book3s_xive.c:1856:2-17: WARNING: Assignment of 0/1 > to bool variable. > > ./arch/powerpc/kvm/book3s_xive.c:1854:2-17: WARNING: Assignment of 0/1 > to bool variable. Applied to powerpc/next. [1/1] powerpc/xive: Assign boolean values to a bool variable https://git.kernel.org/powerpc/c/c9df3f809cc98b196548864f52d3c4e280dd1970 cheers
Re: [PATCH] powerpc/kexec_file: fix FDT size estimation for kdump kernel
On Thu, 04 Feb 2021 17:01:10 +0530, Hari Bathini wrote: > On systems with large amount of memory, loading kdump kernel through > kexec_file_load syscall may fail with the below error: > > "Failed to update fdt with linux,drconf-usable-memory property" > > This happens because the size estimation for kdump kernel's FDT does > not account for the additional space needed to setup usable memory > properties. Fix it by accounting for the space needed to include > linux,usable-memory & linux,drconf-usable-memory properties while > estimating kdump kernel's FDT size. Applied to powerpc/next. [1/1] powerpc/kexec_file: fix FDT size estimation for kdump kernel https://git.kernel.org/powerpc/c/2377c92e37fe97bc5b365f55cf60f56dfc4849f5 cheers
Re: [PATCH v6 0/2] powerpc/32: Implement C syscall entry/exit (complement)
On Tue, 9 Feb 2021 19:29:26 + (UTC), Christophe Leroy wrote: > This series implements C syscall entry/exit for PPC32. It reuses > the work already done for PPC64. > > This series is based on today's next-test (f538b53fd47a) where main patchs > from v5 are merged in. > > The first patch is important for performance. > > [...] Applied to powerpc/next. [1/3] powerpc/syscall: Do not check unsupported scv vector on PPC32 https://git.kernel.org/powerpc/c/b966f2279048ee9f30d83ef8568b99fa40917c54 [2/3] powerpc/32: Handle bookE debugging in C in syscall entry/exit https://git.kernel.org/powerpc/c/d524dda719f06967db4d3ba519edf9267f84c155 [3/3] powerpc/syscall: Avoid storing 'current' in another pointer https://git.kernel.org/powerpc/c/5b90b9661a3396e00f6e8bcbb617a0787fb683d0 cheers
Re: [PATCH v2 1/3] powerpc/uaccess: get rid of small constant size cases in raw_copy_{to, from}_user()
On Tue, 9 Feb 2021 14:02:12 + (UTC), Christophe Leroy wrote: > Copied from commit 4b842e4e25b1 ("x86: get rid of small > constant size cases in raw_copy_{to,from}_user()") > > Very few call sites where that would be triggered remain, and none > of those is anywhere near hot enough to bother. Applied to powerpc/next. [1/3] powerpc/uaccess: get rid of small constant size cases in raw_copy_{to,from}_user() https://git.kernel.org/powerpc/c/6b385d1d7c0a346758e35b128815afa25d4709ee [2/3] powerpc/uaccess: Merge __put_user_size_allowed() into __put_user_size() https://git.kernel.org/powerpc/c/95d019e0f9225954e33b6efcad315be9d548a4d7 [3/3] powerpc/uaccess: Merge raw_copy_to_user_allowed() into raw_copy_to_user() https://git.kernel.org/powerpc/c/052f9d206f6c4b5b512b8c201d375f2dd194be35 cheers
Re: [PATCH v5 00/22] powerpc/32: Implement C syscall entry/exit
On Mon, 8 Feb 2021 15:10:19 + (UTC), Christophe Leroy wrote: > This series implements C syscall entry/exit for PPC32. It reuses > the work already done for PPC64. > > This series is based on today's merge-test > (b6f72fc05389e3fc694bf5a5fa1bbd33f61879e0) > > In terms on performance we have the following number of cycles on an > 8xx running null_syscall benchmark: > - mainline: 296 cycles > - after patch 4: 283 cycles > - after patch 16: 304 cycles > - after patch 17: 348 cycles > - at the end of the series: 320 cycles > > [...] Patches 1-15 and 21 applied to powerpc/next. [01/22] powerpc/32s: Add missing call to kuep_lock on syscall entry https://git.kernel.org/powerpc/c/57fdfbce89137ae85cd5cef48be168040a47dd13 [02/22] powerpc/32: Always enable data translation on syscall entry https://git.kernel.org/powerpc/c/eca2411040c1ee15b8882c6427fb4eb5a48ada69 [03/22] powerpc/32: On syscall entry, enable instruction translation at the same time as data https://git.kernel.org/powerpc/c/76249ddc27080b6b835a89cedcc4185b3b5a6b23 [04/22] powerpc/32: Reorder instructions to avoid using CTR in syscall entry https://git.kernel.org/powerpc/c/2c59e5104821c5720e88bafa9e522f8bea9ce8fa [05/22] powerpc/irq: Add helper to set regs->softe https://git.kernel.org/powerpc/c/fb5608fd117a8b48752d2b5a7e70847c1ed33d33 [06/22] powerpc/irq: Rework helpers that manipulate MSR[EE/RI] https://git.kernel.org/powerpc/c/08353779f2889305f64e04de3e46ed59ed60f859 [07/22] powerpc/irq: Add stub irq_soft_mask_return() for PPC32 https://git.kernel.org/powerpc/c/6650c4782d5788346a25a4f698880d124f2699a0 [08/22] powerpc/syscall: Rename syscall_64.c into interrupt.c https://git.kernel.org/powerpc/c/ab1a517d55b01b54ba70f5d54f926f5ab4b18339 [09/22] powerpc/syscall: Make interrupt.c buildable on PPC32 https://git.kernel.org/powerpc/c/344bb20b159dd0996e521c0d4c131a6ae10c322a [10/22] powerpc/syscall: Use is_compat_task() https://git.kernel.org/powerpc/c/72b7a9e56b25babfe4c90bf3ce88285c7fb62ab9 [11/22] powerpc/syscall: Save r3 in regs->orig_r3 https://git.kernel.org/powerpc/c/8875f47b7681aa4e4484a9b612577b044725f839 [12/22] powerpc/syscall: Change condition to check MSR_RI https://git.kernel.org/powerpc/c/c01b916658150e98f00a4981750c37a3224c8735 [13/22] powerpc/32: Always save non volatile GPRs at syscall entry https://git.kernel.org/powerpc/c/fbcee2ebe8edbb6a93316f0a189ae7fcfaa7094f [14/22] powerpc/syscall: implement system call entry/exit logic in C for PPC32 https://git.kernel.org/powerpc/c/6f76a01173ccaa363739f913394d4e138d92d718 [15/22] powerpc/32: Remove verification of MSR_PR on syscall in the ASM entry https://git.kernel.org/powerpc/c/4d67facbcbdb3d9e3c9cb82e4ec47fc63d298dd8 [21/22] powerpc/32: Remove the counter in global_dbcr0 https://git.kernel.org/powerpc/c/eb595eca74067b78d36fb188b555e30f28686fc7 cheers
Re: [PATCH] powerpc/32: Preserve cr1 in exception prolog stack check to fix build error
On Mon, 8 Feb 2021 07:17:40 + (UTC), Christophe Leroy wrote: > THREAD_ALIGN_SHIFT = THREAD_SHIFT + 1 = PAGE_SHIFT + 1 > Maximum PAGE_SHIFT is 18 for 256k pages so > THREAD_ALIGN_SHIFT is 19 at the maximum. > > No need to clobber cr1, it can be preserved when moving r1 > into CR when we check stack overflow. > > [...] Applied to powerpc/next. [1/1] powerpc/32: Preserve cr1 in exception prolog stack check to fix build error https://git.kernel.org/powerpc/c/3642eb21256a317ac14e9ed560242c6d20cf06d9 cheers
Re: [PATCH 1/3] spi: mpc52xx: Avoid using get_tbl()
On Tue, 9 Feb 2021 10:26:21 + (UTC), Christophe Leroy wrote: > get_tbl() is confusing as it returns the content TBL register > on PPC32 but the concatenation of TBL and TBU on PPC64. > > Use mftb() instead. > > This will allow the removal of get_tbl() in a following patch. Applied to powerpc/next. [1/3] spi: mpc52xx: Avoid using get_tbl() https://git.kernel.org/powerpc/c/e10656114d32c659768e7ca8aebaaa6ac6e959ab [2/3] powerpc/time: Avoid using get_tbl() https://git.kernel.org/powerpc/c/55d68df623eb679cc91f61137f14751e7f369662 [3/3] powerpc/time: Remove get_tbl() https://git.kernel.org/powerpc/c/132f94f133961d18af615cb3503368e59529e9a8 cheers
Re: [PATCH 1/3] powerpc/mm: Enable compound page check for both THP and HugeTLB
On Wed, 3 Feb 2021 10:28:10 +0530, Aneesh Kumar K.V wrote: > THP config results in compound pages. Make sure the kernel enables > the PageCompound() check with CONFIG_HUGETLB_PAGE disabled and > CONFIG_TRANSPARENT_HUGEPAGE enabled. > > This makes sure we correctly flush the icache with THP pages. > flush_dcache_icache_page only matter for platforms that don't support > COHERENT_ICACHE. Applied to powerpc/next. [1/3] powerpc/mm: Enable compound page check for both THP and HugeTLB https://git.kernel.org/powerpc/c/c7ba2d636342093cfb842f47640e5b62192adfed [2/3] powerpc/mm: Add PG_dcache_clean to indicate dcache clean state https://git.kernel.org/powerpc/c/ec94b9b23d620d40ab2ced094a30c22bb8d69b9f [3/3] powerpc/mm: Remove dcache flush from memory remove. https://git.kernel.org/powerpc/c/2ac02e5ecec0cc2484d60a73b1bc6394aa2fad28 cheers
Re: [PATCH kernel v3] powerpc/uaccess: Skip might_fault() when user access is enabled
On Thu, 4 Feb 2021 23:16:12 +1100, Alexey Kardashevskiy wrote: > The amount of code executed with enabled user space access (unlocked KUAP) > should be minimal. However with CONFIG_PROVE_LOCKING or > CONFIG_DEBUG_ATOMIC_SLEEP enabled, might_fault() may end up replaying > interrupts which in turn may access the user space and forget to restore > the KUAP state. > > The problem places are: > 1. strncpy_from_user (and similar) which unlock KUAP and call > unsafe_get_user -> __get_user_allowed -> __get_user_nocheck() > with do_allow=false to skip KUAP as the caller took care of it. > 2. __put_user_nocheck_goto() which is called with unlocked KUAP. > > [...] Applied to powerpc/next. [1/1] powerpc/uaccess: Avoid might_fault() when user access is enabled https://git.kernel.org/powerpc/c/7d506ca97b665b95e698a53697dad99fae813c1a cheers
Re: [PATCH kernel] powerpc/kuap: Restore AMR after replaying soft interrupts
On Tue, 2 Feb 2021 20:15:41 +1100, Alexey Kardashevskiy wrote: > Since de78a9c "powerpc: Add a framework for Kernel Userspace Access > Protection", user access helpers call user_{read|write}_access_{begin|end} > when user space access is allowed. > > 890274c "powerpc/64s: Implement KUAP for Radix MMU" made the mentioned > helpers program a AMR special register to allow such access for a short > period of time, most of the time AMR is expected to block user memory > access by the kernel. > > [...] Applied to powerpc/next. [1/1] powerpc/kuap: Restore AMR after replaying soft interrupts https://git.kernel.org/powerpc/c/60a707d0c99aff4eadb7fd334c5fd21df386723e cheers
Re: Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function
Lakshmi Ramasubramanian writes: > On 2/11/21 9:42 AM, Lakshmi Ramasubramanian wrote: >> Hi Rob, >> [PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem >> This change causes build problem for x86_64 architecture (please see the >> mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses >> "elf_load_addr" for the ELF header buffer address and not >> "elf_headers_mem". >> struct kimage_arch { >> ... >> /* Core ELF header buffer */ >> void *elf_headers; >> unsigned long elf_headers_sz; >> unsigned long elf_load_addr; >> }; >> I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and >> PPC64 since they are the only ones using this function now. >> #if defined(CONFIG_ARM64) && defined(CONFIG_PPC64) > Sorry - I meant to say > #if defined(CONFIG_ARM64) || defined(CONFIG_PPC64) > Does it build correctly if you rename elf_headers_mem to elf_load_addr? Or the other way around, renaming x86's elf_load_addr to elf_headers_mem. I don't really have a preference. That would be better than adding an #if condition. >> void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, >> unsigned long initrd_load_addr, >> unsigned long initrd_len, >> const char *cmdline) >> { >> ... >> } >> #endif /* defined(CONFIG_ARM64) && defined(CONFIG_PPC64) */ >> Please let me know if you have any concerns. >> thanks, >> -lakshmi -- Thiago Jung Bauermann IBM Linux Technology Center
Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.11-8 tag
The pull request you sent on Fri, 12 Feb 2021 10:15:59 +1100: > https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git > tags/powerpc-5.11-8 has been merged into torvalds/linux.git: https://git.kernel.org/torvalds/c/dcc0b49040c70ad827a7f3d58a21b01fdb14e749 Thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/prtracker.html
[PATCH v2 2/2] driver core: platform: Drop of_device_node_put() wrapper
of_device_node_put() is just a wrapper for of_node_put(). The platform driver core is already polluted with of_node pointers and the only 'get' already uses of_node_get() (though typically the get would happen in of_device_alloc()). Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Frank Rowand Signed-off-by: Rob Herring --- v2: - Fix build --- drivers/base/platform.c | 2 +- include/linux/of_device.h | 7 --- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 95fd1549f87d..9d5171e5f967 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -571,7 +571,7 @@ static void platform_device_release(struct device *dev) struct platform_object *pa = container_of(dev, struct platform_object, pdev.dev); - of_device_node_put(&pa->pdev.dev); + of_node_put(pa->pdev.dev.of_node); kfree(pa->pdev.dev.platform_data); kfree(pa->pdev.mfd_cell); kfree(pa->pdev.resource); diff --git a/include/linux/of_device.h b/include/linux/of_device.h index d7a407dfeecb..1d7992a02e36 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -38,11 +38,6 @@ extern int of_device_request_module(struct device *dev); extern void of_device_uevent(struct device *dev, struct kobj_uevent_env *env); extern int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env); -static inline void of_device_node_put(struct device *dev) -{ - of_node_put(dev->of_node); -} - static inline struct device_node *of_cpu_device_node_get(int cpu) { struct device *cpu_dev; @@ -94,8 +89,6 @@ static inline int of_device_uevent_modalias(struct device *dev, return -ENODEV; } -static inline void of_device_node_put(struct device *dev) { } - static inline const struct of_device_id *of_match_device( const struct of_device_id *matches, const struct device *dev) { -- 2.27.0
[PATCH v2 1/2] of: Remove of_dev_{get,put}()
of_dev_get() and of_dev_put are just wrappers for get_device()/put_device() on a platform_device. There's also already platform_device_{get,put}() wrappers for this purpose. Let's update the few users and remove of_dev_{get,put}(). Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Frank Rowand Cc: Patrice Chotard Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Julia Lawall Cc: Gilles Muller Cc: Nicolas Palix Cc: Michal Marek Cc: linuxppc-dev@lists.ozlabs.org Cc: net...@vger.kernel.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-...@vger.kernel.org Cc: co...@systeme.lip6.fr Signed-off-by: Rob Herring --- v2: - Fix build --- arch/powerpc/platforms/pseries/ibmebus.c | 4 ++-- drivers/net/ethernet/ibm/emac/core.c | 15 --- drivers/of/device.c | 21 - drivers/of/platform.c| 4 ++-- drivers/of/unittest.c| 2 +- drivers/usb/dwc3/dwc3-st.c | 2 +- include/linux/of_device.h| 3 --- scripts/coccinelle/free/put_device.cocci | 1 - 8 files changed, 14 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 8c6e509f6967..a15ab33646b3 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -355,12 +355,12 @@ static int ibmebus_bus_device_probe(struct device *dev) if (!drv->probe) return error; - of_dev_get(of_dev); + get_device(dev); if (of_driver_match_device(dev, dev->driver)) error = drv->probe(of_dev); if (error) - of_dev_put(of_dev); + put_device(dev); return error; } diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index c00b9097eeea..471be6ec7e8a 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -2390,11 +2391,11 @@ static int emac_check_deps(struct emac_instance *dev, static void emac_put_deps(struct emac_instance *dev) { - of_dev_put(dev->mal_dev); - of_dev_put(dev->zmii_dev); - of_dev_put(dev->rgmii_dev); - of_dev_put(dev->mdio_dev); - of_dev_put(dev->tah_dev); + platform_device_put(dev->mal_dev); + platform_device_put(dev->zmii_dev); + platform_device_put(dev->rgmii_dev); + platform_device_put(dev->mdio_dev); + platform_device_put(dev->tah_dev); } static int emac_of_bus_notify(struct notifier_block *nb, unsigned long action, @@ -2435,7 +2436,7 @@ static int emac_wait_deps(struct emac_instance *dev) for (i = 0; i < EMAC_DEP_COUNT; i++) { of_node_put(deps[i].node); if (err) - of_dev_put(deps[i].ofdev); + platform_device_put(deps[i].ofdev); } if (err == 0) { dev->mal_dev = deps[EMAC_DEP_MAL_IDX].ofdev; @@ -2444,7 +2445,7 @@ static int emac_wait_deps(struct emac_instance *dev) dev->tah_dev = deps[EMAC_DEP_TAH_IDX].ofdev; dev->mdio_dev = deps[EMAC_DEP_MDIO_IDX].ofdev; } - of_dev_put(deps[EMAC_DEP_PREV_IDX].ofdev); + platform_device_put(deps[EMAC_DEP_PREV_IDX].ofdev); return err; } diff --git a/drivers/of/device.c b/drivers/of/device.c index aedfaaafd3e7..9a748855b39d 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -33,27 +33,6 @@ const struct of_device_id *of_match_device(const struct of_device_id *matches, } EXPORT_SYMBOL(of_match_device); -struct platform_device *of_dev_get(struct platform_device *dev) -{ - struct device *tmp; - - if (!dev) - return NULL; - tmp = get_device(&dev->dev); - if (tmp) - return to_platform_device(tmp); - else - return NULL; -} -EXPORT_SYMBOL(of_dev_get); - -void of_dev_put(struct platform_device *dev) -{ - if (dev) - put_device(&dev->dev); -} -EXPORT_SYMBOL(of_dev_put); - int of_device_add(struct platform_device *ofdev) { BUG_ON(ofdev->dev.of_node == NULL); diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 79bd5f5a1bf1..020bf860c72c 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -687,7 +687,7 @@ static int of_platform_notify(struct notifier_block *nb, pdev_parent = of_find_device_by_node(rd->dn->parent); pdev = of_platform_device_create(rd->dn, NULL, pdev_parent ? &pdev_parent->dev : NULL); - of_dev_put(pdev_parent); + platform_device_put(pdev_parent); if (pdev == NULL) { pr_err("%s: failed to create for '%pOF'\n", @@ -712,7 +712,7 @@ static in
[PATCH v2 0/2] of: of_device.h cleanups
This is a couple of cleanups for of_device.h. They fell out from my attempt at decoupling of_device.h and of_platform.h which is a mess and I haven't finished, but there's no reason to wait on these. Rob Rob Herring (2): of: Remove of_dev_{get,put}() driver core: platform: Drop of_device_node_put() wrapper arch/powerpc/platforms/pseries/ibmebus.c | 4 ++-- drivers/base/platform.c | 2 +- drivers/net/ethernet/ibm/emac/core.c | 15 --- drivers/of/device.c | 21 - drivers/of/platform.c| 4 ++-- drivers/of/unittest.c| 2 +- drivers/usb/dwc3/dwc3-st.c | 2 +- include/linux/of_device.h| 10 -- scripts/coccinelle/free/put_device.cocci | 1 - 9 files changed, 15 insertions(+), 46 deletions(-) -- 2.27.0
Re: [PATCH 5/6] powerpc/mm/64s/hash: Add real-mode change_memory_range() for hash LPAR
Excerpts from Michael Ellerman's message of February 11, 2021 11:51 pm: > When we enabled STRICT_KERNEL_RWX we received some reports of boot > failures when using the Hash MMU and running under phyp. The crashes > are intermittent, and often exhibit as a completely unresponsive > system, or possibly an oops. > > One example, which was caught in xmon: > > [ 14.068327][T1] devtmpfs: mounted > [ 14.069302][T1] Freeing unused kernel memory: 5568K > [ 14.142060][ T347] BUG: Unable to handle kernel instruction fetch > [ 14.142063][T1] Run /sbin/init as init process > [ 14.142074][ T347] Faulting instruction address: 0xc0004400 > cpu 0x2: Vector: 400 (Instruction Access) at [cc7475e0] > pc: c0004400: exc_virt_0x4400_instruction_access+0x0/0x80 > lr: c01862d4: update_rq_clock+0x44/0x110 > sp: cc747880 > msr: 800040001031 > current = 0xcc60d380 > paca= 0xc0001ec9de80 irqmask: 0x03 irq_happened: 0x01 > pid = 347, comm = kworker/2:1 > ... > enter ? for help > [cc747880] c01862d4 update_rq_clock+0x44/0x110 (unreliable) > [cc7478f0] c0198794 update_blocked_averages+0xb4/0x6d0 > [cc7479f0] c0198e40 update_nohz_stats+0x90/0xd0 > [cc747a20] c01a13b4 _nohz_idle_balance+0x164/0x390 > [cc747b10] c01a1af8 newidle_balance+0x478/0x610 > [cc747be0] c01a1d48 pick_next_task_fair+0x58/0x480 > [cc747c40] c0eaab5c __schedule+0x12c/0x950 > [cc747cd0] c0eab3e8 schedule+0x68/0x120 > [cc747d00] c016b730 worker_thread+0x130/0x640 > [cc747da0] c0174d50 kthread+0x1a0/0x1b0 > [cc747e10] c000e0f0 ret_from_kernel_thread+0x5c/0x6c > > This shows that CPU 2, which was idle, woke up and then appears to > randomly take an instruction fault on a completely valid area of > kernel text. > > The cause turns out to be the call to hash__mark_rodata_ro(), late in > boot. Due to the way we layout text and rodata, that function actually > changes the permissions for all of text and rodata to read-only plus > execute. > > To do the permission change we use a hypervisor call, H_PROTECT. On > phyp that appears to be implemented by briefly removing the mapping of > the kernel text, before putting it back with the updated permissions. > If any other CPU is executing during that window, it will see spurious > faults on the kernel text and/or data, leading to crashes. > > To fix it we use stop machine to collect all other CPUs, and then have > them drop into real mode (MMU off), while we change the mapping. That > way they are unaffected by the mapping temporarily disappearing. > > Signed-off-by: Michael Ellerman > --- > arch/powerpc/mm/book3s64/hash_pgtable.c | 105 +++- > 1 file changed, 104 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c > b/arch/powerpc/mm/book3s64/hash_pgtable.c > index 3663d3cdffac..01de985df2c4 100644 > --- a/arch/powerpc/mm/book3s64/hash_pgtable.c > +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c > @@ -8,6 +8,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -400,6 +401,19 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); > #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ > > #ifdef CONFIG_STRICT_KERNEL_RWX > + > +struct change_memory_parms { > + unsigned long start, end, newpp; > + unsigned int step, nr_cpus, master_cpu; > + atomic_t cpu_counter; > +}; > + > +// We'd rather this was on the stack but it has to be in the RMO > +static struct change_memory_parms chmem_parms; > + > +// And therefore we need a lock to protect it from concurrent use > +static DEFINE_MUTEX(chmem_lock); > + > static void change_memory_range(unsigned long start, unsigned long end, > unsigned int step, unsigned long newpp) > { > @@ -414,6 +428,73 @@ static void change_memory_range(unsigned long start, > unsigned long end, > mmu_kernel_ssize); > } > > +static int notrace chmem_secondary_loop(struct change_memory_parms *parms) > +{ > + unsigned long msr, tmp, flags; > + int *p; > + > + p = &parms->cpu_counter.counter; > + > + local_irq_save(flags); > + __hard_EE_RI_disable(); > + > + asm volatile ( > + // Switch to real mode and leave interrupts off > + "mfmsr %[msr] ;" > + "li %[tmp], %[MSR_IR_DR];" > + "andc %[tmp], %[msr], %[tmp] ;" > + "mtmsrd %[tmp] ;" > + > + // Tell the master we are in real mode > + "1: " > + "lwarx %[tmp], 0, %[p] ;" > + "addic %[tmp], %[tmp], -1 ;" > + "stwcx. %[tmp], 0, %[p] ;" > + "bne- 1b ;" > + > + // Spin
[GIT PULL] Please pull powerpc/linux.git powerpc-5.11-8 tag
-BEGIN PGP SIGNED MESSAGE- Hash: SHA256 Hi Linus, Please pull one final powerpc fix for 5.11: The following changes since commit 24321ac668e452a4942598533d267805f291fdc9: powerpc/64/signal: Fix regression in __kernel_sigtramp_rt64() semantics (2021-02-02 22:14:41 +1100) are available in the git repository at: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.11-8 for you to fetch changes up to 8c511eff1827239f24ded212b1bcda7ca5b16203: powerpc/kuap: Allow kernel thread to access userspace after kthread_use_mm (2021-02-06 23:13:04 +1100) - -- powerpc fixes for 5.11 #8 One fix for a regression seen in io_uring, introduced by our support for KUAP (Kernel User Access Prevention) with the Hash MMU. Thanks to: Aneesh Kumar K.V, Zorro Lang. - -- Aneesh Kumar K.V (1): powerpc/kuap: Allow kernel thread to access userspace after kthread_use_mm arch/powerpc/include/asm/book3s/64/kup.h | 16 +++- arch/powerpc/include/asm/book3s/64/pkeys.h | 4 arch/powerpc/mm/book3s64/pkeys.c | 1 + 3 files changed, 12 insertions(+), 9 deletions(-) -BEGIN PGP SIGNATURE- iQIzBAEBCAAdFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAmAlumMACgkQUevqPMjh pYBb9BAAqpyhvEjDiyP0xeHUTQ9UsNMnNUW7Hp03jJYH+EKWC9nT7kb3TY1eNyk5 KyMBOtXiQsggzWDE31bR32BpH/JChSeRPaCO4HHdDS9t+3ZIo16RTFksdiIhCN6m nI4WhnfrdZszstMRsKWzfoLRVDGyNi0hyyQaMS3ypuKvRmozZk6u9K/YNXMa97Wf ihhB0lYRdfMNgxMm66uaqEtzYt3Z4dRj9Y24LQirJnp32xK+sNgoleHl4gvvKG3m r7CogqHlcExbkD3dl/PPe/SVEesfXpmTrQPCvJmi0qWm9NzkduQWrSEkUkUp1YQD T0pBHnCxtI7GAAQpCphBA3gjrz03Og4/RAXmfESgI0JHyh7Vihx91XOwuonuJndn 5ThY2D9+nkZ2vnlis2/AoLx6ClbNgZysr0iAOsRyd2SYR9Er2CcPZ4OuNHXWTHlz G4SmVYiZj9gnSrzqlEGIqBOVWdykV+x+xkLLQx86HUAI+7f1mFV1+dJ4E5NLGKzS jB+XwwG2y6q6SnJt2iiybqDu9K1lPWnKFLNeb4at7GfpJ4riKNcRSYrkY9xYxmkR kgKyXfW+rNt1RcIoy65kNa3hSnXi4p9wc5Lph7joS7zTkZIKR2E3QUJjGQao85Qa rNxccSz3X7ZAL9OEJP/4nE72Zf3VXkzuKbB79D3dhFJ8SJmbPqQ= =GDL5 -END PGP SIGNATURE-
Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()
Excerpts from Segher Boessenkool's message of February 11, 2021 9:50 pm: > On Thu, Feb 11, 2021 at 08:04:55PM +1000, Nicholas Piggin wrote: >> It would be nice if we could have a __builtin_trap_if that gcc would use >> conditional traps with, (and which never assumes following code is >> unreachable even for constant true, so we can use it with WARN and put >> explicit unreachable for BUG). > > It automatically does that with just __builtin_trap, see my other mail :-) If that is generated without branches (or at least with no more branches than existing asm implementation), then it could be usable without trashing CFAR. Unfortunately I don't think we will be parsing the dwarf information to get line numbers from it any time soon, so not a drop in replacement but maybe one day someone would find a solution. Thanks, Nick
[PATCH 2/2] driver core: platform: Drop of_device_node_put() wrapper
of_device_node_put() is just a wrapper for of_node_put(). The platform driver core is already polluted with of_node pointers and the only 'get' already uses of_node_get() (though typically the get would happen in of_device_alloc()). Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Frank Rowand Signed-off-by: Rob Herring --- drivers/base/platform.c | 2 +- include/linux/of_device.h | 7 --- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 95fd1549f87d..c31bc9e92dd1 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -571,7 +571,7 @@ static void platform_device_release(struct device *dev) struct platform_object *pa = container_of(dev, struct platform_object, pdev.dev); - of_device_node_put(&pa->pdev.dev); + of_node_put(&pa->pdev.dev->of_node); kfree(pa->pdev.dev.platform_data); kfree(pa->pdev.mfd_cell); kfree(pa->pdev.resource); diff --git a/include/linux/of_device.h b/include/linux/of_device.h index d7a407dfeecb..1d7992a02e36 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -38,11 +38,6 @@ extern int of_device_request_module(struct device *dev); extern void of_device_uevent(struct device *dev, struct kobj_uevent_env *env); extern int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env); -static inline void of_device_node_put(struct device *dev) -{ - of_node_put(dev->of_node); -} - static inline struct device_node *of_cpu_device_node_get(int cpu) { struct device *cpu_dev; @@ -94,8 +89,6 @@ static inline int of_device_uevent_modalias(struct device *dev, return -ENODEV; } -static inline void of_device_node_put(struct device *dev) { } - static inline const struct of_device_id *of_match_device( const struct of_device_id *matches, const struct device *dev) { -- 2.27.0
[PATCH 0/2] of: of_device.h cleanups
This is a couple of cleanups for of_device.h. They fell out from my attempt at decoupling of_device.h and of_platform.h which is a mess and I haven't finished, but there's no reason to wait on these. Rob Rob Herring (2): of: Remove of_dev_{get,put}() driver core: platform: Drop of_device_node_put() wrapper arch/powerpc/platforms/pseries/ibmebus.c | 4 ++-- drivers/base/platform.c | 2 +- drivers/net/ethernet/ibm/emac/core.c | 15 --- drivers/of/device.c | 21 - drivers/of/platform.c| 4 ++-- drivers/of/unittest.c| 2 +- drivers/usb/dwc3/dwc3-st.c | 2 +- include/linux/of_device.h| 10 -- scripts/coccinelle/free/put_device.cocci | 1 - 9 files changed, 15 insertions(+), 46 deletions(-) -- 2.27.0
[PATCH 1/2] of: Remove of_dev_{get,put}()
of_dev_get() and of_dev_put are just wrappers for get_device()/put_device() on a platform_device. There's also already platform_device_{get,put}() wrappers for this purpose. Let's update the few users and remove of_dev_{get,put}(). Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Frank Rowand Cc: Patrice Chotard Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Julia Lawall Cc: Gilles Muller Cc: Nicolas Palix Cc: Michal Marek Cc: linuxppc-dev@lists.ozlabs.org Cc: net...@vger.kernel.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-...@vger.kernel.org Cc: co...@systeme.lip6.fr Signed-off-by: Rob Herring --- arch/powerpc/platforms/pseries/ibmebus.c | 4 ++-- drivers/net/ethernet/ibm/emac/core.c | 15 --- drivers/of/device.c | 21 - drivers/of/platform.c| 4 ++-- drivers/of/unittest.c| 2 +- drivers/usb/dwc3/dwc3-st.c | 2 +- include/linux/of_device.h| 3 --- scripts/coccinelle/free/put_device.cocci | 1 - 8 files changed, 14 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 8c6e509f6967..c29328fe94e8 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -355,12 +355,12 @@ static int ibmebus_bus_device_probe(struct device *dev) if (!drv->probe) return error; - of_dev_get(of_dev); + get_device(dev); if (of_driver_match_device(dev, dev->driver)) error = drv->probe(of_dev); if (error) - of_dev_put(of_dev); + put_device(of_dev); return error; } diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index c00b9097eeea..471be6ec7e8a 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -2390,11 +2391,11 @@ static int emac_check_deps(struct emac_instance *dev, static void emac_put_deps(struct emac_instance *dev) { - of_dev_put(dev->mal_dev); - of_dev_put(dev->zmii_dev); - of_dev_put(dev->rgmii_dev); - of_dev_put(dev->mdio_dev); - of_dev_put(dev->tah_dev); + platform_device_put(dev->mal_dev); + platform_device_put(dev->zmii_dev); + platform_device_put(dev->rgmii_dev); + platform_device_put(dev->mdio_dev); + platform_device_put(dev->tah_dev); } static int emac_of_bus_notify(struct notifier_block *nb, unsigned long action, @@ -2435,7 +2436,7 @@ static int emac_wait_deps(struct emac_instance *dev) for (i = 0; i < EMAC_DEP_COUNT; i++) { of_node_put(deps[i].node); if (err) - of_dev_put(deps[i].ofdev); + platform_device_put(deps[i].ofdev); } if (err == 0) { dev->mal_dev = deps[EMAC_DEP_MAL_IDX].ofdev; @@ -2444,7 +2445,7 @@ static int emac_wait_deps(struct emac_instance *dev) dev->tah_dev = deps[EMAC_DEP_TAH_IDX].ofdev; dev->mdio_dev = deps[EMAC_DEP_MDIO_IDX].ofdev; } - of_dev_put(deps[EMAC_DEP_PREV_IDX].ofdev); + platform_device_put(deps[EMAC_DEP_PREV_IDX].ofdev); return err; } diff --git a/drivers/of/device.c b/drivers/of/device.c index aedfaaafd3e7..9a748855b39d 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -33,27 +33,6 @@ const struct of_device_id *of_match_device(const struct of_device_id *matches, } EXPORT_SYMBOL(of_match_device); -struct platform_device *of_dev_get(struct platform_device *dev) -{ - struct device *tmp; - - if (!dev) - return NULL; - tmp = get_device(&dev->dev); - if (tmp) - return to_platform_device(tmp); - else - return NULL; -} -EXPORT_SYMBOL(of_dev_get); - -void of_dev_put(struct platform_device *dev) -{ - if (dev) - put_device(&dev->dev); -} -EXPORT_SYMBOL(of_dev_put); - int of_device_add(struct platform_device *ofdev) { BUG_ON(ofdev->dev.of_node == NULL); diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 79bd5f5a1bf1..020bf860c72c 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -687,7 +687,7 @@ static int of_platform_notify(struct notifier_block *nb, pdev_parent = of_find_device_by_node(rd->dn->parent); pdev = of_platform_device_create(rd->dn, NULL, pdev_parent ? &pdev_parent->dev : NULL); - of_dev_put(pdev_parent); + platform_device_put(pdev_parent); if (pdev == NULL) { pr_err("%s: failed to create for '%pOF'\n", @@ -712,7 +712,7 @@ static int of_platform_noti
[PATCH 3/4] ibmvfc: treat H_CLOSED as success during sub-CRQ registration
A non-zero return code for H_REG_SUB_CRQ is currently treated as a failure resulting in failing sub-CRQ setup. The case of H_CLOSED should not be treated as a failure. This return code translates to a successful sub-CRQ registration by the hypervisor, and is meant to communicate back that there is currently no partner VIOS CRQ connection established as of yet. This is a common occurrence during a disconnect where the client adapter can possibly come back up prior to the partner adapter. For non-zero return code from H_REG_SUB_CRQ treat a H_CLOSED as success so that sub-CRQs are successfully setup. Fixes: faacf8c5f1d5 ("ibmvfc: add alloc/dealloc routines for SCSI Sub-CRQ Channels") Signed-off-by: Tyrel Datwyler --- drivers/scsi/ibmvscsi/ibmvfc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 3dd20f383453..ba6fcf9cbc57 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -5636,7 +5636,8 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost, rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE, &scrq->cookie, &scrq->hw_irq); - if (rc) { + /* H_CLOSED indicates successful register, but no CRQ partner */ + if (rc && rc != H_CLOSED) { dev_warn(dev, "Error registering sub-crq: %d\n", rc); if (rc == H_PARAMETER) dev_warn_once(dev, "Firmware may not support MQ\n"); -- 2.27.0
[PATCH 2/4] ibmvfc: fix invalid sub-CRQ handles after hard reset
A hard reset results in a complete transport disconnect such that the CRQ connection with the partner VIOS is broken. This has the side effect of also invalidating the associated sub-CRQs. The current code assumes that the sub-CRQs are perserved resulting in a protocol violation after trying to reconnect them with the VIOS. This introduces an infinite loop such that the VIOS forces a disconnect after each subsequent attempt to re-register with invalid handles. Avoid the aforementioned issue by releasing the sub-CRQs prior to CRQ disconnect, and driving a reinitialization of the sub-CRQs once a new CRQ is registered with the hypervisor. fixes: faacf8c5f1d5 ("ibmvfc: add alloc/dealloc routines for SCSI Sub-CRQ Channels") Signed-off-by: Tyrel Datwyler --- drivers/scsi/ibmvscsi/ibmvfc.c | 21 + 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 384960036f8b..3dd20f383453 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -158,6 +158,9 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *); static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *); static void ibmvfc_tgt_move_login(struct ibmvfc_target *); +static void ibmvfc_release_sub_crqs(struct ibmvfc_host *); +static void ibmvfc_init_sub_crqs(struct ibmvfc_host *); + static const char *unknown_error = "unknown error"; static long h_reg_sub_crq(unsigned long unit_address, unsigned long ioba, @@ -926,8 +929,8 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) unsigned long flags; struct vio_dev *vdev = to_vio_dev(vhost->dev); struct ibmvfc_queue *crq = &vhost->crq; - struct ibmvfc_queue *scrq; - int i; + + ibmvfc_release_sub_crqs(vhost); /* Close the CRQ */ do { @@ -947,16 +950,6 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) memset(crq->msgs.crq, 0, PAGE_SIZE); crq->cur = 0; - if (vhost->scsi_scrqs.scrqs) { - for (i = 0; i < nr_scsi_hw_queues; i++) { - scrq = &vhost->scsi_scrqs.scrqs[i]; - spin_lock(scrq->q_lock); - memset(scrq->msgs.scrq, 0, PAGE_SIZE); - scrq->cur = 0; - spin_unlock(scrq->q_lock); - } - } - /* And re-open it again */ rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address, crq->msg_token, PAGE_SIZE); @@ -966,6 +959,9 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) dev_warn(vhost->dev, "Partner adapter not ready\n"); else if (rc != 0) dev_warn(vhost->dev, "Couldn't register crq (rc=%d)\n", rc); + + ibmvfc_init_sub_crqs(vhost); + spin_unlock(vhost->crq.q_lock); spin_unlock_irqrestore(vhost->host->host_lock, flags); @@ -5692,6 +5688,7 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index) free_irq(scrq->irq, scrq); irq_dispose_mapping(scrq->irq); + scrq->irq = 0; do { rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, -- 2.27.0
[PATCH 0/4] ibmvfc: hard reset fixes
This series contains a minor simplification of ibmvfc_init_sub_crqs() followed by a couple fixes for sub-CRQ handling which effect hard reset of the client/host adapter CRQ pair.
[PATCH 1/4] ibmvfc: simplify handling of sub-CRQ initialization
If ibmvfc_init_sub_crqs() fails ibmvfc_probe() simply parrots registration failure reported elsewhere, and futher vhost->scsi_scrq.scrq == NULL is indication enough to the driver that it has no sub-CRQs available. The mq_enabled check can also be moved into ibmvfc_init_sub_crqs() such that each caller doesn't have to gate the call with a mq_enabled check. Finally, in the case of sub-CRQ setup failure setting do_enquiry can be turned off to putting the driver into single queue fallback mode. The aforementioned changes also simplify the next patch in the series that fixes a hard reset issue, by tying a sub-CRQ setup failure and do_enquiry logic into ibmvfc_init_sub_crqs(). Signed-off-by: Tyrel Datwyler --- drivers/scsi/ibmvscsi/ibmvfc.c | 21 ++--- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 7097028d4cb6..384960036f8b 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -5705,17 +5705,21 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index) LEAVE; } -static int ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost) +static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost) { int i, j; ENTER; + if (!vhost->mq_enabled) + return; vhost->scsi_scrqs.scrqs = kcalloc(nr_scsi_hw_queues, sizeof(*vhost->scsi_scrqs.scrqs), GFP_KERNEL); - if (!vhost->scsi_scrqs.scrqs) - return -1; + if (!vhost->scsi_scrqs.scrqs) { + vhost->do_enquiry = 0; + return; + } for (i = 0; i < nr_scsi_hw_queues; i++) { if (ibmvfc_register_scsi_channel(vhost, i)) { @@ -5724,13 +5728,12 @@ static int ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost) kfree(vhost->scsi_scrqs.scrqs); vhost->scsi_scrqs.scrqs = NULL; vhost->scsi_scrqs.active_queues = 0; - LEAVE; - return -1; + vhost->do_enquiry = 0; + break; } } LEAVE; - return 0; } static void ibmvfc_release_sub_crqs(struct ibmvfc_host *vhost) @@ -5997,11 +6000,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id) goto remove_shost; } - if (vhost->mq_enabled) { - rc = ibmvfc_init_sub_crqs(vhost); - if (rc) - dev_warn(dev, "Failed to allocate Sub-CRQs. rc=%d\n", rc); - } + ibmvfc_init_sub_crqs(vhost); if (shost_to_fc_host(shost)->rqst_q) blk_queue_max_segments(shost_to_fc_host(shost)->rqst_q, 1); -- 2.27.0
[PATCH 4/4] ibmvfc: store return code of H_FREE_SUB_CRQ during cleanup
The H_FREE_SUB_CRQ hypercall can return a retry delay return code that indicates the call needs to be retried after a specific amount of time delay. The error path to free a sub-CRQ in case of a failure during channel registration fails to capture the return code of H_FREE_SUB_CRQ which will result in the delay loop being skipped in the case of a retry delay return code. Store the return code result of the H_FREE_SUB_CRQ call such that the return code check in the delay loop evaluates a meaningful value. Fixes: 9288d35d70b5 ("ibmvfc: map/request irq and register Sub-CRQ interrupt handler") Signed-off-by: Tyrel Datwyler --- drivers/scsi/ibmvscsi/ibmvfc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index ba6fcf9cbc57..23b803ac4a13 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -5670,7 +5670,7 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost, irq_failed: do { - plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie); + rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie); } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); reg_failed: ibmvfc_free_queue(vhost, scrq); -- 2.27.0
[PATCH] powerpc/pseries: extract host bridge from pci_bus prior to bus removal
The pci_bus->bridge reference may no longer be valid after pci_bus_remove() resulting in passing a bad value to device_unregister() for the associated bridge device. Store the host_bridge reference in a separate variable prior to pci_bus_remove(). Fixes: 7340056567e3 ("powerpc/pci: Reorder pci bus/bridge unregistration during PHB removal") Signed-off-by: Tyrel Datwyler --- arch/powerpc/platforms/pseries/pci_dlpar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index f9ae17e8a0f4..a8f9140a24fa 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -50,6 +50,7 @@ EXPORT_SYMBOL_GPL(init_phb_dynamic); int remove_phb_dynamic(struct pci_controller *phb) { struct pci_bus *b = phb->bus; + struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge); struct resource *res; int rc, i; @@ -76,7 +77,8 @@ int remove_phb_dynamic(struct pci_controller *phb) /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); - device_unregister(b->bridge); + host_bridge->bus = NULL; + device_unregister(&host_bridge->dev); /* Now release the IO resource */ if (res->flags & IORESOURCE_IO) -- 2.27.0
Re: Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function
On 2/11/21 9:42 AM, Lakshmi Ramasubramanian wrote: Hi Rob, [PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem This change causes build problem for x86_64 architecture (please see the mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses "elf_load_addr" for the ELF header buffer address and not "elf_headers_mem". struct kimage_arch { ... /* Core ELF header buffer */ void *elf_headers; unsigned long elf_headers_sz; unsigned long elf_load_addr; }; I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and PPC64 since they are the only ones using this function now. #if defined(CONFIG_ARM64) && defined(CONFIG_PPC64) Sorry - I meant to say #if defined(CONFIG_ARM64) || defined(CONFIG_PPC64) void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline) { ... } #endif /* defined(CONFIG_ARM64) && defined(CONFIG_PPC64) */ Please let me know if you have any concerns. thanks, -lakshmi Forwarded Message Subject: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function Date: Fri, 12 Feb 2021 00:50:20 +0800 From: kernel test robot To: Lakshmi Ramasubramanian CC: kbuild-...@lists.01.org Hi Lakshmi, I love your patch! Yet something to improve: [auto build test ERROR on integrity/next-integrity] [also build test ERROR on v5.11-rc7 next-20210211] [cannot apply to powerpc/next robh/for-next arm64/for-next/core] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Lakshmi-Ramasubramanian/Carry-forward-IMA-measurement-log-on-kexec-on-ARM64/20210211-071924 base: https://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git next-integrity config: x86_64-randconfig-m001-20210211 (attached as .config) compiler: gcc-9 (Debian 9.3.0-15) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/12ae86067d115b84092353109e8798693d102f0d git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Lakshmi-Ramasubramanian/Carry-forward-IMA-measurement-log-on-kexec-on-ARM64/20210211-071924 git checkout 12ae86067d115b84092353109e8798693d102f0d # save the attached .config to linux build tree make W=1 ARCH=x86_64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All errors (new ones prefixed by >>): drivers/of/kexec.c: In function 'of_kexec_alloc_and_setup_fdt': drivers/of/kexec.c:183:17: error: 'const struct kimage_arch' has no member named 'elf_headers_mem'; did you mean 'elf_headers_sz'? 183 | image->arch.elf_headers_mem, | ^~~ | elf_headers_sz drivers/of/kexec.c:192:42: error: 'const struct kimage_arch' has no member named 'elf_headers_mem'; did you mean 'elf_headers_sz'? 192 | ret = fdt_add_mem_rsv(fdt, image->arch.elf_headers_mem, | ^~~ | elf_headers_sz vim +183 drivers/of/kexec.c 65 66 /* 67 * of_kexec_alloc_and_setup_fdt - Alloc and setup a new Flattened Device Tree 68 * 69 * @image: kexec image being loaded. 70 * @initrd_load_addr: Address where the next initrd will be loaded. 71 * @initrd_len: Size of the next initrd, or 0 if there will be none. 72 * @cmdline: Command line for the next kernel, or NULL if there will 73 * be none. 74 * 75 * Return: fdt on success, or NULL errno on error. 76 */ 77 void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, 78 unsigned long initrd_load_addr, 79 unsigned long initrd_len, 80 const char *cmdline) 81 { 82 void *fdt; 83 int ret, chosen_node; 84 const void *prop; 85 unsigned long fdt_size; 86 87 fdt_size = fdt_totalsize(initial_boot_params) + 88 (cmdline ? strlen(cmdline) : 0) + 89 FDT_EXTRA_SPACE; 90 91 fdt = kvmalloc(fdt_size, GFP_KERNEL); 92 if (!fdt) 93 return NULL; 94 95 ret = fdt_open_into(initial_boot_params, fdt, fdt_size); 96 if (ret < 0) { 97 pr_err("Error %d setting up the new device tree.\n
Fwd: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function
Hi Rob, [PATCH] powerpc: Rename kexec elfcorehdr_addr to elf_headers_mem This change causes build problem for x86_64 architecture (please see the mail from kernel test bot below) since arch/x86/include/asm/kexec.h uses "elf_load_addr" for the ELF header buffer address and not "elf_headers_mem". struct kimage_arch { ... /* Core ELF header buffer */ void *elf_headers; unsigned long elf_headers_sz; unsigned long elf_load_addr; }; I am thinking of limiting of_kexec_alloc_and_setup_fdt() to ARM64 and PPC64 since they are the only ones using this function now. #if defined(CONFIG_ARM64) && defined(CONFIG_PPC64) void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline) { ... } #endif /* defined(CONFIG_ARM64) && defined(CONFIG_PPC64) */ Please let me know if you have any concerns. thanks, -lakshmi Forwarded Message Subject: Re: [PATCH v17 02/10] of: Add a common kexec FDT setup function Date: Fri, 12 Feb 2021 00:50:20 +0800 From: kernel test robot To: Lakshmi Ramasubramanian CC: kbuild-...@lists.01.org Hi Lakshmi, I love your patch! Yet something to improve: [auto build test ERROR on integrity/next-integrity] [also build test ERROR on v5.11-rc7 next-20210211] [cannot apply to powerpc/next robh/for-next arm64/for-next/core] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Lakshmi-Ramasubramanian/Carry-forward-IMA-measurement-log-on-kexec-on-ARM64/20210211-071924 base: https://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git next-integrity config: x86_64-randconfig-m001-20210211 (attached as .config) compiler: gcc-9 (Debian 9.3.0-15) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/12ae86067d115b84092353109e8798693d102f0d git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Lakshmi-Ramasubramanian/Carry-forward-IMA-measurement-log-on-kexec-on-ARM64/20210211-071924 git checkout 12ae86067d115b84092353109e8798693d102f0d # save the attached .config to linux build tree make W=1 ARCH=x86_64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All errors (new ones prefixed by >>): drivers/of/kexec.c: In function 'of_kexec_alloc_and_setup_fdt': drivers/of/kexec.c:183:17: error: 'const struct kimage_arch' has no member named 'elf_headers_mem'; did you mean 'elf_headers_sz'? 183 | image->arch.elf_headers_mem, | ^~~ | elf_headers_sz drivers/of/kexec.c:192:42: error: 'const struct kimage_arch' has no member named 'elf_headers_mem'; did you mean 'elf_headers_sz'? 192 | ret = fdt_add_mem_rsv(fdt, image->arch.elf_headers_mem, | ^~~ | elf_headers_sz vim +183 drivers/of/kexec.c 65 66 /* 67 * of_kexec_alloc_and_setup_fdt - Alloc and setup a new Flattened Device Tree 68 * 69 * @image: kexec image being loaded. 70 * @initrd_load_addr: Address where the next initrd will be loaded. 71 * @initrd_len: Size of the next initrd, or 0 if there will be none. 72 * @cmdline: Command line for the next kernel, or NULL if there will 73 * be none. 74 * 75 * Return: fdt on success, or NULL errno on error. 76 */ 77 void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, 78 unsigned long initrd_load_addr, 79 unsigned long initrd_len, 80 const char *cmdline) 81 { 82 void *fdt; 83 int ret, chosen_node; 84 const void *prop; 85 unsigned long fdt_size; 86 87 fdt_size = fdt_totalsize(initial_boot_params) + 88 (cmdline ? strlen(cmdline) : 0) + 89 FDT_EXTRA_SPACE; 90 91 fdt = kvmalloc(fdt_size, GFP_KERNEL); 92 if (!fdt) 93 return NULL; 94 95 ret = fdt_open_into(initial_boot_params, fdt, fdt_size); 96 if (ret < 0) { 97 pr_err("Error %d setting up the new device tree.\n", ret); 98 goto out; 99
Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()
Le 11/02/2021 à 15:30, Segher Boessenkool a écrit : On Thu, Feb 11, 2021 at 03:09:43PM +0100, Christophe Leroy wrote: Le 11/02/2021 à 12:49, Segher Boessenkool a écrit : On Thu, Feb 11, 2021 at 07:41:52AM +, Christophe Leroy wrote: powerpc BUG_ON() is based on using twnei or tdnei instruction, which obliges gcc to format the condition into a 0 or 1 value in a register. Huh? Why is that? Will it work better if this used __builtin_trap? Or does the kernel only detect very specific forms of trap instructions? We already made a try with __builtin_trap() 1,5 year ago, see https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20510ce03cc9463f1c9e743c1d93b939de501b53.1566219503.git.christophe.le...@c-s.fr/ The main problems encountered are: - It is only possible to use it for BUG_ON, not for WARN_ON because GCC considers it as noreturn. Is there any workaround ? A trap is noreturn by definition: -- Built-in Function: void __builtin_trap (void) This function causes the program to exit abnormally. - The kernel (With CONFIG_DEBUG_BUGVERBOSE) needs to be able to identify the source file and line corresponding to the trap. How can that be done with __builtin_trap() ? The DWARF debug info should be sufficient. Perhaps you can post-process some way? You can create a trap that falls through yourself (by having a trap-on condition with a condition that is always true, but make the compiler not see that). This isn't efficient though. Could you file a feature request (in bugzilla)? It is probably useful for generic code as well, but we could implement this for powerpc only if needed. Ok I will. For sure using __builtin_trap() would be the best. unsigned long test1(unsigned long msr) { WARN_ON((msr & (MSR_DR | MSR_IR | MSR_RI)) != (MSR_DR | MSR_IR | MSR_RI)); return msr; } unsigned long test2(unsigned long msr) { if ((msr & (MSR_DR | MSR_IR | MSR_RI)) != (MSR_DR | MSR_IR | MSR_RI)) __builtin_trap(); return msr; } 03c0 : 3c0: 70 69 00 32 andi. r9,r3,50 3c4: 69 29 00 32 xorir9,r9,50 3c8: 31 49 ff ff addic r10,r9,-1 3cc: 7d 2a 49 10 subfe r9,r10,r9 3d0: 0f 09 00 00 twnei r9,0 3d4: 4e 80 00 20 blr 03d8 : 3d8: 70 69 00 32 andi. r9,r3,50 3dc: 0f 09 00 32 twnei r9,50 3e0: 4e 80 00 20 blr Christophe
Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()
On Thu, Feb 11, 2021 at 03:09:43PM +0100, Christophe Leroy wrote: > Le 11/02/2021 à 12:49, Segher Boessenkool a écrit : > >On Thu, Feb 11, 2021 at 07:41:52AM +, Christophe Leroy wrote: > >>powerpc BUG_ON() is based on using twnei or tdnei instruction, > >>which obliges gcc to format the condition into a 0 or 1 value > >>in a register. > > > >Huh? Why is that? > > > >Will it work better if this used __builtin_trap? Or does the kernel only > >detect very specific forms of trap instructions? > > We already made a try with __builtin_trap() 1,5 year ago, see > https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20510ce03cc9463f1c9e743c1d93b939de501b53.1566219503.git.christophe.le...@c-s.fr/ > > The main problems encountered are: > - It is only possible to use it for BUG_ON, not for WARN_ON because GCC > considers it as noreturn. Is there any workaround ? A trap is noreturn by definition: -- Built-in Function: void __builtin_trap (void) This function causes the program to exit abnormally. > - The kernel (With CONFIG_DEBUG_BUGVERBOSE) needs to be able to identify > the source file and line corresponding to the trap. How can that be done > with __builtin_trap() ? The DWARF debug info should be sufficient. Perhaps you can post-process some way? You can create a trap that falls through yourself (by having a trap-on condition with a condition that is always true, but make the compiler not see that). This isn't efficient though. Could you file a feature request (in bugzilla)? It is probably useful for generic code as well, but we could implement this for powerpc only if needed. Segher
Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()
Le 11/02/2021 à 12:49, Segher Boessenkool a écrit : On Thu, Feb 11, 2021 at 07:41:52AM +, Christophe Leroy wrote: powerpc BUG_ON() is based on using twnei or tdnei instruction, which obliges gcc to format the condition into a 0 or 1 value in a register. Huh? Why is that? Will it work better if this used __builtin_trap? Or does the kernel only detect very specific forms of trap instructions? We already made a try with __builtin_trap() 1,5 year ago, see https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20510ce03cc9463f1c9e743c1d93b939de501b53.1566219503.git.christophe.le...@c-s.fr/ The main problems encountered are: - It is only possible to use it for BUG_ON, not for WARN_ON because GCC considers it as noreturn. Is there any workaround ? - The kernel (With CONFIG_DEBUG_BUGVERBOSE) needs to be able to identify the source file and line corresponding to the trap. How can that be done with __builtin_trap() ? Christophe
[PATCH 6/6] powerpc/mm/64s: Allow STRICT_KERNEL_RWX again
We have now fixed the known bugs in STRICT_KERNEL_RWX for Book3S 64-bit Hash and Radix MMUs, see preceding commits, so allow the option to be selected again. Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 107bb4319e0e..7d6080afbad6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -135,7 +135,7 @@ config PPC select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 - select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) + select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_COPY_MC if PPC64 -- 2.25.1
[PATCH 5/6] powerpc/mm/64s/hash: Add real-mode change_memory_range() for hash LPAR
When we enabled STRICT_KERNEL_RWX we received some reports of boot failures when using the Hash MMU and running under phyp. The crashes are intermittent, and often exhibit as a completely unresponsive system, or possibly an oops. One example, which was caught in xmon: [ 14.068327][T1] devtmpfs: mounted [ 14.069302][T1] Freeing unused kernel memory: 5568K [ 14.142060][ T347] BUG: Unable to handle kernel instruction fetch [ 14.142063][T1] Run /sbin/init as init process [ 14.142074][ T347] Faulting instruction address: 0xc0004400 cpu 0x2: Vector: 400 (Instruction Access) at [cc7475e0] pc: c0004400: exc_virt_0x4400_instruction_access+0x0/0x80 lr: c01862d4: update_rq_clock+0x44/0x110 sp: cc747880 msr: 800040001031 current = 0xcc60d380 paca= 0xc0001ec9de80 irqmask: 0x03 irq_happened: 0x01 pid = 347, comm = kworker/2:1 ... enter ? for help [cc747880] c01862d4 update_rq_clock+0x44/0x110 (unreliable) [cc7478f0] c0198794 update_blocked_averages+0xb4/0x6d0 [cc7479f0] c0198e40 update_nohz_stats+0x90/0xd0 [cc747a20] c01a13b4 _nohz_idle_balance+0x164/0x390 [cc747b10] c01a1af8 newidle_balance+0x478/0x610 [cc747be0] c01a1d48 pick_next_task_fair+0x58/0x480 [cc747c40] c0eaab5c __schedule+0x12c/0x950 [cc747cd0] c0eab3e8 schedule+0x68/0x120 [cc747d00] c016b730 worker_thread+0x130/0x640 [cc747da0] c0174d50 kthread+0x1a0/0x1b0 [cc747e10] c000e0f0 ret_from_kernel_thread+0x5c/0x6c This shows that CPU 2, which was idle, woke up and then appears to randomly take an instruction fault on a completely valid area of kernel text. The cause turns out to be the call to hash__mark_rodata_ro(), late in boot. Due to the way we layout text and rodata, that function actually changes the permissions for all of text and rodata to read-only plus execute. To do the permission change we use a hypervisor call, H_PROTECT. On phyp that appears to be implemented by briefly removing the mapping of the kernel text, before putting it back with the updated permissions. If any other CPU is executing during that window, it will see spurious faults on the kernel text and/or data, leading to crashes. To fix it we use stop machine to collect all other CPUs, and then have them drop into real mode (MMU off), while we change the mapping. That way they are unaffected by the mapping temporarily disappearing. Signed-off-by: Michael Ellerman --- arch/powerpc/mm/book3s64/hash_pgtable.c | 105 +++- 1 file changed, 104 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 3663d3cdffac..01de985df2c4 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -400,6 +401,19 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_STRICT_KERNEL_RWX + +struct change_memory_parms { + unsigned long start, end, newpp; + unsigned int step, nr_cpus, master_cpu; + atomic_t cpu_counter; +}; + +// We'd rather this was on the stack but it has to be in the RMO +static struct change_memory_parms chmem_parms; + +// And therefore we need a lock to protect it from concurrent use +static DEFINE_MUTEX(chmem_lock); + static void change_memory_range(unsigned long start, unsigned long end, unsigned int step, unsigned long newpp) { @@ -414,6 +428,73 @@ static void change_memory_range(unsigned long start, unsigned long end, mmu_kernel_ssize); } +static int notrace chmem_secondary_loop(struct change_memory_parms *parms) +{ + unsigned long msr, tmp, flags; + int *p; + + p = &parms->cpu_counter.counter; + + local_irq_save(flags); + __hard_EE_RI_disable(); + + asm volatile ( + // Switch to real mode and leave interrupts off + "mfmsr %[msr] ;" + "li %[tmp], %[MSR_IR_DR];" + "andc %[tmp], %[msr], %[tmp] ;" + "mtmsrd %[tmp] ;" + + // Tell the master we are in real mode + "1: " + "lwarx %[tmp], 0, %[p] ;" + "addic %[tmp], %[tmp], -1 ;" + "stwcx. %[tmp], 0, %[p] ;" + "bne- 1b ;" + + // Spin until the counter goes to zero + "2: ;" + "lwz%[tmp], 0(%[p]) ;" + "cmpwi %[tmp], 0 ;" + "bne- 2b ;" + + // Switch back to virtual mode + "mtmsrd %[msr]
[PATCH 4/6] powerpc/mm/64s/hash: Factor out change_memory_range()
Pull the loop calling hpte_updateboltedpp() out of hash__change_memory_range() into a helper function. We need it to be a separate function for the next patch. Signed-off-by: Michael Ellerman --- arch/powerpc/mm/book3s64/hash_pgtable.c | 23 +++ 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 03819c259f0a..3663d3cdffac 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -400,10 +400,23 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_STRICT_KERNEL_RWX +static void change_memory_range(unsigned long start, unsigned long end, + unsigned int step, unsigned long newpp) +{ + unsigned long idx; + + pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", +start, end, newpp, step); + + for (idx = start; idx < end; idx += step) + /* Not sure if we can do much with the return value */ + mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, + mmu_kernel_ssize); +} + static bool hash__change_memory_range(unsigned long start, unsigned long end, unsigned long newpp) { - unsigned long idx; unsigned int step, shift; shift = mmu_psize_defs[mmu_linear_psize].shift; @@ -415,13 +428,7 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end, if (start >= end) return false; - pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", -start, end, newpp, step); - - for (idx = start; idx < end; idx += step) - /* Not sure if we can do much with the return value */ - mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, - mmu_kernel_ssize); + change_memory_range(start, end, step, newpp); return true; } -- 2.25.1
[PATCH 2/6] powerpc/pseries: Add key to flags in pSeries_lpar_hpte_updateboltedpp()
The flags argument to plpar_pte_protect() (aka. H_PROTECT), includes the key in bits 9-13, but currently we always set those bits to zero. In the past that hasn't been a problem because we always used key 0 for the kernel, and updateboltedpp() is only used for kernel mappings. However since commit d94b827e89dc ("powerpc/book3s64/kuap: Use Key 3 for kernel mapping with hash translation") we are now inadvertently changing the key (to zero) when we call plpar_pte_protect(). That hasn't broken anything because updateboltedpp() is only used for STRICT_KERNEL_RWX, which is currently disabled on 64s due to other bugs. But we want to fix that, so first we need to pass the key correctly to plpar_pte_protect(). In the `newpp` value the low 3 bits of the key are already in the correct spot, but the high 2 bits of the key need to be shifted down. Fixes: d94b827e89dc ("powerpc/book3s64/kuap: Use Key 3 for kernel mapping with hash translation") Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/lpar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 764170fdb0f7..8bbbddff7226 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -976,11 +976,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, slot = pSeries_lpar_hpte_find(vpn, psize, ssize); BUG_ON(slot == -1); - flags = newpp & 7; + flags = newpp & (HPTE_R_PP | HPTE_R_N); if (mmu_has_feature(MMU_FTR_KERNEL_RO)) /* Move pp0 into bit 8 (IBM 55) */ flags |= (newpp & HPTE_R_PP0) >> 55; + flags |= ((newpp & HPTE_R_KEY_HI) >> 48) | (newpp & HPTE_R_KEY_LO); + lpar_rc = plpar_pte_protect(flags, slot, 0); BUG_ON(lpar_rc != H_SUCCESS); -- 2.25.1
[PATCH 1/6] powerpc/mm/64s: Add _PAGE_KERNEL_ROX
In the past we had a fallback definition for _PAGE_KERNEL_ROX, but we removed that in commit d82fd29c5a8c ("powerpc/mm: Distribute platform specific PAGE and PMD flags and definitions") and added definitions for each MMU family. However we missed adding a definition for 64s, which was not really a bug because it's currently not used. But we'd like to use PAGE_KERNEL_ROX in a future patch so add a definition now. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index a39886681629..1358fae01d04 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -116,6 +116,7 @@ */ #define _PAGE_KERNEL_RW(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY) #define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_READ) +#define _PAGE_KERNEL_ROX(_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC) #define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_RW | _PAGE_EXEC) /* -- 2.25.1
[PATCH 3/6] powerpc/64s: Use htab_convert_pte_flags() in hash__mark_rodata_ro()
In hash__mark_rodata_ro() we pass the raw PP_RXXX value to hash__change_memory_range(). That has the effect of setting the key to zero, because PP_RXXX contains no key value. Fix it by using htab_convert_pte_flags(), which knows how to convert a pgprot into a pp value, including the key. Fixes: d94b827e89dc ("powerpc/book3s64/kuap: Use Key 3 for kernel mapping with hash translation") Signed-off-by: Michael Ellerman --- arch/powerpc/mm/book3s64/hash_pgtable.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 567e0c6b3978..03819c259f0a 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -428,12 +428,14 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end, void hash__mark_rodata_ro(void) { - unsigned long start, end; + unsigned long start, end, pp; start = (unsigned long)_stext; end = (unsigned long)__init_begin; - WARN_ON(!hash__change_memory_range(start, end, PP_RXXX)); + pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY); + + WARN_ON(!hash__change_memory_range(start, end, pp)); } void hash__mark_initmem_nx(void) -- 2.25.1
Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()
On Thu, Feb 11, 2021 at 01:26:12PM +0100, Christophe Leroy wrote: > >What PowerPC cpus implement branch folding? I know none. > > Extract from powerpc mpc8323 reference manual: > — Zero-cycle branch capability (branch folding) Yeah, this is not what is traditionally called branch folding (which stores the instruction being branched to in some cache, originally the instruction cache itself; somewhat similar (but different) to the BTIC on 750). Overloaded terminology :-) 6xx/7xx CPUs had the branch execution unit in the frontend, and it would not issue a branch at all if it could be resolved then already. Power4 and later predict all branches, and most are not issued at all (those that do need to be executed, like bdnz, are). At completion time it is checked if the prediction was correct (and corrective action is taken if not). Segher
Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()
Le 11/02/2021 à 12:49, Segher Boessenkool a écrit : On Thu, Feb 11, 2021 at 07:41:52AM +, Christophe Leroy wrote: powerpc BUG_ON() is based on using twnei or tdnei instruction, which obliges gcc to format the condition into a 0 or 1 value in a register. Huh? Why is that? Will it work better if this used __builtin_trap? Or does the kernel only detect very specific forms of trap instructions? By using a generic implementation, gcc will generate a branch to the unconditional trap generated by BUG(). That is many more instructions than ideal. As modern powerpc implement branch folding, that's even more efficient. What PowerPC cpus implement branch folding? I know none. Extract from powerpc mpc8323 reference manual: High instruction and data throughput — Zero-cycle branch capability (branch folding) — Programmable static branch prediction on unresolved conditional branches — Two integer units with enhanced multipliers in thee300c2 for increased integer instruction throughput and a maximum two-cycle latency for multiply instructions — Instruction fetch unit capable of fetching two instructions per clock from the instruction cache — A six-entry instruction queue (IQ) that provides lookahead capability — Independent pipelines with feed-forwarding that reduces data dependencies in hardware — 16-Kbyte, four-way set-associative instruction and data caches on the e300c2. — Cache write-back or write-through operation programmable on a per-page or per-block basis — Features for instruction and data cache locking and protection — BPU that performs CR lookahead operations — Address translation facilities for 4-Kbyte page size, variable block size, and 256-Mbyte segment size — A 64-entry, two-way, set-associative ITLB and DTLB — Eight-entry data and instruction BAT arrays providing 128-Kbyte to 256-Mbyte blocks — Software table search operations and updates supported through fast trap mechanism — 52-bit virtual address; 32-bit physical address Christophe
Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()
On Thu, Feb 11, 2021 at 08:04:55PM +1000, Nicholas Piggin wrote: > Excerpts from Christophe Leroy's message of February 11, 2021 5:41 pm: > > As modern powerpc implement branch folding, that's even more efficient. Ah, it seems you mean what Arm calls branch folding. Sure, power4 already did that, and this has not changed. > I think POWER will speculate conditional traps as non faulting always > so it should be just as good if not better than the branch. Right, these are not branch instructions, so are not branch predicted; all trap instructions are assumed to fall through, like all other non-branch instructions. Segher
Re: [PATCH 1/3] powerpc/perf: Adds support for programming of Thresholding in P10
Michael Ellerman writes: > From: Kajol Jain > > Thresholding, a performance monitoring unit feature, can be > used to identify marked instructions which take more than > expected cycles between start event and end event. > Threshold compare (thresh_cmp) bits are programmed in MMCRA > register. In Power9, thresh_cmp bits were part of the > event code. But in case of P10, thresh_cmp are not part of > event code due to inclusion of MMCR3 bits. Accidental resend, ignore. cheers
Re: [PATCH] tools/perf: Fix powerpc gap between kernel end and module start
> On 09-Feb-2021, at 6:17 PM, Arnaldo Carvalho de Melo wrote: > > Em Wed, Feb 03, 2021 at 12:31:48PM -0300, Arnaldo Carvalho de Melo escreveu: >> Em Tue, Feb 02, 2021 at 04:02:36PM +0530, Athira Rajeev escreveu: >>> >>> >>>On 18-Jan-2021, at 3:51 PM, kajoljain wrote: >>> >>> >>> >>>On 1/12/21 3:08 PM, Jiri Olsa wrote: >>> >>>On Mon, Dec 28, 2020 at 09:14:14PM -0500, Athira Rajeev wrote: >>> >>>SNIP >>> >>> >>>c2799370 b backtrace_flag >>>c2799378 B radix_tree_node_cachep >>>c2799380 B __bss_stop >>>c27a B _end >>>c0080389 t icmp_checkentry [ip_tables] >>>c00803890038 t ipt_alloc_initial_table [ip_tables] >>>c00803890468 T ipt_do_table [ip_tables] >>>c00803890de8 T ipt_unregister_table_pre_exit >>> [ip_tables] >>>... >>> >>>Perf calls function symbols__fixup_end() which sets the end of >>>symbol >>>to 0xc0080389, which is the next address and this is the >>>start >>>address of first module (icmp_checkentry in above) which will >>> make >>>the >>>huge symbol size of 0x8010f. >>> >>>After symbols__fixup_end: >>>symbols__fixup_end: sym->name: _end, sym->start: >>>0xc27a, >>>sym->end: 0xc0080389 >>> >>>On powerpc, kernel text segment is located at 0xc000 >>>whereas the modules are located at very high memory addresses, >>>0xc0080xxx. Since the gap between end of kernel text >>>segment >>>and beginning of first module's address is high, histogram >>>allocation >>>using calloc fails. >>> >>>Fix this by detecting the kernel's last symbol and limiting >>>the range of last kernel symbol to pagesize. >>> >>> >>>Patch looks good to me. >>> >>>Tested-By: Kajol Jain >>> >>>Thanks, >>>Kajol Jain >>> >>> >>>Signed-off-by: Athira Rajeev >>> >>> >>>I can't test, but since the same approach works for arm and s390, >>>this also looks ok >>> >>>Acked-by: Jiri Olsa >>> >>>thanks, >>>jirka >>> >>> >>> Hi Arnaldo, >>> >>> Can you please help review this patch and merge if this looks good.. >> >> Thanks, collected the Tested-by from Kajol and the Acked-by from Jiri >> and applied to my local tree for testing, then up to my perf/core >> branch. > > Had to apply this on top. > > - Arnaldo > > commit 0f000f9c89182950cd3500226729977251529364 > Author: Arnaldo Carvalho de Melo > Date: Tue Feb 9 09:41:21 2021 -0300 > >perf powerpc: Fix printf conversion specifier for IP addresses > >We need to use "%#" PRIx64 for u64 values, not "%lx", fixing this build >problem on powerpc 32-bit: > > 7213.69 ubuntu:18.04-x-powerpc: FAIL powerpc-linux-gnu-gcc > (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0 >arch/powerpc/util/machine.c: In function 'arch__symbols__fixup_end': >arch/powerpc/util/machine.c:23:12: error: format '%lx' expects > argument of type 'long unsigned int', but argument 6 has type 'u64 {aka long > long unsigned int}' [-Werror=format=] > pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end); >^ >/git/linux/tools/perf/util/debug.h:18:21: note: in definition of macro > 'pr_fmt' > #define pr_fmt(fmt) fmt > ^~~ >/git/linux/tools/perf/util/debug.h:33:29: note: in expansion of macro > 'pr_debugN' > #define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__) > ^ >/git/linux/tools/perf/util/debug.h:33:42: note: in expansion of macro > 'pr_fmt' > #define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__) > ^~ >arch/powerpc/util/machine.c:23:2: note: in expansion of macro > 'pr_debug4' > pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end); > ^ >cc1: all warnings being treated as errors >/git/linux/tools/build/Makefile.build:139: recipe for target 'util' > failed >make[5]: *** [util] Error 2 >/git/linux/tools/build/Makefile.build:139: recipe for target 'powerpc' > failed >make[4]: *** [powerpc] Error 2 >/git/linux/tools/build/Makefile.build:139: recipe for target 'arch' > failed >make[3]: *** [arch] Error 2 > 7330.47 ubuntu:18.04-x-powerpc64 : Ok powerpc64-linux-gnu-gcc > (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0 > >Fixes: 557c3eadb7712741 ("perf powerpc: Fix gap between kernel end and > module start") >Cc: Athira Rajeev >Cc: Jiri Olsa >Cc: Kajol J
Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()
On Thu, Feb 11, 2021 at 08:04:55PM +1000, Nicholas Piggin wrote: > It would be nice if we could have a __builtin_trap_if that gcc would use > conditional traps with, (and which never assumes following code is > unreachable even for constant true, so we can use it with WARN and put > explicit unreachable for BUG). It automatically does that with just __builtin_trap, see my other mail :-) Segher
Re: [PATCH] arm64: defconfig: enable modern virtio pci device
On Wed, Feb 10, 2021 at 8:05 PM Anders Roxell wrote: > > Since patch ("virtio-pci: introduce modern device module") got added it > is not possible to boot a defconfig kernel in qemu with a virtio pci > device. Add CONFIG_VIRTIO_PCI_MODERN=y fragment makes the kernel able > to boot. > > Signed-off-by: Anders Roxell > --- > arch/arm/configs/multi_v7_defconfig | 1 + > arch/arm64/configs/defconfig| 1 + Acked-by: Arnd Bergmann Michael, can you pick this up in the vhost tree that introduces the regression? Arnd
Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()
On Thu, Feb 11, 2021 at 07:41:52AM +, Christophe Leroy wrote: > powerpc BUG_ON() is based on using twnei or tdnei instruction, > which obliges gcc to format the condition into a 0 or 1 value > in a register. Huh? Why is that? Will it work better if this used __builtin_trap? Or does the kernel only detect very specific forms of trap instructions? > By using a generic implementation, gcc will generate a branch > to the unconditional trap generated by BUG(). That is many more instructions than ideal. > As modern powerpc implement branch folding, that's even more efficient. What PowerPC cpus implement branch folding? I know none. Some example code generated via __builtin_trap: void trap(void) { __builtin_trap(); } void trap_if_0(int x) { if (x == 0) __builtin_trap(); } void trap_if_not_0(int x) { if (x != 0) __builtin_trap(); } -m64: trap: trap trap_if_0: tdeqi 3,0 blr trap_if_not_0: tdnei 3,0 blr -m32: trap: trap trap_if_0: tweqi 3,0 blr trap_if_not_0: twnei 3,0 blr Segher
[PATCH] powerpc/powernv/pci: Use kzalloc() for phb related allocations
As part of commit fbbefb320214 ("powerpc/pci: Move PHB discovery for PCI_DN using platforms"), I switched some allocations from memblock_alloc() to kmalloc(), otherwise memblock would warn that it was being called after slab init. However I missed that the code relied on the allocations being zeroed, without which we could end up crashing: pci_bus :00: busn_res: [bus 00-ff] end is updated to ff BUG: Unable to handle kernel data access on read at 0x6b6b6b6b6b6b6af7 Faulting instruction address: 0xc00dbc90 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV ... NIP pnv_ioda_get_pe_state+0xe0/0x1d0 LR pnv_ioda_get_pe_state+0xb4/0x1d0 Call Trace: pnv_ioda_get_pe_state+0xb4/0x1d0 (unreliable) pnv_pci_config_check_eeh.isra.9+0x78/0x270 pnv_pci_read_config+0xf8/0x160 pci_bus_read_config_dword+0xa4/0x120 pci_bus_generic_read_dev_vendor_id+0x54/0x270 pci_scan_single_device+0xb8/0x140 pci_scan_slot+0x80/0x1b0 pci_scan_child_bus_extend+0x94/0x490 pcibios_scan_phb+0x1f8/0x3c0 pcibios_init+0x8c/0x12c do_one_initcall+0x94/0x510 kernel_init_freeable+0x35c/0x3fc kernel_init+0x2c/0x168 ret_from_kernel_thread+0x5c/0x70 Switch them to kzalloc(). Fixes: fbbefb320214 ("powerpc/pci: Move PHB discovery for PCI_DN using platforms") Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7ee14ac275bd..f0f901683a2f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2921,7 +2921,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = kmalloc(sizeof(*phb), GFP_KERNEL); + phb = kzalloc(sizeof(*phb), GFP_KERNEL); if (!phb) panic("%s: Failed to allocate %zu bytes\n", __func__, sizeof(*phb)); @@ -2970,7 +2970,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, else phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE; - phb->diag_data = kmalloc(phb->diag_data_size, GFP_KERNEL); + phb->diag_data = kzalloc(phb->diag_data_size, GFP_KERNEL); if (!phb->diag_data) panic("%s: Failed to allocate %u bytes\n", __func__, phb->diag_data_size); @@ -3032,7 +3032,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, } pemap_off = size; size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); - aux = kmalloc(size, GFP_KERNEL); + aux = kzalloc(size, GFP_KERNEL); if (!aux) panic("%s: Failed to allocate %lu bytes\n", __func__, size); -- 2.25.1
[PATCH 1/3] powerpc/perf: Adds support for programming of Thresholding in P10
From: Kajol Jain Thresholding, a performance monitoring unit feature, can be used to identify marked instructions which take more than expected cycles between start event and end event. Threshold compare (thresh_cmp) bits are programmed in MMCRA register. In Power9, thresh_cmp bits were part of the event code. But in case of P10, thresh_cmp are not part of event code due to inclusion of MMCR3 bits. Patch here adds an option to use attr.config1 variable to be used to pass thresh_cmp value to be programmed in MMCRA register. A new ppmu flag called PPMU_HAS_ATTR_CONFIG1 has been added and this flag is used to notify the use of attr.config1 variable. Patch has extended the parameter list of 'compute_mmcr', to include power_pmu's 'flags' element and parameter list of get_constraint to include attr.config1 value. It also extend parameter list of power_check_constraints inorder to pass perf_event list. As stated by commit ef0e3b650f8d ("powerpc/perf: Fix Threshold Event Counter Multiplier width for P10"), constraint bits for thresh_cmp is also needed to be increased to 11 bits, which is handled as part of this patch. We added bit number 53 as part of constraint bits of thresh_cmp for power10 to make it an 11 bit field. Updated layout for p10: /* * Layout of constraint bits: * *60565248444036 32 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | * [ fab_match ] [ thresh_cmp ] [ thresh_ctl] [ ] * | | * [ thresh_cmp bits for p10] thresh_sel -* * *2824201612 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | * [ ] | [ ] | [ sample ] [ ] [6] [5] [4] [3] [2] [1] *| || | | * BHRB IFM -* || |*radix_scope | Count of events for each PMC. * EBB -*| |p1, p2, p3, p4, p5, p6. * L1 I/D qualifier -* | * nc - number of counters -* * * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints * we want the low bit of each field to be added to any existing value. * * Everything else is a value field. */ Result: command#: cat /sys/devices/cpu/format/thresh_cmp config1:0-17 ex. usage: command#: perf record -I --weight -d -e cpu/event=0x67340101EC,thresh_cmp=500/ ./ebizzy -S 2 -t 1 -s 4096 1826636 records/s real 2.00 s user 2.00 s sys 0.00 s [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.038 MB perf.data (61 samples) ] Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210209095234.837356-1-kj...@linux.ibm.com --- arch/powerpc/include/asm/perf_event_server.h | 5 +- arch/powerpc/perf/core-book3s.c | 15 +++-- arch/powerpc/perf/isa207-common.c| 67 +--- arch/powerpc/perf/isa207-common.h| 15 +++-- arch/powerpc/perf/mpc7450-pmu.c | 5 +- arch/powerpc/perf/power10-pmu.c | 4 +- arch/powerpc/perf/power5+-pmu.c | 5 +- arch/powerpc/perf/power5-pmu.c | 5 +- arch/powerpc/perf/power6-pmu.c | 5 +- arch/powerpc/perf/power7-pmu.c | 5 +- arch/powerpc/perf/ppc970-pmu.c | 5 +- 11 files changed, 102 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 3b7baba01c92..00e7e671bb4b 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -36,9 +36,9 @@ struct power_pmu { unsigned long test_adder; int (*compute_mmcr)(u64 events[], int n_ev, unsigned int hwc[], struct mmcr_regs *mmcr, - struct perf_event *pevents[]); + struct perf_event *pevents[], u32 flags); int (*get_constraint)(u64 event_id, unsigned long *mskp, - unsigned long *valp); + unsigned long *valp, u64 event_config1); int (*get_alternatives)(u64 event_id, unsigned int flags, u64 alt[]); void(*get_mem_data_src)(union perf_mem_data_src *dsrc, @@ -83,6 +83,7 @@ struct power_pmu { #define PPMU_NO_SIAR 0x0100 /* Do not use SIAR */ #define PPMU_ARCH_31 0x0200 /* Has MMCR3, SIER2 and SIER3 */ #define PPMU_P10_DD1 0x0400 /* Is power10 DD1 processor version */ +#define PPMU_HAS_ATTR_CONFIG1 0x0800 /* Using config1 at
Re: [PATCH] powerpc/bug: Remove specific powerpc BUG_ON()
Excerpts from Christophe Leroy's message of February 11, 2021 5:41 pm: > powerpc BUG_ON() is based on using twnei or tdnei instruction, > which obliges gcc to format the condition into a 0 or 1 value > in a register. > > By using a generic implementation, gcc will generate a branch > to the unconditional trap generated by BUG(). We don't want to do this on 64s because that will lose the useful CFAR contents. Unfortunately the code generation is not great and the registers that give some useful information about the condition are often mangled :( It would be nice if we could have a __builtin_trap_if that gcc would use conditional traps with, (and which never assumes following code is unreachable even for constant true, so we can use it with WARN and put explicit unreachable for BUG). > > As modern powerpc implement branch folding, that's even more efficient. I think POWER will speculate conditional traps as non faulting always so it should be just as good if not better than the branch. Thanks, Nick
Re: [PATCH] powerpc/traps: Declare unrecoverable_exception() as __noreturn
Le 11/02/2021 à 08:47, Gabriel Paubert a écrit : On Thu, Feb 11, 2021 at 06:34:43AM +, Christophe Leroy wrote: unrecoverable_exception() is never expected to return, most callers have an infiniteloop in case it returns. Ensure it really never returns by terminating it with a BUG(), and declare it __no_return. It always GCC to really simplify functions calling it. In the exemple below, s/always/allows ? Yes (Otherwise I can't parse it.) it avoids the stack frame in the likely fast path and avoids code duplication for the exit. Indeed, nice code generation improvement. With this patch: 0348 : 348: 81 43 00 84 lwz r10,132(r3) 34c: 71 48 00 02 andi. r8,r10,2 350: 41 82 00 2c beq 37c 354: 71 4a 40 00 andi. r10,r10,16384 358: 40 82 00 20 bne 378 35c: 80 62 00 70 lwz r3,112(r2) 360: 74 63 00 01 andis. r3,r3,1 364: 40 82 00 28 bne 38c 368: 7d 40 00 a6 mfmsr r10 36c: 7c 11 13 a6 mtspr 81,r0 370: 7c 12 13 a6 mtspr 82,r0 374: 4e 80 00 20 blr 378: 48 00 00 00 b 378 Infinite loop (seems to be on test of MSR_PR)? Yes, that's what you get when CONFIG_BUG is not selected. /include/asm-generic/bug.h: #ifndef HAVE_ARCH_BUG #define BUG() do {} while (1) #endif Gabriel 37c: 94 21 ff f0 stwur1,-16(r1) 380: 7c 08 02 a6 mflrr0 384: 90 01 00 14 stw r0,20(r1) 388: 48 00 00 01 bl 388 388: R_PPC_REL24unrecoverable_exception 38c: 38 e2 00 70 addir7,r2,112 390: 3d 00 00 01 lis r8,1 394: 7c c0 38 28 lwarx r6,0,r7 398: 7c c6 40 78 andcr6,r6,r8 39c: 7c c0 39 2d stwcx. r6,0,r7 3a0: 40 a2 ff f4 bne 394 3a4: 38 60 00 01 li r3,1 3a8: 4b ff ff c0 b 368 Without this patch: 0348 : 348: 94 21 ff f0 stwur1,-16(r1) 34c: 93 e1 00 0c stw r31,12(r1) 350: 7c 7f 1b 78 mr r31,r3 354: 81 23 00 84 lwz r9,132(r3) 358: 71 2a 00 02 andi. r10,r9,2 35c: 41 82 00 34 beq 390 360: 71 29 40 00 andi. r9,r9,16384 364: 40 82 00 28 bne 38c 368: 80 62 00 70 lwz r3,112(r2) 36c: 74 63 00 01 andis. r3,r3,1 370: 40 82 00 3c bne 3ac 374: 7d 20 00 a6 mfmsr r9 378: 7c 11 13 a6 mtspr 81,r0 37c: 7c 12 13 a6 mtspr 82,r0 380: 83 e1 00 0c lwz r31,12(r1) 384: 38 21 00 10 addir1,r1,16 388: 4e 80 00 20 blr 38c: 48 00 00 00 b 38c 390: 7c 08 02 a6 mflrr0 394: 90 01 00 14 stw r0,20(r1) 398: 48 00 00 01 bl 398 398: R_PPC_REL24unrecoverable_exception 39c: 80 01 00 14 lwz r0,20(r1) 3a0: 81 3f 00 84 lwz r9,132(r31) 3a4: 7c 08 03 a6 mtlrr0 3a8: 4b ff ff b8 b 360 3ac: 39 02 00 70 addir8,r2,112 3b0: 3d 40 00 01 lis r10,1 3b4: 7c e0 40 28 lwarx r7,0,r8 3b8: 7c e7 50 78 andcr7,r7,r10 3bc: 7c e0 41 2d stwcx. r7,0,r8 3c0: 40 a2 ff f4 bne 3b4 3c4: 38 60 00 01 li r3,1 3c8: 4b ff ff ac b 374 Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/interrupt.h | 2 +- arch/powerpc/kernel/interrupt.c | 1 - arch/powerpc/kernel/traps.c | 2 ++ 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index dcff30e3919b..fa8bfb91f8df 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -411,7 +411,7 @@ DECLARE_INTERRUPT_HANDLER(altivec_assist_exception); DECLARE_INTERRUPT_HANDLER(CacheLockingException); DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException); DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException); -DECLARE_INTERRUPT_HANDLER(unrecoverable_exception); +DECLARE_INTERRUPT_HANDLER(unrecoverable_exception) __noreturn; DECLARE_INTERRUPT_HANDLER(WatchdogException); DECLARE_INTERRUPT_HANDLER(kernel_bad_stack); diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index eca3be36c18c..7e7106641ca9 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -440,7 +440,6 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned return re