Re: [PATCH 9/9] zsmalloc: remove the zsmalloc file system
On Tue, Mar 09, 2021 at 04:53:48PM +0100, Christoph Hellwig wrote: > Just use the generic anon_inode file system. > > Signed-off-by: Christoph Hellwig Acked-by: Minchan Kim
[PATCH] powerpc: fix warning comparing pointer to 0
Fix the following coccicheck warning: ./arch/powerpc/platforms/powermac/pfunc_core.c:688:40-41: WARNING comparing pointer to 0. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong --- arch/powerpc/platforms/powermac/pfunc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c index 94df0a9..a5aa40f 100644 --- a/arch/powerpc/platforms/powermac/pfunc_core.c +++ b/arch/powerpc/platforms/powermac/pfunc_core.c @@ -685,7 +685,7 @@ static int pmf_add_functions(struct pmf_device *dev, void *driverdata) const int plen = strlen(PP_PREFIX); int count = 0; - for (pp = dev->node->properties; pp != 0; pp = pp->next) { + for (pp = dev->node->properties; pp; pp = pp->next) { const char *name; if (strncmp(pp->name, PP_PREFIX, plen) != 0) continue; -- 1.8.3.1
Re: [PATCH 1/9] fs: rename alloc_anon_inode to alloc_anon_inode_sb
On Tue, Mar 09, 2021 at 04:53:40PM +0100, Christoph Hellwig wrote: > Rename alloc_inode to free the name for a new variant that does not > need boilerplate to create a super_block first. > > Signed-off-by: Christoph Hellwig > --- > arch/powerpc/platforms/pseries/cmm.c | 2 +- > drivers/dma-buf/dma-buf.c| 2 +- > drivers/gpu/drm/drm_drv.c| 2 +- > drivers/misc/cxl/api.c | 2 +- > drivers/misc/vmw_balloon.c | 2 +- > drivers/scsi/cxlflash/ocxl_hw.c | 2 +- > drivers/virtio/virtio_balloon.c | 2 +- > fs/aio.c | 2 +- > fs/anon_inodes.c | 4 ++-- > fs/libfs.c | 2 +- > include/linux/fs.h | 2 +- > kernel/resource.c| 2 +- > mm/z3fold.c | 2 +- > mm/zsmalloc.c| 2 +- > 14 files changed, 15 insertions(+), 15 deletions(-) > > diff --git a/arch/powerpc/platforms/pseries/cmm.c > b/arch/powerpc/platforms/pseries/cmm.c > index 45a3a3022a85c9..6d36b858b14df1 100644 > --- a/arch/powerpc/platforms/pseries/cmm.c > +++ b/arch/powerpc/platforms/pseries/cmm.c > @@ -580,7 +580,7 @@ static int cmm_balloon_compaction_init(void) > return rc; > } > > - b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); > + b_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb); > if (IS_ERR(b_dev_info.inode)) { > rc = PTR_ERR(b_dev_info.inode); > b_dev_info.inode = NULL; > diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c > index f264b70c383eb4..dedcc9483352dc 100644 > --- a/drivers/dma-buf/dma-buf.c > +++ b/drivers/dma-buf/dma-buf.c > @@ -445,7 +445,7 @@ static inline int is_dma_buf_file(struct file *file) > static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) > { > struct file *file; > - struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb); > + struct inode *inode = alloc_anon_inode_sb(dma_buf_mnt->mnt_sb); > > if (IS_ERR(inode)) > return ERR_CAST(inode); > diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c > index 20d22e41d7ce74..87e7214a8e3565 100644 > --- a/drivers/gpu/drm/drm_drv.c > +++ b/drivers/gpu/drm/drm_drv.c > @@ -519,7 +519,7 @@ static struct inode *drm_fs_inode_new(void) > return ERR_PTR(r); > } > > - inode = alloc_anon_inode(drm_fs_mnt->mnt_sb); > + inode = alloc_anon_inode_sb(drm_fs_mnt->mnt_sb); > if (IS_ERR(inode)) > simple_release_fs(_fs_mnt, _fs_cnt); > > diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c > index b493de962153ba..2efbf6c98028ef 100644 > --- a/drivers/misc/cxl/api.c > +++ b/drivers/misc/cxl/api.c > @@ -73,7 +73,7 @@ static struct file *cxl_getfile(const char *name, > goto err_module; > } > > - inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb); > + inode = alloc_anon_inode_sb(cxl_vfs_mount->mnt_sb); > if (IS_ERR(inode)) { > file = ERR_CAST(inode); > goto err_fs; > diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c > index b837e7eba5f7dc..5d057a05ddbee8 100644 > --- a/drivers/misc/vmw_balloon.c > +++ b/drivers/misc/vmw_balloon.c > @@ -1900,7 +1900,7 @@ static __init int vmballoon_compaction_init(struct > vmballoon *b) > return PTR_ERR(vmballoon_mnt); > > b->b_dev_info.migratepage = vmballoon_migratepage; > - b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb); > + b->b_dev_info.inode = alloc_anon_inode_sb(vmballoon_mnt->mnt_sb); > > if (IS_ERR(b->b_dev_info.inode)) > return PTR_ERR(b->b_dev_info.inode); > diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c > index 244fc27215dc79..40184ed926b557 100644 > --- a/drivers/scsi/cxlflash/ocxl_hw.c > +++ b/drivers/scsi/cxlflash/ocxl_hw.c > @@ -88,7 +88,7 @@ static struct file *ocxlflash_getfile(struct device *dev, > const char *name, > goto err2; > } > > - inode = alloc_anon_inode(ocxlflash_vfs_mount->mnt_sb); > + inode = alloc_anon_inode_sb(ocxlflash_vfs_mount->mnt_sb); > if (IS_ERR(inode)) { > rc = PTR_ERR(inode); > dev_err(dev, "%s: alloc_anon_inode failed rc=%d\n", > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > index 8985fc2cea8615..cae76ee5bdd688 100644 > --- a/drivers/virtio/virtio_balloon.c > +++ b/drivers/virtio/virtio_balloon.c > @@ -916,7 +916,7 @@ static int virtballoon_probe(struct virtio_device *vdev) > } > > vb->vb_dev_info.migratepage = virtballoon_migratepage; > - vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); > + vb->vb_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb); > if (IS_ERR(vb->vb_dev_info.inode)) { > err = PTR_ERR(vb->vb_dev_info.inode); > goto
Re: [PATCH V2] mm/memtest: Add ARCH_USE_MEMTEST
On 3/1/21 10:02 AM, Anshuman Khandual wrote: > early_memtest() does not get called from all architectures. Hence enabling > CONFIG_MEMTEST and providing a valid memtest=[1..N] kernel command line > option might not trigger the memory pattern tests as would be expected in > normal circumstances. This situation is misleading. > > The change here prevents the above mentioned problem after introducing a > new config option ARCH_USE_MEMTEST that should be subscribed on platforms > that call early_memtest(), in order to enable the config CONFIG_MEMTEST. > Conversely CONFIG_MEMTEST cannot be enabled on platforms where it would > not be tested anyway. > > Cc: Russell King > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Thomas Bogendoerfer > Cc: Michael Ellerman > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: Chris Zankel > Cc: Max Filippov > Cc: linux-arm-ker...@lists.infradead.org > Cc: linux-m...@vger.kernel.org > Cc: linuxppc-dev@lists.ozlabs.org > Cc: linux-xte...@linux-xtensa.org > Cc: linux...@kvack.org > Cc: linux-ker...@vger.kernel.org > Reviewed-by: Max Filippov > Signed-off-by: Anshuman Khandual > --- > This patch applies on v5.12-rc1 and has been tested on arm64 platform. > But it has been just build tested on all other platforms. > > Changes in V2: > > - Added ARCH_USE_MEMTEST in the sorted alphabetical order on platforms Gentle ping, any updates or objections ?
Re: [PATCH 2/6] mm: Generalize SYS_SUPPORTS_HUGETLBFS (rename as ARCH_SUPPORTS_HUGETLBFS)
Anshuman Khandual writes: > SYS_SUPPORTS_HUGETLBFS config has duplicate definitions on platforms that > subscribe it. Instead, just make it a generic option which can be selected > on applicable platforms. Also rename it as ARCH_SUPPORTS_HUGETLBFS instead. > This reduces code duplication and makes it cleaner. > > Cc: Russell King > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Thomas Bogendoerfer > Cc: "James E.J. Bottomley" > Cc: Helge Deller > Cc: Michael Ellerman > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Paul Walmsley > Cc: Palmer Dabbelt > Cc: Albert Ou > Cc: Yoshinori Sato > Cc: Rich Felker > Cc: Alexander Viro > Cc: linux-arm-ker...@lists.infradead.org > Cc: linux-m...@vger.kernel.org > Cc: linux-par...@vger.kernel.org > Cc: linuxppc-dev@lists.ozlabs.org > Cc: linux-ri...@lists.infradead.org > Cc: linux...@vger.kernel.org > Cc: linux-fsde...@vger.kernel.org > Cc: linux-ker...@vger.kernel.org > Signed-off-by: Anshuman Khandual > --- > arch/arm/Kconfig | 5 + > arch/arm64/Kconfig | 4 +--- > arch/mips/Kconfig | 6 +- > arch/parisc/Kconfig| 5 + > arch/powerpc/Kconfig | 3 --- > arch/powerpc/platforms/Kconfig.cputype | 6 +++--- Acked-by: Michael Ellerman (powerpc) cheers
Re: [PATCH v4] powerpc/uprobes: Validation for prefixed instruction
Ravi Bangoria writes: > On 3/9/21 4:51 PM, Naveen N. Rao wrote: >> On 2021/03/09 08:54PM, Michael Ellerman wrote: >>> Ravi Bangoria writes: As per ISA 3.1, prefixed instruction should not cross 64-byte boundary. So don't allow Uprobe on such prefixed instruction. There are two ways probed instruction is changed in mapped pages. First, when Uprobe is activated, it searches for all the relevant pages and replace instruction in them. In this case, if that probe is on the 64-byte unaligned prefixed instruction, error out directly. Second, when Uprobe is already active and user maps a relevant page via mmap(), instruction is replaced via mmap() code path. But because Uprobe is invalid, entire mmap() operation can not be stopped. In this case just print an error and continue. Signed-off-by: Ravi Bangoria Acked-by: Naveen N. Rao >>> >>> Do we have a Fixes: tag for this? >> >> Since this is an additional check we are adding, I don't think we should >> add a Fixes: tag. Nothing is broken per-se -- we're just adding more >> checks to catch simple mistakes. Also, like Oleg pointed out, there are >> still many other ways for users to shoot themselves in the foot with >> uprobes and prefixed instructions, if they so desire. >> >> However, if you still think we should add a Fixes: tag, we can perhaps >> use the below commit since I didn't see any specific commit adding >> support for prefixed instructions for uprobes: >> >> Fixes: 650b55b707fdfa ("powerpc: Add prefixed instructions to >> instruction data type") > > True. IMO, It doesn't really need any Fixes tag. Yep OK, I'm happy without a Fixes tag based on that explanation. diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index e8a63713e655..4cbfff6e94a3 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -41,6 +41,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, if (addr & 0x03) return -EINVAL; + if (cpu_has_feature(CPU_FTR_ARCH_31) && + ppc_inst_prefixed(auprobe->insn) && + (addr & (SZ_64 - 4)) == SZ_64 - 4) { + pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n"); + return -EINVAL; >>> >>> I realise we already did the 0x03 check above, but I still think this >>> would be clearer simply as: >>> >>> (addr & 0x3f == 60) >> >> Indeed, I like the use of `60' there -- hex is overrated ;) > > Sure. Will resend. Thanks. cheers
Re: PowerPC64 future proof kernel toc, revised for lld
On Wed, Mar 10, 2021 at 03:44:44PM +1100, Alexey Kardashevskiy wrote: > For my own education, is .got for prom_init.o still generated by ld or gcc? .got is generated by ld. > In other words, should "objdump -D -s -j .got" ever dump .got for any .o > file, like below? No. "objdump -r prom_init.o | grep GOT" will tell you whether prom_init.o *may* cause ld to generate .got entries. (Linker optimisations or --gc-sections might remove the need for those .got entries.) > objdump: section '.got' mentioned in a -j option, but not found in any input > file Right, expected. -- Alan Modra Australia Development Lab, IBM
Re: Errant readings on LM81 with T2080 SoC
On 3/9/21 6:19 PM, Chris Packham wrote: > On 9/03/21 9:27 am, Chris Packham wrote: >> On 8/03/21 5:59 pm, Guenter Roeck wrote: >>> Other than that, the only other real idea I have would be to monitor >>> the i2c bus. >> I am in the fortunate position of being able to go into the office and >> even happen to have the expensive scope at the moment. Now I just need >> to find a tame HW engineer so I don't burn myself trying to attach the >> probes. > One thing I see on the scope is that when there is a CPU load there > appears to be some clock stretching going on (SCL is held low some > times). I don't see it without the CPU load. It's hard to correlate a > clock stretching event with a bad read or error but it is one area where > the SMBUS spec has a maximum that might cause the device to give up waiting. > Do you have CONFIG_PREEMPT enabled in your kernel ? But even without that it is possible that the hot loops at the beginning and end of each operation mess up the driver and cause it to sleep longer than intended. Did you try usleep_range() ? On a side note, can you send me a register dump for the lm81 ? It would be useful for my module test code. Thanks, Guenter
Re: [PATCH v2] powerpc/kexec_file: Restore FDT size estimation for kdump kernel
Rob Herring writes: > On Fri, Feb 19, 2021 at 6:52 PM Thiago Jung Bauermann > wrote: >> >> Commit 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump >> kernel") fixed how elf64_load() estimates the FDT size needed by the >> crashdump kernel. >> >> At the same time, commit 130b2d59cec0 ("powerpc: Use common >> of_kexec_alloc_and_setup_fdt()") changed the same code to use the generic >> function of_kexec_alloc_and_setup_fdt() to calculate the FDT size. That >> change made the code overestimate it a bit by counting twice the space >> required for the kernel command line and /chosen properties. >> >> Therefore change kexec_fdt_totalsize_ppc64() to calculate just the extra >> space needed by the kdump kernel, and change the function name so that it >> better reflects what the function is now doing. >> >> Signed-off-by: Thiago Jung Bauermann >> Reviewed-by: Lakshmi Ramasubramanian >> --- >> arch/powerpc/include/asm/kexec.h | 2 +- >> arch/powerpc/kexec/elf_64.c | 2 +- >> arch/powerpc/kexec/file_load_64.c | 26 -- >> 3 files changed, 10 insertions(+), 20 deletions(-) > > I ended up delaying the referenced series til 5.13, but have applied > it now. Can I get an ack from the powerpc maintainers on this one? > I'll fixup the commit log to make sense given the commit id's aren't > valid. Thanks for handling it. Acked-by: Michael Ellerman cheers
Re: [PATCH 2/6] mm: Generalize SYS_SUPPORTS_HUGETLBFS (rename as ARCH_SUPPORTS_HUGETLBFS)
On Tue, 09 Mar 2021 00:33:06 PST (-0800), anshuman.khand...@arm.com wrote: SYS_SUPPORTS_HUGETLBFS config has duplicate definitions on platforms that subscribe it. Instead, just make it a generic option which can be selected on applicable platforms. Also rename it as ARCH_SUPPORTS_HUGETLBFS instead. This reduces code duplication and makes it cleaner. Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Thomas Bogendoerfer Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Yoshinori Sato Cc: Rich Felker Cc: Alexander Viro Cc: linux-arm-ker...@lists.infradead.org Cc: linux-m...@vger.kernel.org Cc: linux-par...@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-ri...@lists.infradead.org Cc: linux...@vger.kernel.org Cc: linux-fsde...@vger.kernel.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Anshuman Khandual --- arch/arm/Kconfig | 5 + arch/arm64/Kconfig | 4 +--- arch/mips/Kconfig | 6 +- arch/parisc/Kconfig| 5 + arch/powerpc/Kconfig | 3 --- arch/powerpc/platforms/Kconfig.cputype | 6 +++--- arch/riscv/Kconfig | 5 + arch/sh/Kconfig| 5 + fs/Kconfig | 5 - 9 files changed, 13 insertions(+), 31 deletions(-) [...] diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 85d626b8ce5e..69954db3aca9 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -30,6 +30,7 @@ config RISCV select ARCH_HAS_STRICT_KERNEL_RWX if MMU select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT + select ARCH_SUPPORTS_HUGETLBFS if MMU select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if 64BIT @@ -165,10 +166,6 @@ config ARCH_WANT_GENERAL_HUGETLB config ARCH_SUPPORTS_UPROBES def_bool y -config SYS_SUPPORTS_HUGETLBFS - depends on MMU - def_bool y - config STACKTRACE_SUPPORT def_bool y Acked-by: Palmer Dabbelt
Re: make alloc_anon_inode more useful
On Tue, Mar 09, 2021 at 04:53:39PM +0100, Christoph Hellwig wrote: > this series first renames the existing alloc_anon_inode to > alloc_anon_inode_sb to clearly mark it as requiring a superblock. > > It then adds a new alloc_anon_inode that works on the anon_inode > file system super block, thus removing tons of boilerplate code. > > The few remainig callers of alloc_anon_inode_sb all use alloc_file_pseudo > later, but might also be ripe for some cleanup. On a somewhat related note, could I get you to look at drivers/video/fbdev/core/fb_defio.c? As far as I can tell, there's no need for fb_deferred_io_aops to exist. We could just set file->f_mapping->a_ops to NULL, and set_page_dirty() would do the exact same thing this code does (except it would get the return value correct). But maybe that would make something else go wrong that distinguishes between page->mapping being NULL and page->mapping->a_ops->foo being NULL? Completely untested patch ... diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index a591d291b231..441ec31d3e4d 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -151,17 +151,6 @@ static const struct vm_operations_struct fb_deferred_io_vm_ops = { .page_mkwrite = fb_deferred_io_mkwrite, }; -static int fb_deferred_io_set_page_dirty(struct page *page) -{ - if (!PageDirty(page)) - SetPageDirty(page); - return 0; -} - -static const struct address_space_operations fb_deferred_io_aops = { - .set_page_dirty = fb_deferred_io_set_page_dirty, -}; - int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma) { vma->vm_ops = _deferred_io_vm_ops; @@ -212,14 +201,6 @@ void fb_deferred_io_init(struct fb_info *info) } EXPORT_SYMBOL_GPL(fb_deferred_io_init); -void fb_deferred_io_open(struct fb_info *info, -struct inode *inode, -struct file *file) -{ - file->f_mapping->a_ops = _deferred_io_aops; -} -EXPORT_SYMBOL_GPL(fb_deferred_io_open); - void fb_deferred_io_cleanup(struct fb_info *info) { struct fb_deferred_io *fbdefio = info->fbdefio; diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 06f5805de2de..c4ba76359f22 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1415,10 +1415,7 @@ __releases(>lock) if (res) module_put(info->fbops->owner); } -#ifdef CONFIG_FB_DEFERRED_IO - if (info->fbdefio) - fb_deferred_io_open(info, inode, file); -#endif + file->f_mapping->a_ops = NULL; out: unlock_fb_info(info); if (res) diff --git a/include/linux/fb.h b/include/linux/fb.h index ecfbcc0553a5..a8dccd23c249 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -659,9 +659,6 @@ static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, /* drivers/video/fb_defio.c */ int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma); extern void fb_deferred_io_init(struct fb_info *info); -extern void fb_deferred_io_open(struct fb_info *info, - struct inode *inode, - struct file *file); extern void fb_deferred_io_cleanup(struct fb_info *info); extern int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync);
Re: PowerPC64 future proof kernel toc, revised for lld
This patch future-proofs the kernel against linker changes that might put the toc pointer at some location other than .got+0x8000, by replacing __toc_start+0x8000 with .TOC. throughout. If the kernel's idea of the toc pointer doesn't agree with the linker, bad things happen. prom_init.c code relocating its toc is also changed so that a symbolic __prom_init_toc_start toc-pointer relative address is calculated rather than assuming that it is always at toc-pointer - 0x8000. The length calculations loading values from the toc are also avoided. It's a little incestuous to do that with unreloc_toc picking up adjusted values (which is fine in practice, they both adjust by the same amount if all goes well). I've also changed the way .got is aligned in vmlinux.lds and zImage.lds, mostly so that dumping out section info by objdump or readelf plainly shows the alignment is 256. This linker script feature was added 2005-09-27, available in FSF binutils releases from 2.17 onwards. Should be safe to use in the kernel, I think. Finally, put *(.got) before the prom_init.o entry which only needs *(.toc), so that the GOT header goes in the correct place. I don't believe this makes any difference for the kernel as it would for dynamic objects being loaded by ld.so. That change is just to stop lusers who blindly copy kernel scripts being led astray. Of course, this change needs the prom_init.c changes. Some notes on .toc and .got. .toc is a compiler generated section of addresses. .got is a linker generated section of addresses, generally built when the linker sees R_*_*GOT* relocations. In the case of powerpc64 ld.bfd, there are multiple generated .got sections, one per input object file. So you can somewhat reasonably write in a linker script an input section statement like *prom_init.o(.got .toc) to mean "the .got and .toc section for files matching *prom_init.o". On other architectures that doesn't make sense, because the linker generally has just one .got section. Even on powerpc64, note well that the GOT entries for prom_init.o may be merged with GOT entries from other objects. That means that if prom_init.o references, say, _end via some GOT relocation, and some other object also references _end via a GOT relocation, the GOT entry for _end may be in the range __prom_init_toc_start to __prom_init_toc_end and if the kernel does something special to GOT/TOC entries in that range then the value of _end as seen by objects other than prom_init.o will be affected. On the other hand the GOT entry for _end may not be in the range __prom_init_toc_start to __prom_init_toc_end. Which way it turns out is deterministic but a detail of linker operation that should not be relied on. A feature of ld.bfd is that input .toc (and .got) sections matching one linker input section statement may be sorted, to put entries used by small-model code first, near the toc base. This is why scripts for powerpc64 normally use *(.got .toc) rather than *(.got) *(.toc), since the first form allows more freedom to sort. Another feature of ld.bfd is that indirect addressing sequences using the GOT/TOC may be edited by the linker to relative addressing. In many cases relative addressing would be emitted by gcc for -mcmodel=medium if you appropriately decorate variable declarations with non-default visibility. Signed-off-by: Alan Modra diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 1d83966f5ef6..e45907fe468f 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -28,7 +28,7 @@ p_etext: .8byte _etext p_bss_start: .8byte __bss_start p_end: .8byte _end -p_toc: .8byte __toc_start + 0x8000 - p_base +p_toc: .8byte .TOC. - p_base p_dyn: .8byte __dynamic_start - p_base p_rela:.8byte __rela_dyn_start - p_base p_prom:.8byte 0 diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index d6f072865627..d65cd55a6f38 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -36,12 +36,9 @@ SECTIONS } #ifdef CONFIG_PPC64_BOOT_WRAPPER - . = ALIGN(256); - .got : + .got : ALIGN(256) { -__toc_start = .; -*(.got) -*(.toc) +*(.got .toc) } #endif diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 324d7b298ec3..e5a1eae11ed5 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -48,14 +48,18 @@ static inline int in_kernel_text(unsigned long addr) static inline unsigned long kernel_toc_addr(void) { - /* Defined by the linker, see vmlinux.lds.S */ - extern unsigned long __toc_start; - - /* -* The TOC register (r2) points 32kB into the TOC, so that 64kB of -* the TOC can be addressed using a single machine instruction. -*/ - return (unsigned long)(&__toc_start) + 0x8000UL; +#if 0 + /* This version is appropriate if
Re: Errant readings on LM81 with T2080 SoC
On 3/9/21 3:35 PM, Chris Packham wrote: > > On 8/03/21 1:31 pm, Guenter Roeck wrote: >> On 3/7/21 2:52 PM, Chris Packham wrote: >>> Fundamentally I think this is a problem with the fact that the LM81 is >>> an SMBus device but the T2080 (and other Freescale SoCs) uses i2c and we >>> emulate SMBus. I suspect the errant readings are when we don't get round >>> to completing the read within the timeout specified by the SMBus >>> specification. Depending on when that happens we either fail the >>> transfer or interpret the result as all-1s. >> That is quite unlikely. Many sensor chips are SMBus chips connected to >> i2c busses. It is much more likely that there is a bug in the T2080 i2c >> driver, >> that the chip doesn't like the bulk read command issued through regmap, that >> the chip has problems with the i2c bus speed, or that the i2c bus is noisy. > I have noticed that with the switch to regmap we end up using plain i2c > instead of SMBUS. There appears to be no way of saying use SMBUS > semantics if the i2c adapter reports I2C_FUNC_I2C. > The driver only really supports I2C; SMBUS functions are emulated. I don't think that makes a real difference. Guenter
Re: [PATCH v2] powerpc/kexec_file: Restore FDT size estimation for kdump kernel
On Tue, Mar 9, 2021 at 7:31 PM Lakshmi Ramasubramanian wrote: > > On 3/9/21 6:08 PM, Rob Herring wrote: > > Hi Rob, > > > On Fri, Feb 19, 2021 at 6:52 PM Thiago Jung Bauermann > > wrote: > >> > >> Commit 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump > >> kernel") fixed how elf64_load() estimates the FDT size needed by the > >> crashdump kernel. > >> > >> At the same time, commit 130b2d59cec0 ("powerpc: Use common > >> of_kexec_alloc_and_setup_fdt()") changed the same code to use the generic > >> function of_kexec_alloc_and_setup_fdt() to calculate the FDT size. That > >> change made the code overestimate it a bit by counting twice the space > >> required for the kernel command line and /chosen properties. > >> > >> Therefore change kexec_fdt_totalsize_ppc64() to calculate just the extra > >> space needed by the kdump kernel, and change the function name so that it > >> better reflects what the function is now doing. > >> > >> Signed-off-by: Thiago Jung Bauermann > >> Reviewed-by: Lakshmi Ramasubramanian > >> --- > >> arch/powerpc/include/asm/kexec.h | 2 +- > >> arch/powerpc/kexec/elf_64.c | 2 +- > >> arch/powerpc/kexec/file_load_64.c | 26 -- > >> 3 files changed, 10 insertions(+), 20 deletions(-) > > > > I ended up delaying the referenced series til 5.13, but have applied > > it now. Can I get an ack from the powerpc maintainers on this one? > > I'll fixup the commit log to make sense given the commit id's aren't > > valid. > > I checked the change applied in linux-next branch and also Device Tree's > for-next branch - it looks like v1 of Thiago's patch has been applied. > Could you please pick up the v2 patch? Huh? This patch (v2) hasn't been applied to any tree AFAICT. Rob
Re: [PATCH v4 3/6] ASoC: dt-bindings: fsl_rpmsg: Add binding doc for rpmsg cpu dai driver
On Mon, Mar 08, 2021 at 09:22:27PM +0800, Shengjiu Wang wrote: > fsl_rpmsg cpu dai driver is driver for rpmsg audio, which is mainly used Bindings describe h/w blocks, not drivers. > for getting the user's configuration from device tree and configure the > clocks which is used by Cortex-M core. So in this document define the > needed property. > > Signed-off-by: Shengjiu Wang > --- > .../devicetree/bindings/sound/fsl,rpmsg.yaml | 118 ++ > 1 file changed, 118 insertions(+) > create mode 100644 Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml > > diff --git a/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml > b/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml > new file mode 100644 > index ..5731c1fbc0a6 > --- /dev/null > +++ b/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml > @@ -0,0 +1,118 @@ > +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) > +%YAML 1.2 > +--- > +$id: http://devicetree.org/schemas/sound/fsl,rpmsg.yaml# > +$schema: http://devicetree.org/meta-schemas/core.yaml# > + > +title: NXP Audio RPMSG CPU DAI Controller > + > +maintainers: > + - Shengjiu Wang > + > +description: | > + fsl_rpmsg cpu dai driver is virtual driver for rpmsg audio, which doesn't > + touch hardware. It is mainly used for getting the user's configuration > + from device tree and configure the clocks which is used by Cortex-M core. > + So in this document define the needed property. > + > +properties: > + compatible: > +enum: > + - fsl,imx7ulp-rpmsg > + - fsl,imx8mn-rpmsg > + - fsl,imx8mm-rpmsg > + - fsl,imx8mp-rpmsg > + > + model: > +$ref: /schemas/types.yaml#/definitions/string > +description: User specified audio sound card name > + > + clocks: > +items: > + - description: Peripheral clock for register access > + - description: Master clock > + - description: DMA clock for DMA register access > + - description: Parent clock for multiple of 8kHz sample rates > + - description: Parent clock for multiple of 11kHz sample rates > +minItems: 5 If this doesn't touch hardware, what are these clocks for? You don't need 'minItems' unless it's less than the number of 'items'. > + > + clock-names: > +items: > + - const: ipg > + - const: mclk > + - const: dma > + - const: pll8k > + - const: pll11k > +minItems: 5 > + > + power-domains: > +maxItems: 1 > + > + fsl,audioindex: > +$ref: /schemas/types.yaml#/definitions/uint32 > +enum: [0, 1] > +default: 0 > +description: Instance index for sound card in > + M core side, which share one rpmsg > + channel. We don't do indexes in DT. What's this numbering tied to? > + > + fsl,version: version of what? This seems odd at best. > +$ref: /schemas/types.yaml#/definitions/uint32 > +enum: [1, 2] You're going to update this with every new firmware version? > +default: 2 > +description: The version of M core image, which is > + to make driver compatible with different image. > + > + fsl,buffer-size: > +$ref: /schemas/types.yaml#/definitions/uint32 > +description: pre allocate dma buffer size How can you have DMA, this doesn't touch h/w? > + > + fsl,enable-lpa: > +$ref: /schemas/types.yaml#/definitions/flag > +description: enable low power audio path. > + > + fsl,rpmsg-out: > +$ref: /schemas/types.yaml#/definitions/flag > +description: | > + This is a boolean property. If present, the transmitting function > + will be enabled. > + > + fsl,rpmsg-in: > +$ref: /schemas/types.yaml#/definitions/flag > +description: | > + This is a boolean property. If present, the receiving function > + will be enabled. > + > + fsl,codec-type: > +$ref: /schemas/types.yaml#/definitions/uint32 > +enum: [0, 1, 2] > +default: 0 > +description: Sometimes the codec is registered by > + driver not by the device tree, this items > + can be used to distinguish codecs. How does one decide what value to use? > + > + audio-codec: > +$ref: /schemas/types.yaml#/definitions/phandle > +description: The phandle of the audio codec The codec is controlled from the Linux side? > + > + memory-region: > +$ref: /schemas/types.yaml#/definitions/phandle > +description: phandle to the reserved memory nodes > + > +required: > + - compatible > + - fsl,audioindex > + - fsl,version > + - fsl,buffer-size > + > +additionalProperties: false > + > +examples: > + - | > +rpmsg_audio: rpmsg_audio { > +compatible = "fsl,imx8mn-rpmsg"; > +fsl,audioindex = <0> ; > +fsl,version = <2>; > +fsl,buffer-size = <0x600>; > +fsl,enable-lpa; How does this work? Don't you need somewhere to put the 'rpmsg' data? > +}; > -- > 2.27.0 >
Re: [PATCH v2] powerpc/kexec_file: Restore FDT size estimation for kdump kernel
On 3/9/21 6:08 PM, Rob Herring wrote: Hi Rob, On Fri, Feb 19, 2021 at 6:52 PM Thiago Jung Bauermann wrote: Commit 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump kernel") fixed how elf64_load() estimates the FDT size needed by the crashdump kernel. At the same time, commit 130b2d59cec0 ("powerpc: Use common of_kexec_alloc_and_setup_fdt()") changed the same code to use the generic function of_kexec_alloc_and_setup_fdt() to calculate the FDT size. That change made the code overestimate it a bit by counting twice the space required for the kernel command line and /chosen properties. Therefore change kexec_fdt_totalsize_ppc64() to calculate just the extra space needed by the kdump kernel, and change the function name so that it better reflects what the function is now doing. Signed-off-by: Thiago Jung Bauermann Reviewed-by: Lakshmi Ramasubramanian --- arch/powerpc/include/asm/kexec.h | 2 +- arch/powerpc/kexec/elf_64.c | 2 +- arch/powerpc/kexec/file_load_64.c | 26 -- 3 files changed, 10 insertions(+), 20 deletions(-) I ended up delaying the referenced series til 5.13, but have applied it now. Can I get an ack from the powerpc maintainers on this one? I'll fixup the commit log to make sense given the commit id's aren't valid. I checked the change applied in linux-next branch and also Device Tree's for-next branch - it looks like v1 of Thiago's patch has been applied. Could you please pick up the v2 patch? thanks, -lakshmi
Re: Errant readings on LM81 with T2080 SoC
On 9/03/21 9:27 am, Chris Packham wrote: > On 8/03/21 5:59 pm, Guenter Roeck wrote: >> Other than that, the only other real idea I have would be to monitor >> the i2c bus. > I am in the fortunate position of being able to go into the office and > even happen to have the expensive scope at the moment. Now I just need > to find a tame HW engineer so I don't burn myself trying to attach the > probes. One thing I see on the scope is that when there is a CPU load there appears to be some clock stretching going on (SCL is held low some times). I don't see it without the CPU load. It's hard to correlate a clock stretching event with a bad read or error but it is one area where the SMBUS spec has a maximum that might cause the device to give up waiting.
Re: [PATCH v2] powerpc/kexec_file: Restore FDT size estimation for kdump kernel
On Fri, Feb 19, 2021 at 6:52 PM Thiago Jung Bauermann wrote: > > Commit 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump > kernel") fixed how elf64_load() estimates the FDT size needed by the > crashdump kernel. > > At the same time, commit 130b2d59cec0 ("powerpc: Use common > of_kexec_alloc_and_setup_fdt()") changed the same code to use the generic > function of_kexec_alloc_and_setup_fdt() to calculate the FDT size. That > change made the code overestimate it a bit by counting twice the space > required for the kernel command line and /chosen properties. > > Therefore change kexec_fdt_totalsize_ppc64() to calculate just the extra > space needed by the kdump kernel, and change the function name so that it > better reflects what the function is now doing. > > Signed-off-by: Thiago Jung Bauermann > Reviewed-by: Lakshmi Ramasubramanian > --- > arch/powerpc/include/asm/kexec.h | 2 +- > arch/powerpc/kexec/elf_64.c | 2 +- > arch/powerpc/kexec/file_load_64.c | 26 -- > 3 files changed, 10 insertions(+), 20 deletions(-) I ended up delaying the referenced series til 5.13, but have applied it now. Can I get an ack from the powerpc maintainers on this one? I'll fixup the commit log to make sense given the commit id's aren't valid. Rob
Re: [PATCH v2 40/43] powerpc/64s: Make kuap_check_amr() and kuap_get_and_check_amr() generic
Excerpts from Christophe Leroy's message of March 9, 2021 10:10 pm: > In preparation of porting powerpc32 to C syscall entry/exit, > rename kuap_check_amr() and kuap_get_and_check_amr() as kuap_check() > and kuap_get_and_check(), and move in the generic asm/kup.h the stub > for when CONFIG_PPC_KUAP is not selected. Looks pretty straightforward to me. While you're renaming things, could kuap_check_amr() be changed to kuap_assert_locked() or similar? Otherwise, Reviewed-by: Nicholas Piggin > > Signed-off-by: Christophe Leroy > --- > arch/powerpc/include/asm/book3s/64/kup.h | 24 ++-- > arch/powerpc/include/asm/kup.h | 10 +- > arch/powerpc/kernel/interrupt.c | 12 ++-- > arch/powerpc/kernel/irq.c| 2 +- > 4 files changed, 18 insertions(+), 30 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/kup.h > b/arch/powerpc/include/asm/book3s/64/kup.h > index 8bd905050896..d9b07e9998be 100644 > --- a/arch/powerpc/include/asm/book3s/64/kup.h > +++ b/arch/powerpc/include/asm/book3s/64/kup.h > @@ -287,7 +287,7 @@ static inline void kuap_kernel_restore(struct pt_regs > *regs, >*/ > } > > -static inline unsigned long kuap_get_and_check_amr(void) > +static inline unsigned long kuap_get_and_check(void) > { > if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { > unsigned long amr = mfspr(SPRN_AMR); > @@ -298,27 +298,7 @@ static inline unsigned long kuap_get_and_check_amr(void) > return 0; > } > > -#else /* CONFIG_PPC_PKEY */ > - > -static inline void kuap_user_restore(struct pt_regs *regs) > -{ > -} > - > -static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long > amr) > -{ > -} > - > -static inline unsigned long kuap_get_and_check_amr(void) > -{ > - return 0; > -} > - > -#endif /* CONFIG_PPC_PKEY */ > - > - > -#ifdef CONFIG_PPC_KUAP > - > -static inline void kuap_check_amr(void) > +static inline void kuap_check(void) > { > if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && > mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) > WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); > diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h > index 25671f711ec2..b7efa46b3109 100644 > --- a/arch/powerpc/include/asm/kup.h > +++ b/arch/powerpc/include/asm/kup.h > @@ -74,7 +74,15 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long > address, bool is_write) > return false; > } > > -static inline void kuap_check_amr(void) { } > +static inline void kuap_check(void) { } > +static inline void kuap_save_and_lock(struct pt_regs *regs) { } > +static inline void kuap_user_restore(struct pt_regs *regs) { } > +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long > amr) { } > + > +static inline unsigned long kuap_get_and_check(void) > +{ > + return 0; > +} > > /* > * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush > diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c > index 727b7848c9cc..40ed55064e54 100644 > --- a/arch/powerpc/kernel/interrupt.c > +++ b/arch/powerpc/kernel/interrupt.c > @@ -76,7 +76,7 @@ notrace long system_call_exception(long r3, long r4, long > r5, > } else > #endif > #ifdef CONFIG_PPC64 > - kuap_check_amr(); > + kuap_check(); > #endif > > booke_restore_dbcr0(); > @@ -254,7 +254,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long > r3, > CT_WARN_ON(ct_state() == CONTEXT_USER); > > #ifdef CONFIG_PPC64 > - kuap_check_amr(); > + kuap_check(); > #endif > > regs->result = r3; > @@ -380,7 +380,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct > pt_regs *regs, unsigned >* AMR can only have been unlocked if we interrupted the kernel. >*/ > #ifdef CONFIG_PPC64 > - kuap_check_amr(); > + kuap_check(); > #endif > > local_irq_save(flags); > @@ -451,7 +451,7 @@ notrace unsigned long > interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign > unsigned long flags; > unsigned long ret = 0; > #ifdef CONFIG_PPC64 > - unsigned long amr; > + unsigned long kuap; > #endif > > if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && > @@ -467,7 +467,7 @@ notrace unsigned long > interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign > CT_WARN_ON(ct_state() == CONTEXT_USER); > > #ifdef CONFIG_PPC64 > - amr = kuap_get_and_check_amr(); > + kuap = kuap_get_and_check(); > #endif > > if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { > @@ -511,7 +511,7 @@ notrace unsigned long > interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign >* value from the check above. >*/ > #ifdef CONFIG_PPC64 > - kuap_kernel_restore(regs, amr); > + kuap_kernel_restore(regs, kuap); > #endif > > return ret; > diff --git a/arch/powerpc/kernel/irq.c
Re: [PATCH v2 36/43] powerpc/32: Set current->thread.regs in C interrupt entry
Excerpts from Christophe Leroy's message of March 9, 2021 10:10 pm: > No need to do that is assembly, do it in C. Hmm. No issues with the patch as such, but why does ppc32 need this but not 64? AFAIKS 64 sets this when a thread is created. Thanks, Nick > > Signed-off-by: Christophe Leroy > --- > arch/powerpc/include/asm/interrupt.h | 4 +++- > arch/powerpc/kernel/entry_32.S | 3 +-- > 2 files changed, 4 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/interrupt.h > b/arch/powerpc/include/asm/interrupt.h > index 861e6eadc98c..e6d71c2e3aa2 100644 > --- a/arch/powerpc/include/asm/interrupt.h > +++ b/arch/powerpc/include/asm/interrupt.h > @@ -33,8 +33,10 @@ static inline void interrupt_enter_prepare(struct pt_regs > *regs, struct interrup > if (!arch_irq_disabled_regs(regs)) > trace_hardirqs_off(); > > - if (user_mode(regs)) > + if (user_mode(regs)) { > + current->thread.regs = regs; > account_cpu_user_entry(); > + } > #endif > /* >* Book3E reconciles irq soft mask in asm > diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S > index 8fe1c3fdfa6e..815a4ff1ba76 100644 > --- a/arch/powerpc/kernel/entry_32.S > +++ b/arch/powerpc/kernel/entry_32.S > @@ -52,8 +52,7 @@ > prepare_transfer_to_handler: > andi. r0,r9,MSR_PR > addir12, r2, THREAD > - beq 2f /* if from user, fix up THREAD.regs */ > - stw r3,PT_REGS(r12) > + beq 2f > #ifdef CONFIG_PPC_BOOK3S_32 > kuep_lock r11, r12 > #endif > -- > 2.25.0 > >
Re: [PATCH v2 28/43] powerpc/64e: Call bad_page_fault() from do_page_fault()
Excerpts from Christophe Leroy's message of March 9, 2021 10:09 pm: > book3e/64 is the last one calling __bad_page_fault() > from assembly. > > Save non volatile registers before calling do_page_fault() > and modify do_page_fault() to call __bad_page_fault() > for all platforms. > > Then it can be refactored by the call of bad_page_fault() > which avoids the duplication of the exception table search. This can go in with the 64e change after your series. I think it should be ready for the next merge window as well. Thanks, Nick > > Signed-off-by: Christophe Leroy > --- > arch/powerpc/kernel/exceptions-64e.S | 8 +--- > arch/powerpc/mm/fault.c | 17 - > 2 files changed, 5 insertions(+), 20 deletions(-) > > diff --git a/arch/powerpc/kernel/exceptions-64e.S > b/arch/powerpc/kernel/exceptions-64e.S > index e8eb9992a270..b60f89078a3f 100644 > --- a/arch/powerpc/kernel/exceptions-64e.S > +++ b/arch/powerpc/kernel/exceptions-64e.S > @@ -1010,15 +1010,9 @@ storage_fault_common: > addir3,r1,STACK_FRAME_OVERHEAD > ld r14,PACA_EXGEN+EX_R14(r13) > ld r15,PACA_EXGEN+EX_R15(r13) > + bl save_nvgprs > bl do_page_fault > - cmpdi r3,0 > - bne-1f > b ret_from_except_lite > -1: bl save_nvgprs > - mr r4,r3 > - addir3,r1,STACK_FRAME_OVERHEAD > - bl __bad_page_fault > - b ret_from_except > > /* > * Alignment exception doesn't fit entirely in the 0x100 bytes so it > diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c > index 2e54bac99a22..7bcff3fca110 100644 > --- a/arch/powerpc/mm/fault.c > +++ b/arch/powerpc/mm/fault.c > @@ -541,24 +541,15 @@ NOKPROBE_SYMBOL(___do_page_fault); > > static long __do_page_fault(struct pt_regs *regs) > { > - const struct exception_table_entry *entry; > long err; > > err = ___do_page_fault(regs, regs->dar, regs->dsisr); > if (likely(!err)) > - return err; > - > - entry = search_exception_tables(regs->nip); > - if (likely(entry)) { > - instruction_pointer_set(regs, extable_fixup(entry)); > return 0; > - } else if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64)) { > - __bad_page_fault(regs, err); > - return 0; > - } else { > - /* 32 and 64e handle the bad page fault in asm */ > - return err; > - } > + > + bad_page_fault(regs, err); > + > + return 0; > } > NOKPROBE_SYMBOL(__do_page_fault); > > -- > 2.25.0 > >
Re: [PATCH v2 02/43] powerpc/traps: Declare unrecoverable_exception() as __noreturn
Excerpts from Christophe Leroy's message of March 9, 2021 10:09 pm: > unrecoverable_exception() is never expected to return, most callers > have an infiniteloop in case it returns. > > Ensure it really never returns by terminating it with a BUG(), and > declare it __no_return. > > It always GCC to really simplify functions calling it. In the exemple > below, it avoids the stack frame in the likely fast path and avoids > code duplication for the exit. > > With this patch: [snip] Nice. > diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c > index a44a30b0688c..d5c9d9ddd186 100644 > --- a/arch/powerpc/kernel/traps.c > +++ b/arch/powerpc/kernel/traps.c > @@ -2170,11 +2170,15 @@ > DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) > * in the MSR is 0. This indicates that SRR0/1 are live, and that > * we therefore lost state by taking this exception. > */ > -void unrecoverable_exception(struct pt_regs *regs) > +void __noreturn unrecoverable_exception(struct pt_regs *regs) > { > pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n", >regs->trap, regs->nip, regs->msr); > die("Unrecoverable exception", regs, SIGABRT); > + /* die() should not return */ > + WARN(true, "die() unexpectedly returned"); > + for (;;) > + ; > } I don't think the WARN should be added because that will cause another interrupt after something is already badly wrong, so this might just make it harder to debug. For example if die() is falling through for some reason, we warn and cause a program check here, and that might also be unrecoverable so it might come through here and fall through again and warn again, etc. Putting the infinite loop is good enough I think (and better than there was previously). Otherwise Reviewed-by: Nicholas Piggin Thanks, Nick
Re: [PATCH v2 01/43] powerpc/traps: unrecoverable_exception() is not an interrupt handler
Excerpts from Christophe Leroy's message of March 9, 2021 10:09 pm: > unrecoverable_exception() is called from interrupt handlers or > after an interrupt handler has failed. > > Make it a standard function to avoid doubling the actions > performed on interrupt entry (e.g.: user time accounting). > > Fixes: 3a96570ffceb ("powerpc: convert interrupt handlers to use wrappers") > Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin This should go in as a fix for this release I think. > --- > arch/powerpc/include/asm/interrupt.h | 3 ++- > arch/powerpc/kernel/interrupt.c | 1 - > arch/powerpc/kernel/traps.c | 2 +- > 3 files changed, 3 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/interrupt.h > b/arch/powerpc/include/asm/interrupt.h > index aedfba29e43a..e8d09a841373 100644 > --- a/arch/powerpc/include/asm/interrupt.h > +++ b/arch/powerpc/include/asm/interrupt.h > @@ -410,7 +410,6 @@ DECLARE_INTERRUPT_HANDLER(altivec_assist_exception); > DECLARE_INTERRUPT_HANDLER(CacheLockingException); > DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException); > DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException); > -DECLARE_INTERRUPT_HANDLER(unrecoverable_exception); > DECLARE_INTERRUPT_HANDLER(WatchdogException); > DECLARE_INTERRUPT_HANDLER(kernel_bad_stack); > > @@ -437,6 +436,8 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); > > DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); > > +void unrecoverable_exception(struct pt_regs *regs); > + > void replay_system_reset(void); > void replay_soft_interrupts(void); > > diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c > index 398cd86b6ada..b8e7d25be31b 100644 > --- a/arch/powerpc/kernel/interrupt.c > +++ b/arch/powerpc/kernel/interrupt.c > @@ -436,7 +436,6 @@ notrace unsigned long interrupt_exit_user_prepare(struct > pt_regs *regs, unsigned > return ret; > } > > -void unrecoverable_exception(struct pt_regs *regs); > void preempt_schedule_irq(void); > > notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, > unsigned long msr) > diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c > index 1583fd1c6010..a44a30b0688c 100644 > --- a/arch/powerpc/kernel/traps.c > +++ b/arch/powerpc/kernel/traps.c > @@ -2170,7 +2170,7 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) > * in the MSR is 0. This indicates that SRR0/1 are live, and that > * we therefore lost state by taking this exception. > */ > -DEFINE_INTERRUPT_HANDLER(unrecoverable_exception) > +void unrecoverable_exception(struct pt_regs *regs) > { > pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n", >regs->trap, regs->nip, regs->msr); > -- > 2.25.0 > >
Re: Errant readings on LM81 with T2080 SoC
On 8/03/21 1:31 pm, Guenter Roeck wrote: > On 3/7/21 2:52 PM, Chris Packham wrote: >> Fundamentally I think this is a problem with the fact that the LM81 is >> an SMBus device but the T2080 (and other Freescale SoCs) uses i2c and we >> emulate SMBus. I suspect the errant readings are when we don't get round >> to completing the read within the timeout specified by the SMBus >> specification. Depending on when that happens we either fail the >> transfer or interpret the result as all-1s. > That is quite unlikely. Many sensor chips are SMBus chips connected to > i2c busses. It is much more likely that there is a bug in the T2080 i2c > driver, > that the chip doesn't like the bulk read command issued through regmap, that > the chip has problems with the i2c bus speed, or that the i2c bus is noisy. I have noticed that with the switch to regmap we end up using plain i2c instead of SMBUS. There appears to be no way of saying use SMBUS semantics if the i2c adapter reports I2C_FUNC_I2C.
Re: [PATCH v1] powerpc: Include running function as first entry in save_stack_trace() and friends
Hi! On Tue, Mar 09, 2021 at 04:05:23PM +, Mark Rutland wrote: > On Thu, Mar 04, 2021 at 03:54:48PM -0600, Segher Boessenkool wrote: > > On Thu, Mar 04, 2021 at 02:57:30PM +, Mark Rutland wrote: > > > It looks like GCC is happy to give us the function-entry-time FP if we use > > > __builtin_frame_address(1), > > > > From the GCC manual: > > Calling this function with a nonzero argument can have > > unpredictable effects, including crashing the calling program. As > > a result, calls that are considered unsafe are diagnosed when the > > '-Wframe-address' option is in effect. Such calls should only be > > made in debugging situations. > > > > It *does* warn (the warning is in -Wall btw), on both powerpc and > > aarch64. Furthermore, using this builtin causes lousy code (it forces > > the use of a frame pointer, which we normally try very hard to optimise > > away, for good reason). > > > > And, that warning is not an idle warning. Non-zero arguments to > > __builtin_frame_address can crash the program. It won't on simpler > > functions, but there is no real definition of what a simpler function > > *is*. It is meant for debugging, not for production use (this is also > > why no one has bothered to make it faster). > > > > On Power it should work, but on pretty much any other arch it won't. > > I understand this is true generally, and cannot be relied upon in > portable code. However as you hint here for Power, I believe that on > arm64 __builtin_frame_address(1) shouldn't crash the program due to the > way frame records work on arm64, but I'll go check with some local > compiler folk. I agree that __builtin_frame_address(2) and beyond > certainly can, e.g. by NULL dereference and similar. I still do not know the aarch64 ABI well enough. If only I had time! > For context, why do you think this would work on power specifically? I > wonder if our rationale is similar. On most 64-bit Power ABIs all stack frames are connected together as a linked list (which is updated atomically, importantly). This makes it possible to always find all previous stack frames. > Are you aware of anything in particular that breaks using > __builtin_frame_address(1) in non-portable code, or is this just a > general sentiment of this not being a supported use-case? It is not supported, and trying to do it anyway can crash: it can use random stack contents as pointer! Not really "random" of course, but where it thinks to find a pointer into the previous frame, which is not something it can rely on (unless the ABI guarantees it somehow). See gcc.gnu.org/PR60109 for example. > > > Unless we can get some strong guarantees from compiler folk such that we > > > can guarantee a specific function acts boundary for unwinding (and > > > doesn't itself get split, etc), the only reliable way I can think to > > > solve this requires an assembly trampoline. Whatever we do is liable to > > > need some invasive rework. > > > > You cannot get such a guarantee, other than not letting the compiler > > see into the routine at all, like with assembler code (not inline asm, > > real assembler code). > > If we cannot reliably ensure this then I'm happy to go write an assembly > trampoline to snapshot the state at a function call boundary (where our > procedure call standard mandates the state of the LR, FP, and frame > records pointed to by the FP). Is the frame pointer required?! > This'll require reworking a reasonable > amount of code cross-architecture, so I'll need to get some more > concrete justification (e.g. examples of things that can go wrong in > practice). Say you have a function that does dynamic stack allocation, then there is usually no way to find the previous stack frame (without function- specific knowledge). So __builtin_frame_address cannot work (it knows nothing about frames further up). Dynamic stack allocation (alloca, or variable length automatic arrays) is just the most common and most convenient example; it is not the only case you have problems here. > > The real way forward is to bite the bullet and to no longer pretend you > > can do a full backtrace from just the stack contents. You cannot. > > I think what you mean here is that there's no reliable way to handle the > current/leaf function, right? If so I do agree. No, I meant what I said. There is the separate issue that you do not know where the return address (etc.) is stored in a function that has not yet done a call itself, sure. You cannot assume anything the ABI does not tell you you can depend on. > Beyond that I believe that arm64's frame records should be sufficient. Do you have a simple linked list connecting all frames? The aarch64 GCC port does not define anything special here (DYNAMIC_CHAIN_ADDRESS), so the default will be used: every frame pointer has to point to the previous one, no exceptions whatsoever. Segher
Re: [PATCH 1/9] fs: rename alloc_anon_inode to alloc_anon_inode_sb
On Tue, Mar 09, 2021 at 04:53:40PM +0100, Christoph Hellwig wrote: > Rename alloc_inode to free the name for a new variant that does not > need boilerplate to create a super_block first. > > Signed-off-by: Christoph Hellwig > --- That is a nice idea as well to avoid sb by introducing an unique fs... Reviewed-by: Gao Xiang Thanks, Gao Xiang
Re: [PATCH 2/9] fs: add an argument-less alloc_anon_inode
On Tue, Mar 09, 2021 at 04:53:41PM +0100, Christoph Hellwig wrote: > Add a new alloc_anon_inode helper that allocates an inode on > the anon_inode file system. > > Signed-off-by: Christoph Hellwig Reviewed-by: Gao Xiang Thanks, Gao Xiang
Re: [PATCH v2 1/8] powerpc/xive: Use cpu_to_node() instead of ibm,chip-id property
On 3/9/21 12:33 PM, Cédric Le Goater wrote: On 3/8/21 6:13 PM, Greg Kurz wrote: On Wed, 3 Mar 2021 18:48:50 +0100 Cédric Le Goater wrote: The 'chip_id' field of the XIVE CPU structure is used to choose a target for a source located on the same chip when possible. This field is assigned on the PowerNV platform using the "ibm,chip-id" property on pSeries under KVM when NUMA nodes are defined but it is undefined This sentence seems to have a syntax problem... like it is missing an 'and' before 'on pSeries'. ah yes, or simply a comma. under PowerVM. The XIVE source structure has a similar field 'src_chip' which is only assigned on the PowerNV platform. cpu_to_node() returns a compatible value on all platforms, 0 being the default node. It will also give us the opportunity to set the affinity of a source on pSeries when we can localize them. IIUC this relies on the fact that the NUMA node id is == to chip id on PowerNV, i.e. xc->chip_id which is passed to OPAL remain stable with this change. Linux sets the NUMA node in numa_setup_cpu(). On pseries, the hcall H_HOME_NODE_ASSOCIATIVITY returns the node id if I am correct (Daniel in Cc:) That's correct. H_HOME_NODE_ASSOCIATIVITY returns not only the node_id, but a list with the ibm,associativity domains of the CPU that "proc-no" (processor identifier) is mapped to inside QEMU. node_id in this case, considering that we're working with a reference-points of size 4, is the 4th element of the returned list. The last element is "procno" itself. On PowerNV, Linux uses "ibm,associativity" property of the CPU to find the node id. This value is built from the chip id in OPAL, so the value returned by cpu_to_node(cpu) and the value of the "ibm,chip-id" property are unlikely to be different. cpu_to_node(cpu) is used in many places to allocate the structures locally to the owning node. XIVE is not an exception (see below in the same patch), it is better to be consistent and get the same information (node id) using the same routine. In Linux, "ibm,chip-id" is only used in low level PowerNV drivers : LPC, XSCOM, RNG, VAS, NX. XIVE should be in that list also but skiboot unifies the controllers of the system to only expose one the OS. This is problematic and should be changed but it's another topic. On the other hand, you have the pSeries case under PowerVM that doesn't xc->chip_id, which isn't passed to any hcall AFAICT. yes "ibm,chip-id" is an OPAL concept unfortunately and it has no meaning under PAPR. xc->chip_id on pseries (PowerVM) will contains an invalid chip id. QEMU/KVM exposes "ibm,chip-id" but it's not used. (its value is not always correct btw) If you have a way to reliably reproduce this, let me know and I'll fix it up in QEMU. Thanks, DHB It looks like the chip id is only used for localization purpose in this case, right ? Yes and PAPR sources are not localized. So it's not used. MSI sources could be if we rewrote the MSI driver. In this case, what about doing this change for pSeries only, somewhere in spapr.c ? The IPI code is common to all platforms and all have the same issue. I rather not. Thanks, C. Signed-off-by: Cédric Le Goater --- arch/powerpc/sysdev/xive/common.c | 7 +-- 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 595310e056f4..b8e456da28aa 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1335,16 +1335,11 @@ static int xive_prepare_cpu(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); if (!xc) { - struct device_node *np; - xc = kzalloc_node(sizeof(struct xive_cpu), GFP_KERNEL, cpu_to_node(cpu)); if (!xc) return -ENOMEM; - np = of_get_cpu_node(cpu, NULL); - if (np) - xc->chip_id = of_get_ibm_chip_id(np); - of_node_put(np); + xc->chip_id = cpu_to_node(cpu); xc->hw_ipi = XIVE_BAD_IRQ; per_cpu(xive_cpu, cpu) = xc;
Re: [PATCH] ethernet: ucc_geth: Use kmemdup instead of kmalloc and memcpy
On 05/03/2021 15.27, angkery wrote: > From: Junlin Yang > > Fixes coccicheck warnings: > ./drivers/net/ethernet/freescale/ucc_geth.c:3594:11-18: > WARNING opportunity for kmemdup > > Signed-off-by: Junlin Yang > --- > drivers/net/ethernet/freescale/ucc_geth.c | 3 +-- > 1 file changed, 1 insertion(+), 2 deletions(-) > > diff --git a/drivers/net/ethernet/freescale/ucc_geth.c > b/drivers/net/ethernet/freescale/ucc_geth.c > index ef4e2fe..2c079ad 100644 > --- a/drivers/net/ethernet/freescale/ucc_geth.c > +++ b/drivers/net/ethernet/freescale/ucc_geth.c > @@ -3591,10 +3591,9 @@ static int ucc_geth_probe(struct platform_device* > ofdev) > if ((ucc_num < 0) || (ucc_num > 7)) > return -ENODEV; > > - ug_info = kmalloc(sizeof(*ug_info), GFP_KERNEL); > + ug_info = kmemdup(_primary_info, sizeof(*ug_info), GFP_KERNEL); > if (ug_info == NULL) > return -ENOMEM; > - memcpy(ug_info, _primary_info, sizeof(*ug_info)); > > ug_info->uf_info.ucc_num = ucc_num; > > Ah, yes, of course, I should have used that. Acked-by: Rasmus Villemoes
Re: [PATCH v2 4/7] CMDLINE: powerpc: convert to generic builtin command line
On Tue, Mar 09, 2021 at 08:56:47AM +0100, Christophe Leroy wrote: > > > Le 09/03/2021 à 01:02, Daniel Walker a écrit : > > This updates the powerpc code to use the CONFIG_GENERIC_CMDLINE > > option. > > > > Cc: xe-linux-exter...@cisco.com > > Signed-off-by: Ruslan Ruslichenko > > Signed-off-by: Ruslan Bilovol > > Signed-off-by: Daniel Walker > > --- > > arch/powerpc/Kconfig| 37 + > > arch/powerpc/kernel/prom.c | 1 + > > arch/powerpc/kernel/prom_init.c | 35 ++- > > 3 files changed, 23 insertions(+), 50 deletions(-) > > > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > > index 107bb4319e0e..276b06d5c961 100644 > > --- a/arch/powerpc/Kconfig > > +++ b/arch/powerpc/Kconfig > > @@ -167,6 +167,7 @@ config PPC > > select EDAC_SUPPORT > > select GENERIC_ATOMIC64 if PPC32 > > select GENERIC_CLOCKEVENTS_BROADCASTif SMP > > + select GENERIC_CMDLINE > > select GENERIC_CMOS_UPDATE > > select GENERIC_CPU_AUTOPROBE > > select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC > > @@ -906,42 +907,6 @@ config PPC_DENORMALISATION > > Add support for handling denormalisation of single precision > > values. Useful for bare metal only. If unsure say Y here. > > -config CMDLINE > > - string "Initial kernel command string" > > - default "" > > - help > > - On some platforms, there is currently no way for the boot loader to > > - pass arguments to the kernel. For these platforms, you can supply > > - some command-line options at build time by entering them here. In > > - most cases you will need to specify the root device here. > > - > > -choice > > - prompt "Kernel command line type" if CMDLINE != "" > > - default CMDLINE_FROM_BOOTLOADER > > - > > -config CMDLINE_FROM_BOOTLOADER > > - bool "Use bootloader kernel arguments if available" > > - help > > - Uses the command-line options passed by the boot loader. If > > - the boot loader doesn't provide any, the default kernel command > > - string provided in CMDLINE will be used. > > - > > -config CMDLINE_EXTEND > > - bool "Extend bootloader kernel arguments" > > - help > > - The command-line arguments provided by the boot loader will be > > - appended to the default kernel command string. > > - > > -config CMDLINE_FORCE > > - bool "Always use the default kernel command string" > > - help > > - Always use the default kernel command string, even if the boot > > - loader passes other arguments to the kernel. > > - This is useful if you cannot or don't want to change the > > - command-line options your boot loader passes to the kernel. > > - > > -endchoice > > - > > config EXTRA_TARGETS > > string "Additional default image types" > > help > > diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c > > index ae3c41730367..96d0a01be1b4 100644 > > --- a/arch/powerpc/kernel/prom.c > > +++ b/arch/powerpc/kernel/prom.c > > @@ -27,6 +27,7 @@ > > #include > > #include > > #include > > +#include > > Why is this needed in prom.c ? Must have been a mistake, I don't think it's needed. > > #include > > #include > > #include > > diff --git a/arch/powerpc/kernel/prom_init.c > > b/arch/powerpc/kernel/prom_init.c > > index e9d4eb6144e1..657241534d69 100644 > > --- a/arch/powerpc/kernel/prom_init.c > > +++ b/arch/powerpc/kernel/prom_init.c > > @@ -27,6 +27,7 @@ > > #include > > #include > > #include > > +#include > > #include > > #include > > #include > > @@ -242,15 +243,6 @@ static int __init prom_strcmp(const char *cs, const > > char *ct) > > return 0; > > } > > -static char __init *prom_strcpy(char *dest, const char *src) > > -{ > > - char *tmp = dest; > > - > > - while ((*dest++ = *src++) != '\0') > > - /* nothing */; > > - return tmp; > > -} > > - > > This game with prom_strcpy() should go a separate preceeding patch. > > Also, it looks like checkpatch.pl recommends to use strscpy() instead of > strlcpy(). strscpy() is very large. I'm not sure it's compatible with this prom_init.c environment. > > static int __init prom_strncmp(const char *cs, const char *ct, size_t > > count) > > { > > unsigned char c1, c2; > > @@ -276,6 +268,20 @@ static size_t __init prom_strlen(const char *s) > > return sc - s; > > } > > +static size_t __init prom_strlcpy(char *dest, const char *src, size_t size) > > +{ > > + size_t ret = prom_strlen(src); > > + > > + if (size) { > > + size_t len = (ret >= size) ? size - 1 : ret; > > + > > + memcpy(dest, src, len); > > + dest[len] = '\0'; > > + } > > + return ret; > > +} > > + > > + > > static int __init prom_memcmp(const void *cs, const void *ct, size_t > > count) > > { > > const unsigned char *su1, *su2; > > @@ -304,6 +310,7 @@ static char __init *prom_strstr(const char *s1, const
Re: [PATCH v2 3/7] powerpc: convert config files to generic cmdline
On Tue, Mar 09, 2021 at 08:47:09AM +0100, Christophe Leroy wrote: > > > Le 09/03/2021 à 01:02, Daniel Walker a écrit : > > This is a scripted mass convert of the config files to use > > the new generic cmdline. There is a bit of a trim effect here. > > It would seems that some of the config haven't been trimmed in > > a while. > > If you do that in a separate patch, you loose bisectability. > > I think it would have been better to do things in a different way, more or > less like I did in my series: > 1/ Provide GENERIC cmdline at the same functionnality level as what is > spread in the different architectures > 2/ Convert architectures to the generic with least churn. > 3/ Add new features to the generic You have to have the churn eventually, no matter how you do it. The only way you don't have churn is if you never upgrade the feature set. > > > > The bash script used to convert is as follows, > > > > if [[ -z "$1" || -z "$2" ]]; then > > echo "Two arguments are needed." > > exit 1 > > fi > > mkdir $1 > > cp $2 $1/.config > > sed -i 's/CONFIG_CMDLINE=/CONFIG_CMDLINE_BOOL=y\nCONFIG_CMDLINE_PREPEND=/g' > > $1/.config > > This is not correct. > > By default, on powerpc the provided command line is used only if the > bootloader doesn't provide one. > > Otherwise: > - the builtin command line is appended to the one provided by the bootloader > if CONFIG_CMDLINE_EXTEND is selected > - the builtin command line replaces to the one provided by the bootloader if > CONFIG_CMDLINE_FORCE is selected I think my changes maintain most of this due to the override of CONFIG_CMDLINE_PREPEND. This is an upgrade and the inflexibility in powerpc is an example of why these changes were created in the first place. For example , say the default command line is "root=/dev/issblk0" from iss476 platform. And the bootloader adds "root=/dev/sda1" The result is . Then you have, root=/dev/issblk0 root=/dev/sda1 and the bootloader has precedent over the default command line. So root= in the above cases is defined by the bootloader. The only issue would be if a person wants to override the default command line with an unrelated bootloader command line. I don't know how many people do this, but I doubt it's many. Can you think of any use cases like this? I would imagine there are many more people who have to entirely duplicate the default command line in the boot loader when they really just want to change a single part of it like the root= device or console device or speed. Daniel
Re: [PATCH v2 1/8] powerpc/xive: Use cpu_to_node() instead of ibm,chip-id property
On 3/9/21 6:08 PM, Daniel Henrique Barboza wrote: > > > On 3/9/21 12:33 PM, Cédric Le Goater wrote: >> On 3/8/21 6:13 PM, Greg Kurz wrote: >>> On Wed, 3 Mar 2021 18:48:50 +0100 >>> Cédric Le Goater wrote: >>> The 'chip_id' field of the XIVE CPU structure is used to choose a target for a source located on the same chip when possible. This field is assigned on the PowerNV platform using the "ibm,chip-id" property on pSeries under KVM when NUMA nodes are defined but it is undefined >>> >>> This sentence seems to have a syntax problem... like it is missing an >>> 'and' before 'on pSeries'. >> >> ah yes, or simply a comma. >> under PowerVM. The XIVE source structure has a similar field 'src_chip' which is only assigned on the PowerNV platform. cpu_to_node() returns a compatible value on all platforms, 0 being the default node. It will also give us the opportunity to set the affinity of a source on pSeries when we can localize them. >>> >>> IIUC this relies on the fact that the NUMA node id is == to chip id >>> on PowerNV, i.e. xc->chip_id which is passed to OPAL remain stable >>> with this change. >> >> Linux sets the NUMA node in numa_setup_cpu(). On pseries, the hcall >> H_HOME_NODE_ASSOCIATIVITY returns the node id if I am correct (Daniel >> in Cc:) > > That's correct. H_HOME_NODE_ASSOCIATIVITY returns not only the node_id, but > a list with the ibm,associativity domains of the CPU that "proc-no" (processor > identifier) is mapped to inside QEMU. > > node_id in this case, considering that we're working with a reference-points > of size 4, is the 4th element of the returned list. The last element is > "procno" itself. > > >> >> On PowerNV, Linux uses "ibm,associativity" property of the CPU to find >> the node id. This value is built from the chip id in OPAL, so the >> value returned by cpu_to_node(cpu) and the value of the "ibm,chip-id" >> property are unlikely to be different. >> >> cpu_to_node(cpu) is used in many places to allocate the structures >> locally to the owning node. XIVE is not an exception (see below in the >> same patch), it is better to be consistent and get the same information >> (node id) using the same routine. >> >> >> In Linux, "ibm,chip-id" is only used in low level PowerNV drivers : >> LPC, XSCOM, RNG, VAS, NX. XIVE should be in that list also but skiboot >> unifies the controllers of the system to only expose one the OS. This >> is problematic and should be changed but it's another topic. >> >> >>> On the other hand, you have the pSeries case under PowerVM that >>> doesn't xc->chip_id, which isn't passed to any hcall AFAICT. >> >> yes "ibm,chip-id" is an OPAL concept unfortunately and it has no meaning >> under PAPR. xc->chip_id on pseries (PowerVM) will contains an invalid >> chip id. >> >> QEMU/KVM exposes "ibm,chip-id" but it's not used. (its value is not >> always correct btw) > > > If you have a way to reliably reproduce this, let me know and I'll fix it > up in QEMU. with : -smp 4,cores=1,maxcpus=8 -object memory-backend-ram,id=ram-node0,size=2G -numa node,nodeid=0,cpus=0-1,cpus=4-5,memdev=ram-node0 -object memory-backend-ram,id=ram-node1,size=2G -numa node,nodeid=1,cpus=2-3,cpus=6-7,memdev=ram-node1 # dmesg | grep numa [0.013106] numa: Node 0 CPUs: 0-1 [0.013136] numa: Node 1 CPUs: 2-3 # dtc -I fs /proc/device-tree/cpus/ -f | grep ibm,chip-id ibm,chip-id = <0x01>; ibm,chip-id = <0x02>; ibm,chip-id = <0x00>; ibm,chip-id = <0x03>; with : -smp 4,cores=4,maxcpus=8,threads=1 -object memory-backend-ram,id=ram-node0,size=2G -numa node,nodeid=0,cpus=0-1,cpus=4-5,memdev=ram-node0 -object memory-backend-ram,id=ram-node1,size=2G -numa node,nodeid=1,cpus=2-3,cpus=6-7,memdev=ram-node1 # dmesg | grep numa [0.013106] numa: Node 0 CPUs: 0-1 [0.013136] numa: Node 1 CPUs: 2-3 # dtc -I fs /proc/device-tree/cpus/ -f | grep ibm,chip-id ibm,chip-id = <0x00>; ibm,chip-id = <0x00>; ibm,chip-id = <0x00>; ibm,chip-id = <0x00>; I think we should simply remove "ibm,chip-id" since it's not used and not in the PAPR spec. Thanks, C. > > Thanks, > > > DHB > > >> >>> It looks like the chip id is only used for localization purpose in >>> this case, right ? >> >> Yes and PAPR sources are not localized. So it's not used. MSI sources >> could be if we rewrote the MSI driver. >> >>> In this case, what about doing this change for pSeries only, >>> somewhere in spapr.c ? >> >> The IPI code is common to all platforms and all have the same issue. >> I rather not. >> >> Thanks, >> >> C. >> Signed-off-by: Cédric Le Goater --- arch/powerpc/sysdev/xive/common.c | 7 +-- 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 595310e056f4..b8e456da28aa
[PATCH] powerpc/xmon: Check cpu id in commands "c#", "dp#" and "dx#"
All these commands end up peeking into the PACA using the user originated cpu id as an index. Check the cpu id is valid in order to prevent xmon to crash. Instead of printing an error, this follows the same behavior as the "lp s #" command : ignore the buggy cpu id parameter and fall back to the #-less version of the command. Signed-off-by: Greg Kurz --- arch/powerpc/xmon/xmon.c |6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 80fbf8968f77..d3d6e044228e 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1248,7 +1248,7 @@ static int cpu_cmd(void) unsigned long cpu, first_cpu, last_cpu; int timeout; - if (!scanhex()) { + if (!scanhex() || cpu >= num_possible_cpus()) { /* print cpus waiting or in xmon */ printf("cpus stopped:"); last_cpu = first_cpu = NR_CPUS; @@ -2678,7 +2678,7 @@ static void dump_pacas(void) termch = c; /* Put c back, it wasn't 'a' */ - if (scanhex()) + if (scanhex() && num < num_possible_cpus()) dump_one_paca(num); else dump_one_paca(xmon_owner); @@ -2751,7 +2751,7 @@ static void dump_xives(void) termch = c; /* Put c back, it wasn't 'a' */ - if (scanhex()) + if (scanhex() && num < num_possible_cpus()) dump_one_xive(num); else dump_one_xive(xmon_owner);
Re: [PATCH 3/3] powerpc/qspinlock: Use generic smp_cond_load_relaxed
On Tue, Mar 09, 2021 at 07:46:11AM -0800, Davidlohr Bueso wrote: > On Tue, 09 Mar 2021, Michal Such�nek wrote: > > > On Mon, Mar 08, 2021 at 05:59:50PM -0800, Davidlohr Bueso wrote: > > > 49a7d46a06c3 (powerpc: Implement smp_cond_load_relaxed()) added > > > busy-waiting pausing with a preferred SMT priority pattern, lowering > > > the priority (reducing decode cycles) during the whole loop slowpath. > > > > > > However, data shows that while this pattern works well with simple > > ^^ > > > spinlocks, queued spinlocks benefit more being kept in medium priority, > > > with a cpu_relax() instead, being a low+medium combo on powerpc. > > ... > > > > > > diff --git a/arch/powerpc/include/asm/barrier.h > > > b/arch/powerpc/include/asm/barrier.h > > > index aecfde829d5d..7ae29cfb06c0 100644 > > > --- a/arch/powerpc/include/asm/barrier.h > > > +++ b/arch/powerpc/include/asm/barrier.h > > > @@ -80,22 +80,6 @@ do { > > > \ > > > ___p1; \ > > > }) > > > > > > -#ifdef CONFIG_PPC64 > > Maybe it should be kept for the simple spinlock case then? > > It is kept, note that simple spinlocks don't use smp_cond_load_relaxed, > but instead deal with the priorities in arch_spin_lock(), so it will > spin in low priority until it sees a chance to take the lock, where > it switches back to medium. Indeed, thanks for the clarification. Michal
Re: [PATCH v2 8/8] powerpc/xive: Map one IPI interrupt per node
On 3/9/21 2:23 PM, Greg Kurz wrote: > On Wed, 3 Mar 2021 18:48:57 +0100 > Cédric Le Goater wrote: > >> ipistorm [*] can be used to benchmark the raw interrupt rate of an >> interrupt controller by measuring the number of IPIs a system can >> sustain. When applied to the XIVE interrupt controller of POWER9 and >> POWER10 systems, a significant drop of the interrupt rate can be >> observed when crossing the second node boundary. >> >> This is due to the fact that a single IPI interrupt is used for all >> CPUs of the system. The structure is shared and the cache line updates >> impact greatly the traffic between nodes and the overall IPI >> performance. >> >> As a workaround, the impact can be reduced by deactivating the IRQ >> lockup detector ("noirqdebug") which does a lot of accounting in the >> Linux IRQ descriptor structure and is responsible for most of the >> performance penalty. >> >> As a fix, this proposal allocates an IPI interrupt per node, to be >> shared by all CPUs of that node. It solves the scaling issue, the IRQ >> lockup detector still has an impact but the XIVE interrupt rate scales >> linearly. It also improves the "noirqdebug" case as showed in the >> tables below. >> >> * P9 DD2.2 - 2s * 64 threads >> >>"noirqdebug" >> Mint/sMint/s >> chips cpus IPI/sys IPI/chip IPI/chipIPI/sys >> -- >> 1 0-15 4.984023 4.875405 4.996536 5.048892 >> 0-3110.879164 10.544040 10.757632 11.037859 >> 0-4715.345301 14.688764 14.926520 15.310053 >> 0-6317.064907 17.066812 17.613416 17.874511 >> 2 0-7911.768764 21.650749 22.689120 22.566508 >> 0-9510.616812 26.878789 28.434703 28.320324 >> 0-111 10.151693 31.397803 31.771773 32.388122 >> 0-1279.948502 33.139336 34.875716 35.224548 >> >> * P10 DD1 - 4s (not homogeneous) 352 threads >> >>"noirqdebug" >> Mint/sMint/s >> chips cpus IPI/sys IPI/chip IPI/chipIPI/sys >> -- >> 1 0-15 2.409402 2.364108 2.383303 2.395091 >> 0-31 6.028325 6.046075 6.08 6.073750 >> 0-47 8.655178 8.644531 8.712830 8.724702 >> 0-6311.629652 11.735953 12.088203 12.055979 >> 0-7914.392321 14.729959 14.986701 14.973073 >> 0-9512.604158 13.004034 17.528748 17.568095 >> 2 0-1119.767753 13.719831 19.968606 20.024218 >> 0-1276.744566 16.418854 22.898066 22.995110 >> 0-1436.005699 19.174421 25.425622 25.417541 >> 0-1595.649719 21.938836 27.952662 28.059603 >> 0-1755.441410 24.109484 31.133915 31.127996 >> 3 0-1915.318341 24.405322 33.999221 33.775354 >> 0-2075.191382 26.449769 36.050161 35.867307 >> 0-2235.102790 29.356943 39.544135 39.508169 >> 0-2395.035295 31.933051 42.135075 42.071975 >> 0-2554.969209 34.477367 44.655395 44.757074 >> 4 0-2714.907652 35.887016 47.080545 47.318537 >> 0-2874.839581 38.076137 50.464307 50.636219 >> 0-3034.786031 40.881319 53.478684 53.310759 >> 0-3194.743750 43.448424 56.388102 55.973969 >> 0-3354.709936 45.623532 59.400930 58.926857 >> 0-3514.681413 45.646151 62.035804 61.830057 >> >> [*] https://github.com/antonblanchard/ipistorm >> >> Signed-off-by: Cédric Le Goater >> --- >> arch/powerpc/sysdev/xive/xive-internal.h | 2 -- >> arch/powerpc/sysdev/xive/common.c| 39 ++-- >> 2 files changed, 30 insertions(+), 11 deletions(-) >> >> diff --git a/arch/powerpc/sysdev/xive/xive-internal.h >> b/arch/powerpc/sysdev/xive/xive-internal.h >> index 9cf57c722faa..b3a456fdd3a5 100644 >> --- a/arch/powerpc/sysdev/xive/xive-internal.h >> +++ b/arch/powerpc/sysdev/xive/xive-internal.h >> @@ -5,8 +5,6 @@ >> #ifndef __XIVE_INTERNAL_H >> #define __XIVE_INTERNAL_H >> >> -#define XIVE_IPI_HW_IRQ 0 /* interrupt source # for IPIs */ >> - >> /* >> * A "disabled" interrupt should never fire, to catch problems >> * we set its logical number to this >> diff --git a/arch/powerpc/sysdev/xive/common.c >> b/arch/powerpc/sysdev/xive/common.c >> index 8eefd152b947..c27f7bb0494b 100644 >> --- a/arch/powerpc/sysdev/xive/common.c >> +++ b/arch/powerpc/sysdev/xive/common.c >> @@ -65,8 +65,16 @@ static struct irq_domain *xive_irq_domain; >> #ifdef CONFIG_SMP >> static struct irq_domain *xive_ipi_irq_domain; >> >> -/* The IPIs all use the same
Re: make alloc_anon_inode more useful
On Tue, Mar 09, 2021 at 04:53:39PM +0100, Christoph Hellwig wrote: > Hi all, > > this series first renames the existing alloc_anon_inode to > alloc_anon_inode_sb to clearly mark it as requiring a superblock. > > It then adds a new alloc_anon_inode that works on the anon_inode > file system super block, thus removing tons of boilerplate code. > > The few remainig callers of alloc_anon_inode_sb all use alloc_file_pseudo > later, but might also be ripe for some cleanup. I like it For a submission plan can we have this on a git branch please? I will need a copy for RDMA and Alex will need one for vfio.. Thanks, Jason
Re: [PATCH 3/9] powerpc/pseries: remove the ppc-cmm file system
On Tue, Mar 09, 2021 at 04:53:42PM +0100, Christoph Hellwig wrote: > Just use the generic anon_inode file system. > > Signed-off-by: Christoph Hellwig > arch/powerpc/platforms/pseries/cmm.c | 27 ++- > 1 file changed, 2 insertions(+), 25 deletions(-) > > diff --git a/arch/powerpc/platforms/pseries/cmm.c > b/arch/powerpc/platforms/pseries/cmm.c > index 6d36b858b14df1..9d07e6bea7126c 100644 > +++ b/arch/powerpc/platforms/pseries/cmm.c > @@ -6,6 +6,7 @@ > * Author(s): Brian King (brk...@linux.vnet.ibm.com), > */ > > +#include > #include > #include > #include > @@ -502,19 +503,6 @@ static struct notifier_block cmm_mem_nb = { > }; > > #ifdef CONFIG_BALLOON_COMPACTION > -static struct vfsmount *balloon_mnt; > - > -static int cmm_init_fs_context(struct fs_context *fc) > -{ > - return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM; Should we clean these unusued magic constants too? include/uapi/linux/magic.h:#define PPC_CMM_MAGIC0xc7571590 Jason
Re: [PATCH 5/9] vmw_balloon: remove the balloon-vmware file system
On 09.03.21 16:53, Christoph Hellwig wrote: Just use the generic anon_inode file system. Signed-off-by: Christoph Hellwig --- drivers/misc/vmw_balloon.c | 24 ++-- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 5d057a05ddbee8..be4be32f858253 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -16,6 +16,7 @@ //#define DEBUG #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -1735,20 +1736,6 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b) #ifdef CONFIG_BALLOON_COMPACTION - -static int vmballoon_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type vmballoon_fs = { - .name = "balloon-vmware", - .init_fs_context= vmballoon_init_fs_context, - .kill_sb= kill_anon_super, -}; - -static struct vfsmount *vmballoon_mnt; - /** * vmballoon_migratepage() - migrates a balloon page. * @b_dev_info: balloon device information descriptor. @@ -1878,8 +1865,6 @@ static void vmballoon_compaction_deinit(struct vmballoon *b) iput(b->b_dev_info.inode); b->b_dev_info.inode = NULL; - kern_unmount(vmballoon_mnt); - vmballoon_mnt = NULL; } /** @@ -1895,13 +1880,8 @@ static void vmballoon_compaction_deinit(struct vmballoon *b) */ static __init int vmballoon_compaction_init(struct vmballoon *b) { - vmballoon_mnt = kern_mount(_fs); - if (IS_ERR(vmballoon_mnt)) - return PTR_ERR(vmballoon_mnt); - b->b_dev_info.migratepage = vmballoon_migratepage; - b->b_dev_info.inode = alloc_anon_inode_sb(vmballoon_mnt->mnt_sb); - + b->b_dev_info.inode = alloc_anon_inode(); if (IS_ERR(b->b_dev_info.inode)) return PTR_ERR(b->b_dev_info.inode); Same comment regarding BALLOON_VMW_MAGIC and includes (mount.h, pseudo_fs.h). Apart from that looks good. -- Thanks, David / dhildenb
Re: [PATCH 6/9] virtio_balloon: remove the balloon-kvm file system
On 09.03.21 16:53, Christoph Hellwig wrote: Just use the generic anon_inode file system. Signed-off-by: Christoph Hellwig --- drivers/virtio/virtio_balloon.c | 30 +++--- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index cae76ee5bdd688..1efb890cd3ff09 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -6,6 +6,7 @@ * Copyright 2008 Rusty Russell IBM Corporation */ +#include #include #include #include @@ -42,10 +43,6 @@ (1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT)) #define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER) -#ifdef CONFIG_BALLOON_COMPACTION -static struct vfsmount *balloon_mnt; -#endif - enum virtio_balloon_vq { VIRTIO_BALLOON_VQ_INFLATE, VIRTIO_BALLOON_VQ_DEFLATE, @@ -805,18 +802,6 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, return MIGRATEPAGE_SUCCESS; } - -static int balloon_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type balloon_fs = { - .name = "balloon-kvm", - .init_fs_context = balloon_init_fs_context, - .kill_sb= kill_anon_super, -}; - #endif /* CONFIG_BALLOON_COMPACTION */ static unsigned long shrink_free_pages(struct virtio_balloon *vb, @@ -909,17 +894,11 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_free_vb; #ifdef CONFIG_BALLOON_COMPACTION - balloon_mnt = kern_mount(_fs); - if (IS_ERR(balloon_mnt)) { - err = PTR_ERR(balloon_mnt); - goto out_del_vqs; - } - vb->vb_dev_info.migratepage = virtballoon_migratepage; - vb->vb_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb); + vb->vb_dev_info.inode = alloc_anon_inode(); if (IS_ERR(vb->vb_dev_info.inode)) { err = PTR_ERR(vb->vb_dev_info.inode); - goto out_kern_unmount; + goto out_del_vqs; } vb->vb_dev_info.inode->i_mapping->a_ops = _aops; #endif @@ -1016,8 +995,6 @@ static int virtballoon_probe(struct virtio_device *vdev) out_iput: #ifdef CONFIG_BALLOON_COMPACTION iput(vb->vb_dev_info.inode); -out_kern_unmount: - kern_unmount(balloon_mnt); out_del_vqs: #endif vdev->config->del_vqs(vdev); @@ -1070,7 +1047,6 @@ static void virtballoon_remove(struct virtio_device *vdev) if (vb->vb_dev_info.inode) iput(vb->vb_dev_info.inode); - kern_unmount(balloon_mnt); #endif kfree(vb); } ... you might know what I am going to say :) Apart from that LGTM. -- Thanks, David / dhildenb
Re: [PATCH 2/9] fs: add an argument-less alloc_anon_inode
On 09.03.21 16:53, Christoph Hellwig wrote: Add a new alloc_anon_inode helper that allocates an inode on the anon_inode file system. Signed-off-by: Christoph Hellwig --- fs/anon_inodes.c| 15 +-- include/linux/anon_inodes.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 4745fc37014332..b6a8ea71920bc3 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -63,7 +63,7 @@ static struct inode *anon_inode_make_secure_inode( const struct qstr qname = QSTR_INIT(name, strlen(name)); int error; - inode = alloc_anon_inode_sb(anon_inode_mnt->mnt_sb); + inode = alloc_anon_inode(); if (IS_ERR(inode)) return inode; inode->i_flags &= ~S_PRIVATE; @@ -225,13 +225,24 @@ int anon_inode_getfd_secure(const char *name, const struct file_operations *fops } EXPORT_SYMBOL_GPL(anon_inode_getfd_secure); +/** + * alloc_anon_inode - create a new anonymous inode + * + * Create an inode on the anon_inode file system and return it. + */ +struct inode *alloc_anon_inode(void) +{ + return alloc_anon_inode_sb(anon_inode_mnt->mnt_sb); +} +EXPORT_SYMBOL_GPL(alloc_anon_inode); + static int __init anon_inode_init(void) { anon_inode_mnt = kern_mount(_inode_fs_type); if (IS_ERR(anon_inode_mnt)) panic("anon_inode_init() kernel mount failed (%ld)\n", PTR_ERR(anon_inode_mnt)); - anon_inode_inode = alloc_anon_inode_sb(anon_inode_mnt->mnt_sb); + anon_inode_inode = alloc_anon_inode(); if (IS_ERR(anon_inode_inode)) panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode)); diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h index 71881a2b6f7860..b5ae9a6eda9923 100644 --- a/include/linux/anon_inodes.h +++ b/include/linux/anon_inodes.h @@ -21,6 +21,7 @@ int anon_inode_getfd_secure(const char *name, const struct file_operations *fops, void *priv, int flags, const struct inode *context_inode); +struct inode *alloc_anon_inode(void); #endif /* _LINUX_ANON_INODES_H */ Reviewed-by: David Hildenbrand -- Thanks, David / dhildenb
Re: [PATCH 1/9] fs: rename alloc_anon_inode to alloc_anon_inode_sb
On 09.03.21 16:53, Christoph Hellwig wrote: Rename alloc_inode to free the name for a new variant that does not need boilerplate to create a super_block first. Signed-off-by: Christoph Hellwig --- arch/powerpc/platforms/pseries/cmm.c | 2 +- drivers/dma-buf/dma-buf.c| 2 +- drivers/gpu/drm/drm_drv.c| 2 +- drivers/misc/cxl/api.c | 2 +- drivers/misc/vmw_balloon.c | 2 +- drivers/scsi/cxlflash/ocxl_hw.c | 2 +- drivers/virtio/virtio_balloon.c | 2 +- fs/aio.c | 2 +- fs/anon_inodes.c | 4 ++-- fs/libfs.c | 2 +- include/linux/fs.h | 2 +- kernel/resource.c| 2 +- mm/z3fold.c | 2 +- mm/zsmalloc.c| 2 +- 14 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 45a3a3022a85c9..6d36b858b14df1 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -580,7 +580,7 @@ static int cmm_balloon_compaction_init(void) return rc; } - b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); + b_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb); if (IS_ERR(b_dev_info.inode)) { rc = PTR_ERR(b_dev_info.inode); b_dev_info.inode = NULL; diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index f264b70c383eb4..dedcc9483352dc 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -445,7 +445,7 @@ static inline int is_dma_buf_file(struct file *file) static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) { struct file *file; - struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb); + struct inode *inode = alloc_anon_inode_sb(dma_buf_mnt->mnt_sb); if (IS_ERR(inode)) return ERR_CAST(inode); diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 20d22e41d7ce74..87e7214a8e3565 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -519,7 +519,7 @@ static struct inode *drm_fs_inode_new(void) return ERR_PTR(r); } - inode = alloc_anon_inode(drm_fs_mnt->mnt_sb); + inode = alloc_anon_inode_sb(drm_fs_mnt->mnt_sb); if (IS_ERR(inode)) simple_release_fs(_fs_mnt, _fs_cnt); diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index b493de962153ba..2efbf6c98028ef 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -73,7 +73,7 @@ static struct file *cxl_getfile(const char *name, goto err_module; } - inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb); + inode = alloc_anon_inode_sb(cxl_vfs_mount->mnt_sb); if (IS_ERR(inode)) { file = ERR_CAST(inode); goto err_fs; diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index b837e7eba5f7dc..5d057a05ddbee8 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -1900,7 +1900,7 @@ static __init int vmballoon_compaction_init(struct vmballoon *b) return PTR_ERR(vmballoon_mnt); b->b_dev_info.migratepage = vmballoon_migratepage; - b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb); + b->b_dev_info.inode = alloc_anon_inode_sb(vmballoon_mnt->mnt_sb); if (IS_ERR(b->b_dev_info.inode)) return PTR_ERR(b->b_dev_info.inode); diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c index 244fc27215dc79..40184ed926b557 100644 --- a/drivers/scsi/cxlflash/ocxl_hw.c +++ b/drivers/scsi/cxlflash/ocxl_hw.c @@ -88,7 +88,7 @@ static struct file *ocxlflash_getfile(struct device *dev, const char *name, goto err2; } - inode = alloc_anon_inode(ocxlflash_vfs_mount->mnt_sb); + inode = alloc_anon_inode_sb(ocxlflash_vfs_mount->mnt_sb); if (IS_ERR(inode)) { rc = PTR_ERR(inode); dev_err(dev, "%s: alloc_anon_inode failed rc=%d\n", diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 8985fc2cea8615..cae76ee5bdd688 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -916,7 +916,7 @@ static int virtballoon_probe(struct virtio_device *vdev) } vb->vb_dev_info.migratepage = virtballoon_migratepage; - vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); + vb->vb_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb); if (IS_ERR(vb->vb_dev_info.inode)) { err = PTR_ERR(vb->vb_dev_info.inode); goto out_kern_unmount; diff --git a/fs/aio.c b/fs/aio.c index 1f32da13d39ee6..d1c2aa7fd6de7c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -234,7 +234,7 @@ static const struct
Re: [PATCH 3/9] powerpc/pseries: remove the ppc-cmm file system
On 09.03.21 16:53, Christoph Hellwig wrote: Just use the generic anon_inode file system. Signed-off-by: Christoph Hellwig --- arch/powerpc/platforms/pseries/cmm.c | 27 ++- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 6d36b858b14df1..9d07e6bea7126c 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -6,6 +6,7 @@ * Author(s): Brian King (brk...@linux.vnet.ibm.com), */ +#include #include #include #include @@ -502,19 +503,6 @@ static struct notifier_block cmm_mem_nb = { }; #ifdef CONFIG_BALLOON_COMPACTION -static struct vfsmount *balloon_mnt; - -static int cmm_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type balloon_fs = { - .name = "ppc-cmm", - .init_fs_context = cmm_init_fs_context, - .kill_sb = kill_anon_super, -}; - static int cmm_migratepage(struct balloon_dev_info *b_dev_info, struct page *newpage, struct page *page, enum migrate_mode mode) @@ -573,19 +561,10 @@ static int cmm_balloon_compaction_init(void) balloon_devinfo_init(_dev_info); b_dev_info.migratepage = cmm_migratepage; - balloon_mnt = kern_mount(_fs); - if (IS_ERR(balloon_mnt)) { - rc = PTR_ERR(balloon_mnt); - balloon_mnt = NULL; - return rc; - } - - b_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb); + b_dev_info.inode = alloc_anon_inode(); if (IS_ERR(b_dev_info.inode)) { rc = PTR_ERR(b_dev_info.inode); b_dev_info.inode = NULL; - kern_unmount(balloon_mnt); - balloon_mnt = NULL; return rc; } @@ -597,8 +576,6 @@ static void cmm_balloon_compaction_deinit(void) if (b_dev_info.inode) iput(b_dev_info.inode); b_dev_info.inode = NULL; - kern_unmount(balloon_mnt); - balloon_mnt = NULL; } #else /* CONFIG_BALLOON_COMPACTION */ static int cmm_balloon_compaction_init(void) I always wondered why that was necessary after all (with my limited fs knowledge :) ). a) I assume you want to remove PPC_CMM_MAGIC from include/uapi/linux/magic.h as well? b) Do we still need #include , #include and #include ? Apart from that looks much cleaner. -- Thanks, David / dhildenb
Re: [PATCH v1] powerpc: Include running function as first entry in save_stack_trace() and friends
On Thu, Mar 04, 2021 at 03:54:48PM -0600, Segher Boessenkool wrote: > Hi! Hi Segher, > On Thu, Mar 04, 2021 at 02:57:30PM +, Mark Rutland wrote: > > It looks like GCC is happy to give us the function-entry-time FP if we use > > __builtin_frame_address(1), > > From the GCC manual: > Calling this function with a nonzero argument can have > unpredictable effects, including crashing the calling program. As > a result, calls that are considered unsafe are diagnosed when the > '-Wframe-address' option is in effect. Such calls should only be > made in debugging situations. > > It *does* warn (the warning is in -Wall btw), on both powerpc and > aarch64. Furthermore, using this builtin causes lousy code (it forces > the use of a frame pointer, which we normally try very hard to optimise > away, for good reason). > > And, that warning is not an idle warning. Non-zero arguments to > __builtin_frame_address can crash the program. It won't on simpler > functions, but there is no real definition of what a simpler function > *is*. It is meant for debugging, not for production use (this is also > why no one has bothered to make it faster). > > On Power it should work, but on pretty much any other arch it won't. I understand this is true generally, and cannot be relied upon in portable code. However as you hint here for Power, I believe that on arm64 __builtin_frame_address(1) shouldn't crash the program due to the way frame records work on arm64, but I'll go check with some local compiler folk. I agree that __builtin_frame_address(2) and beyond certainly can, e.g. by NULL dereference and similar. For context, why do you think this would work on power specifically? I wonder if our rationale is similar. Are you aware of anything in particular that breaks using __builtin_frame_address(1) in non-portable code, or is this just a general sentiment of this not being a supported use-case? > > Unless we can get some strong guarantees from compiler folk such that we > > can guarantee a specific function acts boundary for unwinding (and > > doesn't itself get split, etc), the only reliable way I can think to > > solve this requires an assembly trampoline. Whatever we do is liable to > > need some invasive rework. > > You cannot get such a guarantee, other than not letting the compiler > see into the routine at all, like with assembler code (not inline asm, > real assembler code). If we cannot reliably ensure this then I'm happy to go write an assembly trampoline to snapshot the state at a function call boundary (where our procedure call standard mandates the state of the LR, FP, and frame records pointed to by the FP). This'll require reworking a reasonable amount of code cross-architecture, so I'll need to get some more concrete justification (e.g. examples of things that can go wrong in practice). > The real way forward is to bite the bullet and to no longer pretend you > can do a full backtrace from just the stack contents. You cannot. I think what you mean here is that there's no reliable way to handle the current/leaf function, right? If so I do agree. Beyond that I believe that arm64's frame records should be sufficient. Thanks, Mark.
Re: [PATCH v3] powerpc/32: remove bogus ppc_select syscall
Le 05/03/2021 à 13:03, Arnd Bergmann a écrit : On Fri, Mar 5, 2021 at 11:15 AM Christophe Leroy wrote: Le 05/03/2021 à 11:06, Arnd Bergmann a écrit : On Fri, Mar 5, 2021 at 9:40 AM Christophe Leroy wrote: - glibc support for ppc32 gets merged during the linux-2.5 days, supporting only #142 with the new behavior. It turns out to be older than I said. This was actually in glibc-1.94 from 1997, so during the linux-2.1 days, not 2.5! Whaou, nice archeology, thanks. Do you mind if I copy the history you established ? That's fine, please copy it. In your commit, you said 2.3.48. Here in the history you say 2.1.48. Which one is correct ? 2.1.48 is correct. Regardless of whethere binaries are broken or not for other reason, is that worth expecting an almost 25 yr old binary to run on future kernels ? If one is able to put the necessary effort to port you hardware to the latest kernel, can't he really port the binary as well ? I think the questions of supporting old hardware with new software and supporting old binaries on modern kernels are largely orthogonal. The policy we have is that we don't break existing user setups, and it really seems unlikely that anyone still uses pre-1997 executables for anything that requires a modern kernel! I now checked the oldest mklinux I could find (DR2.1 from 1997), and even has the modern glibc and linux-2.0.28 kernel patched to provide the modern semantics at syscall #142 for glibc, with the same (already unused) compatibility hack at #82 that we still have for ppc32 today. This made mklinux DR2.1 binaries incompatible with mainline linux-2.0 kernels, but they might still work with modern kernels, regardless of whether we remove support for binaries that worked with mainline linux-2.0. I had another look. In fact x86, arm and m68k still have the #82 syscall, but they don't have the hack we have on powerpc to "guess" that something is calling the old select with the arguments of the new select. As part of my series of user accesses cleanup, I'll replace the open coded stuff by a call to sys_old_select(), see below. Maybe at the end we should keep the #82 syscall, but do we need to keep the powerpc hack really ? Maybe the best is to drop ppc_select() function but mention sys_old_select() instead of ni_syscall for entry #82 in the syscall table ? Christophe --- diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 700fcdac2e3c..b541c690a31c 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -40,6 +40,7 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #ifdef CONFIG_PPC32 #define __ARCH_WANT_OLD_STAT +#define __ARCH_WANT_SYS_OLD_SELECT #endif #ifdef CONFIG_PPC64 #define __ARCH_WANT_SYS_TIME diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 078608ec2e92..a552c9e68d7e 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -82,16 +82,8 @@ int ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp) { if ( (unsigned long)n >= 4096 ) - { - unsigned long __user *buffer = (unsigned long __user *)n; - if (!access_ok(buffer, 5*sizeof(unsigned long)) - || __get_user(n, buffer) - || __get_user(inp, ((fd_set __user * __user *)(buffer+1))) - || __get_user(outp, ((fd_set __user * __user *)(buffer+2))) - || __get_user(exp, ((fd_set __user * __user *)(buffer+3))) - || __get_user(tvp, ((struct __kernel_old_timeval __user * __user *)(buffer+4 - return -EFAULT; - } + return sys_old_select((void __user *)n); + return sys_select(n, inp, outp, exp, tvp); } #endif
[PATCH 9/9] zsmalloc: remove the zsmalloc file system
Just use the generic anon_inode file system. Signed-off-by: Christoph Hellwig --- mm/zsmalloc.c | 48 +++- 1 file changed, 3 insertions(+), 45 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index a6449a2ad861de..a7d2f471935447 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -176,10 +177,6 @@ struct zs_size_stat { static struct dentry *zs_stat_root; #endif -#ifdef CONFIG_COMPACTION -static struct vfsmount *zsmalloc_mnt; -#endif - /* * We assign a page to ZS_ALMOST_EMPTY fullness group when: * n <= N / f, where @@ -308,8 +305,6 @@ static void kick_deferred_free(struct zs_pool *pool); static void init_deferred_free(struct zs_pool *pool); static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage); #else -static int zsmalloc_mount(void) { return 0; } -static void zsmalloc_unmount(void) {} static int zs_register_migration(struct zs_pool *pool) { return 0; } static void zs_unregister_migration(struct zs_pool *pool) {} static void migrate_lock_init(struct zspage *zspage) {} @@ -1751,33 +1746,6 @@ static void lock_zspage(struct zspage *zspage) } while ((page = get_next_page(page)) != NULL); } -static int zs_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type zsmalloc_fs = { - .name = "zsmalloc", - .init_fs_context = zs_init_fs_context, - .kill_sb= kill_anon_super, -}; - -static int zsmalloc_mount(void) -{ - int ret = 0; - - zsmalloc_mnt = kern_mount(_fs); - if (IS_ERR(zsmalloc_mnt)) - ret = PTR_ERR(zsmalloc_mnt); - - return ret; -} - -static void zsmalloc_unmount(void) -{ - kern_unmount(zsmalloc_mnt); -} - static void migrate_lock_init(struct zspage *zspage) { rwlock_init(>lock); @@ -2086,7 +2054,7 @@ static const struct address_space_operations zsmalloc_aops = { static int zs_register_migration(struct zs_pool *pool) { - pool->inode = alloc_anon_inode_sb(zsmalloc_mnt->mnt_sb); + pool->inode = alloc_anon_inode(); if (IS_ERR(pool->inode)) { pool->inode = NULL; return 1; @@ -2506,14 +2474,10 @@ static int __init zs_init(void) { int ret; - ret = zsmalloc_mount(); - if (ret) - goto out; - ret = cpuhp_setup_state(CPUHP_MM_ZS_PREPARE, "mm/zsmalloc:prepare", zs_cpu_prepare, zs_cpu_dead); if (ret) - goto hp_setup_fail; + return ret; #ifdef CONFIG_ZPOOL zpool_register_driver(_zpool_driver); @@ -2522,11 +2486,6 @@ static int __init zs_init(void) zs_stat_init(); return 0; - -hp_setup_fail: - zsmalloc_unmount(); -out: - return ret; } static void __exit zs_exit(void) @@ -2534,7 +2493,6 @@ static void __exit zs_exit(void) #ifdef CONFIG_ZPOOL zpool_unregister_driver(_zpool_driver); #endif - zsmalloc_unmount(); cpuhp_remove_state(CPUHP_MM_ZS_PREPARE); zs_stat_exit(); -- 2.30.1
[PATCH 8/9] z3fold: remove the z3fold file system
Just use the generic anon_inode file system. Signed-off-by: Christoph Hellwig --- mm/z3fold.c | 38 ++ 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index e7cd9298b221f5..e0749a3d8987de 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -23,6 +23,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -345,38 +346,10 @@ static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr) } } -static int z3fold_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type z3fold_fs = { - .name = "z3fold", - .init_fs_context = z3fold_init_fs_context, - .kill_sb= kill_anon_super, -}; - -static struct vfsmount *z3fold_mnt; -static int z3fold_mount(void) -{ - int ret = 0; - - z3fold_mnt = kern_mount(_fs); - if (IS_ERR(z3fold_mnt)) - ret = PTR_ERR(z3fold_mnt); - - return ret; -} - -static void z3fold_unmount(void) -{ - kern_unmount(z3fold_mnt); -} - static const struct address_space_operations z3fold_aops; static int z3fold_register_migration(struct z3fold_pool *pool) { - pool->inode = alloc_anon_inode_sb(z3fold_mnt->mnt_sb); + pool->inode = alloc_anon_inode(); if (IS_ERR(pool->inode)) { pool->inode = NULL; return 1; @@ -1787,22 +1760,15 @@ MODULE_ALIAS("zpool-z3fold"); static int __init init_z3fold(void) { - int ret; - /* Make sure the z3fold header is not larger than the page size */ BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE); - ret = z3fold_mount(); - if (ret) - return ret; zpool_register_driver(_zpool_driver); - return 0; } static void __exit exit_z3fold(void) { - z3fold_unmount(); zpool_unregister_driver(_zpool_driver); } -- 2.30.1
[PATCH 7/9] iomem: remove the iomem file system
Just use the generic anon_inode file system. Signed-off-by: Christoph Hellwig --- kernel/resource.c | 30 -- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index 0fd091a3f2fc66..12560553c26796 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1838,37 +1839,14 @@ static int __init strict_iomem(char *str) return 1; } -static int iomem_fs_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type iomem_fs_type = { - .name = "iomem", - .owner = THIS_MODULE, - .init_fs_context = iomem_fs_init_fs_context, - .kill_sb= kill_anon_super, -}; - static int __init iomem_init_inode(void) { - static struct vfsmount *iomem_vfs_mount; - static int iomem_fs_cnt; struct inode *inode; - int rc; - - rc = simple_pin_fs(_fs_type, _vfs_mount, _fs_cnt); - if (rc < 0) { - pr_err("Cannot mount iomem pseudo filesystem: %d\n", rc); - return rc; - } - inode = alloc_anon_inode_sb(iomem_vfs_mount->mnt_sb); + inode = alloc_anon_inode(); if (IS_ERR(inode)) { - rc = PTR_ERR(inode); - pr_err("Cannot allocate inode for iomem: %d\n", rc); - simple_release_fs(_vfs_mount, _fs_cnt); - return rc; + pr_err("Cannot allocate inode for iomem: %zd\n", PTR_ERR(inode)); + return PTR_ERR(inode); } /* -- 2.30.1
[PATCH 6/9] virtio_balloon: remove the balloon-kvm file system
Just use the generic anon_inode file system. Signed-off-by: Christoph Hellwig --- drivers/virtio/virtio_balloon.c | 30 +++--- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index cae76ee5bdd688..1efb890cd3ff09 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -6,6 +6,7 @@ * Copyright 2008 Rusty Russell IBM Corporation */ +#include #include #include #include @@ -42,10 +43,6 @@ (1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT)) #define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER) -#ifdef CONFIG_BALLOON_COMPACTION -static struct vfsmount *balloon_mnt; -#endif - enum virtio_balloon_vq { VIRTIO_BALLOON_VQ_INFLATE, VIRTIO_BALLOON_VQ_DEFLATE, @@ -805,18 +802,6 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, return MIGRATEPAGE_SUCCESS; } - -static int balloon_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type balloon_fs = { - .name = "balloon-kvm", - .init_fs_context = balloon_init_fs_context, - .kill_sb= kill_anon_super, -}; - #endif /* CONFIG_BALLOON_COMPACTION */ static unsigned long shrink_free_pages(struct virtio_balloon *vb, @@ -909,17 +894,11 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_free_vb; #ifdef CONFIG_BALLOON_COMPACTION - balloon_mnt = kern_mount(_fs); - if (IS_ERR(balloon_mnt)) { - err = PTR_ERR(balloon_mnt); - goto out_del_vqs; - } - vb->vb_dev_info.migratepage = virtballoon_migratepage; - vb->vb_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb); + vb->vb_dev_info.inode = alloc_anon_inode(); if (IS_ERR(vb->vb_dev_info.inode)) { err = PTR_ERR(vb->vb_dev_info.inode); - goto out_kern_unmount; + goto out_del_vqs; } vb->vb_dev_info.inode->i_mapping->a_ops = _aops; #endif @@ -1016,8 +995,6 @@ static int virtballoon_probe(struct virtio_device *vdev) out_iput: #ifdef CONFIG_BALLOON_COMPACTION iput(vb->vb_dev_info.inode); -out_kern_unmount: - kern_unmount(balloon_mnt); out_del_vqs: #endif vdev->config->del_vqs(vdev); @@ -1070,7 +1047,6 @@ static void virtballoon_remove(struct virtio_device *vdev) if (vb->vb_dev_info.inode) iput(vb->vb_dev_info.inode); - kern_unmount(balloon_mnt); #endif kfree(vb); } -- 2.30.1
[PATCH 5/9] vmw_balloon: remove the balloon-vmware file system
Just use the generic anon_inode file system. Signed-off-by: Christoph Hellwig --- drivers/misc/vmw_balloon.c | 24 ++-- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 5d057a05ddbee8..be4be32f858253 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -16,6 +16,7 @@ //#define DEBUG #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -1735,20 +1736,6 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b) #ifdef CONFIG_BALLOON_COMPACTION - -static int vmballoon_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type vmballoon_fs = { - .name = "balloon-vmware", - .init_fs_context= vmballoon_init_fs_context, - .kill_sb= kill_anon_super, -}; - -static struct vfsmount *vmballoon_mnt; - /** * vmballoon_migratepage() - migrates a balloon page. * @b_dev_info: balloon device information descriptor. @@ -1878,8 +1865,6 @@ static void vmballoon_compaction_deinit(struct vmballoon *b) iput(b->b_dev_info.inode); b->b_dev_info.inode = NULL; - kern_unmount(vmballoon_mnt); - vmballoon_mnt = NULL; } /** @@ -1895,13 +1880,8 @@ static void vmballoon_compaction_deinit(struct vmballoon *b) */ static __init int vmballoon_compaction_init(struct vmballoon *b) { - vmballoon_mnt = kern_mount(_fs); - if (IS_ERR(vmballoon_mnt)) - return PTR_ERR(vmballoon_mnt); - b->b_dev_info.migratepage = vmballoon_migratepage; - b->b_dev_info.inode = alloc_anon_inode_sb(vmballoon_mnt->mnt_sb); - + b->b_dev_info.inode = alloc_anon_inode(); if (IS_ERR(b->b_dev_info.inode)) return PTR_ERR(b->b_dev_info.inode); -- 2.30.1
[PATCH 4/9] drm: remove the drm file system
Just use the generic anon_inode file system. Signed-off-by: Christoph Hellwig --- drivers/gpu/drm/drm_drv.c | 64 ++- 1 file changed, 3 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 87e7214a8e3565..af293d76f979e5 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -26,6 +26,7 @@ * DEALINGS IN THE SOFTWARE. */ +#include #include #include #include @@ -475,65 +476,6 @@ void drm_dev_unplug(struct drm_device *dev) } EXPORT_SYMBOL(drm_dev_unplug); -/* - * DRM internal mount - * We want to be able to allocate our own "struct address_space" to control - * memory-mappings in VRAM (or stolen RAM, ...). However, core MM does not allow - * stand-alone address_space objects, so we need an underlying inode. As there - * is no way to allocate an independent inode easily, we need a fake internal - * VFS mount-point. - * - * The drm_fs_inode_new() function allocates a new inode, drm_fs_inode_free() - * frees it again. You are allowed to use iget() and iput() to get references to - * the inode. But each drm_fs_inode_new() call must be paired with exactly one - * drm_fs_inode_free() call (which does not have to be the last iput()). - * We use drm_fs_inode_*() to manage our internal VFS mount-point and share it - * between multiple inode-users. You could, technically, call - * iget() + drm_fs_inode_free() directly after alloc and sometime later do an - * iput(), but this way you'd end up with a new vfsmount for each inode. - */ - -static int drm_fs_cnt; -static struct vfsmount *drm_fs_mnt; - -static int drm_fs_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, 0x010203ff) ? 0 : -ENOMEM; -} - -static struct file_system_type drm_fs_type = { - .name = "drm", - .owner = THIS_MODULE, - .init_fs_context = drm_fs_init_fs_context, - .kill_sb= kill_anon_super, -}; - -static struct inode *drm_fs_inode_new(void) -{ - struct inode *inode; - int r; - - r = simple_pin_fs(_fs_type, _fs_mnt, _fs_cnt); - if (r < 0) { - DRM_ERROR("Cannot mount pseudo fs: %d\n", r); - return ERR_PTR(r); - } - - inode = alloc_anon_inode_sb(drm_fs_mnt->mnt_sb); - if (IS_ERR(inode)) - simple_release_fs(_fs_mnt, _fs_cnt); - - return inode; -} - -static void drm_fs_inode_free(struct inode *inode) -{ - if (inode) { - iput(inode); - simple_release_fs(_fs_mnt, _fs_cnt); - } -} - /** * DOC: component helper usage recommendations * @@ -563,7 +505,7 @@ static void drm_dev_init_release(struct drm_device *dev, void *res) { drm_legacy_ctxbitmap_cleanup(dev); drm_legacy_remove_map_hash(dev); - drm_fs_inode_free(dev->anon_inode); + iput(dev->anon_inode); put_device(dev->dev); /* Prevent use-after-free in drm_managed_release when debugging is @@ -616,7 +558,7 @@ static int drm_dev_init(struct drm_device *dev, if (ret) return ret; - dev->anon_inode = drm_fs_inode_new(); + dev->anon_inode = alloc_anon_inode(); if (IS_ERR(dev->anon_inode)) { ret = PTR_ERR(dev->anon_inode); DRM_ERROR("Cannot allocate anonymous inode: %d\n", ret); -- 2.30.1
[PATCH 3/9] powerpc/pseries: remove the ppc-cmm file system
Just use the generic anon_inode file system. Signed-off-by: Christoph Hellwig --- arch/powerpc/platforms/pseries/cmm.c | 27 ++- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 6d36b858b14df1..9d07e6bea7126c 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -6,6 +6,7 @@ * Author(s): Brian King (brk...@linux.vnet.ibm.com), */ +#include #include #include #include @@ -502,19 +503,6 @@ static struct notifier_block cmm_mem_nb = { }; #ifdef CONFIG_BALLOON_COMPACTION -static struct vfsmount *balloon_mnt; - -static int cmm_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type balloon_fs = { - .name = "ppc-cmm", - .init_fs_context = cmm_init_fs_context, - .kill_sb = kill_anon_super, -}; - static int cmm_migratepage(struct balloon_dev_info *b_dev_info, struct page *newpage, struct page *page, enum migrate_mode mode) @@ -573,19 +561,10 @@ static int cmm_balloon_compaction_init(void) balloon_devinfo_init(_dev_info); b_dev_info.migratepage = cmm_migratepage; - balloon_mnt = kern_mount(_fs); - if (IS_ERR(balloon_mnt)) { - rc = PTR_ERR(balloon_mnt); - balloon_mnt = NULL; - return rc; - } - - b_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb); + b_dev_info.inode = alloc_anon_inode(); if (IS_ERR(b_dev_info.inode)) { rc = PTR_ERR(b_dev_info.inode); b_dev_info.inode = NULL; - kern_unmount(balloon_mnt); - balloon_mnt = NULL; return rc; } @@ -597,8 +576,6 @@ static void cmm_balloon_compaction_deinit(void) if (b_dev_info.inode) iput(b_dev_info.inode); b_dev_info.inode = NULL; - kern_unmount(balloon_mnt); - balloon_mnt = NULL; } #else /* CONFIG_BALLOON_COMPACTION */ static int cmm_balloon_compaction_init(void) -- 2.30.1
[PATCH 2/9] fs: add an argument-less alloc_anon_inode
Add a new alloc_anon_inode helper that allocates an inode on the anon_inode file system. Signed-off-by: Christoph Hellwig --- fs/anon_inodes.c| 15 +-- include/linux/anon_inodes.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 4745fc37014332..b6a8ea71920bc3 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -63,7 +63,7 @@ static struct inode *anon_inode_make_secure_inode( const struct qstr qname = QSTR_INIT(name, strlen(name)); int error; - inode = alloc_anon_inode_sb(anon_inode_mnt->mnt_sb); + inode = alloc_anon_inode(); if (IS_ERR(inode)) return inode; inode->i_flags &= ~S_PRIVATE; @@ -225,13 +225,24 @@ int anon_inode_getfd_secure(const char *name, const struct file_operations *fops } EXPORT_SYMBOL_GPL(anon_inode_getfd_secure); +/** + * alloc_anon_inode - create a new anonymous inode + * + * Create an inode on the anon_inode file system and return it. + */ +struct inode *alloc_anon_inode(void) +{ + return alloc_anon_inode_sb(anon_inode_mnt->mnt_sb); +} +EXPORT_SYMBOL_GPL(alloc_anon_inode); + static int __init anon_inode_init(void) { anon_inode_mnt = kern_mount(_inode_fs_type); if (IS_ERR(anon_inode_mnt)) panic("anon_inode_init() kernel mount failed (%ld)\n", PTR_ERR(anon_inode_mnt)); - anon_inode_inode = alloc_anon_inode_sb(anon_inode_mnt->mnt_sb); + anon_inode_inode = alloc_anon_inode(); if (IS_ERR(anon_inode_inode)) panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode)); diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h index 71881a2b6f7860..b5ae9a6eda9923 100644 --- a/include/linux/anon_inodes.h +++ b/include/linux/anon_inodes.h @@ -21,6 +21,7 @@ int anon_inode_getfd_secure(const char *name, const struct file_operations *fops, void *priv, int flags, const struct inode *context_inode); +struct inode *alloc_anon_inode(void); #endif /* _LINUX_ANON_INODES_H */ -- 2.30.1
make alloc_anon_inode more useful
Hi all, this series first renames the existing alloc_anon_inode to alloc_anon_inode_sb to clearly mark it as requiring a superblock. It then adds a new alloc_anon_inode that works on the anon_inode file system super block, thus removing tons of boilerplate code. The few remainig callers of alloc_anon_inode_sb all use alloc_file_pseudo later, but might also be ripe for some cleanup. Diffstat: arch/powerpc/platforms/pseries/cmm.c | 27 +- drivers/dma-buf/dma-buf.c|2 - drivers/gpu/drm/drm_drv.c| 64 +-- drivers/misc/cxl/api.c |2 - drivers/misc/vmw_balloon.c | 24 + drivers/scsi/cxlflash/ocxl_hw.c |2 - drivers/virtio/virtio_balloon.c | 30 +--- fs/aio.c |2 - fs/anon_inodes.c | 15 +++- fs/libfs.c |2 - include/linux/anon_inodes.h |1 include/linux/fs.h |2 - kernel/resource.c| 30 ++-- mm/z3fold.c | 38 +--- mm/zsmalloc.c| 48 +- 15 files changed, 39 insertions(+), 250 deletions(-)
[PATCH 1/9] fs: rename alloc_anon_inode to alloc_anon_inode_sb
Rename alloc_inode to free the name for a new variant that does not need boilerplate to create a super_block first. Signed-off-by: Christoph Hellwig --- arch/powerpc/platforms/pseries/cmm.c | 2 +- drivers/dma-buf/dma-buf.c| 2 +- drivers/gpu/drm/drm_drv.c| 2 +- drivers/misc/cxl/api.c | 2 +- drivers/misc/vmw_balloon.c | 2 +- drivers/scsi/cxlflash/ocxl_hw.c | 2 +- drivers/virtio/virtio_balloon.c | 2 +- fs/aio.c | 2 +- fs/anon_inodes.c | 4 ++-- fs/libfs.c | 2 +- include/linux/fs.h | 2 +- kernel/resource.c| 2 +- mm/z3fold.c | 2 +- mm/zsmalloc.c| 2 +- 14 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 45a3a3022a85c9..6d36b858b14df1 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -580,7 +580,7 @@ static int cmm_balloon_compaction_init(void) return rc; } - b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); + b_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb); if (IS_ERR(b_dev_info.inode)) { rc = PTR_ERR(b_dev_info.inode); b_dev_info.inode = NULL; diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index f264b70c383eb4..dedcc9483352dc 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -445,7 +445,7 @@ static inline int is_dma_buf_file(struct file *file) static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) { struct file *file; - struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb); + struct inode *inode = alloc_anon_inode_sb(dma_buf_mnt->mnt_sb); if (IS_ERR(inode)) return ERR_CAST(inode); diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 20d22e41d7ce74..87e7214a8e3565 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -519,7 +519,7 @@ static struct inode *drm_fs_inode_new(void) return ERR_PTR(r); } - inode = alloc_anon_inode(drm_fs_mnt->mnt_sb); + inode = alloc_anon_inode_sb(drm_fs_mnt->mnt_sb); if (IS_ERR(inode)) simple_release_fs(_fs_mnt, _fs_cnt); diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index b493de962153ba..2efbf6c98028ef 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -73,7 +73,7 @@ static struct file *cxl_getfile(const char *name, goto err_module; } - inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb); + inode = alloc_anon_inode_sb(cxl_vfs_mount->mnt_sb); if (IS_ERR(inode)) { file = ERR_CAST(inode); goto err_fs; diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index b837e7eba5f7dc..5d057a05ddbee8 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -1900,7 +1900,7 @@ static __init int vmballoon_compaction_init(struct vmballoon *b) return PTR_ERR(vmballoon_mnt); b->b_dev_info.migratepage = vmballoon_migratepage; - b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb); + b->b_dev_info.inode = alloc_anon_inode_sb(vmballoon_mnt->mnt_sb); if (IS_ERR(b->b_dev_info.inode)) return PTR_ERR(b->b_dev_info.inode); diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c index 244fc27215dc79..40184ed926b557 100644 --- a/drivers/scsi/cxlflash/ocxl_hw.c +++ b/drivers/scsi/cxlflash/ocxl_hw.c @@ -88,7 +88,7 @@ static struct file *ocxlflash_getfile(struct device *dev, const char *name, goto err2; } - inode = alloc_anon_inode(ocxlflash_vfs_mount->mnt_sb); + inode = alloc_anon_inode_sb(ocxlflash_vfs_mount->mnt_sb); if (IS_ERR(inode)) { rc = PTR_ERR(inode); dev_err(dev, "%s: alloc_anon_inode failed rc=%d\n", diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 8985fc2cea8615..cae76ee5bdd688 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -916,7 +916,7 @@ static int virtballoon_probe(struct virtio_device *vdev) } vb->vb_dev_info.migratepage = virtballoon_migratepage; - vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); + vb->vb_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb); if (IS_ERR(vb->vb_dev_info.inode)) { err = PTR_ERR(vb->vb_dev_info.inode); goto out_kern_unmount; diff --git a/fs/aio.c b/fs/aio.c index 1f32da13d39ee6..d1c2aa7fd6de7c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -234,7 +234,7 @@ static const struct address_space_operations aio_ctx_aops; static
Re: [PATCH v2 7/8] powerpc/xive: Fix xmon command "dxi"
On 3/9/21 11:23 AM, Greg Kurz wrote: > On Wed, 3 Mar 2021 18:48:56 +0100 > Cédric Le Goater wrote: > >> When under xmon, the "dxi" command dumps the state of the XIVE >> interrupts. If an interrupt number is specified, only the state of >> the associated XIVE interrupt is dumped. This form of the command >> lacks an irq_data parameter which is nevertheless used by >> xmon_xive_get_irq_config(), leading to an xmon crash. >> >> Fix that by doing a lookup in the system IRQ mapping to query the IRQ >> descriptor data. Invalid interrupt numbers, or not belonging to the >> XIVE IRQ domain, OPAL event interrupt number for instance, should be >> caught by the previous query done at the firmware level. >> >> Reported-by: kernel test robot >> Reported-by: Dan Carpenter >> Fixes: 97ef27507793 ("powerpc/xive: Fix xmon support on the PowerNV >> platform") >> Signed-off-by: Cédric Le Goater >> --- > > I've tested this in a KVM guest and it seems to do the job. > > 6:mon> dxi 1201 > IRQ 0x1201 : target=0xfc00 prio=ff lirq=0x0 flags= LH PQ=-Q > > Bad HW irq numbers are filtered by the hypervisor: > > 6:mon> dxi bad > [ 696.390577] xive: H_INT_GET_SOURCE_CONFIG lisn=2989 failed -55 > IRQ 0x0bad : no config rc=-6 > > Note that this also allows to show IPIs: > > 6:mon> dxi 0 > IRQ 0x : target=0x0 prio=06 lirq=0x10 > > This is a bit inconsistent with output of the 0-argument form of "dxi", It's an hidden feature ! :) Yes. You can query at the FW level the configuration of any valid HW interrupt number where as "dxi" without an argument only loops on the XIVE IRQ domain which does not include the XIVE CPU IPIs which are special. You should "dxa" for these. > which filters them out for a reason that isn't obvious to me. For historical reason. XIVE support for PowerNV was the first to reach Linux. If you run the same xmon commands on a PowerNV machine (you could use QEMU), the ouput is different. it has more low level details. > No big deal though, this should be addressed in another patch anyway. We could simplify the xmon helpers to be sync with the debugfs one and the QEMU/KVM "info pic" command. I agree. Thanks, C. > Reviewed-and-tested-by: Greg Kurz > >> arch/powerpc/sysdev/xive/common.c | 14 ++ >> 1 file changed, 10 insertions(+), 4 deletions(-) >> >> diff --git a/arch/powerpc/sysdev/xive/common.c >> b/arch/powerpc/sysdev/xive/common.c >> index f6b7b15bbb3a..8eefd152b947 100644 >> --- a/arch/powerpc/sysdev/xive/common.c >> +++ b/arch/powerpc/sysdev/xive/common.c >> @@ -255,17 +255,20 @@ notrace void xmon_xive_do_dump(int cpu) >> xmon_printf("\n"); >> } >> >> +static struct irq_data *xive_get_irq_data(u32 hw_irq) >> +{ >> +unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq); >> + >> +return irq ? irq_get_irq_data(irq) : NULL; >> +} >> + >> int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) >> { >> -struct irq_chip *chip = irq_data_get_irq_chip(d); >> int rc; >> u32 target; >> u8 prio; >> u32 lirq; >> >> -if (!is_xive_irq(chip)) >> -return -EINVAL; >> - >> rc = xive_ops->get_irq_config(hw_irq, , , ); >> if (rc) { >> xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); >> @@ -275,6 +278,9 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data >> *d) >> xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", >> hw_irq, target, prio, lirq); >> >> +if (!d) >> +d = xive_get_irq_data(hw_irq); >> + >> if (d) { >> struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); >> u64 val = xive_esb_read(xd, XIVE_ESB_GET); >
Re: [PATCH 3/3] powerpc/qspinlock: Use generic smp_cond_load_relaxed
On Tue, 09 Mar 2021, Michal Such�nek wrote: On Mon, Mar 08, 2021 at 05:59:50PM -0800, Davidlohr Bueso wrote: 49a7d46a06c3 (powerpc: Implement smp_cond_load_relaxed()) added busy-waiting pausing with a preferred SMT priority pattern, lowering the priority (reducing decode cycles) during the whole loop slowpath. However, data shows that while this pattern works well with simple ^^ spinlocks, queued spinlocks benefit more being kept in medium priority, with a cpu_relax() instead, being a low+medium combo on powerpc. ... diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index aecfde829d5d..7ae29cfb06c0 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -80,22 +80,6 @@ do { \ ___p1; \ }) -#ifdef CONFIG_PPC64 Maybe it should be kept for the simple spinlock case then? It is kept, note that simple spinlocks don't use smp_cond_load_relaxed, but instead deal with the priorities in arch_spin_lock(), so it will spin in low priority until it sees a chance to take the lock, where it switches back to medium. Thanks, Davidlohr
Re: [PATCH v2 4/8] powerpc/xive: Simplify xive_core_debug_show()
On 3/9/21 10:42 AM, Greg Kurz wrote: > On Tue, 9 Mar 2021 10:13:39 +0100 > Greg Kurz wrote: > >> On Mon, 8 Mar 2021 19:11:11 +0100 >> Cédric Le Goater wrote: >> >>> On 3/8/21 7:07 PM, Greg Kurz wrote: On Wed, 3 Mar 2021 18:48:53 +0100 Cédric Le Goater wrote: > Now that the IPI interrupt has its own domain, the checks on the HW > interrupt number XIVE_IPI_HW_IRQ and on the chip can be replaced by a > check on the domain. > > Signed-off-by: Cédric Le Goater > --- Shouldn't this have the following tags ? Reported-by: kernel test robot Reported-by: Dan Carpenter Fixes: 930914b7d528 ("powerpc/xive: Add a debugfs file to dump internal XIVE state") >>> >>> The next patch has because it removes the useless check on irq_data. >>> >> >> Ok I get it. This report isn't about an actual crash. Just a false >> positive because of the not needed check in the caller. >> > > Hrm... I meant because of the check in xive_debug_show_irq(). On the > contrary, the check removed by this patch in xive_core_debug_show() > was rather an explicit hint that xive_debug_show_irq() couldn't be > called with d being NULL. yes. irq_desc_get_irq_data() does not return a NULL value and xive_debug_show_irq() is only called from the for_each_irq_desc() loop. C. > >>> C. >>> Anyway, Reviewed-by: Greg Kurz > arch/powerpc/sysdev/xive/common.c | 18 -- > 1 file changed, 4 insertions(+), 14 deletions(-) > > diff --git a/arch/powerpc/sysdev/xive/common.c > b/arch/powerpc/sysdev/xive/common.c > index 678680531d26..7581cb12bb53 100644 > --- a/arch/powerpc/sysdev/xive/common.c > +++ b/arch/powerpc/sysdev/xive/common.c > @@ -1579,17 +1579,14 @@ static void xive_debug_show_cpu(struct seq_file > *m, int cpu) > seq_puts(m, "\n"); > } > > -static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct > irq_data *d) > +static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d) > { > - struct irq_chip *chip = irq_data_get_irq_chip(d); > + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); > int rc; > u32 target; > u8 prio; > u32 lirq; > > - if (!is_xive_irq(chip)) > - return; > - > rc = xive_ops->get_irq_config(hw_irq, , , ); > if (rc) { > seq_printf(m, "IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); > @@ -1627,16 +1624,9 @@ static int xive_core_debug_show(struct seq_file > *m, void *private) > > for_each_irq_desc(i, desc) { > struct irq_data *d = irq_desc_get_irq_data(desc); > - unsigned int hw_irq; > - > - if (!d) > - continue; > - > - hw_irq = (unsigned int)irqd_to_hwirq(d); > > - /* IPIs are special (HW number 0) */ > - if (hw_irq != XIVE_IPI_HW_IRQ) > - xive_debug_show_irq(m, hw_irq, d); > + if (d->domain == xive_irq_domain) > + xive_debug_show_irq(m, d); > } > return 0; > } >>> >> >
Re: [PATCH v2 1/8] powerpc/xive: Use cpu_to_node() instead of ibm,chip-id property
On 3/8/21 6:13 PM, Greg Kurz wrote: > On Wed, 3 Mar 2021 18:48:50 +0100 > Cédric Le Goater wrote: > >> The 'chip_id' field of the XIVE CPU structure is used to choose a >> target for a source located on the same chip when possible. This field >> is assigned on the PowerNV platform using the "ibm,chip-id" property >> on pSeries under KVM when NUMA nodes are defined but it is undefined > > This sentence seems to have a syntax problem... like it is missing an > 'and' before 'on pSeries'. ah yes, or simply a comma. >> under PowerVM. The XIVE source structure has a similar field >> 'src_chip' which is only assigned on the PowerNV platform. >> >> cpu_to_node() returns a compatible value on all platforms, 0 being the >> default node. It will also give us the opportunity to set the affinity >> of a source on pSeries when we can localize them. >> > > IIUC this relies on the fact that the NUMA node id is == to chip id > on PowerNV, i.e. xc->chip_id which is passed to OPAL remain stable > with this change. Linux sets the NUMA node in numa_setup_cpu(). On pseries, the hcall H_HOME_NODE_ASSOCIATIVITY returns the node id if I am correct (Daniel in Cc:) On PowerNV, Linux uses "ibm,associativity" property of the CPU to find the node id. This value is built from the chip id in OPAL, so the value returned by cpu_to_node(cpu) and the value of the "ibm,chip-id" property are unlikely to be different. cpu_to_node(cpu) is used in many places to allocate the structures locally to the owning node. XIVE is not an exception (see below in the same patch), it is better to be consistent and get the same information (node id) using the same routine. In Linux, "ibm,chip-id" is only used in low level PowerNV drivers : LPC, XSCOM, RNG, VAS, NX. XIVE should be in that list also but skiboot unifies the controllers of the system to only expose one the OS. This is problematic and should be changed but it's another topic. > On the other hand, you have the pSeries case under PowerVM that > doesn't xc->chip_id, which isn't passed to any hcall AFAICT. yes "ibm,chip-id" is an OPAL concept unfortunately and it has no meaning under PAPR. xc->chip_id on pseries (PowerVM) will contains an invalid chip id. QEMU/KVM exposes "ibm,chip-id" but it's not used. (its value is not always correct btw) > It looks like the chip id is only used for localization purpose in > this case, right ? Yes and PAPR sources are not localized. So it's not used. MSI sources could be if we rewrote the MSI driver. > In this case, what about doing this change for pSeries only, > somewhere in spapr.c ? The IPI code is common to all platforms and all have the same issue. I rather not. Thanks, C. >> Signed-off-by: Cédric Le Goater >> --- >> arch/powerpc/sysdev/xive/common.c | 7 +-- >> 1 file changed, 1 insertion(+), 6 deletions(-) >> >> diff --git a/arch/powerpc/sysdev/xive/common.c >> b/arch/powerpc/sysdev/xive/common.c >> index 595310e056f4..b8e456da28aa 100644 >> --- a/arch/powerpc/sysdev/xive/common.c >> +++ b/arch/powerpc/sysdev/xive/common.c >> @@ -1335,16 +1335,11 @@ static int xive_prepare_cpu(unsigned int cpu) >> >> xc = per_cpu(xive_cpu, cpu); >> if (!xc) { >> -struct device_node *np; >> - >> xc = kzalloc_node(sizeof(struct xive_cpu), >>GFP_KERNEL, cpu_to_node(cpu)); >> if (!xc) >> return -ENOMEM; >> -np = of_get_cpu_node(cpu, NULL); >> -if (np) >> -xc->chip_id = of_get_ibm_chip_id(np); >> -of_node_put(np); >> +xc->chip_id = cpu_to_node(cpu); >> xc->hw_ipi = XIVE_BAD_IRQ; >> >> per_cpu(xive_cpu, cpu) = xc; >
[PATCH 4/4] tools/perf: Support pipeline stage cycles for powerpc
The pipeline stage cycles details can be recorded on powerpc from the contents of Performance Monitor Unit (PMU) registers. On ISA v3.1 platform, sampling registers exposes the cycles spent in different pipeline stages. Patch adds perf tools support to present two of the cycle counter information along with memory latency (weight). Re-use the field 'ins_lat' for storing the first pipeline stage cycle. This is stored in 'var2_w' field of 'perf_sample_weight'. Add a new field 'p_stage_cyc' to store the second pipeline stage cycle which is stored in 'var3_w' field of perf_sample_weight. Add new sort function 'Pipeline Stage Cycle' and include this in default_mem_sort_order[]. This new sort function may be used to denote some other pipeline stage in another architecture. So add this to list of sort entries that can have dynamic header string. Signed-off-by: Athira Rajeev --- tools/perf/Documentation/perf-report.txt | 1 + tools/perf/arch/powerpc/util/event.c | 18 -- tools/perf/util/event.h | 1 + tools/perf/util/hist.c | 11 --- tools/perf/util/hist.h | 1 + tools/perf/util/session.c| 4 +++- tools/perf/util/sort.c | 24 ++-- tools/perf/util/sort.h | 2 ++ 8 files changed, 54 insertions(+), 8 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index f546b5e9db05..9691d9c227ba 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -112,6 +112,7 @@ OPTIONS - ins_lat: Instruction latency in core cycles. This is the global instruction latency - local_ins_lat: Local instruction latency version + - p_stage_cyc: Number of cycles spent in a pipeline stage. By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c index f49d32c2c8ae..b80fbee83b6e 100644 --- a/tools/perf/arch/powerpc/util/event.c +++ b/tools/perf/arch/powerpc/util/event.c @@ -18,8 +18,11 @@ void arch_perf_parse_sample_weight(struct perf_sample *data, weight.full = *array; if (type & PERF_SAMPLE_WEIGHT) data->weight = weight.full; - else + else { data->weight = weight.var1_dw; + data->ins_lat = weight.var2_w; + data->p_stage_cyc = weight.var3_w; + } } void arch_perf_synthesize_sample_weight(const struct perf_sample *data, @@ -27,6 +30,17 @@ void arch_perf_synthesize_sample_weight(const struct perf_sample *data, { *array = data->weight; - if (type & PERF_SAMPLE_WEIGHT_STRUCT) + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { *array &= 0x; + *array |= ((u64)data->ins_lat << 32); + } +} + +const char *arch_perf_header_entry__add(const char *se_header) +{ + if (!strcmp(se_header, "Local INSTR Latency")) + return "Finish Cyc"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Dispatch Cyc"; + return se_header; } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 89b149e2e70a..65f89e80916f 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -147,6 +147,7 @@ struct perf_sample { u8 cpumode; u16 misc; u16 ins_lat; + u16 p_stage_cyc; bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index c82f5fc26af8..9299ee535518 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -211,6 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); + hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); else @@ -289,13 +290,14 @@ static long hist_time(unsigned long htime) } static void he_stat__add_period(struct he_stat *he_stat, u64 period, - u64 weight, u64 ins_lat) + u64 weight, u64 ins_lat, u64 p_stage_cyc) { he_stat->period += period; he_stat->weight += weight; he_stat->nr_events += 1; he_stat->ins_lat+= ins_lat; + he_stat->p_stage_cyc+= p_stage_cyc; } static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) @@ -308,6 +310,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->nr_events += src->nr_events;
[PATCH 3/4] tools/perf: Add powerpc support for PERF_SAMPLE_WEIGHT_STRUCT
Add arch specific arch_evsel__set_sample_weight() to set the new sample type for powerpc. Add arch specific arch_perf_parse_sample_weight() to store the sample->weight values depending on the sample type applied. if the new sample type (PERF_SAMPLE_WEIGHT_STRUCT) is applied, store only the lower 32 bits to sample->weight. If sample type is 'PERF_SAMPLE_WEIGHT', store the full 64-bit to sample->weight. Signed-off-by: Athira Rajeev --- tools/perf/arch/powerpc/util/Build | 2 ++ tools/perf/arch/powerpc/util/event.c | 32 tools/perf/arch/powerpc/util/evsel.c | 8 3 files changed, 42 insertions(+) create mode 100644 tools/perf/arch/powerpc/util/event.c create mode 100644 tools/perf/arch/powerpc/util/evsel.c diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index b7945e5a543b..8a79c4126e5b 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -4,6 +4,8 @@ perf-y += kvm-stat.o perf-y += perf_regs.o perf-y += mem-events.o perf-y += sym-handling.o +perf-y += evsel.o +perf-y += event.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c new file mode 100644 index ..f49d32c2c8ae --- /dev/null +++ b/tools/perf/arch/powerpc/util/event.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#include "../../../util/event.h" +#include "../../../util/synthetic-events.h" +#include "../../../util/machine.h" +#include "../../../util/tool.h" +#include "../../../util/map.h" +#include "../../../util/debug.h" + +void arch_perf_parse_sample_weight(struct perf_sample *data, + const __u64 *array, u64 type) +{ + union perf_sample_weight weight; + + weight.full = *array; + if (type & PERF_SAMPLE_WEIGHT) + data->weight = weight.full; + else + data->weight = weight.var1_dw; +} + +void arch_perf_synthesize_sample_weight(const struct perf_sample *data, + __u64 *array, u64 type) +{ + *array = data->weight; + + if (type & PERF_SAMPLE_WEIGHT_STRUCT) + *array &= 0x; +} diff --git a/tools/perf/arch/powerpc/util/evsel.c b/tools/perf/arch/powerpc/util/evsel.c new file mode 100644 index ..2f733cdc8dbb --- /dev/null +++ b/tools/perf/arch/powerpc/util/evsel.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "util/evsel.h" + +void arch_evsel__set_sample_weight(struct evsel *evsel) +{ + evsel__set_sample_bit(evsel, WEIGHT_STRUCT); +} -- 1.8.3.1
[PATCH 2/4] tools/perf: Add dynamic headers for perf report columns
Currently the header string for different columns in perf report is fixed. Some fields of perf sample could have different meaning for different architectures than the meaning conveyed by the header string. An example is the new field 'var2_w' of perf_sample_weight structure. This is presently captured as 'Local INSTR Latency' in perf mem report. But this could be used to denote a different latency cycle in another architecture. Introduce a weak function arch_perf_header_entry__add() to set the arch specific header string for the fields which can contain dynamic header. If the architecture do not have this function, fall back to the default header string value. Signed-off-by: Athira Rajeev --- tools/perf/util/event.h | 1 + tools/perf/util/sort.c | 19 ++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index f603edbbbc6f..89b149e2e70a 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -427,5 +427,6 @@ void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct per void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type); void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type); +const char *arch_perf_header_entry__add(const char *se_header); #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 0d5ad42812b9..741a6df29fa0 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -25,6 +25,7 @@ #include #include "mem-events.h" #include "annotate.h" +#include "event.h" #include "time-utils.h" #include "cgroup.h" #include "machine.h" @@ -45,6 +46,7 @@ regex_tignore_callees_regex; inthave_ignore_callees = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; +const char *dynamic_headers[] = {"local_ins_lat"}; /* * Replaces all occurrences of a char used with the: @@ -1816,6 +1818,16 @@ struct sort_dimension { int taken; }; +const char * __weak arch_perf_header_entry__add(const char *se_header) +{ + return se_header; +} + +static void sort_dimension_add_dynamic_header(struct sort_dimension *sd) +{ + sd->entry->se_header = arch_perf_header_entry__add(sd->entry->se_header); +} + #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } static struct sort_dimension common_sort_dimensions[] = { @@ -2739,11 +2751,16 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, struct evlist *evlist, int level) { - unsigned int i; + unsigned int i, j; for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { struct sort_dimension *sd = _sort_dimensions[i]; + for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) { + if (!strcmp(dynamic_headers[j], sd->name)) + sort_dimension_add_dynamic_header(sd); + } + if (strncasecmp(tok, sd->name, strlen(tok))) continue; -- 1.8.3.1
[PATCH 1/4] powerpc/perf: Expose processor pipeline stage cycles using PERF_SAMPLE_WEIGHT_STRUCT
Performance Monitoring Unit (PMU) registers in powerpc provides information on cycles elapsed between different stages in the pipeline. This can be used for application tuning. On ISA v3.1 platform, this information is exposed by sampling registers. Patch adds kernel support to capture two of the cycle counters as part of perf sample using the sample type: PERF_SAMPLE_WEIGHT_STRUCT. The power PMU function 'get_mem_weight' currently uses 64 bit weight field of perf_sample_data to capture memory latency. But following the introduction of PERF_SAMPLE_WEIGHT_TYPE, weight field could contain 64-bit or 32-bit value depending on the architexture support for PERF_SAMPLE_WEIGHT_STRUCT. Patches uses WEIGHT_STRUCT to expose the pipeline stage cycles info. Hence update the ppmu functions to work for 64-bit and 32-bit weight values. If the sample type is PERF_SAMPLE_WEIGHT, use the 64-bit weight field. if the sample type is PERF_SAMPLE_WEIGHT_STRUCT, memory subsystem latency is stored in the low 32bits of perf_sample_weight structure. Also for CPU_FTR_ARCH_31, capture the two cycle counter information in two 16 bit fields of perf_sample_weight structure. Signed-off-by: Athira Rajeev --- arch/powerpc/include/asm/perf_event_server.h | 2 +- arch/powerpc/perf/core-book3s.c | 4 ++-- arch/powerpc/perf/isa207-common.c| 29 +--- arch/powerpc/perf/isa207-common.h| 6 +- 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 00e7e671bb4b..112cf092d7b3 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -43,7 +43,7 @@ struct power_pmu { u64 alt[]); void(*get_mem_data_src)(union perf_mem_data_src *dsrc, u32 flags, struct pt_regs *regs); - void(*get_mem_weight)(u64 *weight); + void(*get_mem_weight)(u64 *weight, u64 type); unsigned long group_constraint_mask; unsigned long group_constraint_val; u64 (*bhrb_filter_map)(u64 branch_sample_type); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 6817331e22ff..57ff2494880c 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2206,9 +2206,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, ppmu->get_mem_data_src) ppmu->get_mem_data_src(_src, ppmu->flags, regs); - if (event->attr.sample_type & PERF_SAMPLE_WEIGHT && + if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE && ppmu->get_mem_weight) - ppmu->get_mem_weight(); + ppmu->get_mem_weight(, event->attr.sample_type); if (perf_event_overflow(event, , regs)) power_pmu_stop(event, 0); diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index e4f577da33d8..5dcbdbd54598 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -284,8 +284,10 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, } } -void isa207_get_mem_weight(u64 *weight) +void isa207_get_mem_weight(u64 *weight, u64 type) { + union perf_sample_weight *weight_fields; + u64 weight_lat; u64 mmcra = mfspr(SPRN_MMCRA); u64 exp = MMCRA_THR_CTR_EXP(mmcra); u64 mantissa = MMCRA_THR_CTR_MANT(mmcra); @@ -296,9 +298,30 @@ void isa207_get_mem_weight(u64 *weight) mantissa = P10_MMCRA_THR_CTR_MANT(mmcra); if (val == 0 || val == 7) - *weight = 0; + weight_lat = 0; else - *weight = mantissa << (2 * exp); + weight_lat = mantissa << (2 * exp); + + /* +* Use 64 bit weight field (full) if sample type is +* WEIGHT. +* +* if sample type is WEIGHT_STRUCT: +* - store memory latency in the lower 32 bits. +* - For ISA v3.1, use remaining two 16 bit fields of +* perf_sample_weight to store cycle counter values +* from sier2. +*/ + weight_fields = (union perf_sample_weight *)weight; + if (type & PERF_SAMPLE_WEIGHT) + weight_fields->full = weight_lat; + else { + weight_fields->var1_dw = (u32)weight_lat; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + weight_fields->var2_w = P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2)); + weight_fields->var3_w = P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2)); + } + } } int isa207_get_constraint(u64 event, unsigned long *maskp,
[PATCH 0/4] powerpc/perf: Export processor pipeline stage cycles information
Performance Monitoring Unit (PMU) registers in powerpc exports number of cycles elapsed between different stages in the pipeline. Example, sampling registers in ISA v3.1. This patchset implements kernel and perf tools support to expose these pipeline stage cycles using the sample type PERF_SAMPLE_WEIGHT_TYPE. Patch 1/4 adds kernel side support to store the cycle counter values as part of 'var2_w' and 'var3_w' fields of perf_sample_weight structure. Patch 2/4 adds support to make the perf report column header strings as dynamic. Patch 3/4 adds powerpc support in perf tools for PERF_SAMPLE_WEIGHT_STRUCT in sample type: PERF_SAMPLE_WEIGHT_TYPE. Patch 4/4 adds support to present pipeline stage cycles as part of mem-mode. Sample output on powerpc: # perf mem record ls # perf mem report # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 11 of event 'cpu/mem-loads/' # Total weight : 1332 # Sort order : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,stall_cyc # # Overhead Samples Local Weight Memory access Symbol Shared Object Data Symbol Data ObjectSnoop TLB access Locked Blocked Finish Cyc Dispatch Cyc # .. . . .. .. .. . . # 44.14% 1 588 L1 hit[k] rcu_nmi_exit[kernel.vmlinux] [k] 0xc007ffdd21b0 [unknown] N/A N/A No N/A7 5 22.22% 1 296 L1 hit[k] copypage_power7 [kernel.vmlinux] [k] 0xc000ff6a1780 [unknown] N/A N/A No N/A2933 6.98% 1 93L1 hit[.] _dl_addr libc-2.31.so [.] 0x7fff86fa5058 libc-2.31.so N/A N/A No N/A7 1 6.61% 1 88L2 hit[.] new_do_writelibc-2.31.so [.] _IO_2_1_stdout_+0x0 libc-2.31.so N/A N/A No N/A84 1 5.93% 1 79L1 hit[k] printk_nmi_exit [kernel.vmlinux] [k] 0xc006085df6b0 [unknown] N/A N/A No N/A7 1 4.05% 1 54L2 hit[.] __alloc_dir libc-2.31.so [.] 0x7fffdb70a640 [stack]N/A N/A No N/A18 1 3.60% 1 48L1 hit[.] _init ls[.] 0x00016ca82118 [heap] N/A N/A No N/A7 6 2.40% 1 32L1 hit[k] desc_read [kernel.vmlinux] [k] _printk_rb_static_descs+0x1ea10 [kernel.vmlinux].data N/A N/A No N/A7 1 1.65% 1 22L2 hit[k] perf_iterate_ctx.constprop.139 [kernel.vmlinux] [k] 0xc0064d79e8a8 [unknown] N/A N/A No N/A16 1 1.58% 1 21L1 hit[k] perf_event_interrupt[kernel.vmlinux] [k] 0xc006085df6b0 [unknown] N/A N/A No N/A7 1 0.83% 1 11L1 hit[k] perf_event_exec [kernel.vmlinux] [k] 0xc007ffdd3288 [unknown] N/A N/A No N/A7 4 Athira Rajeev (4): powerpc/perf: Expose processor pipeline stage cycles using PERF_SAMPLE_WEIGHT_STRUCT tools/perf: Add dynamic headers for perf report columns tools/perf: Add powerpc support for PERF_SAMPLE_WEIGHT_STRUCT tools/perf: Support
Re: [PATCH v2 8/8] powerpc/xive: Map one IPI interrupt per node
On Wed, 3 Mar 2021 18:48:57 +0100 Cédric Le Goater wrote: > ipistorm [*] can be used to benchmark the raw interrupt rate of an > interrupt controller by measuring the number of IPIs a system can > sustain. When applied to the XIVE interrupt controller of POWER9 and > POWER10 systems, a significant drop of the interrupt rate can be > observed when crossing the second node boundary. > > This is due to the fact that a single IPI interrupt is used for all > CPUs of the system. The structure is shared and the cache line updates > impact greatly the traffic between nodes and the overall IPI > performance. > > As a workaround, the impact can be reduced by deactivating the IRQ > lockup detector ("noirqdebug") which does a lot of accounting in the > Linux IRQ descriptor structure and is responsible for most of the > performance penalty. > > As a fix, this proposal allocates an IPI interrupt per node, to be > shared by all CPUs of that node. It solves the scaling issue, the IRQ > lockup detector still has an impact but the XIVE interrupt rate scales > linearly. It also improves the "noirqdebug" case as showed in the > tables below. > > * P9 DD2.2 - 2s * 64 threads > >"noirqdebug" > Mint/sMint/s > chips cpus IPI/sys IPI/chip IPI/chipIPI/sys > -- > 1 0-15 4.984023 4.875405 4.996536 5.048892 > 0-3110.879164 10.544040 10.757632 11.037859 > 0-4715.345301 14.688764 14.926520 15.310053 > 0-6317.064907 17.066812 17.613416 17.874511 > 2 0-7911.768764 21.650749 22.689120 22.566508 > 0-9510.616812 26.878789 28.434703 28.320324 > 0-111 10.151693 31.397803 31.771773 32.388122 > 0-1279.948502 33.139336 34.875716 35.224548 > > * P10 DD1 - 4s (not homogeneous) 352 threads > >"noirqdebug" > Mint/sMint/s > chips cpus IPI/sys IPI/chip IPI/chipIPI/sys > -- > 1 0-15 2.409402 2.364108 2.383303 2.395091 > 0-31 6.028325 6.046075 6.08 6.073750 > 0-47 8.655178 8.644531 8.712830 8.724702 > 0-6311.629652 11.735953 12.088203 12.055979 > 0-7914.392321 14.729959 14.986701 14.973073 > 0-9512.604158 13.004034 17.528748 17.568095 > 2 0-1119.767753 13.719831 19.968606 20.024218 > 0-1276.744566 16.418854 22.898066 22.995110 > 0-1436.005699 19.174421 25.425622 25.417541 > 0-1595.649719 21.938836 27.952662 28.059603 > 0-1755.441410 24.109484 31.133915 31.127996 > 3 0-1915.318341 24.405322 33.999221 33.775354 > 0-2075.191382 26.449769 36.050161 35.867307 > 0-2235.102790 29.356943 39.544135 39.508169 > 0-2395.035295 31.933051 42.135075 42.071975 > 0-2554.969209 34.477367 44.655395 44.757074 > 4 0-2714.907652 35.887016 47.080545 47.318537 > 0-2874.839581 38.076137 50.464307 50.636219 > 0-3034.786031 40.881319 53.478684 53.310759 > 0-3194.743750 43.448424 56.388102 55.973969 > 0-3354.709936 45.623532 59.400930 58.926857 > 0-3514.681413 45.646151 62.035804 61.830057 > > [*] https://github.com/antonblanchard/ipistorm > > Signed-off-by: Cédric Le Goater > --- > arch/powerpc/sysdev/xive/xive-internal.h | 2 -- > arch/powerpc/sysdev/xive/common.c| 39 ++-- > 2 files changed, 30 insertions(+), 11 deletions(-) > > diff --git a/arch/powerpc/sysdev/xive/xive-internal.h > b/arch/powerpc/sysdev/xive/xive-internal.h > index 9cf57c722faa..b3a456fdd3a5 100644 > --- a/arch/powerpc/sysdev/xive/xive-internal.h > +++ b/arch/powerpc/sysdev/xive/xive-internal.h > @@ -5,8 +5,6 @@ > #ifndef __XIVE_INTERNAL_H > #define __XIVE_INTERNAL_H > > -#define XIVE_IPI_HW_IRQ 0 /* interrupt source # for IPIs */ > - > /* > * A "disabled" interrupt should never fire, to catch problems > * we set its logical number to this > diff --git a/arch/powerpc/sysdev/xive/common.c > b/arch/powerpc/sysdev/xive/common.c > index 8eefd152b947..c27f7bb0494b 100644 > --- a/arch/powerpc/sysdev/xive/common.c > +++ b/arch/powerpc/sysdev/xive/common.c > @@ -65,8 +65,16 @@ static struct irq_domain *xive_irq_domain; > #ifdef CONFIG_SMP > static struct irq_domain *xive_ipi_irq_domain; > > -/* The IPIs all use the same logical irq number */ > -static u32 xive_ipi_irq; > +/* The IPIs use the same logical irq number when on the same chip */ >
Re: [PATCH v4] powerpc/uprobes: Validation for prefixed instruction
On 3/9/21 4:51 PM, Naveen N. Rao wrote: On 2021/03/09 08:54PM, Michael Ellerman wrote: Ravi Bangoria writes: As per ISA 3.1, prefixed instruction should not cross 64-byte boundary. So don't allow Uprobe on such prefixed instruction. There are two ways probed instruction is changed in mapped pages. First, when Uprobe is activated, it searches for all the relevant pages and replace instruction in them. In this case, if that probe is on the 64-byte unaligned prefixed instruction, error out directly. Second, when Uprobe is already active and user maps a relevant page via mmap(), instruction is replaced via mmap() code path. But because Uprobe is invalid, entire mmap() operation can not be stopped. In this case just print an error and continue. Signed-off-by: Ravi Bangoria Acked-by: Naveen N. Rao Do we have a Fixes: tag for this? Since this is an additional check we are adding, I don't think we should add a Fixes: tag. Nothing is broken per-se -- we're just adding more checks to catch simple mistakes. Also, like Oleg pointed out, there are still many other ways for users to shoot themselves in the foot with uprobes and prefixed instructions, if they so desire. However, if you still think we should add a Fixes: tag, we can perhaps use the below commit since I didn't see any specific commit adding support for prefixed instructions for uprobes: Fixes: 650b55b707fdfa ("powerpc: Add prefixed instructions to instruction data type") True. IMO, It doesn't really need any Fixes tag. --- v3: https://lore.kernel.org/r/20210304050529.59391-1-ravi.bango...@linux.ibm.com v3->v4: - CONFIG_PPC64 check was not required, remove it. - Use SZ_ macros instead of hardcoded numbers. arch/powerpc/kernel/uprobes.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index e8a63713e655..4cbfff6e94a3 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -41,6 +41,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, if (addr & 0x03) return -EINVAL; + if (cpu_has_feature(CPU_FTR_ARCH_31) && + ppc_inst_prefixed(auprobe->insn) && + (addr & (SZ_64 - 4)) == SZ_64 - 4) { + pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n"); + return -EINVAL; I realise we already did the 0x03 check above, but I still think this would be clearer simply as: (addr & 0x3f == 60) Indeed, I like the use of `60' there -- hex is overrated ;) Sure. Will resend. Ravi
[PATCH v2 43/43] powerpc/32: Manage KUAP in C
Move all KUAP management in C. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/book3s/32/kup.h | 50 +--- arch/powerpc/include/asm/interrupt.h | 2 + arch/powerpc/include/asm/kup.h | 9 arch/powerpc/include/asm/nohash/32/kup-8xx.h | 25 +- arch/powerpc/kernel/entry_32.S | 6 --- arch/powerpc/kernel/interrupt.c | 19 ++-- arch/powerpc/kernel/process.c| 3 ++ 7 files changed, 11 insertions(+), 103 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index c9d6c28bcd10..27991e0d2cf9 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -5,55 +5,7 @@ #include #include -#ifdef __ASSEMBLY__ - -#ifdef CONFIG_PPC_KUAP - -.macro kuap_update_sr gpr1, gpr2, gpr3/* NEVER use r0 as gpr2 due to addis */ -101: mtsrin \gpr1, \gpr2 - addi\gpr1, \gpr1, 0x111 /* next VSID */ - rlwinm \gpr1, \gpr1, 0, 0xf0ff /* clear VSID overflow */ - addis \gpr2, \gpr2, 0x1000/* address of next segment */ - cmplw \gpr2, \gpr3 - blt-101b - isync -.endm - -.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 - lwz \gpr2, KUAP(\thread) - rlwinm. \gpr3, \gpr2, 28, 0xf000 - stw \gpr2, STACK_REGS_KUAP(\sp) - beq+102f - li \gpr1, 0 - stw \gpr1, KUAP(\thread) - mfsrin \gpr1, \gpr2 - oris\gpr1, \gpr1, SR_KS@h /* set Ks */ - kuap_update_sr \gpr1, \gpr2, \gpr3 -102: -.endm - -.macro kuap_restoresp, current, gpr1, gpr2, gpr3 - lwz \gpr2, STACK_REGS_KUAP(\sp) - rlwinm. \gpr3, \gpr2, 28, 0xf000 - stw \gpr2, THREAD + KUAP(\current) - beq+102f - mfsrin \gpr1, \gpr2 - rlwinm \gpr1, \gpr1, 0, ~SR_KS /* Clear Ks */ - kuap_update_sr \gpr1, \gpr2, \gpr3 -102: -.endm - -.macro kuap_check current, gpr -#ifdef CONFIG_PPC_KUAP_DEBUG - lwz \gpr, THREAD + KUAP(\current) -999: twnei \gpr, 0 - EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) -#endif -.endm - -#endif /* CONFIG_PPC_KUAP */ - -#else /* !__ASSEMBLY__ */ +#ifndef __ASSEMBLY__ #ifdef CONFIG_PPC_KUAP diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index d4bfe94b4a68..b41cb4e014b2 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -37,6 +37,8 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup kuep_lock(); current->thread.regs = regs; account_cpu_user_entry(); + } else { + kuap_save_and_lock(regs); } #endif /* diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index b7efa46b3109..5bbe8f28d26b 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -28,15 +28,6 @@ #ifdef __ASSEMBLY__ #ifndef CONFIG_PPC_KUAP -.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 -.endm - -.macro kuap_restoresp, current, gpr1, gpr2, gpr3 -.endm - -.macro kuap_check current, gpr -.endm - .macro kuap_check_amr gpr1, gpr2 .endm diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index c74f5704bc47..fb294dbca102 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -7,30 +7,7 @@ #ifdef CONFIG_PPC_KUAP -#ifdef __ASSEMBLY__ - -.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 - lis \gpr2, MD_APG_KUAP@h/* only APG0 and APG1 are used */ - mfspr \gpr1, SPRN_MD_AP - mtspr SPRN_MD_AP, \gpr2 - stw \gpr1, STACK_REGS_KUAP(\sp) -.endm - -.macro kuap_restoresp, current, gpr1, gpr2, gpr3 - lwz \gpr1, STACK_REGS_KUAP(\sp) - mtspr SPRN_MD_AP, \gpr1 -.endm - -.macro kuap_check current, gpr -#ifdef CONFIG_PPC_KUAP_DEBUG - mfspr \gpr, SPRN_MD_AP - rlwinm \gpr, \gpr, 16, 0x -999: twnei \gpr, MD_APG_KUAP@h - EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) -#endif -.endm - -#else /* !__ASSEMBLY__ */ +#ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 850cb17a937f..f5ac021ff9ed 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -52,11 +52,9 @@ .globl prepare_transfer_to_handler prepare_transfer_to_handler: andi. r0,r9,MSR_PR - addir12, r2, THREAD bnelr /* if from kernel, check interrupted DOZE/NAP mode */ - kuap_save_and_lock r11, r12, r9, r5, r6 lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 bt- 31-TLF_NAPPING,4f
[PATCH v2 41/43] powerpc/32s: Create C version of kuap save/restore/check helpers
In preparation of porting PPC32 to C syscall entry/exit, create C version of kuap_save_and_lock() and kuap_user_restore() and kuap_kernel_restore() and kuap_check() and kuap_get_and_check() on book3s/32. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/book3s/32/kup.h | 45 1 file changed, 45 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index b97ea60f6fa3..c9d6c28bcd10 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -72,6 +72,51 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) isync();/* Context sync required after mtsr() */ } +static inline void kuap_save_and_lock(struct pt_regs *regs) +{ + unsigned long kuap = current->thread.kuap; + u32 addr = kuap & 0xf000; + u32 end = kuap << 28; + + regs->kuap = kuap; + if (unlikely(!kuap)) + return; + + current->thread.kuap = 0; + kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* Set Ks */ +} + +static inline void kuap_user_restore(struct pt_regs *regs) +{ +} + +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +{ + u32 addr = regs->kuap & 0xf000; + u32 end = regs->kuap << 28; + + current->thread.kuap = regs->kuap; + + if (unlikely(regs->kuap == kuap)) + return; + + kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */ +} + +static inline unsigned long kuap_get_and_check(void) +{ + unsigned long kuap = current->thread.kuap; + + WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0); + + return kuap; +} + +static inline void kuap_check(void) +{ + kuap_get_and_check(); +} + static __always_inline void allow_user_access(void __user *to, const void __user *from, u32 size, unsigned long dir) { -- 2.25.0
[PATCH v2 42/43] powerpc/8xx: Create C version of kuap save/restore/check helpers
In preparation of porting PPC32 to C syscall entry/exit, create C version of kuap_save_and_lock() and kuap_user_restore() and kuap_kernel_restore() and kuap_check() and kuap_get_and_check() on 8xx. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 31 1 file changed, 31 insertions(+) diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 17a4a616436f..c74f5704bc47 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -34,6 +34,37 @@ #include +static inline void kuap_save_and_lock(struct pt_regs *regs) +{ + regs->kuap = mfspr(SPRN_MD_AP); + mtspr(SPRN_MD_AP, MD_APG_KUAP); +} + +static inline void kuap_user_restore(struct pt_regs *regs) +{ +} + +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +{ + mtspr(SPRN_MD_AP, regs->kuap); +} + +static inline unsigned long kuap_get_and_check(void) +{ + unsigned long kuap = mfspr(SPRN_MD_AP); + + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16); + + return kuap; +} + +static inline void kuap_check(void) +{ + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + kuap_get_and_check(); +} + static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { -- 2.25.0
[PATCH v2 40/43] powerpc/64s: Make kuap_check_amr() and kuap_get_and_check_amr() generic
In preparation of porting powerpc32 to C syscall entry/exit, rename kuap_check_amr() and kuap_get_and_check_amr() as kuap_check() and kuap_get_and_check(), and move in the generic asm/kup.h the stub for when CONFIG_PPC_KUAP is not selected. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/book3s/64/kup.h | 24 ++-- arch/powerpc/include/asm/kup.h | 10 +- arch/powerpc/kernel/interrupt.c | 12 ++-- arch/powerpc/kernel/irq.c| 2 +- 4 files changed, 18 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 8bd905050896..d9b07e9998be 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -287,7 +287,7 @@ static inline void kuap_kernel_restore(struct pt_regs *regs, */ } -static inline unsigned long kuap_get_and_check_amr(void) +static inline unsigned long kuap_get_and_check(void) { if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { unsigned long amr = mfspr(SPRN_AMR); @@ -298,27 +298,7 @@ static inline unsigned long kuap_get_and_check_amr(void) return 0; } -#else /* CONFIG_PPC_PKEY */ - -static inline void kuap_user_restore(struct pt_regs *regs) -{ -} - -static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) -{ -} - -static inline unsigned long kuap_get_and_check_amr(void) -{ - return 0; -} - -#endif /* CONFIG_PPC_PKEY */ - - -#ifdef CONFIG_PPC_KUAP - -static inline void kuap_check_amr(void) +static inline void kuap_check(void) { if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 25671f711ec2..b7efa46b3109 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -74,7 +74,15 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) return false; } -static inline void kuap_check_amr(void) { } +static inline void kuap_check(void) { } +static inline void kuap_save_and_lock(struct pt_regs *regs) { } +static inline void kuap_user_restore(struct pt_regs *regs) { } +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { } + +static inline unsigned long kuap_get_and_check(void) +{ + return 0; +} /* * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 727b7848c9cc..40ed55064e54 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -76,7 +76,7 @@ notrace long system_call_exception(long r3, long r4, long r5, } else #endif #ifdef CONFIG_PPC64 - kuap_check_amr(); + kuap_check(); #endif booke_restore_dbcr0(); @@ -254,7 +254,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, CT_WARN_ON(ct_state() == CONTEXT_USER); #ifdef CONFIG_PPC64 - kuap_check_amr(); + kuap_check(); #endif regs->result = r3; @@ -380,7 +380,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned * AMR can only have been unlocked if we interrupted the kernel. */ #ifdef CONFIG_PPC64 - kuap_check_amr(); + kuap_check(); #endif local_irq_save(flags); @@ -451,7 +451,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign unsigned long flags; unsigned long ret = 0; #ifdef CONFIG_PPC64 - unsigned long amr; + unsigned long kuap; #endif if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && @@ -467,7 +467,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign CT_WARN_ON(ct_state() == CONTEXT_USER); #ifdef CONFIG_PPC64 - amr = kuap_get_and_check_amr(); + kuap = kuap_get_and_check(); #endif if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { @@ -511,7 +511,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign * value from the check above. */ #ifdef CONFIG_PPC64 - kuap_kernel_restore(regs, amr); + kuap_kernel_restore(regs, kuap); #endif return ret; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index d71fd10a1dd4..3b18d2b2c702 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -282,7 +282,7 @@ static inline void replay_soft_interrupts_irqrestore(void) * and re-locking AMR but we shouldn't get here in the first place, * hence the warning. */ - kuap_check_amr(); + kuap_check(); if (kuap_state != AMR_KUAP_BLOCKED) set_kuap(AMR_KUAP_BLOCKED); -- 2.25.0
[PATCH v2 39/43] powerpc/32s: Move KUEP locking/unlocking in C
This can be done in C, do it. Unrolling the loop gains approx. 15% performance. >From now on, prepare_transfer_to_handler() is only for interrupts from kernel. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/book3s/32/kup.h | 31 --- arch/powerpc/include/asm/interrupt.h | 3 ++ arch/powerpc/include/asm/kup.h | 8 + arch/powerpc/kernel/entry_32.S | 16 +- arch/powerpc/kernel/interrupt.c | 4 +++ arch/powerpc/mm/book3s32/Makefile| 1 + arch/powerpc/mm/book3s32/kuep.c | 38 7 files changed, 55 insertions(+), 46 deletions(-) create mode 100644 arch/powerpc/mm/book3s32/kuep.c diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 73bc5d2c431d..b97ea60f6fa3 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -7,37 +7,6 @@ #ifdef __ASSEMBLY__ -.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */ -101: mtsrin \gpr1, \gpr2 - addi\gpr1, \gpr1, 0x111 /* next VSID */ - rlwinm \gpr1, \gpr1, 0, 0xf0ff /* clear VSID overflow */ - addis \gpr2, \gpr2, 0x1000/* address of next segment */ - bdnz101b - isync -.endm - -.macro kuep_lock gpr1, gpr2 -#ifdef CONFIG_PPC_KUEP - li \gpr1, NUM_USER_SEGMENTS - li \gpr2, 0 - mtctr \gpr1 - mfsrin \gpr1, \gpr2 - oris\gpr1, \gpr1, SR_NX@h /* set Nx */ - kuep_update_sr \gpr1, \gpr2 -#endif -.endm - -.macro kuep_unlock gpr1, gpr2 -#ifdef CONFIG_PPC_KUEP - li \gpr1, NUM_USER_SEGMENTS - li \gpr2, 0 - mtctr \gpr1 - mfsrin \gpr1, \gpr2 - rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */ - kuep_update_sr \gpr1, \gpr2 -#endif -.endm - #ifdef CONFIG_PPC_KUAP .macro kuap_update_sr gpr1, gpr2, gpr3/* NEVER use r0 as gpr2 due to addis */ diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index e6d71c2e3aa2..d4bfe94b4a68 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -34,6 +34,7 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup trace_hardirqs_off(); if (user_mode(regs)) { + kuep_lock(); current->thread.regs = regs; account_cpu_user_entry(); } @@ -91,6 +92,8 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt exception_exit(state->ctx_state); #endif + if (user_mode(regs)) + kuep_unlock(); /* * Book3S exits to user via interrupt_exit_user_prepare(), which does * context tracking, which is a cleaner way to handle PREEMPT=y diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 7ec21af49a45..25671f711ec2 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -55,6 +55,14 @@ void setup_kuep(bool disabled); static inline void setup_kuep(bool disabled) { } #endif /* CONFIG_PPC_KUEP */ +#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) +void kuep_lock(void); +void kuep_unlock(void); +#else +static inline void kuep_lock(void) { } +static inline void kuep_unlock(void) { } +#endif + #ifdef CONFIG_PPC_KUAP void setup_kuap(bool disabled); #else diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 9c333e6db5fa..850cb17a937f 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -53,14 +53,9 @@ prepare_transfer_to_handler: andi. r0,r9,MSR_PR addir12, r2, THREAD - beq 2f -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_lock r11, r12 -#endif - blr + bnelr /* if from kernel, check interrupted DOZE/NAP mode */ -2: kuap_save_and_lock r11, r12, r9, r5, r6 lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 @@ -84,9 +79,6 @@ _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) .globl transfer_to_syscall transfer_to_syscall: SAVE_NVGPRS(r1) -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_lock r11, r12 -#endif /* Calling convention has r9 = orig r0, r10 = regs */ addir10,r1,STACK_FRAME_OVERHEAD @@ -104,9 +96,6 @@ ret_from_syscall: cmplwi cr0,r5,0 bne-2f #endif /* CONFIG_PPC_47x */ -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_unlock r5, r7 -#endif kuap_check r2, r4 lwz r4,_LINK(r1) lwz r5,_CCR(r1) @@ -287,9 +276,6 @@ interrupt_return: bne-.Lrestore_nvgprs .Lfast_user_interrupt_return: -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_unlock r10, r11 -#endif kuap_check r2, r4 lwz r11,_NIP(r1) lwz r12,_MSR(r1) diff --git
[PATCH v2 38/43] powerpc/32: Only use prepare_transfer_to_handler function on book3s/32 and e500
Only book3s/32 and e500 have significative work to do in prepare_transfer_to_handler. Other 32 bit have nothing to do at all. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 6 ++ arch/powerpc/kernel/head_32.h| 2 ++ arch/powerpc/kernel/head_booke.h | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 5cfa10816261..9c333e6db5fa 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -48,6 +48,7 @@ */ .align 12 +#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) .globl prepare_transfer_to_handler prepare_transfer_to_handler: andi. r0,r9,MSR_PR @@ -61,15 +62,12 @@ prepare_transfer_to_handler: /* if from kernel, check interrupted DOZE/NAP mode */ 2: kuap_save_and_lock r11, r12, r9, r5, r6 -#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 bt- 31-TLF_NAPPING,4f bt- 31-TLF_SLEEPING,7f -#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ blr -#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING stw r12,TI_LOCAL_FLAGS(r2) b power_save_ppc32_restore @@ -80,8 +78,8 @@ prepare_transfer_to_handler: rlwinm r9,r9,0,~MSR_EE stw r9,_MSR(r11) b fast_exception_return -#endif _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) +#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ .globl transfer_to_syscall transfer_to_syscall: diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 267479072495..ca303762d8cc 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -132,7 +132,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) .endm .macro prepare_transfer_to_handler +#ifdef CONFIG_PPC_BOOK3S_32 bl prepare_transfer_to_handler +#endif .endm .macro SYSCALL_ENTRY trapno diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 4d583fbef0b6..a2565023d2d0 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -92,7 +92,9 @@ END_BTB_FLUSH_SECTION .endm .macro prepare_transfer_to_handler +#ifdef CONFIG_E500 bl prepare_transfer_to_handler +#endif .endm .macro SYSCALL_ENTRY trapno intno srr1 -- 2.25.0
[PATCH v2 37/43] powerpc/32: Return directly from power_save_ppc32_restore()
transfer_to_handler_cont: is now just a blr. Directly perform blr in power_save_ppc32_restore(). Also remove useless setting of r11 in e500 version of power_save_ppc32_restore(). Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 3 --- arch/powerpc/kernel/idle_6xx.S | 2 +- arch/powerpc/kernel/idle_e500.S | 10 +- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 815a4ff1ba76..5cfa10816261 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -67,8 +67,6 @@ prepare_transfer_to_handler: bt- 31-TLF_NAPPING,4f bt- 31-TLF_SLEEPING,7f #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ - .globl transfer_to_handler_cont -transfer_to_handler_cont: blr #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) @@ -84,7 +82,6 @@ transfer_to_handler_cont: b fast_exception_return #endif _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) -_ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) .globl transfer_to_syscall transfer_to_syscall: diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 153366e178c4..13cad9297d82 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -176,7 +176,7 @@ BEGIN_FTR_SECTION lwz r9,nap_save_hid1@l(r9) mtspr SPRN_HID1, r9 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) - b transfer_to_handler_cont + blr _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) .data diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 7795727e7f08..9e1bc4502c50 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -81,13 +81,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) _GLOBAL(power_save_ppc32_restore) lwz r9,_LINK(r11) /* interrupted in e500_idle */ stw r9,_NIP(r11)/* make it do a blr */ - -#ifdef CONFIG_SMP - lwz r11,TASK_CPU(r2)/* get cpu number * 4 */ - slwir11,r11,2 -#else - li r11,0 -#endif - - b transfer_to_handler_cont + blr _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) -- 2.25.0
[PATCH v2 36/43] powerpc/32: Set current->thread.regs in C interrupt entry
No need to do that is assembly, do it in C. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/interrupt.h | 4 +++- arch/powerpc/kernel/entry_32.S | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 861e6eadc98c..e6d71c2e3aa2 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -33,8 +33,10 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup if (!arch_irq_disabled_regs(regs)) trace_hardirqs_off(); - if (user_mode(regs)) + if (user_mode(regs)) { + current->thread.regs = regs; account_cpu_user_entry(); + } #endif /* * Book3E reconciles irq soft mask in asm diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 8fe1c3fdfa6e..815a4ff1ba76 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -52,8 +52,7 @@ prepare_transfer_to_handler: andi. r0,r9,MSR_PR addir12, r2, THREAD - beq 2f /* if from user, fix up THREAD.regs */ - stw r3,PT_REGS(r12) + beq 2f #ifdef CONFIG_PPC_BOOK3S_32 kuep_lock r11, r12 #endif -- 2.25.0
[PATCH v2 35/43] powerpc/32: Save remaining registers in exception prolog
Save non volatile registers, XER, CTR, MSR and NIP in exception prolog. Also assign proper value to r2 and r3 there. For now, recalculate thread pointer in prepare_transfer_to_handler. It will disappear once KUAP is ported to C. And remove the comment which is now completely wrong. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 25 +++-- arch/powerpc/kernel/head_32.h | 12 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 289f111a5ac7..8fe1c3fdfa6e 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -48,29 +48,11 @@ */ .align 12 -/* - * This code finishes saving the registers to the exception frame - * and jumps to the appropriate handler for the exception, turning - * on address translation. - * Note that we rely on the caller having set cr0.eq iff the exception - * occurred in kernel mode (i.e. MSR:PR = 0). - */ .globl prepare_transfer_to_handler prepare_transfer_to_handler: - SAVE_NVGPRS(r11) - addir3,r1,STACK_FRAME_OVERHEAD - stw r2,GPR2(r11) - stw r12,_NIP(r11) - stw r9,_MSR(r11) - andi. r2,r9,MSR_PR - mfctr r12 - mfspr r2,SPRN_XER - stw r12,_CTR(r11) - stw r2,_XER(r11) - mfspr r12,SPRN_SPRG_THREAD - tovirt(r12, r12) + andi. r0,r9,MSR_PR + addir12, r2, THREAD beq 2f /* if from user, fix up THREAD.regs */ - addir2, r12, -THREAD stw r3,PT_REGS(r12) #ifdef CONFIG_PPC_BOOK3S_32 kuep_lock r11, r12 @@ -79,8 +61,7 @@ prepare_transfer_to_handler: /* if from kernel, check interrupted DOZE/NAP mode */ 2: - kuap_save_and_lock r11, r12, r9, r2, r6 - addir2, r12, -THREAD + kuap_save_and_lock r11, r12, r9, r5, r6 #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index ba20bfabdf63..267479072495 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -117,6 +117,18 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) stw r10,_TRAP(r1) SAVE_4GPRS(3, r1) SAVE_2GPRS(7, r1) + SAVE_NVGPRS(r1) + stw r2,GPR2(r1) + stw r12,_NIP(r1) + stw r9,_MSR(r1) + mfctr r0 + mfspr r10,SPRN_XER + mfspr r2,SPRN_SPRG_THREAD + stw r0,_CTR(r1) + tovirt(r2, r2) + stw r10,_XER(r1) + addir2, r2, -THREAD + addir3,r1,STACK_FRAME_OVERHEAD .endm .macro prepare_transfer_to_handler -- 2.25.0
[PATCH v2 34/43] powerpc/32: Refactor saving of volatile registers in exception prologs
Exception prologs all do the same at the end: - Save trapno in stack - Mark stack with exception marker - Save r0 - Save r3 to r8 Refactor that into a COMMON_EXCEPTION_PROLOG_END macro. At the same time use r1 instead of r11. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.h| 16 ++-- arch/powerpc/kernel/head_40x.S | 9 + arch/powerpc/kernel/head_booke.h | 26 +- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 84e6251622e8..ba20bfabdf63 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -104,15 +104,19 @@ li r10, MSR_KERNEL /* can take exceptions */ mtmsr r10 /* (except for mach check in rtas) */ #endif - stw r0,GPR0(r11) + COMMON_EXCEPTION_PROLOG_END \trapno +_ASM_NOKPROBE_SYMBOL(\name\()_virt) +.endm + +.macro COMMON_EXCEPTION_PROLOG_END trapno + stw r0,GPR0(r1) lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addir10,r10,STACK_FRAME_REGS_MARKER@l - stw r10,8(r11) + stw r10,8(r1) li r10, \trapno - stw r10,_TRAP(r11) - SAVE_4GPRS(3, r11) - SAVE_2GPRS(7, r11) -_ASM_NOKPROBE_SYMBOL(\name\()_virt) + stw r10,_TRAP(r1) + SAVE_4GPRS(3, r1) + SAVE_2GPRS(7, r1) .endm .macro prepare_transfer_to_handler diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 52b40bf529c6..e1360b88b6cb 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -157,14 +157,7 @@ _ENTRY(crit_esr) mfspr r12,SPRN_SRR2 mfspr r9,SPRN_SRR3 rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ - stw r0,GPR0(r11) - lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ - addir10, r10, STACK_FRAME_REGS_MARKER@l - stw r10, 8(r11) - li r10, \trapno + 2 - stw r10,_TRAP(r11) - SAVE_4GPRS(3, r11) - SAVE_2GPRS(7, r11) + COMMON_EXCEPTION_PROLOG_END \trapno + 2 _ASM_NOKPROBE_SYMBOL(\name\()_virt) .endm diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index fa566e89f18b..4d583fbef0b6 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -78,14 +78,18 @@ END_BTB_FLUSH_SECTION stw r1, 0(r11); \ mr r1, r11; \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - stw r0,GPR0(r11);\ - lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \ - addir10, r10, STACK_FRAME_REGS_MARKER@l; \ - stw r10, 8(r11); \ - li r10, trapno; \ - stw r10,_TRAP(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) + COMMON_EXCEPTION_PROLOG_END trapno + +.macro COMMON_EXCEPTION_PROLOG_END trapno + stw r0,GPR0(r1) + lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + addir10, r10, STACK_FRAME_REGS_MARKER@l + stw r10, 8(r1) + li r10, \trapno + stw r10,_TRAP(r1) + SAVE_4GPRS(3, r1) + SAVE_2GPRS(7, r1) +.endm .macro prepare_transfer_to_handler bl prepare_transfer_to_handler @@ -231,11 +235,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r1,0(r11); \ mr r1,r11; \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - li r10, trapno; \ - stw r10,_TRAP(r11); \ - stw r0,GPR0(r11);\ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) + COMMON_EXCEPTION_PROLOG_END trapno #define SAVE_xSRR(xSRR)\ mfspr r0,SPRN_##xSRR##0; \ -- 2.25.0
[PATCH v2 33/43] powerpc/32: Remove the xfer parameter in EXCEPTION() macro
The xfer parameter is not used anymore, remove it. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.h| 2 +- arch/powerpc/kernel/head_40x.S | 42 arch/powerpc/kernel/head_44x.S | 10 ++-- arch/powerpc/kernel/head_8xx.S | 14 +++--- arch/powerpc/kernel/head_book3s_32.S | 72 ++-- arch/powerpc/kernel/head_booke.h | 2 +- arch/powerpc/kernel/head_fsl_booke.S | 28 +-- 7 files changed, 81 insertions(+), 89 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 412ede8610f7..84e6251622e8 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -186,7 +186,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #endif -#define EXCEPTION(n, label, hdlr, xfer)\ +#define EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label) \ EXCEPTION_PROLOG n label; \ prepare_transfer_to_handler;\ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 7eb49ebd6000..52b40bf529c6 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -228,7 +228,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) b interrupt_return /* 0x0500 - External Interrupt Exception */ - EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, HardwareInterrupt, do_IRQ) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) @@ -246,19 +246,19 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) REST_NVGPRS(r1) b interrupt_return - EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0800, Trap_08, unknown_exception) + EXCEPTION(0x0900, Trap_09, unknown_exception) + EXCEPTION(0x0A00, Trap_0A, unknown_exception) + EXCEPTION(0x0B00, Trap_0B, unknown_exception) /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) SYSCALL_ENTRY 0xc00 /* Trap_0D is commented out to get more space for system call exception */ -/* EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD) */ - EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD) +/* EXCEPTION(0x0D00, Trap_0D, unknown_exception) */ + EXCEPTION(0x0E00, Trap_0E, unknown_exception) + EXCEPTION(0x0F00, Trap_0F, unknown_exception) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ START_EXCEPTION(0x1000, DecrementerTrap) @@ -433,19 +433,19 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) mfspr r10, SPRN_SPRG_SCRATCH5 b InstructionAccess - EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1300, Trap_13, unknown_exception) + EXCEPTION(0x1400, Trap_14, unknown_exception) + EXCEPTION(0x1500, Trap_15, unknown_exception) + EXCEPTION(0x1600, Trap_16, unknown_exception) + EXCEPTION(0x1700, Trap_17, unknown_exception) + EXCEPTION(0x1800, Trap_18, unknown_exception) + EXCEPTION(0x1900, Trap_19, unknown_exception) + EXCEPTION(0x1A00, Trap_1A, unknown_exception) + EXCEPTION(0x1B00, Trap_1B, unknown_exception) + EXCEPTION(0x1C00, Trap_1C, unknown_exception) + EXCEPTION(0x1D00, Trap_1D, unknown_exception) + EXCEPTION(0x1E00, Trap_1E, unknown_exception) + EXCEPTION(0x1F00, Trap_1F, unknown_exception) /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 813fa305c33b..5c106ac36626 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -263,8 +263,7 @@ interrupt_base: INSTRUCTION_STORAGE_EXCEPTION /* External Input
[PATCH v2 32/43] powerpc/32: Dismantle EXC_XFER_STD/LITE/TEMPLATE
In order to get more control in exception prolog, dismantle all non standard exception macros, finishing with EXC_XFER_STD and EXC_XFER_LITE and EXC_XFER_TEMPLATE. Also remove transfer_to_handler_full and ret_from_except and ret_from_except_full as they are not used anymore. Last parameter of EXCEPTION() is now ignored, will be removed in a later patch to avoid too much churn. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 40 --- arch/powerpc/kernel/head_32.h| 21 arch/powerpc/kernel/head_40x.S | 33 --- arch/powerpc/kernel/head_8xx.S | 12 +-- arch/powerpc/kernel/head_book3s_32.S | 27 ++- arch/powerpc/kernel/head_booke.h | 49 +++- arch/powerpc/kernel/head_fsl_booke.S | 14 +--- 7 files changed, 91 insertions(+), 105 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ca14bc2f3418..289f111a5ac7 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -48,30 +48,6 @@ */ .align 12 -#ifdef CONFIG_BOOKE - .globl mcheck_transfer_to_handler -mcheck_transfer_to_handler: - /* fall through */ -_ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler) - - .globl debug_transfer_to_handler -debug_transfer_to_handler: - /* fall through */ -_ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler) - - .globl crit_transfer_to_handler -crit_transfer_to_handler: - /* fall through */ -_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) -#endif - -#ifdef CONFIG_40x - .globl crit_transfer_to_handler -crit_transfer_to_handler: - /* fall through */ -_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) -#endif - /* * This code finishes saving the registers to the exception frame * and jumps to the appropriate handler for the exception, turning @@ -79,13 +55,6 @@ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) * Note that we rely on the caller having set cr0.eq iff the exception * occurred in kernel mode (i.e. MSR:PR = 0). */ - .globl transfer_to_handler_full -transfer_to_handler_full: -_ASM_NOKPROBE_SYMBOL(transfer_to_handler_full) - /* fall through */ - - .globl transfer_to_handler -transfer_to_handler: .globl prepare_transfer_to_handler prepare_transfer_to_handler: SAVE_NVGPRS(r11) @@ -135,7 +104,6 @@ transfer_to_handler_cont: b fast_exception_return #endif _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) -_ASM_NOKPROBE_SYMBOL(transfer_to_handler) _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) .globl transfer_to_syscall @@ -333,14 +301,6 @@ fast_exception_return: #endif _ASM_NOKPROBE_SYMBOL(fast_exception_return) - .globl ret_from_except_full -ret_from_except_full: - /* fall through */ - - .globl ret_from_except -ret_from_except: -_ASM_NOKPROBE_SYMBOL(ret_from_except) - .globl interrupt_return interrupt_return: lwz r4,_MSR(r1) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 3ab0f3ad9a6a..412ede8610f7 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -189,20 +189,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #define EXCEPTION(n, label, hdlr, xfer)\ START_EXCEPTION(n, label) \ EXCEPTION_PROLOG n label; \ - xfer(n, hdlr) - -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ - bl tfer; \ - bl hdlr; \ - b ret - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler, \ - ret_from_except) + prepare_transfer_to_handler;\ + bl hdlr; \ + b interrupt_return .macro vmap_stack_overflow_exception __HEAD @@ -218,7 +207,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) lwz r1, emergency_ctx@l(r1) addir1, r1, THREAD_SIZE - INT_FRAME_SIZE EXCEPTION_PROLOG_2 0 vmap_stack_overflow - EXC_XFER_STD(0, stack_overflow_exception) + prepare_transfer_to_handler + bl stack_overflow_exception + b interrupt_return .endm #endif /* __HEAD_32_H__ */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index f3e5b462113f..7eb49ebd6000 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -187,8 +187,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #define CRITICAL_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label); \
[PATCH v2 31/43] powerpc/32: Only restore non volatile registers when required
Until now, non volatile registers were restored everytime they were saved, ie using EXC_XFER_STD meant saving and restoring them while EXC_XFER_LITE meant neither saving not restoring them. Now that they are always saved, EXC_XFER_STD means to restore them and EXC_XFER_LITE means to not restore them. Most of the users of EXC_XFER_STD only need to retrieve the non volatile registers. For them there is no need to restore the non volatile registers as they have not been modified. Only very few exceptions require non volatile registers restore. Opencode the few places which require saving of non volatile registers. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 1 - arch/powerpc/kernel/head_40x.S | 10 -- arch/powerpc/kernel/head_8xx.S | 24 arch/powerpc/kernel/head_book3s_32.S | 17 ++--- arch/powerpc/kernel/head_booke.h | 10 -- arch/powerpc/kernel/head_fsl_booke.S | 16 6 files changed, 62 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e2346662444d..ca14bc2f3418 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -335,7 +335,6 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return) .globl ret_from_except_full ret_from_except_full: - REST_NVGPRS(r1) /* fall through */ .globl ret_from_except diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 7270caff665c..f3e5b462113f 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -228,12 +228,18 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 - EXC_XFER_STD(0x600, alignment_exception) + prepare_transfer_to_handler + bl alignment_exception + REST_NVGPRS(r1) + b interrupt_return /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1 - EXC_XFER_STD(0x700, program_check_exception) + prepare_transfer_to_handler + bl program_check_exception + REST_NVGPRS(r1) + b interrupt_return EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD) EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c48de97f42fc..86f844eb0e5a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -131,10 +131,18 @@ instruction_counter: /* Alignment exception */ START_EXCEPTION(0x600, Alignment) EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 - EXC_XFER_STD(0x600, alignment_exception) + prepare_transfer_to_handler + bl alignment_exception + REST_NVGPRS(r1) + b interrupt_return /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) + START_EXCEPTION(0x700, ProgramCheck) + EXCEPTION_PROLOG 0x700 ProgramCheck + prepare_transfer_to_handler + bl program_check_exception + REST_NVGPRS(r1) + b interrupt_return /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) @@ -149,7 +157,12 @@ instruction_counter: /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ - EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD) + START_EXCEPTION(0x1000, SoftEmu) + EXCEPTION_PROLOG 0x1000 SoftEmu + prepare_transfer_to_handler + bl emulation_assist_interrupt + REST_NVGPRS(r1) + b interrupt_return /* * For the MPC8xx, this is a software tablewalk to load the instruction @@ -348,7 +361,10 @@ DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_PROLOG_2 0x1c00 DataBreakpoint handle_dar_dsisr=1 mfspr r4,SPRN_BAR stw r4,_DAR(r11) - EXC_XFER_STD(0x1c00, do_break) + prepare_transfer_to_handler + bl do_break + REST_NVGPRS(r1) + b interrupt_return #ifdef CONFIG_PERF_EVENTS START_EXCEPTION(0x1d00, InstructionBreakpoint) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 67dac65b8ec3..609b2eedd4f9 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -300,7 +300,10 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) andis. r0, r5, DSISR_DABRMATCH@h bne-1f EXC_XFER_LITE(0x300, do_page_fault) -1: EXC_XFER_STD(0x300, do_break) +1: prepare_transfer_to_handler + bl do_break + REST_NVGPRS(r1) + b interrupt_return
[PATCH v2 30/43] powerpc/32: Add a prepare_transfer_to_handler macro for exception prologs
In order to increase flexibility, add a macro that will for now call transfer_to_handler. As transfer_to_handler doesn't do the actual transfer anymore, also name it prepare_transfer_to_handler. The following patches will progressively remove the use of transfer_to_handler label. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 3 +++ arch/powerpc/kernel/head_32.h| 4 arch/powerpc/kernel/head_booke.h | 4 3 files changed, 11 insertions(+) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index cb2fa00b8fc1..e2346662444d 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -86,6 +86,8 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_full) .globl transfer_to_handler transfer_to_handler: + .globl prepare_transfer_to_handler +prepare_transfer_to_handler: SAVE_NVGPRS(r11) addir3,r1,STACK_FRAME_OVERHEAD stw r2,GPR2(r11) @@ -132,6 +134,7 @@ transfer_to_handler_cont: stw r9,_MSR(r11) b fast_exception_return #endif +_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) _ASM_NOKPROBE_SYMBOL(transfer_to_handler) _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index bf4c288173ad..3ab0f3ad9a6a 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -115,6 +115,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) .endm +.macro prepare_transfer_to_handler + bl prepare_transfer_to_handler +.endm + .macro SYSCALL_ENTRY trapno mfspr r9, SPRN_SRR1 mfspr r10, SPRN_SRR0 diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 27a7358c04bb..0f02b970e797 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -87,6 +87,10 @@ END_BTB_FLUSH_SECTION SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) +.macro prepare_transfer_to_handler + bl prepare_transfer_to_handler +.endm + .macro SYSCALL_ENTRY trapno intno srr1 mfspr r10, SPRN_SPRG_THREAD #ifdef CONFIG_KVM_BOOKE_HV -- 2.25.0
[PATCH v2 29/43] powerpc/32: Save trap number on stack in exception prolog
Saving the trap number into the stack goes into the exception prolog, as EXC_XFER_xxx will soon disappear. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.h| 14 - arch/powerpc/kernel/head_40x.S | 22 +++--- arch/powerpc/kernel/head_8xx.S | 14 - arch/powerpc/kernel/head_book3s_32.S | 14 - arch/powerpc/kernel/head_booke.h | 44 +++- arch/powerpc/kernel/head_fsl_booke.S | 4 +-- 6 files changed, 58 insertions(+), 54 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 4d638d760a96..bf4c288173ad 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -10,10 +10,10 @@ * We assume sprg3 has the physical address of the current * task's thread_struct. */ -.macro EXCEPTION_PROLOGname handle_dar_dsisr=0 +.macro EXCEPTION_PROLOGtrapno name handle_dar_dsisr=0 EXCEPTION_PROLOG_0 handle_dar_dsisr=\handle_dar_dsisr EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 \name handle_dar_dsisr=\handle_dar_dsisr + EXCEPTION_PROLOG_2 \trapno \name handle_dar_dsisr=\handle_dar_dsisr .endm .macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0 @@ -56,7 +56,7 @@ #endif .endm -.macro EXCEPTION_PROLOG_2 name handle_dar_dsisr=0 +.macro EXCEPTION_PROLOG_2 trapno name handle_dar_dsisr=0 #ifdef CONFIG_PPC_8xx .if \handle_dar_dsisr li r11, RPN_PATTERN @@ -108,6 +108,8 @@ lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addir10,r10,STACK_FRAME_REGS_MARKER@l stw r10,8(r11) + li r10, \trapno + stw r10,_TRAP(r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) _ASM_NOKPROBE_SYMBOL(\name\()_virt) @@ -182,12 +184,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #define EXCEPTION(n, label, hdlr, xfer)\ START_EXCEPTION(n, label) \ - EXCEPTION_PROLOG label; \ + EXCEPTION_PROLOG n label; \ xfer(n, hdlr) #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ bl tfer; \ bl hdlr; \ b ret @@ -213,7 +213,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #endif lwz r1, emergency_ctx@l(r1) addir1, r1, THREAD_SIZE - INT_FRAME_SIZE - EXCEPTION_PROLOG_2 vmap_stack_overflow + EXCEPTION_PROLOG_2 0 vmap_stack_overflow EXC_XFER_STD(0, stack_overflow_exception) .endm diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a65778380704..7270caff665c 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -104,7 +104,7 @@ _ENTRY(crit_esr) * Instead we use a couple of words of memory at low physical addresses. * This is OK since we don't support SMP on these processors. */ -.macro CRITICAL_EXCEPTION_PROLOG name +.macro CRITICAL_EXCEPTION_PROLOG trapno name stw r10,crit_r10@l(0) /* save two registers to work with */ stw r11,crit_r11@l(0) mfspr r10,SPRN_SRR0 @@ -161,6 +161,8 @@ _ENTRY(crit_esr) lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addir10, r10, STACK_FRAME_REGS_MARKER@l stw r10, 8(r11) + li r10, \trapno + 2 + stw r10,_TRAP(r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) _ASM_NOKPROBE_SYMBOL(\name\()_virt) @@ -184,7 +186,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) */ #define CRITICAL_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label); \ - CRITICAL_EXCEPTION_PROLOG label;\ + CRITICAL_EXCEPTION_PROLOG n label; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ crit_transfer_to_handler, ret_from_crit_exc) @@ -206,7 +208,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) * if they can't resolve the lightweight TLB fault. */ START_EXCEPTION(0x0300, DataStorage) - EXCEPTION_PROLOG DataStorage handle_dar_dsisr=1 + EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1 EXC_XFER_LITE(0x300, do_page_fault) /* @@ -214,7 +216,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) * This is caused by a fetch from non-execute or guarded pages. */ START_EXCEPTION(0x0400, InstructionAccess) - EXCEPTION_PROLOG InstructionAccess + EXCEPTION_PROLOG 0x400 InstructionAccess li r5,0 stw r5, _ESR(r11) /* Zero ESR */ stw r12, _DEAR(r11) /* SRR0 as DEAR */ @@ -225,12 +227,12 @@
[PATCH v2 27/43] powerpc/32: Call bad_page_fault() from do_page_fault()
Now that non volatile registers are saved at all time, no need to split bad_page_fault() out of do_page_fault(). Remove handle_page_fault() and use do_page_fault() directly. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 16 arch/powerpc/kernel/head_40x.S | 4 ++-- arch/powerpc/kernel/head_8xx.S | 4 ++-- arch/powerpc/kernel/head_book3s_32.S | 4 ++-- arch/powerpc/kernel/head_booke.h | 4 ++-- arch/powerpc/kernel/head_fsl_booke.S | 2 +- arch/powerpc/mm/fault.c | 2 +- 7 files changed, 10 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 4698fd1bd8c8..cb2fa00b8fc1 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -220,22 +220,6 @@ ret_from_kernel_thread: li r3,0 b ret_from_syscall -/* - * Top-level page fault handling. - * This is in assembler because if do_page_fault tells us that - * it is a bad kernel page fault, we want to save the non-volatile - * registers before calling bad_page_fault. - */ - .globl handle_page_fault -handle_page_fault: - bl do_page_fault - cmpwi r3,0 - beq+ret_from_except - mr r4,r3 /* err arg for bad_page_fault */ - addir3,r1,STACK_FRAME_OVERHEAD - bl __bad_page_fault - b ret_from_except_full - /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 08563d4170c6..a65778380704 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -207,7 +207,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) */ START_EXCEPTION(0x0300, DataStorage) EXCEPTION_PROLOG DataStorage handle_dar_dsisr=1 - EXC_XFER_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, do_page_fault) /* * 0x0400 - Instruction Storage Exception @@ -218,7 +218,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) li r5,0 stw r5, _ESR(r11) /* Zero ESR */ stw r12, _DEAR(r11) /* SRR0 as DEAR */ - EXC_XFER_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, do_page_fault) /* 0x0500 - External Interrupt Exception */ EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index eb1d40a8f2c4..4078d0dc2f18 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -301,7 +301,7 @@ instruction_counter: .Litlbie: stw r12, _DAR(r11) stw r5, _DSISR(r11) - EXC_XFER_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, do_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We bail out to @@ -322,7 +322,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ tlbie r4 .Ldtlbie: /* 0x300 is DataAccess exception, needed by bad_page_fault() */ - EXC_XFER_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, do_page_fault) #ifdef CONFIG_VMAP_STACK vmap_stack_overflow_exception diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 626e9fbac2cc..81a6ec098dd1 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -299,7 +299,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) lwz r5, _DSISR(r11) andis. r0, r5, DSISR_DABRMATCH@h bne-1f - EXC_XFER_LITE(0x300, handle_page_fault) + EXC_XFER_LITE(0x300, do_page_fault) 1: EXC_XFER_STD(0x300, do_break) @@ -328,7 +328,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ stw r5, _DSISR(r11) stw r12, _DAR(r11) - EXC_XFER_LITE(0x400, handle_page_fault) + EXC_XFER_LITE(0x400, do_page_fault) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 009a56d70d76..036a69d16605 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -462,7 +462,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ stw r4, _DEAR(r11); \ - EXC_XFER_LITE(0x0300, handle_page_fault) + EXC_XFER_LITE(0x0300, do_page_fault) #define INSTRUCTION_STORAGE_EXCEPTION\ START_EXCEPTION(InstructionStorage)
[PATCH v2 28/43] powerpc/64e: Call bad_page_fault() from do_page_fault()
book3e/64 is the last one calling __bad_page_fault() from assembly. Save non volatile registers before calling do_page_fault() and modify do_page_fault() to call __bad_page_fault() for all platforms. Then it can be refactored by the call of bad_page_fault() which avoids the duplication of the exception table search. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/exceptions-64e.S | 8 +--- arch/powerpc/mm/fault.c | 17 - 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index e8eb9992a270..b60f89078a3f 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1010,15 +1010,9 @@ storage_fault_common: addir3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) + bl save_nvgprs bl do_page_fault - cmpdi r3,0 - bne-1f b ret_from_except_lite -1: bl save_nvgprs - mr r4,r3 - addir3,r1,STACK_FRAME_OVERHEAD - bl __bad_page_fault - b ret_from_except /* * Alignment exception doesn't fit entirely in the 0x100 bytes so it diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 2e54bac99a22..7bcff3fca110 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -541,24 +541,15 @@ NOKPROBE_SYMBOL(___do_page_fault); static long __do_page_fault(struct pt_regs *regs) { - const struct exception_table_entry *entry; long err; err = ___do_page_fault(regs, regs->dar, regs->dsisr); if (likely(!err)) - return err; - - entry = search_exception_tables(regs->nip); - if (likely(entry)) { - instruction_pointer_set(regs, extable_fixup(entry)); return 0; - } else if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64)) { - __bad_page_fault(regs, err); - return 0; - } else { - /* 32 and 64e handle the bad page fault in asm */ - return err; - } + + bad_page_fault(regs, err); + + return 0; } NOKPROBE_SYMBOL(__do_page_fault); -- 2.25.0
[PATCH v2 26/43] powerpc/32: Set regs parameter in r3 in transfer_to_handler
All exception handlers take regs as first parameter. Instead of setting r3 just before each call to a handler, set it in transfer_to_handler. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 5 ++--- arch/powerpc/kernel/head_32.h| 2 -- arch/powerpc/kernel/head_40x.S | 7 --- arch/powerpc/kernel/head_8xx.S | 3 --- arch/powerpc/kernel/head_book3s_32.S | 9 ++--- arch/powerpc/kernel/head_booke.h | 11 +-- arch/powerpc/kernel/head_fsl_booke.S | 4 +--- 7 files changed, 6 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index d8fd2fd2c777..4698fd1bd8c8 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -87,6 +87,7 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_full) .globl transfer_to_handler transfer_to_handler: SAVE_NVGPRS(r11) + addir3,r1,STACK_FRAME_OVERHEAD stw r2,GPR2(r11) stw r12,_NIP(r11) stw r9,_MSR(r11) @@ -99,8 +100,7 @@ transfer_to_handler: tovirt(r12, r12) beq 2f /* if from user, fix up THREAD.regs */ addir2, r12, -THREAD - addir11,r1,STACK_FRAME_OVERHEAD - stw r11,PT_REGS(r12) + stw r3,PT_REGS(r12) #ifdef CONFIG_PPC_BOOK3S_32 kuep_lock r11, r12 #endif @@ -228,7 +228,6 @@ ret_from_kernel_thread: */ .globl handle_page_fault handle_page_fault: - addir3,r1,STACK_FRAME_OVERHEAD bl do_page_fault cmpwi r3,0 beq+ret_from_except diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 087445e45489..4d638d760a96 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -183,7 +183,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #define EXCEPTION(n, label, hdlr, xfer)\ START_EXCEPTION(n, label) \ EXCEPTION_PROLOG label; \ - addir3,r1,STACK_FRAME_OVERHEAD; \ xfer(n, hdlr) #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ @@ -215,7 +214,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) lwz r1, emergency_ctx@l(r1) addir1, r1, THREAD_SIZE - INT_FRAME_SIZE EXCEPTION_PROLOG_2 vmap_stack_overflow - addir3, r1, STACK_FRAME_OVERHEAD EXC_XFER_STD(0, stack_overflow_exception) .endm diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 86883ccb3dc5..08563d4170c6 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -185,7 +185,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) #define CRITICAL_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label); \ CRITICAL_EXCEPTION_PROLOG label;\ - addir3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ crit_transfer_to_handler, ret_from_crit_exc) @@ -227,13 +226,11 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) EXCEPTION_PROLOG Alignment handle_dar_dsisr=1 - addir3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) EXCEPTION_PROLOG ProgramCheck handle_dar_dsisr=1 - addir3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x700, program_check_exception) EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD) @@ -494,7 +491,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) /* continue normal handling for a critical exception... */ 2: mfspr r4,SPRN_DBSR stw r4,_ESR(r11)/* DebugException takes DBSR in _ESR */ - addir3,r1,STACK_FRAME_OVERHEAD EXC_XFER_TEMPLATE(DebugException, 0x2002, \ (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ crit_transfer_to_handler, ret_from_crit_exc) @@ -505,21 +501,18 @@ Decrementer: EXCEPTION_PROLOG Decrementer lis r0,TSR_PIS@h mtspr SPRN_TSR,r0 /* Clear the PIT exception */ - addir3,r1,STACK_FRAME_OVERHEAD EXC_XFER_LITE(0x1000, timer_interrupt) /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ __HEAD FITException: EXCEPTION_PROLOG FITException - addir3,r1,STACK_FRAME_OVERHEAD; EXC_XFER_STD(0x1010, unknown_exception) /* Watchdog Timer (WDT) Exception. (from 0x1020) */ __HEAD WDTException: CRITICAL_EXCEPTION_PROLOG WDTException - addir3,r1,STACK_FRAME_OVERHEAD; EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
[PATCH v2 25/43] powerpc/32: Replace ASM exception exit by C exception exit from ppc64
This patch replaces the PPC32 ASM exception exit by C exception exit. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 481 +--- arch/powerpc/kernel/interrupt.c | 4 + 2 files changed, 132 insertions(+), 353 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 7084289994b3..d8fd2fd2c777 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -129,9 +129,7 @@ transfer_to_handler_cont: stw r12,TI_LOCAL_FLAGS(r2) lwz r9,_MSR(r11)/* if sleeping, clear MSR.EE */ rlwinm r9,r9,0,~MSR_EE - lwz r12,_LINK(r11) /* and return to address in LR */ - kuap_restore r11, r2, r3, r4, r5 - lwz r2, GPR2(r11) + stw r9,_MSR(r11) b fast_exception_return #endif _ASM_NOKPROBE_SYMBOL(transfer_to_handler) @@ -334,69 +332,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) .globl fast_exception_return fast_exception_return: + lwz r6,_MSR(r1) + andi. r0,r6,MSR_PR + bne .Lfast_user_interrupt_return + li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) - andi. r10,r9,MSR_RI /* check for recoverable interrupt */ - beq 1f /* if not, we've got problems */ -#endif - -2: REST_4GPRS(3, r11) - lwz r10,_CCR(r11) - REST_GPR(1, r11) - mtcrr10 - lwz r10,_LINK(r11) - mtlrr10 - /* Clear the exception_marker on the stack to avoid confusing stacktrace */ - li r10, 0 - stw r10, 8(r11) - REST_GPR(10, r11) -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif - mtspr SPRN_SRR1,r9 - mtspr SPRN_SRR0,r12 - REST_GPR(9, r11) - REST_GPR(12, r11) - lwz r11,GPR11(r11) - rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif -_ASM_NOKPROBE_SYMBOL(fast_exception_return) - -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) -/* check if the exception happened in a restartable section */ -1: lis r3,exc_exit_restart_end@ha - addir3,r3,exc_exit_restart_end@l - cmplw r12,r3 - bge 3f - lis r4,exc_exit_restart@ha - addir4,r4,exc_exit_restart@l - cmplw r12,r4 - blt 3f - lis r3,fee_restarts@ha - tophys(r3,r3) - lwz r5,fee_restarts@l(r3) - addir5,r5,1 - stw r5,fee_restarts@l(r3) - mr r12,r4 /* restart at exc_exit_restart */ - b 2b - - .section .bss - .align 2 -fee_restarts: - .space 4 - .previous - -/* aargh, a nonrecoverable interrupt, panic */ -/* aargh, we don't know which trap this is */ -3: - li r10,-1 - stw r10,_TRAP(r11) + andi. r0,r6,MSR_RI + bne+.Lfast_kernel_interrupt_return addir3,r1,STACK_FRAME_OVERHEAD - bl transfer_to_handler_full bl unrecoverable_exception - b ret_from_except + trap/* should not get here */ +#else + b .Lfast_kernel_interrupt_return #endif +_ASM_NOKPROBE_SYMBOL(fast_exception_return) .globl ret_from_except_full ret_from_except_full: @@ -405,213 +354,146 @@ ret_from_except_full: .globl ret_from_except ret_from_except: - /* Hard-disable interrupts so that current_thread_info()->flags -* can't change between when we test it and when we return -* from the interrupt. */ - /* Note: We don't bother telling lockdep about it */ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) - mtmsr r10 /* disable interrupts */ - - lwz r3,_MSR(r1) /* Returning to user mode? */ - andi. r0,r3,MSR_PR - beq resume_kernel - -user_exc_return: /* r10 contains MSR_KERNEL here */ - /* Check current_thread_info()->flags */ - lwz r9,TI_FLAGS(r2) - andi. r0,r9,_TIF_USER_WORK_MASK - bne do_work - -restore_user: -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - /* Check whether this process has its own DBCR0 value. The internal - debug mode bit tells us that dbcr0 should be loaded. */ - lwz r0,THREAD+THREAD_DBCR0(r2) - andis. r10,r0,DBCR0_IDM@h - bnel- load_dbcr0 -#endif - ACCOUNT_CPU_USER_EXIT(r2, r10, r11) +_ASM_NOKPROBE_SYMBOL(ret_from_except) + + .globl interrupt_return +interrupt_return: + lwz r4,_MSR(r1) + andi. r0,r4,MSR_PR + beq .Lkernel_interrupt_return + addir3,r1,STACK_FRAME_OVERHEAD + bl interrupt_exit_user_prepare + cmpwi r3,0 + bne-.Lrestore_nvgprs + +.Lfast_user_interrupt_return: #ifdef CONFIG_PPC_BOOK3S_32 kuep_unlock r10, r11 #endif +
[PATCH v2 24/43] powerpc/32: Always save non volatile registers on exception entry
In preparation of handling exception entry and exit in C, in order to simplify the handling, always save non volatile registers when entering an exception. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/ptrace.h | 6 ++ arch/powerpc/kernel/entry_32.S| 13 + arch/powerpc/kernel/head_32.h | 3 +-- arch/powerpc/kernel/head_booke.h | 2 +- 4 files changed, 5 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 975ba260006a..0a5d8c6b13c4 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -209,16 +209,14 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) */ #define TRAP_FLAGS_MASK0x1F #define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) -#define FULL_REGS(regs)(((regs)->trap & 1) == 0) -#define SET_FULL_REGS(regs)((regs)->trap |= 1) +#define FULL_REGS(regs)true +#define SET_FULL_REGS(regs)do { } while (0) #define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0) #define IS_MCHECK_EXC(regs)(((regs)->trap & 4) != 0) #define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0) #define NV_REG_POISON 0xdeadbeef #define CHECK_FULL_REGS(regs)\ do { \ - if ((regs)->trap & 1) \ - printk(KERN_CRIT "%s: partial register set\n", __func__); \ } while (0) #endif /* __powerpc64__ */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index fb849ef922fb..7084289994b3 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -81,12 +81,12 @@ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) */ .globl transfer_to_handler_full transfer_to_handler_full: - SAVE_NVGPRS(r11) _ASM_NOKPROBE_SYMBOL(transfer_to_handler_full) /* fall through */ .globl transfer_to_handler transfer_to_handler: + SAVE_NVGPRS(r11) stw r2,GPR2(r11) stw r12,_NIP(r11) stw r9,_MSR(r11) @@ -234,10 +234,6 @@ handle_page_fault: bl do_page_fault cmpwi r3,0 beq+ret_from_except - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - clrrwi r0,r0,1 - stw r0,_TRAP(r1) mr r4,r3 /* err arg for bad_page_fault */ addir3,r1,STACK_FRAME_OVERHEAD bl __bad_page_fault @@ -810,13 +806,6 @@ recheck: do_user_signal:/* r10 contains MSR_KERNEL here */ ori r10,r10,MSR_EE mtmsr r10 /* hard-enable interrupts */ - /* save r13-r31 in the exception frame, if not already done */ - lwz r3,_TRAP(r1) - andi. r0,r3,1 - beq 2f - SAVE_NVGPRS(r1) - rlwinm r3,r3,0,0,30 - stw r3,_TRAP(r1) 2: addir3,r1,STACK_FRAME_OVERHEAD mr r4,r9 bl do_notify_resume diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index e09585b88ba7..087445e45489 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -198,7 +198,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) ret_from_except_full) #define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler, \ ret_from_except) .macro vmap_stack_overflow_exception @@ -215,7 +215,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) lwz r1, emergency_ctx@l(r1) addir1, r1, THREAD_SIZE - INT_FRAME_SIZE EXCEPTION_PROLOG_2 vmap_stack_overflow - SAVE_NVGPRS(r11) addir3, r1, STACK_FRAME_OVERHEAD EXC_XFER_STD(0, stack_overflow_exception) .endm diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 3707f49f0b78..b31bf9e833c0 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -331,7 +331,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) ret_from_except_full) #define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler, \ ret_from_except) /* Check for a single step debug exception while in an exception -- 2.25.0
[PATCH v2 23/43] powerpc/32: Perform normal function call in exception entry
Now that the MMU is re-enabled before calling the transfer function, we don't need anymore that hack with the address of the handler and the return function sitting just after the 'bl' to the transfer fonction, that function is retrieving via a read relative to 'lr'. Do a regular call to the transfer function, then to the handler, then branch to the return function. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 14 -- arch/powerpc/kernel/head_32.h| 4 ++-- arch/powerpc/kernel/head_booke.h | 6 +++--- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ad1fd33e1126..fb849ef922fb 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -104,7 +104,7 @@ transfer_to_handler: #ifdef CONFIG_PPC_BOOK3S_32 kuep_lock r11, r12 #endif - b 3f + blr /* if from kernel, check interrupted DOZE/NAP mode */ 2: @@ -118,13 +118,7 @@ transfer_to_handler: #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ .globl transfer_to_handler_cont transfer_to_handler_cont: -3: - mflrr9 - lwz r11,0(r9) /* virtual address of handler */ - lwz r9,4(r9)/* where to go when done */ - mtctr r11 - mtlrr9 - bctr/* jump to handler */ + blr #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING @@ -404,8 +398,8 @@ fee_restarts: stw r10,_TRAP(r11) addir3,r1,STACK_FRAME_OVERHEAD bl transfer_to_handler_full - .long unrecoverable_exception - .long ret_from_except + bl unrecoverable_exception + b ret_from_except #endif .globl ret_from_except_full diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 160ebd573c37..e09585b88ba7 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -190,8 +190,8 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) li r10,trap; \ stw r10,_TRAP(r11); \ bl tfer; \ - .long hdlr; \ - .long ret + bl hdlr; \ + b ret #define EXC_XFER_STD(n, hdlr) \ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index a127d5e7efb4..3707f49f0b78 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -322,9 +322,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ li r10,trap; \ stw r10,_TRAP(r11); \ - bl tfer; \ - .long hdlr; \ - .long ret + bl tfer; \ + bl hdlr; \ + b ret;\ #define EXC_XFER_STD(n, hdlr) \ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ -- 2.25.0
[PATCH v2 22/43] powerpc/32: Refactor booke critical registers saving
Refactor booke critical registers saving into a few macros and move it into the exception prolog directly. Keep the dedicated transfert_to_handler entry point for the moment allthough they are empty. They will be removed in a later patch to reduce churn. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 33 - arch/powerpc/kernel/head_booke.h | 41 2 files changed, 41 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 01a064c8a96a..ad1fd33e1126 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -51,49 +51,16 @@ #ifdef CONFIG_BOOKE .globl mcheck_transfer_to_handler mcheck_transfer_to_handler: - mfspr r0,SPRN_DSRR0 - stw r0,_DSRR0(r11) - mfspr r0,SPRN_DSRR1 - stw r0,_DSRR1(r11) /* fall through */ _ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler) .globl debug_transfer_to_handler debug_transfer_to_handler: - mfspr r0,SPRN_CSRR0 - stw r0,_CSRR0(r11) - mfspr r0,SPRN_CSRR1 - stw r0,_CSRR1(r11) /* fall through */ _ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler) .globl crit_transfer_to_handler crit_transfer_to_handler: -#ifdef CONFIG_PPC_BOOK3E_MMU - mfspr r0,SPRN_MAS0 - stw r0,MAS0(r11) - mfspr r0,SPRN_MAS1 - stw r0,MAS1(r11) - mfspr r0,SPRN_MAS2 - stw r0,MAS2(r11) - mfspr r0,SPRN_MAS3 - stw r0,MAS3(r11) - mfspr r0,SPRN_MAS6 - stw r0,MAS6(r11) -#ifdef CONFIG_PHYS_64BIT - mfspr r0,SPRN_MAS7 - stw r0,MAS7(r11) -#endif /* CONFIG_PHYS_64BIT */ -#endif /* CONFIG_PPC_BOOK3E_MMU */ -#ifdef CONFIG_44x - mfspr r0,SPRN_MMUCR - stw r0,MMUCR(r11) -#endif - mfspr r0,SPRN_SRR0 - stw r0,_SRR0(r11) - mfspr r0,SPRN_SRR1 - stw r0,_SRR1(r11) - /* fall through */ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #endif diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index f712b9bc6d62..a127d5e7efb4 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -229,6 +229,36 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) +#define SAVE_xSRR(xSRR)\ + mfspr r0,SPRN_##xSRR##0; \ + stw r0,_##xSRR##0(r1); \ + mfspr r0,SPRN_##xSRR##1; \ + stw r0,_##xSRR##1(r1) + + +.macro SAVE_MMU_REGS +#ifdef CONFIG_PPC_BOOK3E_MMU + mfspr r0,SPRN_MAS0 + stw r0,MAS0(r1) + mfspr r0,SPRN_MAS1 + stw r0,MAS1(r1) + mfspr r0,SPRN_MAS2 + stw r0,MAS2(r1) + mfspr r0,SPRN_MAS3 + stw r0,MAS3(r1) + mfspr r0,SPRN_MAS6 + stw r0,MAS6(r1) +#ifdef CONFIG_PHYS_64BIT + mfspr r0,SPRN_MAS7 + stw r0,MAS7(r1) +#endif /* CONFIG_PHYS_64BIT */ +#endif /* CONFIG_PPC_BOOK3E_MMU */ +#ifdef CONFIG_44x + mfspr r0,SPRN_MMUCR + stw r0,MMUCR(r1) +#endif +.endm + #define CRITICAL_EXCEPTION_PROLOG(intno) \ EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1) #define DEBUG_EXCEPTION_PROLOG \ @@ -271,6 +301,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) START_EXCEPTION(label); \ CRITICAL_EXCEPTION_PROLOG(intno); \ addir3,r1,STACK_FRAME_OVERHEAD; \ + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ crit_transfer_to_handler, ret_from_crit_exc) @@ -280,6 +312,10 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mfspr r5,SPRN_ESR;\ stw r5,_ESR(r11); \ addir3,r1,STACK_FRAME_OVERHEAD; \ + SAVE_xSRR(DSRR);\ + SAVE_xSRR(CSRR);\ + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ mcheck_transfer_to_handler, ret_from_mcheck_exc) @@ -363,6 +399,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) 2: mfspr r4,SPRN_DBSR; \ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ addir3,r1,STACK_FRAME_OVERHEAD; \ +
[PATCH v2 21/43] powerpc/32: Provide a name to exception prolog continuation in virtual mode
Now that the prolog continuation is separated in .text, give it a name and mark it _ASM_NOKPROBE_SYMBOL. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.h| 12 +++- arch/powerpc/kernel/head_40x.S | 22 -- arch/powerpc/kernel/head_8xx.S | 10 +- arch/powerpc/kernel/head_book3s_32.S | 14 +++--- 4 files changed, 31 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 3c0aa4538514..160ebd573c37 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -10,10 +10,10 @@ * We assume sprg3 has the physical address of the current * task's thread_struct. */ -.macro EXCEPTION_PROLOG handle_dar_dsisr=0 +.macro EXCEPTION_PROLOGname handle_dar_dsisr=0 EXCEPTION_PROLOG_0 handle_dar_dsisr=\handle_dar_dsisr EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 handle_dar_dsisr=\handle_dar_dsisr + EXCEPTION_PROLOG_2 \name handle_dar_dsisr=\handle_dar_dsisr .endm .macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0 @@ -56,7 +56,7 @@ #endif .endm -.macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 +.macro EXCEPTION_PROLOG_2 name handle_dar_dsisr=0 #ifdef CONFIG_PPC_8xx .if \handle_dar_dsisr li r11, RPN_PATTERN @@ -72,6 +72,7 @@ rfi .text +\name\()_virt: 1: stw r11,GPR1(r1) stw r11,0(r1) @@ -109,6 +110,7 @@ stw r10,8(r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) +_ASM_NOKPROBE_SYMBOL(\name\()_virt) .endm .macro SYSCALL_ENTRY trapno @@ -180,7 +182,7 @@ #define EXCEPTION(n, label, hdlr, xfer)\ START_EXCEPTION(n, label) \ - EXCEPTION_PROLOG; \ + EXCEPTION_PROLOG label; \ addir3,r1,STACK_FRAME_OVERHEAD; \ xfer(n, hdlr) @@ -212,7 +214,7 @@ #endif lwz r1, emergency_ctx@l(r1) addir1, r1, THREAD_SIZE - INT_FRAME_SIZE - EXCEPTION_PROLOG_2 + EXCEPTION_PROLOG_2 vmap_stack_overflow SAVE_NVGPRS(r11) addir3, r1, STACK_FRAME_OVERHEAD EXC_XFER_STD(0, stack_overflow_exception) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index e7d8856714d3..86883ccb3dc5 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -104,7 +104,7 @@ _ENTRY(crit_esr) * Instead we use a couple of words of memory at low physical addresses. * This is OK since we don't support SMP on these processors. */ -.macro CRITICAL_EXCEPTION_PROLOG +.macro CRITICAL_EXCEPTION_PROLOG name stw r10,crit_r10@l(0) /* save two registers to work with */ stw r11,crit_r11@l(0) mfspr r10,SPRN_SRR0 @@ -135,6 +135,7 @@ _ENTRY(crit_esr) .text 1: +\name\()_virt: lwz r11,crit_r1@l(0) stw r11,GPR1(r1) stw r11,0(r1) @@ -162,6 +163,7 @@ _ENTRY(crit_esr) stw r10, 8(r11) SAVE_4GPRS(3, r11) SAVE_2GPRS(7, r11) +_ASM_NOKPROBE_SYMBOL(\name\()_virt) .endm /* @@ -182,7 +184,7 @@ _ENTRY(crit_esr) */ #define CRITICAL_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label); \ - CRITICAL_EXCEPTION_PROLOG; \ + CRITICAL_EXCEPTION_PROLOG label;\ addir3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ crit_transfer_to_handler, ret_from_crit_exc) @@ -205,7 +207,7 @@ _ENTRY(crit_esr) * if they can't resolve the lightweight TLB fault. */ START_EXCEPTION(0x0300, DataStorage) - EXCEPTION_PROLOG handle_dar_dsisr=1 + EXCEPTION_PROLOG DataStorage handle_dar_dsisr=1 EXC_XFER_LITE(0x300, handle_page_fault) /* @@ -213,7 +215,7 @@ _ENTRY(crit_esr) * This is caused by a fetch from non-execute or guarded pages. */ START_EXCEPTION(0x0400, InstructionAccess) - EXCEPTION_PROLOG + EXCEPTION_PROLOG InstructionAccess li r5,0 stw r5, _ESR(r11) /* Zero ESR */ stw r12, _DEAR(r11) /* SRR0 as DEAR */ @@ -224,13 +226,13 @@ _ENTRY(crit_esr) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) - EXCEPTION_PROLOG handle_dar_dsisr=1 + EXCEPTION_PROLOG Alignment handle_dar_dsisr=1 addir3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) - EXCEPTION_PROLOG handle_dar_dsisr=1 + EXCEPTION_PROLOG ProgramCheck handle_dar_dsisr=1 addir3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x700, program_check_exception) @@ -450,7 +452,7 @@ _ENTRY(crit_esr)
[PATCH v2 20/43] powerpc/32: Move exception prolog code into .text once MMU is back on
The space in the head section is rather constrained by the fact that exception vectors are spread every 0x100 bytes and sometimes we need to have "out of line" code because it doesn't fit. Now that we are enabling MMU early in the prolog, take that opportunity to jump somewhere else in the .text section where we don't have any space constraint. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.h| 5 arch/powerpc/kernel/head_40x.S | 6 + arch/powerpc/kernel/head_8xx.S | 25 arch/powerpc/kernel/head_book3s_32.S | 34 4 files changed, 36 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index d97ec94b34da..3c0aa4538514 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -70,6 +70,8 @@ mtspr SPRN_SRR0, r11 mfspr r11, SPRN_SPRG_SCRATCH2 rfi + + .text 1: stw r11,GPR1(r1) stw r11,0(r1) @@ -163,12 +165,14 @@ */ #ifdef CONFIG_PPC_BOOK3S #defineSTART_EXCEPTION(n, label) \ + __HEAD; \ . = n; \ DO_KVM n; \ label: #else #defineSTART_EXCEPTION(n, label) \ + __HEAD; \ . = n; \ label: @@ -196,6 +200,7 @@ ret_from_except) .macro vmap_stack_overflow_exception + __HEAD vmap_stack_overflow: #ifdef CONFIG_SMP mfspr r1, SPRN_SPRG_THREAD diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index c14a71e0d6d3..e7d8856714d3 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -132,6 +132,8 @@ _ENTRY(crit_esr) ori r11, r11, 1f@l mtspr SPRN_SRR0, r11 rfi + + .text 1: lwz r11,crit_r1@l(0) stw r11,GPR1(r1) @@ -496,6 +498,7 @@ _ENTRY(crit_esr) crit_transfer_to_handler, ret_from_crit_exc) /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ + __HEAD Decrementer: EXCEPTION_PROLOG lis r0,TSR_PIS@h @@ -504,12 +507,14 @@ Decrementer: EXC_XFER_LITE(0x1000, timer_interrupt) /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ + __HEAD FITException: EXCEPTION_PROLOG addir3,r1,STACK_FRAME_OVERHEAD; EXC_XFER_STD(0x1010, unknown_exception) /* Watchdog Timer (WDT) Exception. (from 0x1020) */ + __HEAD WDTException: CRITICAL_EXCEPTION_PROLOG; addir3,r1,STACK_FRAME_OVERHEAD; @@ -523,6 +528,7 @@ WDTException: * reserved. */ + __HEAD /* Damn, I came up one instruction too many to fit into the * exception space :-). Both the instruction and data TLB * miss get to this point to load the TLB. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 11789a077d76..d16d0ec71bb2 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -133,7 +133,7 @@ instruction_counter: START_EXCEPTION(0x600, Alignment) EXCEPTION_PROLOG handle_dar_dsisr=1 addir3,r1,STACK_FRAME_OVERHEAD - b .Lalignment_exception_ool + EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) @@ -141,11 +141,6 @@ instruction_counter: /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - /* With VMAP_STACK there's not enough room for this at 0x600 */ - . = 0xa00 -.Lalignment_exception_ool: - EXC_XFER_STD(0x600, alignment_exception) - /* System call */ START_EXCEPTION(0xc00, SystemCall) SYSCALL_ENTRY 0xc00 @@ -339,26 +334,25 @@ DARFixed:/* Return from dcbx instruction bug workaround */ * support of breakpoints and such. Someday I will get around to * using them. */ -do_databreakpoint: - EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 handle_dar_dsisr=1 - addir3,r1,STACK_FRAME_OVERHEAD - mfspr r4,SPRN_BAR - stw r4,_DAR(r11) - EXC_XFER_STD(0x1c00, do_break) - START_EXCEPTION(0x1c00, DataBreakpoint) EXCEPTION_PROLOG_0 handle_dar_dsisr=1 mfspr r11, SPRN_SRR0 cmplwi cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l cror4*cr1+eq, 4*cr1+eq, 4*cr7+eq - bne cr1, do_databreakpoint + bne cr1, 1f mtcrr10 mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 rfi +1: EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 handle_dar_dsisr=1 + addir3,r1,STACK_FRAME_OVERHEAD + mfspr r4,SPRN_BAR
[PATCH v2 19/43] powerpc/32: Use START_EXCEPTION() as much as possible
Everywhere where it is possible, use START_EXCEPTION(). This will help for proper exception init in future patches. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_40x.S | 12 +-- arch/powerpc/kernel/head_8xx.S | 27 + arch/powerpc/kernel/head_book3s_32.S | 30 3 files changed, 22 insertions(+), 47 deletions(-) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 55fa99c5085c..c14a71e0d6d3 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -247,17 +247,15 @@ _ENTRY(crit_esr) EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ - . = 0x1000 + START_EXCEPTION(0x1000, DecrementerTrap) b Decrementer -/* 0x1010 - Fixed Interval Timer (FIT) Exception -*/ - . = 0x1010 +/* 0x1010 - Fixed Interval Timer (FIT) Exception */ + START_EXCEPTION(0x1010, FITExceptionTrap) b FITException -/* 0x1020 - Watchdog Timer (WDT) Exception -*/ - . = 0x1020 +/* 0x1020 - Watchdog Timer (WDT) Exception */ + START_EXCEPTION(0x1020, WDTExceptionTrap) b WDTException /* 0x1100 - Data TLB Miss Exception diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index b63445c55f4d..11789a077d76 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -121,8 +121,7 @@ instruction_counter: EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD) /* Machine check */ - . = 0x200 -MachineCheck: + START_EXCEPTION(0x200, MachineCheck) EXCEPTION_PROLOG handle_dar_dsisr=1 addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x200, machine_check_exception) @@ -131,8 +130,7 @@ MachineCheck: EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) /* Alignment exception */ - . = 0x600 -Alignment: + START_EXCEPTION(0x600, Alignment) EXCEPTION_PROLOG handle_dar_dsisr=1 addir3,r1,STACK_FRAME_OVERHEAD b .Lalignment_exception_ool @@ -149,8 +147,7 @@ Alignment: EXC_XFER_STD(0x600, alignment_exception) /* System call */ - . = 0xc00 -SystemCall: + START_EXCEPTION(0xc00, SystemCall) SYSCALL_ENTRY 0xc00 /* Single step - not used on 601 */ @@ -161,7 +158,6 @@ SystemCall: */ EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD) - . = 0x1100 /* * For the MPC8xx, this is a software tablewalk to load the instruction * TLB. The task switch loads the M_TWB register with the pointer to the first @@ -183,7 +179,7 @@ SystemCall: #define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) #endif -InstructionTLBMiss: + START_EXCEPTION(0x1100, InstructionTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 @@ -239,8 +235,7 @@ InstructionTLBMiss: rfi #endif - . = 0x1200 -DataStoreTLBMiss: + START_EXCEPTION(0x1200, DataStoreTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 mfcrr11 @@ -303,8 +298,7 @@ DataStoreTLBMiss: * to many reasons, such as executing guarded memory or illegal instruction * addresses. There is nothing to do but handle a big time error fault. */ - . = 0x1300 -InstructionTLBError: + START_EXCEPTION(0x1300, InstructionTLBError) EXCEPTION_PROLOG andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ andis. r10,r9,SRR1_ISI_NOPT@h @@ -320,8 +314,7 @@ InstructionTLBError: * many reasons, including a dirty update to a pte. We bail out to * a higher level function that can handle it. */ - . = 0x1400 -DataTLBError: + START_EXCEPTION(0x1400, DataTLBError) EXCEPTION_PROLOG_0 handle_dar_dsisr=1 mfspr r11, SPRN_DAR cmpwi cr1, r11, RPN_PATTERN @@ -354,8 +347,7 @@ do_databreakpoint: stw r4,_DAR(r11) EXC_XFER_STD(0x1c00, do_break) - . = 0x1c00 -DataBreakpoint: + START_EXCEPTION(0x1c00, DataBreakpoint) EXCEPTION_PROLOG_0 handle_dar_dsisr=1 mfspr r11, SPRN_SRR0 cmplwi cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l @@ -368,8 +360,7 @@ DataBreakpoint: rfi #ifdef CONFIG_PERF_EVENTS - . = 0x1d00 -InstructionBreakpoint: + START_EXCEPTION(0x1d00, InstructionBreakpoint) mtspr SPRN_SPRG_SCRATCH0, r10 lwz r10, (instruction_counter - PAGE_OFFSET)@l(0) addir10, r10, -1 diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 9dc05890477d..8f5c8c8da63d 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -255,9 +255,7 @@ __secondary_hold_acknowledge: * pointer when we take an exception from supervisor mode.) * -- paulus. */ - . = 0x200 - DO_KVM 0x200 -MachineCheck: +
[PATCH v2 18/43] powerpc/32: Add vmap_stack_overflow label inside the macro
For consistency, add in the macro the label used by exception prolog to branch to stack overflow processing. While at it, enclose the macro in #ifdef CONFIG_VMAP_STACK on the 8xx as already done on book3s/32. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.h| 3 ++- arch/powerpc/kernel/head_8xx.S | 3 ++- arch/powerpc/kernel/head_book3s_32.S | 1 - 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 15c6fc7cbbf5..d97ec94b34da 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -52,7 +52,7 @@ 1: #ifdef CONFIG_VMAP_STACK mtcrf 0x3f, r1 - bt 32 - THREAD_ALIGN_SHIFT, stack_overflow + bt 32 - THREAD_ALIGN_SHIFT, vmap_stack_overflow #endif .endm @@ -196,6 +196,7 @@ ret_from_except) .macro vmap_stack_overflow_exception +vmap_stack_overflow: #ifdef CONFIG_SMP mfspr r1, SPRN_SPRG_THREAD lwz r1, TASK_CPU - THREAD(r1) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index cdbfa9d41353..b63445c55f4d 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -338,8 +338,9 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) -stack_overflow: +#ifdef CONFIG_VMAP_STACK vmap_stack_overflow_exception +#endif /* On the MPC8xx, these next four traps are used for development * support of breakpoints and such. Someday I will get around to diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 59efbee7c080..9dc05890477d 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -729,7 +729,6 @@ fast_hash_page_return: #endif /* CONFIG_PPC_BOOK3S_604 */ #ifdef CONFIG_VMAP_STACK -stack_overflow: vmap_stack_overflow_exception #endif -- 2.25.0
[PATCH v2 17/43] powerpc/32: Statically initialise first emergency context
The check of the emergency context initialisation in vmap_stack_overflow is buggy for the SMP case, as it compares r1 with 0 while in the SMP case r1 is offseted by the CPU id. Instead of fixing it, just perform static initialisation of the first emergency context. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.h | 6 +- arch/powerpc/kernel/setup_32.c | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 88b02bd91e8e..15c6fc7cbbf5 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -205,11 +205,7 @@ lis r1, emergency_ctx@ha #endif lwz r1, emergency_ctx@l(r1) - cmpwi cr1, r1, 0 - bne cr1, 1f - lis r1, init_thread_union@ha - addir1, r1, init_thread_union@l -1: addir1, r1, THREAD_SIZE - INT_FRAME_SIZE + addir1, r1, THREAD_SIZE - INT_FRAME_SIZE EXCEPTION_PROLOG_2 SAVE_NVGPRS(r11) addir3, r1, STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 8ba49a6bf515..d7c1f92152af 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -164,7 +164,7 @@ void __init irqstack_early_init(void) } #ifdef CONFIG_VMAP_STACK -void *emergency_ctx[NR_CPUS] __ro_after_init; +void *emergency_ctx[NR_CPUS] __ro_after_init = {[0] = _stack}; void __init emergency_stack_init(void) { -- 2.25.0
[PATCH v2 16/43] powerpc/32: Enable instruction translation at the same time as data translation
On 40x and 8xx, kernel text is pinned. On book3s/32, kernel text is mapped by BATs. Enable instruction translation at the same time as data translation, it makes things simpler. In syscall handler, MSR_RI can also be set at the same time because srr0/srr1 are already saved and r1 is set properly. On booke, translation is always on, so at the end all PPC32 have translation on early. Just update msr. Also update comment in power_save_ppc32_restore(). Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/entry_32.S | 30 -- arch/powerpc/kernel/head_32.h| 13 - arch/powerpc/kernel/head_40x.S | 10 +++--- arch/powerpc/kernel/head_booke.h | 6 -- 4 files changed, 31 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 33e97032ca25..01a064c8a96a 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -153,19 +153,11 @@ transfer_to_handler: transfer_to_handler_cont: 3: mflrr9 - tovirt(r9, r9) lwz r11,0(r9) /* virtual address of handler */ lwz r9,4(r9)/* where to go when done */ -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r10 + mtctr r11 mtlrr9 - rfi /* jump to handler, enable MMU */ -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif + bctr/* jump to handler */ #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING @@ -444,8 +436,6 @@ fee_restarts: li r10,-1 stw r10,_TRAP(r11) addir3,r1,STACK_FRAME_OVERHEAD - lis r10,MSR_KERNEL@h - ori r10,r10,MSR_KERNEL@l bl transfer_to_handler_full .long unrecoverable_exception .long ret_from_except @@ -945,16 +935,20 @@ _GLOBAL(enter_rtas) mtspr SPRN_SRR1,r9 rfi 1: - li r0, MSR_KERNEL & ~MSR_IR/* can take DTLB miss */ - mtmsr r0 - isync + lis r8, 1f@h + ori r8, r8, 1f@l + LOAD_REG_IMMEDIATE(r9,MSR_KERNEL) + mtspr SPRN_SRR0,r8 + mtspr SPRN_SRR1,r9 + rfi /* Reactivate MMU translation */ +1: lwz r8,INT_FRAME_SIZE+4(r1) /* get return address */ lwz r9,8(r1)/* original msr value */ addir1,r1,INT_FRAME_SIZE li r0,0 stw r0, THREAD + RTAS_SP(r2) - mtspr SPRN_SRR0,r8 - mtspr SPRN_SRR1,r9 - rfi /* return to caller */ + mtlrr8 + mtmsr r9 + blr /* return to caller */ _ASM_NOKPROBE_SYMBOL(enter_rtas) #endif /* CONFIG_PPC_RTAS */ diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 910f86642eec..88b02bd91e8e 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -63,10 +63,14 @@ mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */ .endif #endif - LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take DTLB miss */ - mtmsr r11 - isync + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~MSR_RI) /* re-enable MMU */ + mtspr SPRN_SRR1, r11 + lis r11, 1f@h + ori r11, r11, 1f@l + mtspr SPRN_SRR0, r11 mfspr r11, SPRN_SPRG_SCRATCH2 + rfi +1: stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 @@ -94,7 +98,7 @@ #elif defined(CONFIG_PPC_8xx) mtspr SPRN_EID, r2/* Set MSR_RI */ #else - li r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */ + li r10, MSR_KERNEL /* can take exceptions */ mtmsr r10 /* (except for mach check in rtas) */ #endif stw r0,GPR0(r11) @@ -179,7 +183,6 @@ #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ li r10,trap; \ stw r10,_TRAP(r11); \ - LOAD_REG_IMMEDIATE(r10, msr); \ bl tfer; \ .long hdlr; \ .long ret diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 7da673ec63ef..55fa99c5085c 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -126,9 +126,13 @@ _ENTRY(crit_esr) lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */ 1: stw r1,crit_r1@l(0) addir1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ - LOAD_REG_IMMEDIATE(r11,MSR_KERNEL & ~(MSR_IR | MSR_RI)) - mtmsr r11 -
[PATCH v2 15/43] powerpc/32: Tag DAR in EXCEPTION_PROLOG_2 for the 8xx
8xx requires to tag the DAR with a magic value in order to fixup DAR on faults generated by 'dcbX', as the 8xx forgets to update the DAR for those faults. Do the tagging as early as possible, that is before enabling MMU. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.h | 6 ++ arch/powerpc/kernel/head_8xx.S | 18 ++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 1b707755c68e..910f86642eec 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -57,6 +57,12 @@ .endm .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 +#ifdef CONFIG_PPC_8xx + .if \handle_dar_dsisr + li r11, RPN_PATTERN + mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */ + .endif +#endif LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take DTLB miss */ mtmsr r11 isync diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 792e2fd86479..cdbfa9d41353 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -30,6 +30,12 @@ #include #include +/* + * Value for the bits that have fixed value in RPN entries. + * Also used for tagging DAR for DTLBerror. + */ +#define RPN_PATTERN0x00f0 + #include "head_32.h" .macro compare_to_kernel_boundary scratch, addr @@ -42,12 +48,6 @@ #endif .endm -/* - * Value for the bits that have fixed value in RPN entries. - * Also used for tagging DAR for DTLBerror. - */ -#define RPN_PATTERN0x00f0 - #define PAGE_SHIFT_512K19 #define PAGE_SHIFT_8M 23 @@ -124,8 +124,6 @@ instruction_counter: . = 0x200 MachineCheck: EXCEPTION_PROLOG handle_dar_dsisr=1 - li r6, RPN_PATTERN - mtspr SPRN_DAR, r6/* Tag DAR, to be used in DTLB Error */ addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x200, machine_check_exception) @@ -136,8 +134,6 @@ MachineCheck: . = 0x600 Alignment: EXCEPTION_PROLOG handle_dar_dsisr=1 - li r6, RPN_PATTERN - mtspr SPRN_DAR, r6/* Tag DAR, to be used in DTLB Error */ addir3,r1,STACK_FRAME_OVERHEAD b .Lalignment_exception_ool @@ -331,8 +327,6 @@ DataTLBError: cmpwi cr1, r11, RPN_PATTERN beq-cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ - li r11, RPN_PATTERN - mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */ EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 handle_dar_dsisr=1 lwz r4, _DAR(r11) -- 2.25.0
[PATCH v2 14/43] powerpc/32: Always enable data translation in exception prolog
If the code can use a stack in vm area, it can also use a stack in linear space. Simplify code by removing old non VMAP stack code on PPC32. That means the data translation is now re-enabled early in exception prolog in all cases, not only when using VMAP stacks. While we are touching EXCEPTION_PROLOG macros, remove the unused for_rtas parameter in EXCEPTION_PROLOG_1. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/processor.h | 4 +- arch/powerpc/kernel/asm-offsets.c| 2 - arch/powerpc/kernel/entry_32.S | 19 +++ arch/powerpc/kernel/fpu.S| 2 - arch/powerpc/kernel/head_32.h| 85 +--- arch/powerpc/kernel/head_40x.S | 23 arch/powerpc/kernel/head_8xx.S | 19 +-- arch/powerpc/kernel/head_book3s_32.S | 47 +-- arch/powerpc/kernel/idle_6xx.S | 12 +--- arch/powerpc/kernel/idle_e500.S | 4 +- arch/powerpc/kernel/vector.S | 2 - arch/powerpc/mm/book3s32/hash_low.S | 14 - 12 files changed, 17 insertions(+), 216 deletions(-) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 43cbd9281055..eae16facc390 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -147,11 +147,9 @@ struct thread_struct { #ifdef CONFIG_PPC_RTAS unsigned long rtas_sp;/* stack pointer for when in RTAS */ #endif -#endif #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) unsigned long kuap; /* opened segments for user access */ #endif -#ifdef CONFIG_VMAP_STACK unsigned long srr0; unsigned long srr1; unsigned long dar; @@ -160,7 +158,7 @@ struct thread_struct { unsigned long r0, r3, r4, r5, r6, r8, r9, r11; unsigned long lr, ctr; #endif -#endif +#endif /* CONFIG_PPC32 */ /* Debug Registers */ struct debug_reg debug; #ifdef CONFIG_PPC_FPU_REGS diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 73620536c801..85ba2b0bc8d8 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -131,7 +131,6 @@ int main(void) OFFSET(KSP_VSID, thread_struct, ksp_vsid); #else /* CONFIG_PPC64 */ OFFSET(PGDIR, thread_struct, pgdir); -#ifdef CONFIG_VMAP_STACK OFFSET(SRR0, thread_struct, srr0); OFFSET(SRR1, thread_struct, srr1); OFFSET(DAR, thread_struct, dar); @@ -148,7 +147,6 @@ int main(void) OFFSET(THLR, thread_struct, lr); OFFSET(THCTR, thread_struct, ctr); #endif -#endif #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); OFFSET(THREAD_ACC, thread_struct, acc); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 66198e6e25e7..33e97032ca25 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -129,7 +129,7 @@ transfer_to_handler: stw r12,_CTR(r11) stw r2,_XER(r11) mfspr r12,SPRN_SPRG_THREAD - tovirt_vmstack r12, r12 + tovirt(r12, r12) beq 2f /* if from user, fix up THREAD.regs */ addir2, r12, -THREAD addir11,r1,STACK_FRAME_OVERHEAD @@ -153,8 +153,7 @@ transfer_to_handler: transfer_to_handler_cont: 3: mflrr9 - tovirt_novmstack r2, r2 /* set r2 to current */ - tovirt_vmstack r9, r9 + tovirt(r9, r9) lwz r11,0(r9) /* virtual address of handler */ lwz r9,4(r9)/* where to go when done */ #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) @@ -933,7 +932,6 @@ _GLOBAL(enter_rtas) lis r6,1f@ha/* physical return address for rtas */ addir6,r6,1f@l tophys(r6,r6) - tophys_novmstack r7, r1 lwz r8,RTASENTRY(r4) lwz r4,RTASBASE(r4) mfmsr r9 @@ -942,22 +940,19 @@ _GLOBAL(enter_rtas) mtmsr r0 /* disable interrupts so SRR0/1 don't get trashed */ li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR) mtlrr6 - stw r7, THREAD + RTAS_SP(r2) + stw r1, THREAD + RTAS_SP(r2) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 rfi -1: tophys_novmstack r9, r1 -#ifdef CONFIG_VMAP_STACK +1: li r0, MSR_KERNEL & ~MSR_IR/* can take DTLB miss */ mtmsr r0 isync -#endif - lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */ - lwz r9,8(r9)/* original msr value */ + lwz r8,INT_FRAME_SIZE+4(r1) /* get return address */ + lwz r9,8(r1)/* original msr value */ addir1,r1,INT_FRAME_SIZE li r0,0 - tophys_novmstack r7, r2 - stw r0, THREAD + RTAS_SP(r7) + stw r0, THREAD + RTAS_SP(r2) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 rfi
[PATCH v2 13/43] powerpc/32: Remove ksp_limit
ksp_limit is there to help detect stack overflows. That is specific to ppc32 as it was removed from ppc64 in commit cbc9565ee826 ("powerpc: Remove ksp_limit on ppc64"). There are other means for detecting stack overflows. As ppc64 has proven to not need it, ppc32 should be able to do without it too. Lets remove it and simplify exception handling. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/processor.h | 2 - arch/powerpc/kernel/asm-offsets.c| 2 - arch/powerpc/kernel/entry_32.S | 68 +--- arch/powerpc/kernel/head_40x.S | 2 - arch/powerpc/kernel/head_booke.h | 1 - arch/powerpc/kernel/misc_32.S| 14 -- arch/powerpc/kernel/process.c| 3 -- arch/powerpc/kernel/traps.c | 9 arch/powerpc/lib/sstep.c | 9 9 files changed, 2 insertions(+), 108 deletions(-) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8acc3590c971..43cbd9281055 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -144,7 +144,6 @@ struct thread_struct { #endif #ifdef CONFIG_PPC32 void*pgdir; /* root of page-table tree */ - unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ #ifdef CONFIG_PPC_RTAS unsigned long rtas_sp;/* stack pointer for when in RTAS */ #endif @@ -282,7 +281,6 @@ struct thread_struct { #ifdef CONFIG_PPC32 #define INIT_THREAD { \ .ksp = INIT_SP, \ - .ksp_limit = INIT_SP_LIMIT, \ .pgdir = swapper_pg_dir, \ .fpexc_mode = MSR_FE0 | MSR_FE1, \ SPEFSCR_INIT \ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f3a662201a9f..73620536c801 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -91,7 +91,6 @@ int main(void) DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); #else - OFFSET(KSP_LIMIT, thread_struct, ksp_limit); #ifdef CONFIG_PPC_RTAS OFFSET(RTAS_SP, thread_struct, rtas_sp); #endif @@ -381,7 +380,6 @@ int main(void) DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1)); DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0)); DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1)); - DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit)); #endif #endif diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 4ffbcf3df72e..66198e6e25e7 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -94,12 +94,6 @@ crit_transfer_to_handler: mfspr r0,SPRN_SRR1 stw r0,_SRR1(r11) - /* set the stack limit to the current stack */ - mfspr r8,SPRN_SPRG_THREAD - lwz r0,KSP_LIMIT(r8) - stw r0,SAVED_KSP_LIMIT(r11) - rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) - stw r0,KSP_LIMIT(r8) /* fall through */ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #endif @@ -107,12 +101,6 @@ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #ifdef CONFIG_40x .globl crit_transfer_to_handler crit_transfer_to_handler: - /* set the stack limit to the current stack */ - mfspr r8,SPRN_SPRG_THREAD - lwz r0,KSP_LIMIT(r8) - stw r0,saved_ksp_limit@l(0) - rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) - stw r0,KSP_LIMIT(r8) /* fall through */ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) #endif @@ -151,17 +139,10 @@ transfer_to_handler: #endif b 3f -2: /* if from kernel, check interrupted DOZE/NAP mode and - * check for stack overflow - */ + /* if from kernel, check interrupted DOZE/NAP mode */ +2: kuap_save_and_lock r11, r12, r9, r2, r6 addir2, r12, -THREAD -#ifndef CONFIG_VMAP_STACK - lwz r9,KSP_LIMIT(r12) - cmplw r1,r9 /* if r1 <= ksp_limit */ - ble-stack_ovf /* then the kernel stack overflowed */ -#endif -5: #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 @@ -204,37 +185,6 @@ transfer_to_handler_cont: _ASM_NOKPROBE_SYMBOL(transfer_to_handler) _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) -#ifndef CONFIG_VMAP_STACK -/* - * On kernel stack overflow, load up an initial stack pointer - * and call StackOverflow(regs), which should not return. - */ -stack_ovf: - /* sometimes we use a statically-allocated stack, which is OK. */ - lis r12,_end@h - ori r12,r12,_end@l - cmplw r1,r12 - ble 5b /* r1 <= &_end is OK */ - SAVE_NVGPRS(r11) - addir3,r1,STACK_FRAME_OVERHEAD - lis r1,init_thread_union@ha - addi
[PATCH v2 12/43] powerpc/32: Use fast instruction to set MSR RI in exception prolog on 8xx
8xx has registers SPRN_NRI, SPRN_EID and SPRN_EIE for changing MSR EE and RI. Use SPRN_EID in exception prolog to set RI. On an 8xx, it reduces the null_syscall test by 3 cycles. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_32.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index ac6b391f1493..25ee6b26ef5a 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -107,6 +107,8 @@ #endif #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#elif defined(CONFIG_PPC_8xx) + mtspr SPRN_EID, r2/* Set MSR_RI */ #else #ifdef CONFIG_VMAP_STACK li r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */ -- 2.25.0
[PATCH v2 11/43] powerpc/32: Handle bookE debugging in C in exception entry
The handling of SPRN_DBCR0 and other registers can easily be done in C instead of ASM. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/interrupt.h | 2 ++ arch/powerpc/kernel/entry_32.S | 23 --- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index c35368adbe71..861e6eadc98c 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -65,6 +65,8 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup if (user_mode(regs)) account_cpu_user_entry(); #endif + + booke_restore_dbcr0(); } /* diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0f3f1bdd909e..4ffbcf3df72e 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -146,32 +146,9 @@ transfer_to_handler: addir2, r12, -THREAD addir11,r1,STACK_FRAME_OVERHEAD stw r11,PT_REGS(r12) -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - /* Check to see if the dbcr0 register is set up to debug. Use the - internal debug mode bit to do this. */ - lwz r12,THREAD_DBCR0(r12) - andis. r12,r12,DBCR0_IDM@h -#endif #ifdef CONFIG_PPC_BOOK3S_32 kuep_lock r11, r12 #endif -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - beq+3f - /* From user and task is ptraced - load up global dbcr0 */ - li r12,-1 /* clear all pending debug events */ - mtspr SPRN_DBSR,r12 - lis r11,global_dbcr0@ha - tophys_novmstack r11,r11 - addir11,r11,global_dbcr0@l -#ifdef CONFIG_SMP - lwz r9,TASK_CPU(r2) - slwir9,r9,2 - add r11,r11,r9 -#endif - lwz r12,0(r11) - mtspr SPRN_DBCR0,r12 -#endif - b 3f 2: /* if from kernel, check interrupted DOZE/NAP mode and -- 2.25.0
[PATCH v2 09/43] powerpc/32: Reconcile interrupts in C
There is no need for this to be in asm anymore, use the new interrupt entry wrapper. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/interrupt.h | 4 ++ arch/powerpc/kernel/entry_32.S | 58 2 files changed, 4 insertions(+), 58 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 232a4847f596..b2f69e5dcb50 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -29,6 +29,10 @@ static inline void booke_restore_dbcr0(void) static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { +#ifdef CONFIG_PPC32 + if (!arch_irq_disabled_regs(regs)) + trace_hardirqs_off(); +#endif /* * Book3E reconciles irq soft mask in asm */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 535c55f4393a..0f18fe14649c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -202,22 +202,6 @@ transfer_to_handler_cont: lwz r9,4(r9)/* where to go when done */ #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 -#endif -#ifdef CONFIG_TRACE_IRQFLAGS - /* -* When tracing IRQ state (lockdep) we enable the MMU before we call -* the IRQ tracing functions as they might access vmalloc space or -* perform IOs for console output. -* -* To speed up the syscall path where interrupts stay on, let's check -* first if we are changing the MSR value at all. -*/ - tophys_novmstack r12, r1 - lwz r12,_MSR(r12) - andi. r12,r12,MSR_EE - bne 1f - - /* MSR isn't changing, just transition directly */ #endif mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r10 @@ -244,48 +228,6 @@ transfer_to_handler_cont: _ASM_NOKPROBE_SYMBOL(transfer_to_handler) _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) -#ifdef CONFIG_TRACE_IRQFLAGS -1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to -* keep interrupts disabled at this point otherwise we might risk -* taking an interrupt before we tell lockdep they are enabled. -*/ - lis r12,reenable_mmu@h - ori r12,r12,reenable_mmu@l - LOAD_REG_IMMEDIATE(r0, MSR_KERNEL) - mtspr SPRN_SRR0,r12 - mtspr SPRN_SRR1,r0 - rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif - -reenable_mmu: - /* -* We save a bunch of GPRs, -* r3 can be different from GPR3(r1) at this point, r9 and r11 -* contains the old MSR and handler address respectively, -* r0, r4-r8, r12, CCR, CTR, XER etc... are left -* clobbered as they aren't useful past this point. -*/ - - stwur1,-32(r1) - stw r9,8(r1) - stw r11,12(r1) - stw r3,16(r1) - - /* If we are disabling interrupts (normal case), simply log it with -* lockdep -*/ -1: bl trace_hardirqs_off - lwz r3,16(r1) - lwz r11,12(r1) - lwz r9,8(r1) - addir1,r1,32 - mtctr r11 - mtlrr9 - bctr/* jump to handler */ -#endif /* CONFIG_TRACE_IRQFLAGS */ - #ifndef CONFIG_VMAP_STACK /* * On kernel stack overflow, load up an initial stack pointer -- 2.25.0
[PATCH v2 10/43] powerpc/32: Entry cpu time accounting in C
There is no need for this to be in asm, use the new interrupt entry wrapper. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/interrupt.h | 3 +++ arch/powerpc/include/asm/ppc_asm.h | 10 -- arch/powerpc/kernel/entry_32.S | 1 - 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index b2f69e5dcb50..c35368adbe71 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -32,6 +32,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup #ifdef CONFIG_PPC32 if (!arch_irq_disabled_regs(regs)) trace_hardirqs_off(); + + if (user_mode(regs)) + account_cpu_user_entry(); #endif /* * Book3E reconciles irq soft mask in asm diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 3dceb64fc9af..8998122fc7e2 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -23,18 +23,8 @@ */ #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) #define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) #else -#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb)\ - MFTB(ra); /* get timebase */ \ - PPC_LL rb, ACCOUNT_STARTTIME_USER(ptr);\ - PPC_STL ra, ACCOUNT_STARTTIME(ptr); \ - subfrb,rb,ra; /* subtract start value */ \ - PPC_LL ra, ACCOUNT_USER_TIME(ptr); \ - add ra,ra,rb; /* add on to user time */ \ - PPC_STL ra, ACCOUNT_USER_TIME(ptr); \ - #define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) \ MFTB(ra); /* get timebase */ \ PPC_LL rb, ACCOUNT_STARTTIME(ptr); \ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0f18fe14649c..0f3f1bdd909e 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -152,7 +152,6 @@ transfer_to_handler: lwz r12,THREAD_DBCR0(r12) andis. r12,r12,DBCR0_IDM@h #endif - ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) #ifdef CONFIG_PPC_BOOK3S_32 kuep_lock r11, r12 #endif -- 2.25.0