Re: [PATCH 9/9] zsmalloc: remove the zsmalloc file system

2021-03-09 Thread Minchan Kim
On Tue, Mar 09, 2021 at 04:53:48PM +0100, Christoph Hellwig wrote:
> Just use the generic anon_inode file system.
> 
> Signed-off-by: Christoph Hellwig 
Acked-by: Minchan Kim 


[PATCH] powerpc: fix warning comparing pointer to 0

2021-03-09 Thread Jiapeng Chong
Fix the following coccicheck warning:

./arch/powerpc/platforms/powermac/pfunc_core.c:688:40-41: WARNING
comparing pointer to 0.

Reported-by: Abaci Robot 
Signed-off-by: Jiapeng Chong 
---
 arch/powerpc/platforms/powermac/pfunc_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c 
b/arch/powerpc/platforms/powermac/pfunc_core.c
index 94df0a9..a5aa40f 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -685,7 +685,7 @@ static int pmf_add_functions(struct pmf_device *dev, void 
*driverdata)
const int plen = strlen(PP_PREFIX);
int count = 0;
 
-   for (pp = dev->node->properties; pp != 0; pp = pp->next) {
+   for (pp = dev->node->properties; pp; pp = pp->next) {
const char *name;
if (strncmp(pp->name, PP_PREFIX, plen) != 0)
continue;
-- 
1.8.3.1



Re: [PATCH 1/9] fs: rename alloc_anon_inode to alloc_anon_inode_sb

2021-03-09 Thread Minchan Kim
On Tue, Mar 09, 2021 at 04:53:40PM +0100, Christoph Hellwig wrote:
> Rename alloc_inode to free the name for a new variant that does not
> need boilerplate to create a super_block first.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/powerpc/platforms/pseries/cmm.c | 2 +-
>  drivers/dma-buf/dma-buf.c| 2 +-
>  drivers/gpu/drm/drm_drv.c| 2 +-
>  drivers/misc/cxl/api.c   | 2 +-
>  drivers/misc/vmw_balloon.c   | 2 +-
>  drivers/scsi/cxlflash/ocxl_hw.c  | 2 +-
>  drivers/virtio/virtio_balloon.c  | 2 +-
>  fs/aio.c | 2 +-
>  fs/anon_inodes.c | 4 ++--
>  fs/libfs.c   | 2 +-
>  include/linux/fs.h   | 2 +-
>  kernel/resource.c| 2 +-
>  mm/z3fold.c  | 2 +-
>  mm/zsmalloc.c| 2 +-
>  14 files changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/cmm.c 
> b/arch/powerpc/platforms/pseries/cmm.c
> index 45a3a3022a85c9..6d36b858b14df1 100644
> --- a/arch/powerpc/platforms/pseries/cmm.c
> +++ b/arch/powerpc/platforms/pseries/cmm.c
> @@ -580,7 +580,7 @@ static int cmm_balloon_compaction_init(void)
>   return rc;
>   }
>  
> - b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
> + b_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
>   if (IS_ERR(b_dev_info.inode)) {
>   rc = PTR_ERR(b_dev_info.inode);
>   b_dev_info.inode = NULL;
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index f264b70c383eb4..dedcc9483352dc 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -445,7 +445,7 @@ static inline int is_dma_buf_file(struct file *file)
>  static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
>  {
>   struct file *file;
> - struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb);
> + struct inode *inode = alloc_anon_inode_sb(dma_buf_mnt->mnt_sb);
>  
>   if (IS_ERR(inode))
>   return ERR_CAST(inode);
> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> index 20d22e41d7ce74..87e7214a8e3565 100644
> --- a/drivers/gpu/drm/drm_drv.c
> +++ b/drivers/gpu/drm/drm_drv.c
> @@ -519,7 +519,7 @@ static struct inode *drm_fs_inode_new(void)
>   return ERR_PTR(r);
>   }
>  
> - inode = alloc_anon_inode(drm_fs_mnt->mnt_sb);
> + inode = alloc_anon_inode_sb(drm_fs_mnt->mnt_sb);
>   if (IS_ERR(inode))
>   simple_release_fs(_fs_mnt, _fs_cnt);
>  
> diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
> index b493de962153ba..2efbf6c98028ef 100644
> --- a/drivers/misc/cxl/api.c
> +++ b/drivers/misc/cxl/api.c
> @@ -73,7 +73,7 @@ static struct file *cxl_getfile(const char *name,
>   goto err_module;
>   }
>  
> - inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb);
> + inode = alloc_anon_inode_sb(cxl_vfs_mount->mnt_sb);
>   if (IS_ERR(inode)) {
>   file = ERR_CAST(inode);
>   goto err_fs;
> diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
> index b837e7eba5f7dc..5d057a05ddbee8 100644
> --- a/drivers/misc/vmw_balloon.c
> +++ b/drivers/misc/vmw_balloon.c
> @@ -1900,7 +1900,7 @@ static __init int vmballoon_compaction_init(struct 
> vmballoon *b)
>   return PTR_ERR(vmballoon_mnt);
>  
>   b->b_dev_info.migratepage = vmballoon_migratepage;
> - b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb);
> + b->b_dev_info.inode = alloc_anon_inode_sb(vmballoon_mnt->mnt_sb);
>  
>   if (IS_ERR(b->b_dev_info.inode))
>   return PTR_ERR(b->b_dev_info.inode);
> diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c
> index 244fc27215dc79..40184ed926b557 100644
> --- a/drivers/scsi/cxlflash/ocxl_hw.c
> +++ b/drivers/scsi/cxlflash/ocxl_hw.c
> @@ -88,7 +88,7 @@ static struct file *ocxlflash_getfile(struct device *dev, 
> const char *name,
>   goto err2;
>   }
>  
> - inode = alloc_anon_inode(ocxlflash_vfs_mount->mnt_sb);
> + inode = alloc_anon_inode_sb(ocxlflash_vfs_mount->mnt_sb);
>   if (IS_ERR(inode)) {
>   rc = PTR_ERR(inode);
>   dev_err(dev, "%s: alloc_anon_inode failed rc=%d\n",
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 8985fc2cea8615..cae76ee5bdd688 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -916,7 +916,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
>   }
>  
>   vb->vb_dev_info.migratepage = virtballoon_migratepage;
> - vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
> + vb->vb_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
>   if (IS_ERR(vb->vb_dev_info.inode)) {
>   err = PTR_ERR(vb->vb_dev_info.inode);
>   goto 

Re: [PATCH V2] mm/memtest: Add ARCH_USE_MEMTEST

2021-03-09 Thread Anshuman Khandual



On 3/1/21 10:02 AM, Anshuman Khandual wrote:
> early_memtest() does not get called from all architectures. Hence enabling
> CONFIG_MEMTEST and providing a valid memtest=[1..N] kernel command line
> option might not trigger the memory pattern tests as would be expected in
> normal circumstances. This situation is misleading.
> 
> The change here prevents the above mentioned problem after introducing a
> new config option ARCH_USE_MEMTEST that should be subscribed on platforms
> that call early_memtest(), in order to enable the config CONFIG_MEMTEST.
> Conversely CONFIG_MEMTEST cannot be enabled on platforms where it would
> not be tested anyway.
> 
> Cc: Russell King 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Thomas Bogendoerfer 
> Cc: Michael Ellerman 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar 
> Cc: Chris Zankel 
> Cc: Max Filippov 
> Cc: linux-arm-ker...@lists.infradead.org
> Cc: linux-m...@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-xte...@linux-xtensa.org
> Cc: linux...@kvack.org
> Cc: linux-ker...@vger.kernel.org
> Reviewed-by: Max Filippov 
> Signed-off-by: Anshuman Khandual 
> ---
> This patch applies on v5.12-rc1 and has been tested on arm64 platform.
> But it has been just build tested on all other platforms.
> 
> Changes in V2:
> 
> - Added ARCH_USE_MEMTEST in the sorted alphabetical order on platforms

Gentle ping, any updates or objections ?


Re: [PATCH 2/6] mm: Generalize SYS_SUPPORTS_HUGETLBFS (rename as ARCH_SUPPORTS_HUGETLBFS)

2021-03-09 Thread Michael Ellerman
Anshuman Khandual  writes:
> SYS_SUPPORTS_HUGETLBFS config has duplicate definitions on platforms that
> subscribe it. Instead, just make it a generic option which can be selected
> on applicable platforms. Also rename it as ARCH_SUPPORTS_HUGETLBFS instead.
> This reduces code duplication and makes it cleaner.
>
> Cc: Russell King 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Thomas Bogendoerfer 
> Cc: "James E.J. Bottomley" 
> Cc: Helge Deller 
> Cc: Michael Ellerman 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Paul Walmsley 
> Cc: Palmer Dabbelt 
> Cc: Albert Ou 
> Cc: Yoshinori Sato 
> Cc: Rich Felker 
> Cc: Alexander Viro 
> Cc: linux-arm-ker...@lists.infradead.org
> Cc: linux-m...@vger.kernel.org
> Cc: linux-par...@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-ri...@lists.infradead.org
> Cc: linux...@vger.kernel.org
> Cc: linux-fsde...@vger.kernel.org
> Cc: linux-ker...@vger.kernel.org
> Signed-off-by: Anshuman Khandual 
> ---
>  arch/arm/Kconfig   | 5 +
>  arch/arm64/Kconfig | 4 +---
>  arch/mips/Kconfig  | 6 +-
>  arch/parisc/Kconfig| 5 +
>  arch/powerpc/Kconfig   | 3 ---
>  arch/powerpc/platforms/Kconfig.cputype | 6 +++---

Acked-by: Michael Ellerman  (powerpc)

cheers


Re: [PATCH v4] powerpc/uprobes: Validation for prefixed instruction

2021-03-09 Thread Michael Ellerman
Ravi Bangoria  writes:
> On 3/9/21 4:51 PM, Naveen N. Rao wrote:
>> On 2021/03/09 08:54PM, Michael Ellerman wrote:
>>> Ravi Bangoria  writes:
 As per ISA 3.1, prefixed instruction should not cross 64-byte
 boundary. So don't allow Uprobe on such prefixed instruction.

 There are two ways probed instruction is changed in mapped pages.
 First, when Uprobe is activated, it searches for all the relevant
 pages and replace instruction in them. In this case, if that probe
 is on the 64-byte unaligned prefixed instruction, error out
 directly. Second, when Uprobe is already active and user maps a
 relevant page via mmap(), instruction is replaced via mmap() code
 path. But because Uprobe is invalid, entire mmap() operation can
 not be stopped. In this case just print an error and continue.

 Signed-off-by: Ravi Bangoria 
 Acked-by: Naveen N. Rao 
>>>
>>> Do we have a Fixes: tag for this?
>> 
>> Since this is an additional check we are adding, I don't think we should
>> add a Fixes: tag. Nothing is broken per-se -- we're just adding more
>> checks to catch simple mistakes. Also, like Oleg pointed out, there are
>> still many other ways for users to shoot themselves in the foot with
>> uprobes and prefixed instructions, if they so desire.
>> 
>> However, if you still think we should add a Fixes: tag, we can perhaps
>> use the below commit since I didn't see any specific commit adding
>> support for prefixed instructions for uprobes:
>> 
>> Fixes: 650b55b707fdfa ("powerpc: Add prefixed instructions to
>> instruction data type")
>
> True. IMO, It doesn't really need any Fixes tag.

Yep OK, I'm happy without a Fixes tag based on that explanation.

 diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
 index e8a63713e655..4cbfff6e94a3 100644
 --- a/arch/powerpc/kernel/uprobes.c
 +++ b/arch/powerpc/kernel/uprobes.c
 @@ -41,6 +41,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe 
 *auprobe,
if (addr & 0x03)
return -EINVAL;
   
 +  if (cpu_has_feature(CPU_FTR_ARCH_31) &&
 +  ppc_inst_prefixed(auprobe->insn) &&
 +  (addr & (SZ_64 - 4)) == SZ_64 - 4) {
 +  pr_info_ratelimited("Cannot register a uprobe on 64 byte 
 unaligned prefixed instruction\n");
 +  return -EINVAL;
>>>
>>> I realise we already did the 0x03 check above, but I still think this
>>> would be clearer simply as:
>>>
>>> (addr & 0x3f == 60)
>> 
>> Indeed, I like the use of `60' there -- hex is overrated ;)
>
> Sure. Will resend.

Thanks.

cheers


Re: PowerPC64 future proof kernel toc, revised for lld

2021-03-09 Thread Alan Modra
On Wed, Mar 10, 2021 at 03:44:44PM +1100, Alexey Kardashevskiy wrote:
> For my own education, is .got for prom_init.o still generated by ld or gcc?

.got is generated by ld.

> In other words, should "objdump -D -s -j .got" ever dump .got for any .o
> file, like below?

No.  "objdump -r prom_init.o | grep GOT" will tell you whether
prom_init.o *may* cause ld to generate .got entries.  (Linker
optimisations or --gc-sections might remove the need for those .got
entries.)

> objdump: section '.got' mentioned in a -j option, but not found in any input
> file

Right, expected.

-- 
Alan Modra
Australia Development Lab, IBM


Re: Errant readings on LM81 with T2080 SoC

2021-03-09 Thread Guenter Roeck
On 3/9/21 6:19 PM, Chris Packham wrote:
> On 9/03/21 9:27 am, Chris Packham wrote:
>> On 8/03/21 5:59 pm, Guenter Roeck wrote:
>>> Other than that, the only other real idea I have would be to monitor
>>> the i2c bus.
>> I am in the fortunate position of being able to go into the office and 
>> even happen to have the expensive scope at the moment. Now I just need 
>> to find a tame HW engineer so I don't burn myself trying to attach the 
>> probes.
> One thing I see on the scope is that when there is a CPU load there 
> appears to be some clock stretching going on (SCL is held low some 
> times). I don't see it without the CPU load. It's hard to correlate a 
> clock stretching event with a bad read or error but it is one area where 
> the SMBUS spec has a maximum that might cause the device to give up waiting.
> 
Do you have CONFIG_PREEMPT enabled in your kernel ? But even without
that it is possible that the hot loops at the beginning and end of
each operation mess up the driver and cause it to sleep longer
than intended. Did you try usleep_range() ?

On a side note, can you send me a register dump for the lm81 ?
It would be useful for my module test code.

Thanks,
Guenter


Re: [PATCH v2] powerpc/kexec_file: Restore FDT size estimation for kdump kernel

2021-03-09 Thread Michael Ellerman
Rob Herring  writes:
> On Fri, Feb 19, 2021 at 6:52 PM Thiago Jung Bauermann
>  wrote:
>>
>> Commit 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump
>> kernel") fixed how elf64_load() estimates the FDT size needed by the
>> crashdump kernel.
>>
>> At the same time, commit 130b2d59cec0 ("powerpc: Use common
>> of_kexec_alloc_and_setup_fdt()") changed the same code to use the generic
>> function of_kexec_alloc_and_setup_fdt() to calculate the FDT size. That
>> change made the code overestimate it a bit by counting twice the space
>> required for the kernel command line and /chosen properties.
>>
>> Therefore change kexec_fdt_totalsize_ppc64() to calculate just the extra
>> space needed by the kdump kernel, and change the function name so that it
>> better reflects what the function is now doing.
>>
>> Signed-off-by: Thiago Jung Bauermann 
>> Reviewed-by: Lakshmi Ramasubramanian 
>> ---
>>  arch/powerpc/include/asm/kexec.h  |  2 +-
>>  arch/powerpc/kexec/elf_64.c   |  2 +-
>>  arch/powerpc/kexec/file_load_64.c | 26 --
>>  3 files changed, 10 insertions(+), 20 deletions(-)
>
> I ended up delaying the referenced series til 5.13, but have applied
> it now. Can I get an ack from the powerpc maintainers on this one?
> I'll fixup the commit log to make sense given the commit id's aren't
> valid.

Thanks for handling it.

Acked-by: Michael Ellerman 


cheers


Re: [PATCH 2/6] mm: Generalize SYS_SUPPORTS_HUGETLBFS (rename as ARCH_SUPPORTS_HUGETLBFS)

2021-03-09 Thread Palmer Dabbelt

On Tue, 09 Mar 2021 00:33:06 PST (-0800), anshuman.khand...@arm.com wrote:

SYS_SUPPORTS_HUGETLBFS config has duplicate definitions on platforms that
subscribe it. Instead, just make it a generic option which can be selected
on applicable platforms. Also rename it as ARCH_SUPPORTS_HUGETLBFS instead.
This reduces code duplication and makes it cleaner.

Cc: Russell King 
Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Thomas Bogendoerfer 
Cc: "James E.J. Bottomley" 
Cc: Helge Deller 
Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Paul Walmsley 
Cc: Palmer Dabbelt 
Cc: Albert Ou 
Cc: Yoshinori Sato 
Cc: Rich Felker 
Cc: Alexander Viro 
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-m...@vger.kernel.org
Cc: linux-par...@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-ri...@lists.infradead.org
Cc: linux...@vger.kernel.org
Cc: linux-fsde...@vger.kernel.org
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Anshuman Khandual 
---
 arch/arm/Kconfig   | 5 +
 arch/arm64/Kconfig | 4 +---
 arch/mips/Kconfig  | 6 +-
 arch/parisc/Kconfig| 5 +
 arch/powerpc/Kconfig   | 3 ---
 arch/powerpc/platforms/Kconfig.cputype | 6 +++---
 arch/riscv/Kconfig | 5 +
 arch/sh/Kconfig| 5 +
 fs/Kconfig | 5 -
 9 files changed, 13 insertions(+), 31 deletions(-)


[...]


diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 85d626b8ce5e..69954db3aca9 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -30,6 +30,7 @@ config RISCV
select ARCH_HAS_STRICT_KERNEL_RWX if MMU
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+   select ARCH_SUPPORTS_HUGETLBFS if MMU
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
@@ -165,10 +166,6 @@ config ARCH_WANT_GENERAL_HUGETLB
 config ARCH_SUPPORTS_UPROBES
def_bool y

-config SYS_SUPPORTS_HUGETLBFS
-   depends on MMU
-   def_bool y
-
 config STACKTRACE_SUPPORT
def_bool y


Acked-by: Palmer Dabbelt 


Re: make alloc_anon_inode more useful

2021-03-09 Thread Matthew Wilcox
On Tue, Mar 09, 2021 at 04:53:39PM +0100, Christoph Hellwig wrote:
> this series first renames the existing alloc_anon_inode to
> alloc_anon_inode_sb to clearly mark it as requiring a superblock.
> 
> It then adds a new alloc_anon_inode that works on the anon_inode
> file system super block, thus removing tons of boilerplate code.
> 
> The few remainig callers of alloc_anon_inode_sb all use alloc_file_pseudo
> later, but might also be ripe for some cleanup.

On a somewhat related note, could I get you to look at
drivers/video/fbdev/core/fb_defio.c?

As far as I can tell, there's no need for fb_deferred_io_aops to exist.
We could just set file->f_mapping->a_ops to NULL, and set_page_dirty()
would do the exact same thing this code does (except it would get the
return value correct).

But maybe that would make something else go wrong that distinguishes
between page->mapping being NULL and page->mapping->a_ops->foo being NULL?
Completely untested patch ...

diff --git a/drivers/video/fbdev/core/fb_defio.c 
b/drivers/video/fbdev/core/fb_defio.c
index a591d291b231..441ec31d3e4d 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -151,17 +151,6 @@ static const struct vm_operations_struct 
fb_deferred_io_vm_ops = {
.page_mkwrite   = fb_deferred_io_mkwrite,
 };
 
-static int fb_deferred_io_set_page_dirty(struct page *page)
-{
-   if (!PageDirty(page))
-   SetPageDirty(page);
-   return 0;
-}
-
-static const struct address_space_operations fb_deferred_io_aops = {
-   .set_page_dirty = fb_deferred_io_set_page_dirty,
-};
-
 int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
vma->vm_ops = _deferred_io_vm_ops;
@@ -212,14 +201,6 @@ void fb_deferred_io_init(struct fb_info *info)
 }
 EXPORT_SYMBOL_GPL(fb_deferred_io_init);
 
-void fb_deferred_io_open(struct fb_info *info,
-struct inode *inode,
-struct file *file)
-{
-   file->f_mapping->a_ops = _deferred_io_aops;
-}
-EXPORT_SYMBOL_GPL(fb_deferred_io_open);
-
 void fb_deferred_io_cleanup(struct fb_info *info)
 {
struct fb_deferred_io *fbdefio = info->fbdefio;
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 06f5805de2de..c4ba76359f22 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1415,10 +1415,7 @@ __releases(>lock)
if (res)
module_put(info->fbops->owner);
}
-#ifdef CONFIG_FB_DEFERRED_IO
-   if (info->fbdefio)
-   fb_deferred_io_open(info, inode, file);
-#endif
+   file->f_mapping->a_ops = NULL;
 out:
unlock_fb_info(info);
if (res)
diff --git a/include/linux/fb.h b/include/linux/fb.h
index ecfbcc0553a5..a8dccd23c249 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -659,9 +659,6 @@ static inline void __fb_pad_aligned_buffer(u8 *dst, u32 
d_pitch,
 /* drivers/video/fb_defio.c */
 int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma);
 extern void fb_deferred_io_init(struct fb_info *info);
-extern void fb_deferred_io_open(struct fb_info *info,
-   struct inode *inode,
-   struct file *file);
 extern void fb_deferred_io_cleanup(struct fb_info *info);
 extern int fb_deferred_io_fsync(struct file *file, loff_t start,
loff_t end, int datasync);


Re: PowerPC64 future proof kernel toc, revised for lld

2021-03-09 Thread Alan Modra
This patch future-proofs the kernel against linker changes that might
put the toc pointer at some location other than .got+0x8000, by
replacing __toc_start+0x8000 with .TOC. throughout.  If the kernel's
idea of the toc pointer doesn't agree with the linker, bad things
happen.

prom_init.c code relocating its toc is also changed so that a symbolic
__prom_init_toc_start toc-pointer relative address is calculated
rather than assuming that it is always at toc-pointer - 0x8000.  The
length calculations loading values from the toc are also avoided.
It's a little incestuous to do that with unreloc_toc picking up
adjusted values (which is fine in practice, they both adjust by the
same amount if all goes well).

I've also changed the way .got is aligned in vmlinux.lds and
zImage.lds, mostly so that dumping out section info by objdump or
readelf plainly shows the alignment is 256.  This linker script
feature was added 2005-09-27, available in FSF binutils releases from
2.17 onwards.  Should be safe to use in the kernel, I think.

Finally, put *(.got) before the prom_init.o entry which only needs
*(.toc), so that the GOT header goes in the correct place.  I don't
believe this makes any difference for the kernel as it would for
dynamic objects being loaded by ld.so.  That change is just to stop
lusers who blindly copy kernel scripts being led astray.  Of course,
this change needs the prom_init.c changes.

Some notes on .toc and .got.

.toc is a compiler generated section of addresses.  .got is a linker
generated section of addresses, generally built when the linker sees
R_*_*GOT* relocations.  In the case of powerpc64 ld.bfd, there are
multiple generated .got sections, one per input object file.  So you
can somewhat reasonably write in a linker script an input section
statement like *prom_init.o(.got .toc) to mean "the .got and .toc
section for files matching *prom_init.o".  On other architectures that
doesn't make sense, because the linker generally has just one .got
section.  Even on powerpc64, note well that the GOT entries for
prom_init.o may be merged with GOT entries from other objects.  That
means that if prom_init.o references, say, _end via some GOT
relocation, and some other object also references _end via a GOT
relocation, the GOT entry for _end may be in the range
__prom_init_toc_start to __prom_init_toc_end and if the kernel does
something special to GOT/TOC entries in that range then the value of
_end as seen by objects other than prom_init.o will be affected.  On
the other hand the GOT entry for _end may not be in the range
__prom_init_toc_start to __prom_init_toc_end.  Which way it turns out
is deterministic but a detail of linker operation that should not be
relied on.

A feature of ld.bfd is that input .toc (and .got) sections matching
one linker input section statement may be sorted, to put entries used
by small-model code first, near the toc base.  This is why scripts for
powerpc64 normally use *(.got .toc) rather than *(.got) *(.toc), since
the first form allows more freedom to sort.

Another feature of ld.bfd is that indirect addressing sequences using
the GOT/TOC may be edited by the linker to relative addressing.  In
many cases relative addressing would be emitted by gcc for
-mcmodel=medium if you appropriately decorate variable declarations
with non-default visibility.

Signed-off-by: Alan Modra 

diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index 1d83966f5ef6..e45907fe468f 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -28,7 +28,7 @@ p_etext:  .8byte  _etext
 p_bss_start:   .8byte  __bss_start
 p_end: .8byte  _end
 
-p_toc: .8byte  __toc_start + 0x8000 - p_base
+p_toc: .8byte  .TOC. - p_base
 p_dyn: .8byte  __dynamic_start - p_base
 p_rela:.8byte  __rela_dyn_start - p_base
 p_prom:.8byte  0
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index d6f072865627..d65cd55a6f38 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -36,12 +36,9 @@ SECTIONS
   }
 
 #ifdef CONFIG_PPC64_BOOT_WRAPPER
-  . = ALIGN(256);
-  .got :
+  .got : ALIGN(256)
   {
-__toc_start = .;
-*(.got)
-*(.toc)
+*(.got .toc)
   }
 #endif
 
diff --git a/arch/powerpc/include/asm/sections.h 
b/arch/powerpc/include/asm/sections.h
index 324d7b298ec3..e5a1eae11ed5 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -48,14 +48,18 @@ static inline int in_kernel_text(unsigned long addr)
 
 static inline unsigned long kernel_toc_addr(void)
 {
-   /* Defined by the linker, see vmlinux.lds.S */
-   extern unsigned long __toc_start;
-
-   /*
-* The TOC register (r2) points 32kB into the TOC, so that 64kB of
-* the TOC can be addressed using a single machine instruction.
-*/
-   return (unsigned long)(&__toc_start) + 0x8000UL;
+#if 0
+   /* This version is appropriate if 

Re: Errant readings on LM81 with T2080 SoC

2021-03-09 Thread Guenter Roeck
On 3/9/21 3:35 PM, Chris Packham wrote:
> 
> On 8/03/21 1:31 pm, Guenter Roeck wrote:
>> On 3/7/21 2:52 PM, Chris Packham wrote:
>>> Fundamentally I think this is a problem with the fact that the LM81 is
>>> an SMBus device but the T2080 (and other Freescale SoCs) uses i2c and we
>>> emulate SMBus. I suspect the errant readings are when we don't get round
>>> to completing the read within the timeout specified by the SMBus
>>> specification. Depending on when that happens we either fail the
>>> transfer or interpret the result as all-1s.
>> That is quite unlikely. Many sensor chips are SMBus chips connected to
>> i2c busses. It is much more likely that there is a bug in the T2080 i2c 
>> driver,
>> that the chip doesn't like the bulk read command issued through regmap, that
>> the chip has problems with the i2c bus speed, or that the i2c bus is noisy.
> I have noticed that with the switch to regmap we end up using plain i2c 
> instead of SMBUS. There appears to be no way of saying use SMBUS 
> semantics if the i2c adapter reports I2C_FUNC_I2C.
> 

The driver only really supports I2C; SMBUS functions are emulated.
I don't think that makes a real difference.

Guenter


Re: [PATCH v2] powerpc/kexec_file: Restore FDT size estimation for kdump kernel

2021-03-09 Thread Rob Herring
On Tue, Mar 9, 2021 at 7:31 PM Lakshmi Ramasubramanian
 wrote:
>
> On 3/9/21 6:08 PM, Rob Herring wrote:
>
> Hi Rob,
>
> > On Fri, Feb 19, 2021 at 6:52 PM Thiago Jung Bauermann
> >  wrote:
> >>
> >> Commit 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump
> >> kernel") fixed how elf64_load() estimates the FDT size needed by the
> >> crashdump kernel.
> >>
> >> At the same time, commit 130b2d59cec0 ("powerpc: Use common
> >> of_kexec_alloc_and_setup_fdt()") changed the same code to use the generic
> >> function of_kexec_alloc_and_setup_fdt() to calculate the FDT size. That
> >> change made the code overestimate it a bit by counting twice the space
> >> required for the kernel command line and /chosen properties.
> >>
> >> Therefore change kexec_fdt_totalsize_ppc64() to calculate just the extra
> >> space needed by the kdump kernel, and change the function name so that it
> >> better reflects what the function is now doing.
> >>
> >> Signed-off-by: Thiago Jung Bauermann 
> >> Reviewed-by: Lakshmi Ramasubramanian 
> >> ---
> >>   arch/powerpc/include/asm/kexec.h  |  2 +-
> >>   arch/powerpc/kexec/elf_64.c   |  2 +-
> >>   arch/powerpc/kexec/file_load_64.c | 26 --
> >>   3 files changed, 10 insertions(+), 20 deletions(-)
> >
> > I ended up delaying the referenced series til 5.13, but have applied
> > it now. Can I get an ack from the powerpc maintainers on this one?
> > I'll fixup the commit log to make sense given the commit id's aren't
> > valid.
>
> I checked the change applied in linux-next branch and also Device Tree's
> for-next branch - it looks like v1 of Thiago's patch has been applied.
> Could you please pick up the v2 patch?

Huh? This patch (v2) hasn't been applied to any tree AFAICT.

Rob


Re: [PATCH v4 3/6] ASoC: dt-bindings: fsl_rpmsg: Add binding doc for rpmsg cpu dai driver

2021-03-09 Thread Rob Herring
On Mon, Mar 08, 2021 at 09:22:27PM +0800, Shengjiu Wang wrote:
> fsl_rpmsg cpu dai driver is driver for rpmsg audio, which is mainly used

Bindings describe h/w blocks, not drivers.

> for getting the user's configuration from device tree and configure the
> clocks which is used by Cortex-M core. So in this document define the
> needed property.
> 
> Signed-off-by: Shengjiu Wang 
> ---
>  .../devicetree/bindings/sound/fsl,rpmsg.yaml  | 118 ++
>  1 file changed, 118 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
> 
> diff --git a/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml 
> b/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
> new file mode 100644
> index ..5731c1fbc0a6
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
> @@ -0,0 +1,118 @@
> +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/sound/fsl,rpmsg.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: NXP Audio RPMSG CPU DAI Controller
> +
> +maintainers:
> +  - Shengjiu Wang 
> +
> +description: |
> +  fsl_rpmsg cpu dai driver is virtual driver for rpmsg audio, which doesn't
> +  touch hardware. It is mainly used for getting the user's configuration
> +  from device tree and configure the clocks which is used by Cortex-M core.
> +  So in this document define the needed property.
> +
> +properties:
> +  compatible:
> +enum:
> +  - fsl,imx7ulp-rpmsg
> +  - fsl,imx8mn-rpmsg
> +  - fsl,imx8mm-rpmsg
> +  - fsl,imx8mp-rpmsg
> +
> +  model:
> +$ref: /schemas/types.yaml#/definitions/string
> +description: User specified audio sound card name
> +
> +  clocks:
> +items:
> +  - description: Peripheral clock for register access
> +  - description: Master clock
> +  - description: DMA clock for DMA register access
> +  - description: Parent clock for multiple of 8kHz sample rates
> +  - description: Parent clock for multiple of 11kHz sample rates
> +minItems: 5

If this doesn't touch hardware, what are these clocks for?

You don't need 'minItems' unless it's less than the number of 'items'.

> +
> +  clock-names:
> +items:
> +  - const: ipg
> +  - const: mclk
> +  - const: dma
> +  - const: pll8k
> +  - const: pll11k
> +minItems: 5
> +
> +  power-domains:
> +maxItems: 1
> +
> +  fsl,audioindex:
> +$ref: /schemas/types.yaml#/definitions/uint32
> +enum: [0, 1]
> +default: 0
> +description: Instance index for sound card in
> + M core side, which share one rpmsg
> + channel.

We don't do indexes in DT. What's this numbering tied to?

> +
> +  fsl,version:

version of what?

This seems odd at best.

> +$ref: /schemas/types.yaml#/definitions/uint32
> +enum: [1, 2]

You're going to update this with every new firmware version?

> +default: 2
> +description: The version of M core image, which is
> + to make driver compatible with different image.
> +
> +  fsl,buffer-size:
> +$ref: /schemas/types.yaml#/definitions/uint32
> +description: pre allocate dma buffer size

How can you have DMA, this doesn't touch h/w?

> +
> +  fsl,enable-lpa:
> +$ref: /schemas/types.yaml#/definitions/flag
> +description: enable low power audio path.
> +
> +  fsl,rpmsg-out:
> +$ref: /schemas/types.yaml#/definitions/flag
> +description: |
> +  This is a boolean property. If present, the transmitting function
> +  will be enabled.
> +
> +  fsl,rpmsg-in:
> +$ref: /schemas/types.yaml#/definitions/flag
> +description: |
> +  This is a boolean property. If present, the receiving function
> +  will be enabled.
> +
> +  fsl,codec-type:
> +$ref: /schemas/types.yaml#/definitions/uint32
> +enum: [0, 1, 2]
> +default: 0
> +description: Sometimes the codec is registered by
> + driver not by the device tree, this items
> + can be used to distinguish codecs.

How does one decide what value to use?

> +
> +  audio-codec:
> +$ref: /schemas/types.yaml#/definitions/phandle
> +description: The phandle of the audio codec

The codec is controlled from the Linux side?

> +
> +  memory-region:
> +$ref: /schemas/types.yaml#/definitions/phandle
> +description: phandle to the reserved memory nodes
> +
> +required:
> +  - compatible
> +  - fsl,audioindex
> +  - fsl,version
> +  - fsl,buffer-size
> +
> +additionalProperties: false
> +
> +examples:
> +  - |
> +rpmsg_audio: rpmsg_audio {
> +compatible = "fsl,imx8mn-rpmsg";
> +fsl,audioindex = <0> ;
> +fsl,version = <2>;
> +fsl,buffer-size = <0x600>;
> +fsl,enable-lpa;

How does this work? Don't you need somewhere to put the 'rpmsg' data?

> +};
> -- 
> 2.27.0
> 


Re: [PATCH v2] powerpc/kexec_file: Restore FDT size estimation for kdump kernel

2021-03-09 Thread Lakshmi Ramasubramanian

On 3/9/21 6:08 PM, Rob Herring wrote:

Hi Rob,


On Fri, Feb 19, 2021 at 6:52 PM Thiago Jung Bauermann
 wrote:


Commit 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump
kernel") fixed how elf64_load() estimates the FDT size needed by the
crashdump kernel.

At the same time, commit 130b2d59cec0 ("powerpc: Use common
of_kexec_alloc_and_setup_fdt()") changed the same code to use the generic
function of_kexec_alloc_and_setup_fdt() to calculate the FDT size. That
change made the code overestimate it a bit by counting twice the space
required for the kernel command line and /chosen properties.

Therefore change kexec_fdt_totalsize_ppc64() to calculate just the extra
space needed by the kdump kernel, and change the function name so that it
better reflects what the function is now doing.

Signed-off-by: Thiago Jung Bauermann 
Reviewed-by: Lakshmi Ramasubramanian 
---
  arch/powerpc/include/asm/kexec.h  |  2 +-
  arch/powerpc/kexec/elf_64.c   |  2 +-
  arch/powerpc/kexec/file_load_64.c | 26 --
  3 files changed, 10 insertions(+), 20 deletions(-)


I ended up delaying the referenced series til 5.13, but have applied
it now. Can I get an ack from the powerpc maintainers on this one?
I'll fixup the commit log to make sense given the commit id's aren't
valid.


I checked the change applied in linux-next branch and also Device Tree's 
for-next branch - it looks like v1 of Thiago's patch has been applied. 
Could you please pick up the v2 patch?


thanks,
 -lakshmi




Re: Errant readings on LM81 with T2080 SoC

2021-03-09 Thread Chris Packham
On 9/03/21 9:27 am, Chris Packham wrote:
> On 8/03/21 5:59 pm, Guenter Roeck wrote:
>> Other than that, the only other real idea I have would be to monitor
>> the i2c bus.
> I am in the fortunate position of being able to go into the office and 
> even happen to have the expensive scope at the moment. Now I just need 
> to find a tame HW engineer so I don't burn myself trying to attach the 
> probes.
One thing I see on the scope is that when there is a CPU load there 
appears to be some clock stretching going on (SCL is held low some 
times). I don't see it without the CPU load. It's hard to correlate a 
clock stretching event with a bad read or error but it is one area where 
the SMBUS spec has a maximum that might cause the device to give up waiting.


Re: [PATCH v2] powerpc/kexec_file: Restore FDT size estimation for kdump kernel

2021-03-09 Thread Rob Herring
On Fri, Feb 19, 2021 at 6:52 PM Thiago Jung Bauermann
 wrote:
>
> Commit 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump
> kernel") fixed how elf64_load() estimates the FDT size needed by the
> crashdump kernel.
>
> At the same time, commit 130b2d59cec0 ("powerpc: Use common
> of_kexec_alloc_and_setup_fdt()") changed the same code to use the generic
> function of_kexec_alloc_and_setup_fdt() to calculate the FDT size. That
> change made the code overestimate it a bit by counting twice the space
> required for the kernel command line and /chosen properties.
>
> Therefore change kexec_fdt_totalsize_ppc64() to calculate just the extra
> space needed by the kdump kernel, and change the function name so that it
> better reflects what the function is now doing.
>
> Signed-off-by: Thiago Jung Bauermann 
> Reviewed-by: Lakshmi Ramasubramanian 
> ---
>  arch/powerpc/include/asm/kexec.h  |  2 +-
>  arch/powerpc/kexec/elf_64.c   |  2 +-
>  arch/powerpc/kexec/file_load_64.c | 26 --
>  3 files changed, 10 insertions(+), 20 deletions(-)

I ended up delaying the referenced series til 5.13, but have applied
it now. Can I get an ack from the powerpc maintainers on this one?
I'll fixup the commit log to make sense given the commit id's aren't
valid.

Rob


Re: [PATCH v2 40/43] powerpc/64s: Make kuap_check_amr() and kuap_get_and_check_amr() generic

2021-03-09 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of March 9, 2021 10:10 pm:
> In preparation of porting powerpc32 to C syscall entry/exit,
> rename kuap_check_amr() and kuap_get_and_check_amr() as kuap_check()
> and kuap_get_and_check(), and move in the generic asm/kup.h the stub
> for when CONFIG_PPC_KUAP is not selected.

Looks pretty straightforward to me.

While you're renaming things, could kuap_check_amr() be changed to
kuap_assert_locked() or similar? Otherwise,

Reviewed-by: Nicholas Piggin 

> 
> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/include/asm/book3s/64/kup.h | 24 ++--
>  arch/powerpc/include/asm/kup.h   | 10 +-
>  arch/powerpc/kernel/interrupt.c  | 12 ++--
>  arch/powerpc/kernel/irq.c|  2 +-
>  4 files changed, 18 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/kup.h 
> b/arch/powerpc/include/asm/book3s/64/kup.h
> index 8bd905050896..d9b07e9998be 100644
> --- a/arch/powerpc/include/asm/book3s/64/kup.h
> +++ b/arch/powerpc/include/asm/book3s/64/kup.h
> @@ -287,7 +287,7 @@ static inline void kuap_kernel_restore(struct pt_regs 
> *regs,
>*/
>  }
>  
> -static inline unsigned long kuap_get_and_check_amr(void)
> +static inline unsigned long kuap_get_and_check(void)
>  {
>   if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
>   unsigned long amr = mfspr(SPRN_AMR);
> @@ -298,27 +298,7 @@ static inline unsigned long kuap_get_and_check_amr(void)
>   return 0;
>  }
>  
> -#else /* CONFIG_PPC_PKEY */
> -
> -static inline void kuap_user_restore(struct pt_regs *regs)
> -{
> -}
> -
> -static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long 
> amr)
> -{
> -}
> -
> -static inline unsigned long kuap_get_and_check_amr(void)
> -{
> - return 0;
> -}
> -
> -#endif /* CONFIG_PPC_PKEY */
> -
> -
> -#ifdef CONFIG_PPC_KUAP
> -
> -static inline void kuap_check_amr(void)
> +static inline void kuap_check(void)
>  {
>   if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && 
> mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
>   WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
> diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
> index 25671f711ec2..b7efa46b3109 100644
> --- a/arch/powerpc/include/asm/kup.h
> +++ b/arch/powerpc/include/asm/kup.h
> @@ -74,7 +74,15 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long 
> address, bool is_write)
>   return false;
>  }
>  
> -static inline void kuap_check_amr(void) { }
> +static inline void kuap_check(void) { }
> +static inline void kuap_save_and_lock(struct pt_regs *regs) { }
> +static inline void kuap_user_restore(struct pt_regs *regs) { }
> +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long 
> amr) { }
> +
> +static inline unsigned long kuap_get_and_check(void)
> +{
> + return 0;
> +}
>  
>  /*
>   * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 727b7848c9cc..40ed55064e54 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -76,7 +76,7 @@ notrace long system_call_exception(long r3, long r4, long 
> r5,
>   } else
>  #endif
>  #ifdef CONFIG_PPC64
> - kuap_check_amr();
> + kuap_check();
>  #endif
>  
>   booke_restore_dbcr0();
> @@ -254,7 +254,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long 
> r3,
>   CT_WARN_ON(ct_state() == CONTEXT_USER);
>  
>  #ifdef CONFIG_PPC64
> - kuap_check_amr();
> + kuap_check();
>  #endif
>  
>   regs->result = r3;
> @@ -380,7 +380,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct 
> pt_regs *regs, unsigned
>* AMR can only have been unlocked if we interrupted the kernel.
>*/
>  #ifdef CONFIG_PPC64
> - kuap_check_amr();
> + kuap_check();
>  #endif
>  
>   local_irq_save(flags);
> @@ -451,7 +451,7 @@ notrace unsigned long 
> interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
>   unsigned long flags;
>   unsigned long ret = 0;
>  #ifdef CONFIG_PPC64
> - unsigned long amr;
> + unsigned long kuap;
>  #endif
>  
>   if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
> @@ -467,7 +467,7 @@ notrace unsigned long 
> interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
>   CT_WARN_ON(ct_state() == CONTEXT_USER);
>  
>  #ifdef CONFIG_PPC64
> - amr = kuap_get_and_check_amr();
> + kuap = kuap_get_and_check();
>  #endif
>  
>   if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) {
> @@ -511,7 +511,7 @@ notrace unsigned long 
> interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
>* value from the check above.
>*/
>  #ifdef CONFIG_PPC64
> - kuap_kernel_restore(regs, amr);
> + kuap_kernel_restore(regs, kuap);
>  #endif
>  
>   return ret;
> diff --git a/arch/powerpc/kernel/irq.c 

Re: [PATCH v2 36/43] powerpc/32: Set current->thread.regs in C interrupt entry

2021-03-09 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of March 9, 2021 10:10 pm:
> No need to do that is assembly, do it in C.

Hmm. No issues with the patch as such, but why does ppc32 need this but 
not 64? AFAIKS 64 sets this when a thread is created.

Thanks,
Nick

> 
> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/include/asm/interrupt.h | 4 +++-
>  arch/powerpc/kernel/entry_32.S   | 3 +--
>  2 files changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/interrupt.h 
> b/arch/powerpc/include/asm/interrupt.h
> index 861e6eadc98c..e6d71c2e3aa2 100644
> --- a/arch/powerpc/include/asm/interrupt.h
> +++ b/arch/powerpc/include/asm/interrupt.h
> @@ -33,8 +33,10 @@ static inline void interrupt_enter_prepare(struct pt_regs 
> *regs, struct interrup
>   if (!arch_irq_disabled_regs(regs))
>   trace_hardirqs_off();
>  
> - if (user_mode(regs))
> + if (user_mode(regs)) {
> + current->thread.regs = regs;
>   account_cpu_user_entry();
> + }
>  #endif
>   /*
>* Book3E reconciles irq soft mask in asm
> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
> index 8fe1c3fdfa6e..815a4ff1ba76 100644
> --- a/arch/powerpc/kernel/entry_32.S
> +++ b/arch/powerpc/kernel/entry_32.S
> @@ -52,8 +52,7 @@
>  prepare_transfer_to_handler:
>   andi.   r0,r9,MSR_PR
>   addir12, r2, THREAD
> - beq 2f  /* if from user, fix up THREAD.regs */
> - stw r3,PT_REGS(r12)
> + beq 2f
>  #ifdef CONFIG_PPC_BOOK3S_32
>   kuep_lock r11, r12
>  #endif
> -- 
> 2.25.0
> 
> 


Re: [PATCH v2 28/43] powerpc/64e: Call bad_page_fault() from do_page_fault()

2021-03-09 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of March 9, 2021 10:09 pm:
> book3e/64 is the last one calling __bad_page_fault()
> from assembly.
> 
> Save non volatile registers before calling do_page_fault()
> and modify do_page_fault() to call __bad_page_fault()
> for all platforms.
> 
> Then it can be refactored by the call of bad_page_fault()
> which avoids the duplication of the exception table search.

This can go in with the 64e change after your series. I think it should
be ready for the next merge window as well.

Thanks,
Nick

> 
> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/kernel/exceptions-64e.S |  8 +---
>  arch/powerpc/mm/fault.c  | 17 -
>  2 files changed, 5 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/exceptions-64e.S 
> b/arch/powerpc/kernel/exceptions-64e.S
> index e8eb9992a270..b60f89078a3f 100644
> --- a/arch/powerpc/kernel/exceptions-64e.S
> +++ b/arch/powerpc/kernel/exceptions-64e.S
> @@ -1010,15 +1010,9 @@ storage_fault_common:
>   addir3,r1,STACK_FRAME_OVERHEAD
>   ld  r14,PACA_EXGEN+EX_R14(r13)
>   ld  r15,PACA_EXGEN+EX_R15(r13)
> + bl  save_nvgprs
>   bl  do_page_fault
> - cmpdi   r3,0
> - bne-1f
>   b   ret_from_except_lite
> -1:   bl  save_nvgprs
> - mr  r4,r3
> - addir3,r1,STACK_FRAME_OVERHEAD
> - bl  __bad_page_fault
> - b   ret_from_except
>  
>  /*
>   * Alignment exception doesn't fit entirely in the 0x100 bytes so it
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 2e54bac99a22..7bcff3fca110 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -541,24 +541,15 @@ NOKPROBE_SYMBOL(___do_page_fault);
>  
>  static long __do_page_fault(struct pt_regs *regs)
>  {
> - const struct exception_table_entry *entry;
>   long err;
>  
>   err = ___do_page_fault(regs, regs->dar, regs->dsisr);
>   if (likely(!err))
> - return err;
> -
> - entry = search_exception_tables(regs->nip);
> - if (likely(entry)) {
> - instruction_pointer_set(regs, extable_fixup(entry));
>   return 0;
> - } else if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
> - __bad_page_fault(regs, err);
> - return 0;
> - } else {
> - /* 32 and 64e handle the bad page fault in asm */
> - return err;
> - }
> +
> + bad_page_fault(regs, err);
> +
> + return 0;
>  }
>  NOKPROBE_SYMBOL(__do_page_fault);
>  
> -- 
> 2.25.0
> 
> 


Re: [PATCH v2 02/43] powerpc/traps: Declare unrecoverable_exception() as __noreturn

2021-03-09 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of March 9, 2021 10:09 pm:
> unrecoverable_exception() is never expected to return, most callers
> have an infiniteloop in case it returns.
> 
> Ensure it really never returns by terminating it with a BUG(), and
> declare it __no_return.
> 
> It always GCC to really simplify functions calling it. In the exemple
> below, it avoids the stack frame in the likely fast path and avoids
> code duplication for the exit.
> 
> With this patch:

[snip]

Nice.

> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
> index a44a30b0688c..d5c9d9ddd186 100644
> --- a/arch/powerpc/kernel/traps.c
> +++ b/arch/powerpc/kernel/traps.c
> @@ -2170,11 +2170,15 @@ 
> DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
>   * in the MSR is 0.  This indicates that SRR0/1 are live, and that
>   * we therefore lost state by taking this exception.
>   */
> -void unrecoverable_exception(struct pt_regs *regs)
> +void __noreturn unrecoverable_exception(struct pt_regs *regs)
>  {
>   pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
>regs->trap, regs->nip, regs->msr);
>   die("Unrecoverable exception", regs, SIGABRT);
> + /* die() should not return */
> + WARN(true, "die() unexpectedly returned");
> + for (;;)
> + ;
>  }

I don't think the WARN should be added because that will cause another
interrupt after something is already badly wrong, so this might just
make it harder to debug.

For example if die() is falling through for some reason, we warn and
cause a program check here, and that might also be unrecoverable so it
might come through here and fall through again and warn again, etc.

Putting the infinite loop is good enough I think (and better than there 
was previously).

Otherwise

Reviewed-by: Nicholas Piggin 

Thanks,
Nick


Re: [PATCH v2 01/43] powerpc/traps: unrecoverable_exception() is not an interrupt handler

2021-03-09 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of March 9, 2021 10:09 pm:
> unrecoverable_exception() is called from interrupt handlers or
> after an interrupt handler has failed.
> 
> Make it a standard function to avoid doubling the actions
> performed on interrupt entry (e.g.: user time accounting).
> 
> Fixes: 3a96570ffceb ("powerpc: convert interrupt handlers to use wrappers")
> Signed-off-by: Christophe Leroy 

Reviewed-by: Nicholas Piggin 

This should go in as a fix for this release I think.

> ---
>  arch/powerpc/include/asm/interrupt.h | 3 ++-
>  arch/powerpc/kernel/interrupt.c  | 1 -
>  arch/powerpc/kernel/traps.c  | 2 +-
>  3 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/interrupt.h 
> b/arch/powerpc/include/asm/interrupt.h
> index aedfba29e43a..e8d09a841373 100644
> --- a/arch/powerpc/include/asm/interrupt.h
> +++ b/arch/powerpc/include/asm/interrupt.h
> @@ -410,7 +410,6 @@ DECLARE_INTERRUPT_HANDLER(altivec_assist_exception);
>  DECLARE_INTERRUPT_HANDLER(CacheLockingException);
>  DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException);
>  DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException);
> -DECLARE_INTERRUPT_HANDLER(unrecoverable_exception);
>  DECLARE_INTERRUPT_HANDLER(WatchdogException);
>  DECLARE_INTERRUPT_HANDLER(kernel_bad_stack);
>  
> @@ -437,6 +436,8 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode);
>  
>  DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException);
>  
> +void unrecoverable_exception(struct pt_regs *regs);
> +
>  void replay_system_reset(void);
>  void replay_soft_interrupts(void);
>  
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 398cd86b6ada..b8e7d25be31b 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -436,7 +436,6 @@ notrace unsigned long interrupt_exit_user_prepare(struct 
> pt_regs *regs, unsigned
>   return ret;
>  }
>  
> -void unrecoverable_exception(struct pt_regs *regs);
>  void preempt_schedule_irq(void);
>  
>  notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, 
> unsigned long msr)
> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
> index 1583fd1c6010..a44a30b0688c 100644
> --- a/arch/powerpc/kernel/traps.c
> +++ b/arch/powerpc/kernel/traps.c
> @@ -2170,7 +2170,7 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
>   * in the MSR is 0.  This indicates that SRR0/1 are live, and that
>   * we therefore lost state by taking this exception.
>   */
> -DEFINE_INTERRUPT_HANDLER(unrecoverable_exception)
> +void unrecoverable_exception(struct pt_regs *regs)
>  {
>   pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
>regs->trap, regs->nip, regs->msr);
> -- 
> 2.25.0
> 
> 


Re: Errant readings on LM81 with T2080 SoC

2021-03-09 Thread Chris Packham

On 8/03/21 1:31 pm, Guenter Roeck wrote:
> On 3/7/21 2:52 PM, Chris Packham wrote:
>> Fundamentally I think this is a problem with the fact that the LM81 is
>> an SMBus device but the T2080 (and other Freescale SoCs) uses i2c and we
>> emulate SMBus. I suspect the errant readings are when we don't get round
>> to completing the read within the timeout specified by the SMBus
>> specification. Depending on when that happens we either fail the
>> transfer or interpret the result as all-1s.
> That is quite unlikely. Many sensor chips are SMBus chips connected to
> i2c busses. It is much more likely that there is a bug in the T2080 i2c 
> driver,
> that the chip doesn't like the bulk read command issued through regmap, that
> the chip has problems with the i2c bus speed, or that the i2c bus is noisy.
I have noticed that with the switch to regmap we end up using plain i2c 
instead of SMBUS. There appears to be no way of saying use SMBUS 
semantics if the i2c adapter reports I2C_FUNC_I2C.

Re: [PATCH v1] powerpc: Include running function as first entry in save_stack_trace() and friends

2021-03-09 Thread Segher Boessenkool
Hi!

On Tue, Mar 09, 2021 at 04:05:23PM +, Mark Rutland wrote:
> On Thu, Mar 04, 2021 at 03:54:48PM -0600, Segher Boessenkool wrote:
> > On Thu, Mar 04, 2021 at 02:57:30PM +, Mark Rutland wrote:
> > > It looks like GCC is happy to give us the function-entry-time FP if we use
> > > __builtin_frame_address(1),
> > 
> > From the GCC manual:
> >  Calling this function with a nonzero argument can have
> >  unpredictable effects, including crashing the calling program.  As
> >  a result, calls that are considered unsafe are diagnosed when the
> >  '-Wframe-address' option is in effect.  Such calls should only be
> >  made in debugging situations.
> > 
> > It *does* warn (the warning is in -Wall btw), on both powerpc and
> > aarch64.  Furthermore, using this builtin causes lousy code (it forces
> > the use of a frame pointer, which we normally try very hard to optimise
> > away, for good reason).
> > 
> > And, that warning is not an idle warning.  Non-zero arguments to
> > __builtin_frame_address can crash the program.  It won't on simpler
> > functions, but there is no real definition of what a simpler function
> > *is*.  It is meant for debugging, not for production use (this is also
> > why no one has bothered to make it faster).
> >
> > On Power it should work, but on pretty much any other arch it won't.
> 
> I understand this is true generally, and cannot be relied upon in
> portable code. However as you hint here for Power, I believe that on
> arm64 __builtin_frame_address(1) shouldn't crash the program due to the
> way frame records work on arm64, but I'll go check with some local
> compiler folk. I agree that __builtin_frame_address(2) and beyond
> certainly can, e.g.  by NULL dereference and similar.

I still do not know the aarch64 ABI well enough.  If only I had time!

> For context, why do you think this would work on power specifically? I
> wonder if our rationale is similar.

On most 64-bit Power ABIs all stack frames are connected together as a
linked list (which is updated atomically, importantly).  This makes it
possible to always find all previous stack frames.

> Are you aware of anything in particular that breaks using
> __builtin_frame_address(1) in non-portable code, or is this just a
> general sentiment of this not being a supported use-case?

It is not supported, and trying to do it anyway can crash: it can use
random stack contents as pointer!  Not really "random" of course, but
where it thinks to find a pointer into the previous frame, which is not
something it can rely on (unless the ABI guarantees it somehow).

See gcc.gnu.org/PR60109 for example.

> > > Unless we can get some strong guarantees from compiler folk such that we
> > > can guarantee a specific function acts boundary for unwinding (and
> > > doesn't itself get split, etc), the only reliable way I can think to
> > > solve this requires an assembly trampoline. Whatever we do is liable to
> > > need some invasive rework.
> > 
> > You cannot get such a guarantee, other than not letting the compiler
> > see into the routine at all, like with assembler code (not inline asm,
> > real assembler code).
> 
> If we cannot reliably ensure this then I'm happy to go write an assembly
> trampoline to snapshot the state at a function call boundary (where our
> procedure call standard mandates the state of the LR, FP, and frame
> records pointed to by the FP).

Is the frame pointer required?!

> This'll require reworking a reasonable
> amount of code cross-architecture, so I'll need to get some more
> concrete justification (e.g. examples of things that can go wrong in
> practice).

Say you have a function that does dynamic stack allocation, then there
is usually no way to find the previous stack frame (without function-
specific knowledge).  So __builtin_frame_address cannot work (it knows
nothing about frames further up).

Dynamic stack allocation (alloca, or variable length automatic arrays)
is just the most common and most convenient example; it is not the only
case you have problems here.

> > The real way forward is to bite the bullet and to no longer pretend you
> > can do a full backtrace from just the stack contents.  You cannot.
> 
> I think what you mean here is that there's no reliable way to handle the
> current/leaf function, right? If so I do agree.

No, I meant what I said.

There is the separate issue that you do not know where the return
address (etc.) is stored in a function that has not yet done a call
itself, sure.  You cannot assume anything the ABI does not tell you you
can depend on.

> Beyond that I believe that arm64's frame records should be sufficient.

Do you have a simple linked list connecting all frames?  The aarch64 GCC
port does not define anything special here (DYNAMIC_CHAIN_ADDRESS), so
the default will be used: every frame pointer has to point to the
previous one, no exceptions whatsoever.


Segher


Re: [PATCH 1/9] fs: rename alloc_anon_inode to alloc_anon_inode_sb

2021-03-09 Thread Gao Xiang
On Tue, Mar 09, 2021 at 04:53:40PM +0100, Christoph Hellwig wrote:
> Rename alloc_inode to free the name for a new variant that does not
> need boilerplate to create a super_block first.
> 
> Signed-off-by: Christoph Hellwig 
> ---

That is a nice idea as well to avoid sb by introducing an unique
fs...

Reviewed-by: Gao Xiang 

Thanks,
Gao Xiang



Re: [PATCH 2/9] fs: add an argument-less alloc_anon_inode

2021-03-09 Thread Gao Xiang
On Tue, Mar 09, 2021 at 04:53:41PM +0100, Christoph Hellwig wrote:
> Add a new alloc_anon_inode helper that allocates an inode on
> the anon_inode file system.
> 
> Signed-off-by: Christoph Hellwig 

Reviewed-by: Gao Xiang 

Thanks,
Gao Xiang



Re: [PATCH v2 1/8] powerpc/xive: Use cpu_to_node() instead of ibm,chip-id property

2021-03-09 Thread Daniel Henrique Barboza




On 3/9/21 12:33 PM, Cédric Le Goater wrote:

On 3/8/21 6:13 PM, Greg Kurz wrote:

On Wed, 3 Mar 2021 18:48:50 +0100
Cédric Le Goater  wrote:


The 'chip_id' field of the XIVE CPU structure is used to choose a
target for a source located on the same chip when possible. This field
is assigned on the PowerNV platform using the "ibm,chip-id" property
on pSeries under KVM when NUMA nodes are defined but it is undefined


This sentence seems to have a syntax problem... like it is missing an
'and' before 'on pSeries'.


ah yes, or simply a comma.


under PowerVM. The XIVE source structure has a similar field
'src_chip' which is only assigned on the PowerNV platform.

cpu_to_node() returns a compatible value on all platforms, 0 being the
default node. It will also give us the opportunity to set the affinity
of a source on pSeries when we can localize them.



IIUC this relies on the fact that the NUMA node id is == to chip id
on PowerNV, i.e. xc->chip_id which is passed to OPAL remain stable
with this change.


Linux sets the NUMA node in numa_setup_cpu(). On pseries, the hcall
H_HOME_NODE_ASSOCIATIVITY returns the node id if I am correct (Daniel
in Cc:)


That's correct. H_HOME_NODE_ASSOCIATIVITY returns not only the node_id, but
a list with the ibm,associativity domains of the CPU that "proc-no" (processor
identifier) is mapped to inside QEMU.

node_id in this case, considering that we're working with a reference-points
of size 4, is the 4th element of the returned list. The last element is
"procno" itself.




On PowerNV, Linux uses "ibm,associativity" property of the CPU to find
the node id. This value is built from the chip id in OPAL, so the
value returned by cpu_to_node(cpu) and the value of the "ibm,chip-id"
property are unlikely to be different.

cpu_to_node(cpu) is used in many places to allocate the structures
locally to the owning node. XIVE is not an exception (see below in the
same patch), it is better to be consistent and get the same information
(node id) using the same routine.


In Linux, "ibm,chip-id" is only used in low level PowerNV drivers :
LPC, XSCOM, RNG, VAS, NX. XIVE should be in that list also but skiboot
unifies the controllers of the system to only expose one the OS. This
is problematic and should be changed but it's another topic.



On the other hand, you have the pSeries case under PowerVM that
doesn't xc->chip_id, which isn't passed to any hcall AFAICT.


yes "ibm,chip-id" is an OPAL concept unfortunately and it has no meaning
under PAPR. xc->chip_id on pseries (PowerVM) will contains an invalid
chip id.

QEMU/KVM exposes "ibm,chip-id" but it's not used. (its value is not
always correct btw)



If you have a way to reliably reproduce this, let me know and I'll fix it
up in QEMU.



Thanks,


DHB





It looks like the chip id is only used for localization purpose in
this case, right ?


Yes and PAPR sources are not localized. So it's not used. MSI sources
could be if we rewrote the MSI driver.


In this case, what about doing this change for pSeries only,
somewhere in spapr.c ?


The IPI code is common to all platforms and all have the same issue.
I rather not.

Thanks,

C.
  

Signed-off-by: Cédric Le Goater 
---
  arch/powerpc/sysdev/xive/common.c | 7 +--
  1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/powerpc/sysdev/xive/common.c 
b/arch/powerpc/sysdev/xive/common.c
index 595310e056f4..b8e456da28aa 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1335,16 +1335,11 @@ static int xive_prepare_cpu(unsigned int cpu)
  
  	xc = per_cpu(xive_cpu, cpu);

if (!xc) {
-   struct device_node *np;
-
xc = kzalloc_node(sizeof(struct xive_cpu),
  GFP_KERNEL, cpu_to_node(cpu));
if (!xc)
return -ENOMEM;
-   np = of_get_cpu_node(cpu, NULL);
-   if (np)
-   xc->chip_id = of_get_ibm_chip_id(np);
-   of_node_put(np);
+   xc->chip_id = cpu_to_node(cpu);
xc->hw_ipi = XIVE_BAD_IRQ;
  
  		per_cpu(xive_cpu, cpu) = xc;






Re: [PATCH] ethernet: ucc_geth: Use kmemdup instead of kmalloc and memcpy

2021-03-09 Thread Rasmus Villemoes
On 05/03/2021 15.27, angkery wrote:
> From: Junlin Yang 
> 
> Fixes coccicheck warnings:
> ./drivers/net/ethernet/freescale/ucc_geth.c:3594:11-18:
> WARNING opportunity for kmemdup
> 
> Signed-off-by: Junlin Yang 
> ---
>  drivers/net/ethernet/freescale/ucc_geth.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/ucc_geth.c 
> b/drivers/net/ethernet/freescale/ucc_geth.c
> index ef4e2fe..2c079ad 100644
> --- a/drivers/net/ethernet/freescale/ucc_geth.c
> +++ b/drivers/net/ethernet/freescale/ucc_geth.c
> @@ -3591,10 +3591,9 @@ static int ucc_geth_probe(struct platform_device* 
> ofdev)
>   if ((ucc_num < 0) || (ucc_num > 7))
>   return -ENODEV;
>  
> - ug_info = kmalloc(sizeof(*ug_info), GFP_KERNEL);
> + ug_info = kmemdup(_primary_info, sizeof(*ug_info), GFP_KERNEL);
>   if (ug_info == NULL)
>   return -ENOMEM;
> - memcpy(ug_info, _primary_info, sizeof(*ug_info));
>  
>   ug_info->uf_info.ucc_num = ucc_num;
>  
> 

Ah, yes, of course, I should have used that.

Acked-by: Rasmus Villemoes 


Re: [PATCH v2 4/7] CMDLINE: powerpc: convert to generic builtin command line

2021-03-09 Thread Daniel Walker
On Tue, Mar 09, 2021 at 08:56:47AM +0100, Christophe Leroy wrote:
> 
> 
> Le 09/03/2021 à 01:02, Daniel Walker a écrit :
> > This updates the powerpc code to use the CONFIG_GENERIC_CMDLINE
> > option.
> > 
> > Cc: xe-linux-exter...@cisco.com
> > Signed-off-by: Ruslan Ruslichenko 
> > Signed-off-by: Ruslan Bilovol 
> > Signed-off-by: Daniel Walker 
> > ---
> >   arch/powerpc/Kconfig| 37 +
> >   arch/powerpc/kernel/prom.c  |  1 +
> >   arch/powerpc/kernel/prom_init.c | 35 ++-
> >   3 files changed, 23 insertions(+), 50 deletions(-)
> > 
> > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > index 107bb4319e0e..276b06d5c961 100644
> > --- a/arch/powerpc/Kconfig
> > +++ b/arch/powerpc/Kconfig
> > @@ -167,6 +167,7 @@ config PPC
> > select EDAC_SUPPORT
> > select GENERIC_ATOMIC64 if PPC32
> > select GENERIC_CLOCKEVENTS_BROADCASTif SMP
> > +   select GENERIC_CMDLINE
> > select GENERIC_CMOS_UPDATE
> > select GENERIC_CPU_AUTOPROBE
> > select GENERIC_CPU_VULNERABILITIES  if PPC_BARRIER_NOSPEC
> > @@ -906,42 +907,6 @@ config PPC_DENORMALISATION
> >   Add support for handling denormalisation of single precision
> >   values.  Useful for bare metal only.  If unsure say Y here.
> > -config CMDLINE
> > -   string "Initial kernel command string"
> > -   default ""
> > -   help
> > - On some platforms, there is currently no way for the boot loader to
> > - pass arguments to the kernel. For these platforms, you can supply
> > - some command-line options at build time by entering them here.  In
> > - most cases you will need to specify the root device here.
> > -
> > -choice
> > -   prompt "Kernel command line type" if CMDLINE != ""
> > -   default CMDLINE_FROM_BOOTLOADER
> > -
> > -config CMDLINE_FROM_BOOTLOADER
> > -   bool "Use bootloader kernel arguments if available"
> > -   help
> > - Uses the command-line options passed by the boot loader. If
> > - the boot loader doesn't provide any, the default kernel command
> > - string provided in CMDLINE will be used.
> > -
> > -config CMDLINE_EXTEND
> > -   bool "Extend bootloader kernel arguments"
> > -   help
> > - The command-line arguments provided by the boot loader will be
> > - appended to the default kernel command string.
> > -
> > -config CMDLINE_FORCE
> > -   bool "Always use the default kernel command string"
> > -   help
> > - Always use the default kernel command string, even if the boot
> > - loader passes other arguments to the kernel.
> > - This is useful if you cannot or don't want to change the
> > - command-line options your boot loader passes to the kernel.
> > -
> > -endchoice
> > -
> >   config EXTRA_TARGETS
> > string "Additional default image types"
> > help
> > diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
> > index ae3c41730367..96d0a01be1b4 100644
> > --- a/arch/powerpc/kernel/prom.c
> > +++ b/arch/powerpc/kernel/prom.c
> > @@ -27,6 +27,7 @@
> >   #include 
> >   #include 
> >   #include 
> > +#include 
> 
> Why is this needed in prom.c ?
 
Must have been a mistake, I don't think it's needed.


> >   #include 
> >   #include 
> >   #include 
> > diff --git a/arch/powerpc/kernel/prom_init.c 
> > b/arch/powerpc/kernel/prom_init.c
> > index e9d4eb6144e1..657241534d69 100644
> > --- a/arch/powerpc/kernel/prom_init.c
> > +++ b/arch/powerpc/kernel/prom_init.c
> > @@ -27,6 +27,7 @@
> >   #include 
> >   #include 
> >   #include 
> > +#include 
> >   #include 
> >   #include 
> >   #include 
> > @@ -242,15 +243,6 @@ static int __init prom_strcmp(const char *cs, const 
> > char *ct)
> > return 0;
> >   }
> > -static char __init *prom_strcpy(char *dest, const char *src)
> > -{
> > -   char *tmp = dest;
> > -
> > -   while ((*dest++ = *src++) != '\0')
> > -   /* nothing */;
> > -   return tmp;
> > -}
> > -
> 
> This game with prom_strcpy() should go a separate preceeding patch.
> 
> Also, it looks like checkpatch.pl recommends to use strscpy() instead of 
> strlcpy().

strscpy() is very large. I'm not sure it's compatible with this prom_init.c
environment.

> >   static int __init prom_strncmp(const char *cs, const char *ct, size_t 
> > count)
> >   {
> > unsigned char c1, c2;
> > @@ -276,6 +268,20 @@ static size_t __init prom_strlen(const char *s)
> > return sc - s;
> >   }
> > +static size_t __init prom_strlcpy(char *dest, const char *src, size_t size)
> > +{
> > +   size_t ret = prom_strlen(src);
> > +
> > +   if (size) {
> > +   size_t len = (ret >= size) ? size - 1 : ret;
> > +
> > +   memcpy(dest, src, len);
> > +   dest[len] = '\0';
> > +   }
> > +   return ret;
> > +}
> > +
> > +
> >   static int __init prom_memcmp(const void *cs, const void *ct, size_t 
> > count)
> >   {
> > const unsigned char *su1, *su2;
> > @@ -304,6 +310,7 @@ static char __init *prom_strstr(const char *s1, const 

Re: [PATCH v2 3/7] powerpc: convert config files to generic cmdline

2021-03-09 Thread Daniel Walker
On Tue, Mar 09, 2021 at 08:47:09AM +0100, Christophe Leroy wrote:
> 
> 
> Le 09/03/2021 à 01:02, Daniel Walker a écrit :
> > This is a scripted mass convert of the config files to use
> > the new generic cmdline. There is a bit of a trim effect here.
> > It would seems that some of the config haven't been trimmed in
> > a while.
> 
> If you do that in a separate patch, you loose bisectability.
> 
> I think it would have been better to do things in a different way, more or 
> less like I did in my series:
> 1/ Provide GENERIC cmdline at the same functionnality level as what is
> spread in the different architectures
> 2/ Convert architectures to the generic with least churn.
> 3/ Add new features to the generic

You have to have the churn eventually, no matter how you do it. The only way you
don't have churn is if you never upgrade the feature set.


> > 
> > The bash script used to convert is as follows,
> > 
> > if [[ -z "$1" || -z "$2" ]]; then
> >  echo "Two arguments are needed."
> >  exit 1
> > fi
> > mkdir $1
> > cp $2 $1/.config
> > sed -i 's/CONFIG_CMDLINE=/CONFIG_CMDLINE_BOOL=y\nCONFIG_CMDLINE_PREPEND=/g' 
> > $1/.config
> 
> This is not correct.
> 
> By default, on powerpc the provided command line is used only if the 
> bootloader doesn't provide one.
> 
> Otherwise:
> - the builtin command line is appended to the one provided by the bootloader
> if CONFIG_CMDLINE_EXTEND is selected
> - the builtin command line replaces to the one provided by the bootloader if
> CONFIG_CMDLINE_FORCE is selected

I think my changes maintain most of this due to the override of
CONFIG_CMDLINE_PREPEND. This is an upgrade and the inflexibility in powerpc is
an example of why these changes were created in the first place.

For example , say the default command line is "root=/dev/issblk0" from iss476
platform. And the bootloader adds "root=/dev/sda1"

The result is .

Then you have,

root=/dev/issblk0 root=/dev/sda1

and the bootloader has precedent over the default command line. So root= in the
above cases is defined by the bootloader.

The only issue would be if a person wants to override the default command line
with an unrelated bootloader command line. I don't know how many people do this,
but I doubt it's many. Can you think of any use cases like this?

I would imagine there are many more people who have to entirely duplicate the
default command line in the boot loader when they really just want to change a
single part of it like the root= device or console device or speed.

Daniel


Re: [PATCH v2 1/8] powerpc/xive: Use cpu_to_node() instead of ibm,chip-id property

2021-03-09 Thread Cédric Le Goater
On 3/9/21 6:08 PM, Daniel Henrique Barboza wrote:
> 
> 
> On 3/9/21 12:33 PM, Cédric Le Goater wrote:
>> On 3/8/21 6:13 PM, Greg Kurz wrote:
>>> On Wed, 3 Mar 2021 18:48:50 +0100
>>> Cédric Le Goater  wrote:
>>>
 The 'chip_id' field of the XIVE CPU structure is used to choose a
 target for a source located on the same chip when possible. This field
 is assigned on the PowerNV platform using the "ibm,chip-id" property
 on pSeries under KVM when NUMA nodes are defined but it is undefined
>>>
>>> This sentence seems to have a syntax problem... like it is missing an
>>> 'and' before 'on pSeries'.
>>
>> ah yes, or simply a comma.
>>
 under PowerVM. The XIVE source structure has a similar field
 'src_chip' which is only assigned on the PowerNV platform.

 cpu_to_node() returns a compatible value on all platforms, 0 being the
 default node. It will also give us the opportunity to set the affinity
 of a source on pSeries when we can localize them.

>>>
>>> IIUC this relies on the fact that the NUMA node id is == to chip id
>>> on PowerNV, i.e. xc->chip_id which is passed to OPAL remain stable
>>> with this change.
>>
>> Linux sets the NUMA node in numa_setup_cpu(). On pseries, the hcall
>> H_HOME_NODE_ASSOCIATIVITY returns the node id if I am correct (Daniel
>> in Cc:)
> 
> That's correct. H_HOME_NODE_ASSOCIATIVITY returns not only the node_id, but
> a list with the ibm,associativity domains of the CPU that "proc-no" (processor
> identifier) is mapped to inside QEMU.
> 
> node_id in this case, considering that we're working with a reference-points
> of size 4, is the 4th element of the returned list. The last element is
> "procno" itself.
> 
> 
>>
>> On PowerNV, Linux uses "ibm,associativity" property of the CPU to find
>> the node id. This value is built from the chip id in OPAL, so the
>> value returned by cpu_to_node(cpu) and the value of the "ibm,chip-id"
>> property are unlikely to be different.
>>
>> cpu_to_node(cpu) is used in many places to allocate the structures
>> locally to the owning node. XIVE is not an exception (see below in the
>> same patch), it is better to be consistent and get the same information
>> (node id) using the same routine.
>>
>>
>> In Linux, "ibm,chip-id" is only used in low level PowerNV drivers :
>> LPC, XSCOM, RNG, VAS, NX. XIVE should be in that list also but skiboot
>> unifies the controllers of the system to only expose one the OS. This
>> is problematic and should be changed but it's another topic.
>>
>>
>>> On the other hand, you have the pSeries case under PowerVM that
>>> doesn't xc->chip_id, which isn't passed to any hcall AFAICT.
>>
>> yes "ibm,chip-id" is an OPAL concept unfortunately and it has no meaning
>> under PAPR. xc->chip_id on pseries (PowerVM) will contains an invalid
>> chip id.
>>
>> QEMU/KVM exposes "ibm,chip-id" but it's not used. (its value is not
>> always correct btw)
> 
> 
> If you have a way to reliably reproduce this, let me know and I'll fix it
> up in QEMU.

with :

   -smp 4,cores=1,maxcpus=8 -object memory-backend-ram,id=ram-node0,size=2G 
-numa node,nodeid=0,cpus=0-1,cpus=4-5,memdev=ram-node0 -object 
memory-backend-ram,id=ram-node1,size=2G -numa 
node,nodeid=1,cpus=2-3,cpus=6-7,memdev=ram-node1

# dmesg | grep numa
[0.013106] numa: Node 0 CPUs: 0-1
[0.013136] numa: Node 1 CPUs: 2-3

# dtc -I fs /proc/device-tree/cpus/ -f | grep ibm,chip-id
ibm,chip-id = <0x01>;
ibm,chip-id = <0x02>;
ibm,chip-id = <0x00>;
ibm,chip-id = <0x03>;

with :

  -smp 4,cores=4,maxcpus=8,threads=1 -object 
memory-backend-ram,id=ram-node0,size=2G -numa 
node,nodeid=0,cpus=0-1,cpus=4-5,memdev=ram-node0 -object 
memory-backend-ram,id=ram-node1,size=2G -numa 
node,nodeid=1,cpus=2-3,cpus=6-7,memdev=ram-node1

# dmesg | grep numa
[0.013106] numa: Node 0 CPUs: 0-1
[0.013136] numa: Node 1 CPUs: 2-3

# dtc -I fs /proc/device-tree/cpus/ -f | grep ibm,chip-id
ibm,chip-id = <0x00>;
ibm,chip-id = <0x00>;
ibm,chip-id = <0x00>;
ibm,chip-id = <0x00>;

I think we should simply remove "ibm,chip-id" since it's not used and
not in the PAPR spec.

Thanks,

C.

 

> 
> Thanks,
> 
> 
> DHB
> 
> 
>>
>>> It looks like the chip id is only used for localization purpose in
>>> this case, right ?
>>
>> Yes and PAPR sources are not localized. So it's not used. MSI sources
>> could be if we rewrote the MSI driver.
>>
>>> In this case, what about doing this change for pSeries only,
>>> somewhere in spapr.c ?
>>
>> The IPI code is common to all platforms and all have the same issue.
>> I rather not.
>>
>> Thanks,
>>
>> C.
>>  
 Signed-off-by: Cédric Le Goater 
 ---
   arch/powerpc/sysdev/xive/common.c | 7 +--
   1 file changed, 1 insertion(+), 6 deletions(-)

 diff --git a/arch/powerpc/sysdev/xive/common.c 
 b/arch/powerpc/sysdev/xive/common.c
 index 595310e056f4..b8e456da28aa 

[PATCH] powerpc/xmon: Check cpu id in commands "c#", "dp#" and "dx#"

2021-03-09 Thread Greg Kurz
All these commands end up peeking into the PACA using the user originated
cpu id as an index. Check the cpu id is valid in order to prevent xmon to
crash. Instead of printing an error, this follows the same behavior as the
"lp s #" command : ignore the buggy cpu id parameter and fall back to the
#-less version of the command.

Signed-off-by: Greg Kurz 
---
 arch/powerpc/xmon/xmon.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 80fbf8968f77..d3d6e044228e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1248,7 +1248,7 @@ static int cpu_cmd(void)
unsigned long cpu, first_cpu, last_cpu;
int timeout;
 
-   if (!scanhex()) {
+   if (!scanhex() || cpu >= num_possible_cpus()) {
/* print cpus waiting or in xmon */
printf("cpus stopped:");
last_cpu = first_cpu = NR_CPUS;
@@ -2678,7 +2678,7 @@ static void dump_pacas(void)
 
termch = c; /* Put c back, it wasn't 'a' */
 
-   if (scanhex())
+   if (scanhex() && num < num_possible_cpus())
dump_one_paca(num);
else
dump_one_paca(xmon_owner);
@@ -2751,7 +2751,7 @@ static void dump_xives(void)
 
termch = c; /* Put c back, it wasn't 'a' */
 
-   if (scanhex())
+   if (scanhex() && num < num_possible_cpus())
dump_one_xive(num);
else
dump_one_xive(xmon_owner);




Re: [PATCH 3/3] powerpc/qspinlock: Use generic smp_cond_load_relaxed

2021-03-09 Thread Michal Suchánek
On Tue, Mar 09, 2021 at 07:46:11AM -0800, Davidlohr Bueso wrote:
> On Tue, 09 Mar 2021, Michal Such�nek wrote:
> 
> > On Mon, Mar 08, 2021 at 05:59:50PM -0800, Davidlohr Bueso wrote:
> > > 49a7d46a06c3 (powerpc: Implement smp_cond_load_relaxed()) added
> > > busy-waiting pausing with a preferred SMT priority pattern, lowering
> > > the priority (reducing decode cycles) during the whole loop slowpath.
> > > 
> > > However, data shows that while this pattern works well with simple
> >  ^^
> > > spinlocks, queued spinlocks benefit more being kept in medium priority,
> > > with a cpu_relax() instead, being a low+medium combo on powerpc.
> > ...
> > > 
> > > diff --git a/arch/powerpc/include/asm/barrier.h 
> > > b/arch/powerpc/include/asm/barrier.h
> > > index aecfde829d5d..7ae29cfb06c0 100644
> > > --- a/arch/powerpc/include/asm/barrier.h
> > > +++ b/arch/powerpc/include/asm/barrier.h
> > > @@ -80,22 +80,6 @@ do {   
> > > \
> > >   ___p1;  \
> > >  })
> > > 
> > > -#ifdef CONFIG_PPC64
> > Maybe it should be kept for the simple spinlock case then?
> 
> It is kept, note that simple spinlocks don't use smp_cond_load_relaxed,
> but instead deal with the priorities in arch_spin_lock(), so it will
> spin in low priority until it sees a chance to take the lock, where
> it switches back to medium.

Indeed, thanks for the clarification.

Michal


Re: [PATCH v2 8/8] powerpc/xive: Map one IPI interrupt per node

2021-03-09 Thread Cédric Le Goater
On 3/9/21 2:23 PM, Greg Kurz wrote:
> On Wed, 3 Mar 2021 18:48:57 +0100
> Cédric Le Goater  wrote:
> 
>> ipistorm [*] can be used to benchmark the raw interrupt rate of an
>> interrupt controller by measuring the number of IPIs a system can
>> sustain. When applied to the XIVE interrupt controller of POWER9 and
>> POWER10 systems, a significant drop of the interrupt rate can be
>> observed when crossing the second node boundary.
>>
>> This is due to the fact that a single IPI interrupt is used for all
>> CPUs of the system. The structure is shared and the cache line updates
>> impact greatly the traffic between nodes and the overall IPI
>> performance.
>>
>> As a workaround, the impact can be reduced by deactivating the IRQ
>> lockup detector ("noirqdebug") which does a lot of accounting in the
>> Linux IRQ descriptor structure and is responsible for most of the
>> performance penalty.
>>
>> As a fix, this proposal allocates an IPI interrupt per node, to be
>> shared by all CPUs of that node. It solves the scaling issue, the IRQ
>> lockup detector still has an impact but the XIVE interrupt rate scales
>> linearly. It also improves the "noirqdebug" case as showed in the
>> tables below.
>>
>>  * P9 DD2.2 - 2s * 64 threads
>>
>>"noirqdebug"
>> Mint/sMint/s
>>  chips  cpus  IPI/sys   IPI/chip   IPI/chipIPI/sys
>>  --
>>  1  0-15 4.984023   4.875405   4.996536   5.048892
>> 0-3110.879164  10.544040  10.757632  11.037859
>> 0-4715.345301  14.688764  14.926520  15.310053
>> 0-6317.064907  17.066812  17.613416  17.874511
>>  2  0-7911.768764  21.650749  22.689120  22.566508
>> 0-9510.616812  26.878789  28.434703  28.320324
>> 0-111   10.151693  31.397803  31.771773  32.388122
>> 0-1279.948502  33.139336  34.875716  35.224548
>>
>>  * P10 DD1 - 4s (not homogeneous) 352 threads
>>
>>"noirqdebug"
>> Mint/sMint/s
>>  chips  cpus  IPI/sys   IPI/chip   IPI/chipIPI/sys
>>  --
>>  1  0-15 2.409402   2.364108   2.383303   2.395091
>> 0-31 6.028325   6.046075   6.08   6.073750
>> 0-47 8.655178   8.644531   8.712830   8.724702
>> 0-6311.629652  11.735953  12.088203  12.055979
>> 0-7914.392321  14.729959  14.986701  14.973073
>> 0-9512.604158  13.004034  17.528748  17.568095
>>  2  0-1119.767753  13.719831  19.968606  20.024218
>> 0-1276.744566  16.418854  22.898066  22.995110
>> 0-1436.005699  19.174421  25.425622  25.417541
>> 0-1595.649719  21.938836  27.952662  28.059603
>> 0-1755.441410  24.109484  31.133915  31.127996
>>  3  0-1915.318341  24.405322  33.999221  33.775354
>> 0-2075.191382  26.449769  36.050161  35.867307
>> 0-2235.102790  29.356943  39.544135  39.508169
>> 0-2395.035295  31.933051  42.135075  42.071975
>> 0-2554.969209  34.477367  44.655395  44.757074
>>  4  0-2714.907652  35.887016  47.080545  47.318537
>> 0-2874.839581  38.076137  50.464307  50.636219
>> 0-3034.786031  40.881319  53.478684  53.310759
>> 0-3194.743750  43.448424  56.388102  55.973969
>> 0-3354.709936  45.623532  59.400930  58.926857
>> 0-3514.681413  45.646151  62.035804  61.830057
>>
>> [*] https://github.com/antonblanchard/ipistorm
>>
>> Signed-off-by: Cédric Le Goater 
>> ---
>>  arch/powerpc/sysdev/xive/xive-internal.h |  2 --
>>  arch/powerpc/sysdev/xive/common.c| 39 ++--
>>  2 files changed, 30 insertions(+), 11 deletions(-)
>>
>> diff --git a/arch/powerpc/sysdev/xive/xive-internal.h 
>> b/arch/powerpc/sysdev/xive/xive-internal.h
>> index 9cf57c722faa..b3a456fdd3a5 100644
>> --- a/arch/powerpc/sysdev/xive/xive-internal.h
>> +++ b/arch/powerpc/sysdev/xive/xive-internal.h
>> @@ -5,8 +5,6 @@
>>  #ifndef __XIVE_INTERNAL_H
>>  #define __XIVE_INTERNAL_H
>>  
>> -#define XIVE_IPI_HW_IRQ 0 /* interrupt source # for IPIs */
>> -
>>  /*
>>   * A "disabled" interrupt should never fire, to catch problems
>>   * we set its logical number to this
>> diff --git a/arch/powerpc/sysdev/xive/common.c 
>> b/arch/powerpc/sysdev/xive/common.c
>> index 8eefd152b947..c27f7bb0494b 100644
>> --- a/arch/powerpc/sysdev/xive/common.c
>> +++ b/arch/powerpc/sysdev/xive/common.c
>> @@ -65,8 +65,16 @@ static struct irq_domain *xive_irq_domain;
>>  #ifdef CONFIG_SMP
>>  static struct irq_domain *xive_ipi_irq_domain;
>>  
>> -/* The IPIs all use the same 

Re: make alloc_anon_inode more useful

2021-03-09 Thread Jason Gunthorpe
On Tue, Mar 09, 2021 at 04:53:39PM +0100, Christoph Hellwig wrote:
> Hi all,
> 
> this series first renames the existing alloc_anon_inode to
> alloc_anon_inode_sb to clearly mark it as requiring a superblock.
> 
> It then adds a new alloc_anon_inode that works on the anon_inode
> file system super block, thus removing tons of boilerplate code.
> 
> The few remainig callers of alloc_anon_inode_sb all use alloc_file_pseudo
> later, but might also be ripe for some cleanup.

I like it

For a submission plan can we have this on a git branch please? I will
need a copy for RDMA and Alex will need one for vfio..

Thanks,
Jason


Re: [PATCH 3/9] powerpc/pseries: remove the ppc-cmm file system

2021-03-09 Thread Jason Gunthorpe
On Tue, Mar 09, 2021 at 04:53:42PM +0100, Christoph Hellwig wrote:
> Just use the generic anon_inode file system.
> 
> Signed-off-by: Christoph Hellwig 
>  arch/powerpc/platforms/pseries/cmm.c | 27 ++-
>  1 file changed, 2 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/cmm.c 
> b/arch/powerpc/platforms/pseries/cmm.c
> index 6d36b858b14df1..9d07e6bea7126c 100644
> +++ b/arch/powerpc/platforms/pseries/cmm.c
> @@ -6,6 +6,7 @@
>   * Author(s): Brian King (brk...@linux.vnet.ibm.com),
>   */
>  
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -502,19 +503,6 @@ static struct notifier_block cmm_mem_nb = {
>  };
>  
>  #ifdef CONFIG_BALLOON_COMPACTION
> -static struct vfsmount *balloon_mnt;
> -
> -static int cmm_init_fs_context(struct fs_context *fc)
> -{
> - return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;

Should we clean these unusued magic constants too?

include/uapi/linux/magic.h:#define PPC_CMM_MAGIC0xc7571590

Jason


Re: [PATCH 5/9] vmw_balloon: remove the balloon-vmware file system

2021-03-09 Thread David Hildenbrand

On 09.03.21 16:53, Christoph Hellwig wrote:

Just use the generic anon_inode file system.

Signed-off-by: Christoph Hellwig 
---
  drivers/misc/vmw_balloon.c | 24 ++--
  1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 5d057a05ddbee8..be4be32f858253 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -16,6 +16,7 @@
  //#define DEBUG
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
+#include 

  #include 
  #include 
  #include 
@@ -1735,20 +1736,6 @@ static inline void vmballoon_debugfs_exit(struct 
vmballoon *b)
  
  
  #ifdef CONFIG_BALLOON_COMPACTION

-
-static int vmballoon_init_fs_context(struct fs_context *fc)
-{
-   return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type vmballoon_fs = {
-   .name   = "balloon-vmware",
-   .init_fs_context= vmballoon_init_fs_context,
-   .kill_sb= kill_anon_super,
-};
-
-static struct vfsmount *vmballoon_mnt;
-
  /**
   * vmballoon_migratepage() - migrates a balloon page.
   * @b_dev_info: balloon device information descriptor.
@@ -1878,8 +1865,6 @@ static void vmballoon_compaction_deinit(struct vmballoon 
*b)
iput(b->b_dev_info.inode);
  
  	b->b_dev_info.inode = NULL;

-   kern_unmount(vmballoon_mnt);
-   vmballoon_mnt = NULL;
  }
  
  /**

@@ -1895,13 +1880,8 @@ static void vmballoon_compaction_deinit(struct vmballoon 
*b)
   */
  static __init int vmballoon_compaction_init(struct vmballoon *b)
  {
-   vmballoon_mnt = kern_mount(_fs);
-   if (IS_ERR(vmballoon_mnt))
-   return PTR_ERR(vmballoon_mnt);
-
b->b_dev_info.migratepage = vmballoon_migratepage;
-   b->b_dev_info.inode = alloc_anon_inode_sb(vmballoon_mnt->mnt_sb);
-
+   b->b_dev_info.inode = alloc_anon_inode();
if (IS_ERR(b->b_dev_info.inode))
return PTR_ERR(b->b_dev_info.inode);
  



Same comment regarding BALLOON_VMW_MAGIC and includes (mount.h, 
pseudo_fs.h).


Apart from that looks good.

--
Thanks,

David / dhildenb



Re: [PATCH 6/9] virtio_balloon: remove the balloon-kvm file system

2021-03-09 Thread David Hildenbrand

On 09.03.21 16:53, Christoph Hellwig wrote:

Just use the generic anon_inode file system.

Signed-off-by: Christoph Hellwig 
---
  drivers/virtio/virtio_balloon.c | 30 +++---
  1 file changed, 3 insertions(+), 27 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index cae76ee5bdd688..1efb890cd3ff09 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -6,6 +6,7 @@
   *  Copyright 2008 Rusty Russell IBM Corporation
   */
  
+#include 

  #include 
  #include 
  #include 
@@ -42,10 +43,6 @@
(1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT))
  #define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER)
  
-#ifdef CONFIG_BALLOON_COMPACTION

-static struct vfsmount *balloon_mnt;
-#endif
-
  enum virtio_balloon_vq {
VIRTIO_BALLOON_VQ_INFLATE,
VIRTIO_BALLOON_VQ_DEFLATE,
@@ -805,18 +802,6 @@ static int virtballoon_migratepage(struct balloon_dev_info 
*vb_dev_info,
  
  	return MIGRATEPAGE_SUCCESS;

  }
-
-static int balloon_init_fs_context(struct fs_context *fc)
-{
-   return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type balloon_fs = {
-   .name   = "balloon-kvm",
-   .init_fs_context = balloon_init_fs_context,
-   .kill_sb= kill_anon_super,
-};
-
  #endif /* CONFIG_BALLOON_COMPACTION */
  
  static unsigned long shrink_free_pages(struct virtio_balloon *vb,

@@ -909,17 +894,11 @@ static int virtballoon_probe(struct virtio_device *vdev)
goto out_free_vb;
  
  #ifdef CONFIG_BALLOON_COMPACTION

-   balloon_mnt = kern_mount(_fs);
-   if (IS_ERR(balloon_mnt)) {
-   err = PTR_ERR(balloon_mnt);
-   goto out_del_vqs;
-   }
-
vb->vb_dev_info.migratepage = virtballoon_migratepage;
-   vb->vb_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
+   vb->vb_dev_info.inode = alloc_anon_inode();
if (IS_ERR(vb->vb_dev_info.inode)) {
err = PTR_ERR(vb->vb_dev_info.inode);
-   goto out_kern_unmount;
+   goto out_del_vqs;
}
vb->vb_dev_info.inode->i_mapping->a_ops = _aops;
  #endif
@@ -1016,8 +995,6 @@ static int virtballoon_probe(struct virtio_device *vdev)
  out_iput:
  #ifdef CONFIG_BALLOON_COMPACTION
iput(vb->vb_dev_info.inode);
-out_kern_unmount:
-   kern_unmount(balloon_mnt);
  out_del_vqs:
  #endif
vdev->config->del_vqs(vdev);
@@ -1070,7 +1047,6 @@ static void virtballoon_remove(struct virtio_device *vdev)
if (vb->vb_dev_info.inode)
iput(vb->vb_dev_info.inode);
  
-	kern_unmount(balloon_mnt);

  #endif
kfree(vb);
  }



... you might know what I am going to say :)

Apart from that LGTM.

--
Thanks,

David / dhildenb



Re: [PATCH 2/9] fs: add an argument-less alloc_anon_inode

2021-03-09 Thread David Hildenbrand

On 09.03.21 16:53, Christoph Hellwig wrote:

Add a new alloc_anon_inode helper that allocates an inode on
the anon_inode file system.

Signed-off-by: Christoph Hellwig 
---
  fs/anon_inodes.c| 15 +--
  include/linux/anon_inodes.h |  1 +
  2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 4745fc37014332..b6a8ea71920bc3 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -63,7 +63,7 @@ static struct inode *anon_inode_make_secure_inode(
const struct qstr qname = QSTR_INIT(name, strlen(name));
int error;
  
-	inode = alloc_anon_inode_sb(anon_inode_mnt->mnt_sb);

+   inode = alloc_anon_inode();
if (IS_ERR(inode))
return inode;
inode->i_flags &= ~S_PRIVATE;
@@ -225,13 +225,24 @@ int anon_inode_getfd_secure(const char *name, const 
struct file_operations *fops
  }
  EXPORT_SYMBOL_GPL(anon_inode_getfd_secure);
  
+/**

+ * alloc_anon_inode - create a new anonymous inode
+ *
+ * Create an inode on the anon_inode file system and return it.
+ */
+struct inode *alloc_anon_inode(void)
+{
+   return alloc_anon_inode_sb(anon_inode_mnt->mnt_sb);
+}
+EXPORT_SYMBOL_GPL(alloc_anon_inode);
+
  static int __init anon_inode_init(void)
  {
anon_inode_mnt = kern_mount(_inode_fs_type);
if (IS_ERR(anon_inode_mnt))
panic("anon_inode_init() kernel mount failed (%ld)\n", 
PTR_ERR(anon_inode_mnt));
  
-	anon_inode_inode = alloc_anon_inode_sb(anon_inode_mnt->mnt_sb);

+   anon_inode_inode = alloc_anon_inode();
if (IS_ERR(anon_inode_inode))
panic("anon_inode_init() inode allocation failed (%ld)\n", 
PTR_ERR(anon_inode_inode));
  
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h

index 71881a2b6f7860..b5ae9a6eda9923 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -21,6 +21,7 @@ int anon_inode_getfd_secure(const char *name,
const struct file_operations *fops,
void *priv, int flags,
const struct inode *context_inode);
+struct inode *alloc_anon_inode(void);
  
  #endif /* _LINUX_ANON_INODES_H */
  



Reviewed-by: David Hildenbrand 

--
Thanks,

David / dhildenb



Re: [PATCH 1/9] fs: rename alloc_anon_inode to alloc_anon_inode_sb

2021-03-09 Thread David Hildenbrand

On 09.03.21 16:53, Christoph Hellwig wrote:

Rename alloc_inode to free the name for a new variant that does not
need boilerplate to create a super_block first.

Signed-off-by: Christoph Hellwig 
---
  arch/powerpc/platforms/pseries/cmm.c | 2 +-
  drivers/dma-buf/dma-buf.c| 2 +-
  drivers/gpu/drm/drm_drv.c| 2 +-
  drivers/misc/cxl/api.c   | 2 +-
  drivers/misc/vmw_balloon.c   | 2 +-
  drivers/scsi/cxlflash/ocxl_hw.c  | 2 +-
  drivers/virtio/virtio_balloon.c  | 2 +-
  fs/aio.c | 2 +-
  fs/anon_inodes.c | 4 ++--
  fs/libfs.c   | 2 +-
  include/linux/fs.h   | 2 +-
  kernel/resource.c| 2 +-
  mm/z3fold.c  | 2 +-
  mm/zsmalloc.c| 2 +-
  14 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/cmm.c 
b/arch/powerpc/platforms/pseries/cmm.c
index 45a3a3022a85c9..6d36b858b14df1 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -580,7 +580,7 @@ static int cmm_balloon_compaction_init(void)
return rc;
}
  
-	b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);

+   b_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
if (IS_ERR(b_dev_info.inode)) {
rc = PTR_ERR(b_dev_info.inode);
b_dev_info.inode = NULL;
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index f264b70c383eb4..dedcc9483352dc 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -445,7 +445,7 @@ static inline int is_dma_buf_file(struct file *file)
  static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
  {
struct file *file;
-   struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb);
+   struct inode *inode = alloc_anon_inode_sb(dma_buf_mnt->mnt_sb);
  
  	if (IS_ERR(inode))

return ERR_CAST(inode);
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 20d22e41d7ce74..87e7214a8e3565 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -519,7 +519,7 @@ static struct inode *drm_fs_inode_new(void)
return ERR_PTR(r);
}
  
-	inode = alloc_anon_inode(drm_fs_mnt->mnt_sb);

+   inode = alloc_anon_inode_sb(drm_fs_mnt->mnt_sb);
if (IS_ERR(inode))
simple_release_fs(_fs_mnt, _fs_cnt);
  
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c

index b493de962153ba..2efbf6c98028ef 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -73,7 +73,7 @@ static struct file *cxl_getfile(const char *name,
goto err_module;
}
  
-	inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb);

+   inode = alloc_anon_inode_sb(cxl_vfs_mount->mnt_sb);
if (IS_ERR(inode)) {
file = ERR_CAST(inode);
goto err_fs;
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index b837e7eba5f7dc..5d057a05ddbee8 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -1900,7 +1900,7 @@ static __init int vmballoon_compaction_init(struct 
vmballoon *b)
return PTR_ERR(vmballoon_mnt);
  
  	b->b_dev_info.migratepage = vmballoon_migratepage;

-   b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb);
+   b->b_dev_info.inode = alloc_anon_inode_sb(vmballoon_mnt->mnt_sb);
  
  	if (IS_ERR(b->b_dev_info.inode))

return PTR_ERR(b->b_dev_info.inode);
diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c
index 244fc27215dc79..40184ed926b557 100644
--- a/drivers/scsi/cxlflash/ocxl_hw.c
+++ b/drivers/scsi/cxlflash/ocxl_hw.c
@@ -88,7 +88,7 @@ static struct file *ocxlflash_getfile(struct device *dev, 
const char *name,
goto err2;
}
  
-	inode = alloc_anon_inode(ocxlflash_vfs_mount->mnt_sb);

+   inode = alloc_anon_inode_sb(ocxlflash_vfs_mount->mnt_sb);
if (IS_ERR(inode)) {
rc = PTR_ERR(inode);
dev_err(dev, "%s: alloc_anon_inode failed rc=%d\n",
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 8985fc2cea8615..cae76ee5bdd688 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -916,7 +916,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
}
  
  	vb->vb_dev_info.migratepage = virtballoon_migratepage;

-   vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
+   vb->vb_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
if (IS_ERR(vb->vb_dev_info.inode)) {
err = PTR_ERR(vb->vb_dev_info.inode);
goto out_kern_unmount;
diff --git a/fs/aio.c b/fs/aio.c
index 1f32da13d39ee6..d1c2aa7fd6de7c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -234,7 +234,7 @@ static const struct 

Re: [PATCH 3/9] powerpc/pseries: remove the ppc-cmm file system

2021-03-09 Thread David Hildenbrand

On 09.03.21 16:53, Christoph Hellwig wrote:

Just use the generic anon_inode file system.

Signed-off-by: Christoph Hellwig 
---
  arch/powerpc/platforms/pseries/cmm.c | 27 ++-
  1 file changed, 2 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/cmm.c 
b/arch/powerpc/platforms/pseries/cmm.c
index 6d36b858b14df1..9d07e6bea7126c 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -6,6 +6,7 @@
   * Author(s): Brian King (brk...@linux.vnet.ibm.com),
   */
  
+#include 

  #include 
  #include 
  #include 
@@ -502,19 +503,6 @@ static struct notifier_block cmm_mem_nb = {
  };
  
  #ifdef CONFIG_BALLOON_COMPACTION

-static struct vfsmount *balloon_mnt;
-
-static int cmm_init_fs_context(struct fs_context *fc)
-{
-   return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type balloon_fs = {
-   .name = "ppc-cmm",
-   .init_fs_context = cmm_init_fs_context,
-   .kill_sb = kill_anon_super,
-};
-
  static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
   struct page *newpage, struct page *page,
   enum migrate_mode mode)
@@ -573,19 +561,10 @@ static int cmm_balloon_compaction_init(void)
balloon_devinfo_init(_dev_info);
b_dev_info.migratepage = cmm_migratepage;
  
-	balloon_mnt = kern_mount(_fs);

-   if (IS_ERR(balloon_mnt)) {
-   rc = PTR_ERR(balloon_mnt);
-   balloon_mnt = NULL;
-   return rc;
-   }
-
-   b_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
+   b_dev_info.inode = alloc_anon_inode();
if (IS_ERR(b_dev_info.inode)) {
rc = PTR_ERR(b_dev_info.inode);
b_dev_info.inode = NULL;
-   kern_unmount(balloon_mnt);
-   balloon_mnt = NULL;
return rc;
}
  
@@ -597,8 +576,6 @@ static void cmm_balloon_compaction_deinit(void)

if (b_dev_info.inode)
iput(b_dev_info.inode);
b_dev_info.inode = NULL;
-   kern_unmount(balloon_mnt);
-   balloon_mnt = NULL;
  }
  #else /* CONFIG_BALLOON_COMPACTION */
  static int cmm_balloon_compaction_init(void)



I always wondered why that was necessary after all (with my limited fs 
knowledge :) ).


a) I assume you want to remove PPC_CMM_MAGIC from 
include/uapi/linux/magic.h as well?


b) Do we still need #include , #include  
and #include ?


Apart from that looks much cleaner.

--
Thanks,

David / dhildenb



Re: [PATCH v1] powerpc: Include running function as first entry in save_stack_trace() and friends

2021-03-09 Thread Mark Rutland
On Thu, Mar 04, 2021 at 03:54:48PM -0600, Segher Boessenkool wrote:
> Hi!

Hi Segher,

> On Thu, Mar 04, 2021 at 02:57:30PM +, Mark Rutland wrote:
> > It looks like GCC is happy to give us the function-entry-time FP if we use
> > __builtin_frame_address(1),
> 
> From the GCC manual:
>  Calling this function with a nonzero argument can have
>  unpredictable effects, including crashing the calling program.  As
>  a result, calls that are considered unsafe are diagnosed when the
>  '-Wframe-address' option is in effect.  Such calls should only be
>  made in debugging situations.
> 
> It *does* warn (the warning is in -Wall btw), on both powerpc and
> aarch64.  Furthermore, using this builtin causes lousy code (it forces
> the use of a frame pointer, which we normally try very hard to optimise
> away, for good reason).
> 
> And, that warning is not an idle warning.  Non-zero arguments to
> __builtin_frame_address can crash the program.  It won't on simpler
> functions, but there is no real definition of what a simpler function
> *is*.  It is meant for debugging, not for production use (this is also
> why no one has bothered to make it faster).
>
> On Power it should work, but on pretty much any other arch it won't.

I understand this is true generally, and cannot be relied upon in
portable code. However as you hint here for Power, I believe that on
arm64 __builtin_frame_address(1) shouldn't crash the program due to the
way frame records work on arm64, but I'll go check with some local
compiler folk. I agree that __builtin_frame_address(2) and beyond
certainly can, e.g.  by NULL dereference and similar.

For context, why do you think this would work on power specifically? I
wonder if our rationale is similar.

Are you aware of anything in particular that breaks using
__builtin_frame_address(1) in non-portable code, or is this just a
general sentiment of this not being a supported use-case?

> > Unless we can get some strong guarantees from compiler folk such that we
> > can guarantee a specific function acts boundary for unwinding (and
> > doesn't itself get split, etc), the only reliable way I can think to
> > solve this requires an assembly trampoline. Whatever we do is liable to
> > need some invasive rework.
> 
> You cannot get such a guarantee, other than not letting the compiler
> see into the routine at all, like with assembler code (not inline asm,
> real assembler code).

If we cannot reliably ensure this then I'm happy to go write an assembly
trampoline to snapshot the state at a function call boundary (where our
procedure call standard mandates the state of the LR, FP, and frame
records pointed to by the FP). This'll require reworking a reasonable
amount of code cross-architecture, so I'll need to get some more
concrete justification (e.g. examples of things that can go wrong in
practice).

> The real way forward is to bite the bullet and to no longer pretend you
> can do a full backtrace from just the stack contents.  You cannot.

I think what you mean here is that there's no reliable way to handle the
current/leaf function, right? If so I do agree.

Beyond that I believe that arm64's frame records should be sufficient.

Thanks,
Mark.


Re: [PATCH v3] powerpc/32: remove bogus ppc_select syscall

2021-03-09 Thread Christophe Leroy




Le 05/03/2021 à 13:03, Arnd Bergmann a écrit :

On Fri, Mar 5, 2021 at 11:15 AM Christophe Leroy
 wrote:

Le 05/03/2021 à 11:06, Arnd Bergmann a écrit :

On Fri, Mar 5, 2021 at 9:40 AM Christophe Leroy  
wrote:
- glibc support for ppc32 gets merged during the linux-2.5 days, supporting
only #142 with the new behavior.


It turns out to be older than I said. This was actually in glibc-1.94
from 1997, so during
the linux-2.1 days, not 2.5!


Whaou, nice archeology, thanks. Do you mind if I copy the history you 
established ?


That's fine, please copy it.


In your commit, you said 2.3.48. Here in the history you say 2.1.48. Which one 
is correct ?


2.1.48 is correct.


Regardless of whethere binaries are broken or not for other reason, is that 
worth expecting an
almost 25 yr old binary to run on future kernels ? If one is able to put the 
necessary effort to
port you hardware to the latest kernel, can't he really port the binary as well 
?


I think the questions of supporting old hardware with new software and
supporting old
binaries on modern kernels are largely orthogonal. The policy we have
is that we don't
break existing user setups, and it really seems unlikely that anyone
still uses pre-1997
executables for anything that requires a modern kernel!

I now checked the oldest mklinux I could find (DR2.1 from 1997), and
even has the
modern glibc and linux-2.0.28 kernel patched to provide the modern semantics at
syscall #142 for glibc, with the same (already unused) compatibility hack at #82
that we still have for ppc32 today. This made mklinux DR2.1 binaries
incompatible
with mainline linux-2.0 kernels, but they might still work with modern kernels,
regardless of whether we remove support for binaries that worked with mainline
linux-2.0.



I had another look. In fact x86, arm and m68k still have the #82 syscall, but they don't have the 
hack we have on powerpc to "guess" that something is calling the old select with the arguments of 
the new select.


As part of my series of user accesses cleanup, I'll replace the open coded stuff by a call to 
sys_old_select(), see below.


Maybe at the end we should keep the #82 syscall, but do we need to keep the powerpc hack really ? 
Maybe the best is to drop ppc_select() function but mention sys_old_select() instead of ni_syscall 
for entry #82 in the syscall table ?


Christophe
---
diff --git a/arch/powerpc/include/asm/unistd.h 
b/arch/powerpc/include/asm/unistd.h
index 700fcdac2e3c..b541c690a31c 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -40,6 +40,7 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #ifdef CONFIG_PPC32
 #define __ARCH_WANT_OLD_STAT
+#define __ARCH_WANT_SYS_OLD_SELECT
 #endif
 #ifdef CONFIG_PPC64
 #define __ARCH_WANT_SYS_TIME
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 078608ec2e92..a552c9e68d7e 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -82,16 +82,8 @@ int
 ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct 
__kernel_old_timeval __user *tvp)

 {
if ( (unsigned long)n >= 4096 )
-   {
-   unsigned long __user *buffer = (unsigned long __user *)n;
-   if (!access_ok(buffer, 5*sizeof(unsigned long))
-   || __get_user(n, buffer)
-   || __get_user(inp, ((fd_set __user * __user *)(buffer+1)))
-   || __get_user(outp, ((fd_set  __user * __user *)(buffer+2)))
-   || __get_user(exp, ((fd_set  __user * __user *)(buffer+3)))
-   || __get_user(tvp, ((struct __kernel_old_timeval  __user * 
__user *)(buffer+4
-   return -EFAULT;
-   }
+   return sys_old_select((void __user *)n);
+
return sys_select(n, inp, outp, exp, tvp);
 }
 #endif


[PATCH 9/9] zsmalloc: remove the zsmalloc file system

2021-03-09 Thread Christoph Hellwig
Just use the generic anon_inode file system.

Signed-off-by: Christoph Hellwig 
---
 mm/zsmalloc.c | 48 +++-
 1 file changed, 3 insertions(+), 45 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index a6449a2ad861de..a7d2f471935447 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -41,6 +41,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -176,10 +177,6 @@ struct zs_size_stat {
 static struct dentry *zs_stat_root;
 #endif
 
-#ifdef CONFIG_COMPACTION
-static struct vfsmount *zsmalloc_mnt;
-#endif
-
 /*
  * We assign a page to ZS_ALMOST_EMPTY fullness group when:
  * n <= N / f, where
@@ -308,8 +305,6 @@ static void kick_deferred_free(struct zs_pool *pool);
 static void init_deferred_free(struct zs_pool *pool);
 static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
 #else
-static int zsmalloc_mount(void) { return 0; }
-static void zsmalloc_unmount(void) {}
 static int zs_register_migration(struct zs_pool *pool) { return 0; }
 static void zs_unregister_migration(struct zs_pool *pool) {}
 static void migrate_lock_init(struct zspage *zspage) {}
@@ -1751,33 +1746,6 @@ static void lock_zspage(struct zspage *zspage)
} while ((page = get_next_page(page)) != NULL);
 }
 
-static int zs_init_fs_context(struct fs_context *fc)
-{
-   return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type zsmalloc_fs = {
-   .name   = "zsmalloc",
-   .init_fs_context = zs_init_fs_context,
-   .kill_sb= kill_anon_super,
-};
-
-static int zsmalloc_mount(void)
-{
-   int ret = 0;
-
-   zsmalloc_mnt = kern_mount(_fs);
-   if (IS_ERR(zsmalloc_mnt))
-   ret = PTR_ERR(zsmalloc_mnt);
-
-   return ret;
-}
-
-static void zsmalloc_unmount(void)
-{
-   kern_unmount(zsmalloc_mnt);
-}
-
 static void migrate_lock_init(struct zspage *zspage)
 {
rwlock_init(>lock);
@@ -2086,7 +2054,7 @@ static const struct address_space_operations 
zsmalloc_aops = {
 
 static int zs_register_migration(struct zs_pool *pool)
 {
-   pool->inode = alloc_anon_inode_sb(zsmalloc_mnt->mnt_sb);
+   pool->inode = alloc_anon_inode();
if (IS_ERR(pool->inode)) {
pool->inode = NULL;
return 1;
@@ -2506,14 +2474,10 @@ static int __init zs_init(void)
 {
int ret;
 
-   ret = zsmalloc_mount();
-   if (ret)
-   goto out;
-
ret = cpuhp_setup_state(CPUHP_MM_ZS_PREPARE, "mm/zsmalloc:prepare",
zs_cpu_prepare, zs_cpu_dead);
if (ret)
-   goto hp_setup_fail;
+   return ret;
 
 #ifdef CONFIG_ZPOOL
zpool_register_driver(_zpool_driver);
@@ -2522,11 +2486,6 @@ static int __init zs_init(void)
zs_stat_init();
 
return 0;
-
-hp_setup_fail:
-   zsmalloc_unmount();
-out:
-   return ret;
 }
 
 static void __exit zs_exit(void)
@@ -2534,7 +2493,6 @@ static void __exit zs_exit(void)
 #ifdef CONFIG_ZPOOL
zpool_unregister_driver(_zpool_driver);
 #endif
-   zsmalloc_unmount();
cpuhp_remove_state(CPUHP_MM_ZS_PREPARE);
 
zs_stat_exit();
-- 
2.30.1



[PATCH 8/9] z3fold: remove the z3fold file system

2021-03-09 Thread Christoph Hellwig
Just use the generic anon_inode file system.

Signed-off-by: Christoph Hellwig 
---
 mm/z3fold.c | 38 ++
 1 file changed, 2 insertions(+), 36 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index e7cd9298b221f5..e0749a3d8987de 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -23,6 +23,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include 
 #include 
 #include 
 #include 
@@ -345,38 +346,10 @@ static inline void free_handle(unsigned long handle, 
struct z3fold_header *zhdr)
}
 }
 
-static int z3fold_init_fs_context(struct fs_context *fc)
-{
-   return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type z3fold_fs = {
-   .name   = "z3fold",
-   .init_fs_context = z3fold_init_fs_context,
-   .kill_sb= kill_anon_super,
-};
-
-static struct vfsmount *z3fold_mnt;
-static int z3fold_mount(void)
-{
-   int ret = 0;
-
-   z3fold_mnt = kern_mount(_fs);
-   if (IS_ERR(z3fold_mnt))
-   ret = PTR_ERR(z3fold_mnt);
-
-   return ret;
-}
-
-static void z3fold_unmount(void)
-{
-   kern_unmount(z3fold_mnt);
-}
-
 static const struct address_space_operations z3fold_aops;
 static int z3fold_register_migration(struct z3fold_pool *pool)
 {
-   pool->inode = alloc_anon_inode_sb(z3fold_mnt->mnt_sb);
+   pool->inode = alloc_anon_inode();
if (IS_ERR(pool->inode)) {
pool->inode = NULL;
return 1;
@@ -1787,22 +1760,15 @@ MODULE_ALIAS("zpool-z3fold");
 
 static int __init init_z3fold(void)
 {
-   int ret;
-
/* Make sure the z3fold header is not larger than the page size */
BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE);
-   ret = z3fold_mount();
-   if (ret)
-   return ret;
 
zpool_register_driver(_zpool_driver);
-
return 0;
 }
 
 static void __exit exit_z3fold(void)
 {
-   z3fold_unmount();
zpool_unregister_driver(_zpool_driver);
 }
 
-- 
2.30.1



[PATCH 7/9] iomem: remove the iomem file system

2021-03-09 Thread Christoph Hellwig
Just use the generic anon_inode file system.

Signed-off-by: Christoph Hellwig 
---
 kernel/resource.c | 30 --
 1 file changed, 4 insertions(+), 26 deletions(-)

diff --git a/kernel/resource.c b/kernel/resource.c
index 0fd091a3f2fc66..12560553c26796 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -1838,37 +1839,14 @@ static int __init strict_iomem(char *str)
return 1;
 }
 
-static int iomem_fs_init_fs_context(struct fs_context *fc)
-{
-   return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type iomem_fs_type = {
-   .name   = "iomem",
-   .owner  = THIS_MODULE,
-   .init_fs_context = iomem_fs_init_fs_context,
-   .kill_sb= kill_anon_super,
-};
-
 static int __init iomem_init_inode(void)
 {
-   static struct vfsmount *iomem_vfs_mount;
-   static int iomem_fs_cnt;
struct inode *inode;
-   int rc;
-
-   rc = simple_pin_fs(_fs_type, _vfs_mount, _fs_cnt);
-   if (rc < 0) {
-   pr_err("Cannot mount iomem pseudo filesystem: %d\n", rc);
-   return rc;
-   }
 
-   inode = alloc_anon_inode_sb(iomem_vfs_mount->mnt_sb);
+   inode = alloc_anon_inode();
if (IS_ERR(inode)) {
-   rc = PTR_ERR(inode);
-   pr_err("Cannot allocate inode for iomem: %d\n", rc);
-   simple_release_fs(_vfs_mount, _fs_cnt);
-   return rc;
+   pr_err("Cannot allocate inode for iomem: %zd\n", 
PTR_ERR(inode));
+   return PTR_ERR(inode);
}
 
/*
-- 
2.30.1



[PATCH 6/9] virtio_balloon: remove the balloon-kvm file system

2021-03-09 Thread Christoph Hellwig
Just use the generic anon_inode file system.

Signed-off-by: Christoph Hellwig 
---
 drivers/virtio/virtio_balloon.c | 30 +++---
 1 file changed, 3 insertions(+), 27 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index cae76ee5bdd688..1efb890cd3ff09 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -6,6 +6,7 @@
  *  Copyright 2008 Rusty Russell IBM Corporation
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -42,10 +43,6 @@
(1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT))
 #define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER)
 
-#ifdef CONFIG_BALLOON_COMPACTION
-static struct vfsmount *balloon_mnt;
-#endif
-
 enum virtio_balloon_vq {
VIRTIO_BALLOON_VQ_INFLATE,
VIRTIO_BALLOON_VQ_DEFLATE,
@@ -805,18 +802,6 @@ static int virtballoon_migratepage(struct balloon_dev_info 
*vb_dev_info,
 
return MIGRATEPAGE_SUCCESS;
 }
-
-static int balloon_init_fs_context(struct fs_context *fc)
-{
-   return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type balloon_fs = {
-   .name   = "balloon-kvm",
-   .init_fs_context = balloon_init_fs_context,
-   .kill_sb= kill_anon_super,
-};
-
 #endif /* CONFIG_BALLOON_COMPACTION */
 
 static unsigned long shrink_free_pages(struct virtio_balloon *vb,
@@ -909,17 +894,11 @@ static int virtballoon_probe(struct virtio_device *vdev)
goto out_free_vb;
 
 #ifdef CONFIG_BALLOON_COMPACTION
-   balloon_mnt = kern_mount(_fs);
-   if (IS_ERR(balloon_mnt)) {
-   err = PTR_ERR(balloon_mnt);
-   goto out_del_vqs;
-   }
-
vb->vb_dev_info.migratepage = virtballoon_migratepage;
-   vb->vb_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
+   vb->vb_dev_info.inode = alloc_anon_inode();
if (IS_ERR(vb->vb_dev_info.inode)) {
err = PTR_ERR(vb->vb_dev_info.inode);
-   goto out_kern_unmount;
+   goto out_del_vqs;
}
vb->vb_dev_info.inode->i_mapping->a_ops = _aops;
 #endif
@@ -1016,8 +995,6 @@ static int virtballoon_probe(struct virtio_device *vdev)
 out_iput:
 #ifdef CONFIG_BALLOON_COMPACTION
iput(vb->vb_dev_info.inode);
-out_kern_unmount:
-   kern_unmount(balloon_mnt);
 out_del_vqs:
 #endif
vdev->config->del_vqs(vdev);
@@ -1070,7 +1047,6 @@ static void virtballoon_remove(struct virtio_device *vdev)
if (vb->vb_dev_info.inode)
iput(vb->vb_dev_info.inode);
 
-   kern_unmount(balloon_mnt);
 #endif
kfree(vb);
 }
-- 
2.30.1



[PATCH 5/9] vmw_balloon: remove the balloon-vmware file system

2021-03-09 Thread Christoph Hellwig
Just use the generic anon_inode file system.

Signed-off-by: Christoph Hellwig 
---
 drivers/misc/vmw_balloon.c | 24 ++--
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 5d057a05ddbee8..be4be32f858253 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -16,6 +16,7 @@
 //#define DEBUG
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include 
 #include 
 #include 
 #include 
@@ -1735,20 +1736,6 @@ static inline void vmballoon_debugfs_exit(struct 
vmballoon *b)
 
 
 #ifdef CONFIG_BALLOON_COMPACTION
-
-static int vmballoon_init_fs_context(struct fs_context *fc)
-{
-   return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type vmballoon_fs = {
-   .name   = "balloon-vmware",
-   .init_fs_context= vmballoon_init_fs_context,
-   .kill_sb= kill_anon_super,
-};
-
-static struct vfsmount *vmballoon_mnt;
-
 /**
  * vmballoon_migratepage() - migrates a balloon page.
  * @b_dev_info: balloon device information descriptor.
@@ -1878,8 +1865,6 @@ static void vmballoon_compaction_deinit(struct vmballoon 
*b)
iput(b->b_dev_info.inode);
 
b->b_dev_info.inode = NULL;
-   kern_unmount(vmballoon_mnt);
-   vmballoon_mnt = NULL;
 }
 
 /**
@@ -1895,13 +1880,8 @@ static void vmballoon_compaction_deinit(struct vmballoon 
*b)
  */
 static __init int vmballoon_compaction_init(struct vmballoon *b)
 {
-   vmballoon_mnt = kern_mount(_fs);
-   if (IS_ERR(vmballoon_mnt))
-   return PTR_ERR(vmballoon_mnt);
-
b->b_dev_info.migratepage = vmballoon_migratepage;
-   b->b_dev_info.inode = alloc_anon_inode_sb(vmballoon_mnt->mnt_sb);
-
+   b->b_dev_info.inode = alloc_anon_inode();
if (IS_ERR(b->b_dev_info.inode))
return PTR_ERR(b->b_dev_info.inode);
 
-- 
2.30.1



[PATCH 4/9] drm: remove the drm file system

2021-03-09 Thread Christoph Hellwig
Just use the generic anon_inode file system.

Signed-off-by: Christoph Hellwig 
---
 drivers/gpu/drm/drm_drv.c | 64 ++-
 1 file changed, 3 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 87e7214a8e3565..af293d76f979e5 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -26,6 +26,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -475,65 +476,6 @@ void drm_dev_unplug(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_dev_unplug);
 
-/*
- * DRM internal mount
- * We want to be able to allocate our own "struct address_space" to control
- * memory-mappings in VRAM (or stolen RAM, ...). However, core MM does not 
allow
- * stand-alone address_space objects, so we need an underlying inode. As there
- * is no way to allocate an independent inode easily, we need a fake internal
- * VFS mount-point.
- *
- * The drm_fs_inode_new() function allocates a new inode, drm_fs_inode_free()
- * frees it again. You are allowed to use iget() and iput() to get references 
to
- * the inode. But each drm_fs_inode_new() call must be paired with exactly one
- * drm_fs_inode_free() call (which does not have to be the last iput()).
- * We use drm_fs_inode_*() to manage our internal VFS mount-point and share it
- * between multiple inode-users. You could, technically, call
- * iget() + drm_fs_inode_free() directly after alloc and sometime later do an
- * iput(), but this way you'd end up with a new vfsmount for each inode.
- */
-
-static int drm_fs_cnt;
-static struct vfsmount *drm_fs_mnt;
-
-static int drm_fs_init_fs_context(struct fs_context *fc)
-{
-   return init_pseudo(fc, 0x010203ff) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type drm_fs_type = {
-   .name   = "drm",
-   .owner  = THIS_MODULE,
-   .init_fs_context = drm_fs_init_fs_context,
-   .kill_sb= kill_anon_super,
-};
-
-static struct inode *drm_fs_inode_new(void)
-{
-   struct inode *inode;
-   int r;
-
-   r = simple_pin_fs(_fs_type, _fs_mnt, _fs_cnt);
-   if (r < 0) {
-   DRM_ERROR("Cannot mount pseudo fs: %d\n", r);
-   return ERR_PTR(r);
-   }
-
-   inode = alloc_anon_inode_sb(drm_fs_mnt->mnt_sb);
-   if (IS_ERR(inode))
-   simple_release_fs(_fs_mnt, _fs_cnt);
-
-   return inode;
-}
-
-static void drm_fs_inode_free(struct inode *inode)
-{
-   if (inode) {
-   iput(inode);
-   simple_release_fs(_fs_mnt, _fs_cnt);
-   }
-}
-
 /**
  * DOC: component helper usage recommendations
  *
@@ -563,7 +505,7 @@ static void drm_dev_init_release(struct drm_device *dev, 
void *res)
 {
drm_legacy_ctxbitmap_cleanup(dev);
drm_legacy_remove_map_hash(dev);
-   drm_fs_inode_free(dev->anon_inode);
+   iput(dev->anon_inode);
 
put_device(dev->dev);
/* Prevent use-after-free in drm_managed_release when debugging is
@@ -616,7 +558,7 @@ static int drm_dev_init(struct drm_device *dev,
if (ret)
return ret;
 
-   dev->anon_inode = drm_fs_inode_new();
+   dev->anon_inode = alloc_anon_inode();
if (IS_ERR(dev->anon_inode)) {
ret = PTR_ERR(dev->anon_inode);
DRM_ERROR("Cannot allocate anonymous inode: %d\n", ret);
-- 
2.30.1



[PATCH 3/9] powerpc/pseries: remove the ppc-cmm file system

2021-03-09 Thread Christoph Hellwig
Just use the generic anon_inode file system.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/platforms/pseries/cmm.c | 27 ++-
 1 file changed, 2 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/cmm.c 
b/arch/powerpc/platforms/pseries/cmm.c
index 6d36b858b14df1..9d07e6bea7126c 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -6,6 +6,7 @@
  * Author(s): Brian King (brk...@linux.vnet.ibm.com),
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -502,19 +503,6 @@ static struct notifier_block cmm_mem_nb = {
 };
 
 #ifdef CONFIG_BALLOON_COMPACTION
-static struct vfsmount *balloon_mnt;
-
-static int cmm_init_fs_context(struct fs_context *fc)
-{
-   return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type balloon_fs = {
-   .name = "ppc-cmm",
-   .init_fs_context = cmm_init_fs_context,
-   .kill_sb = kill_anon_super,
-};
-
 static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
   struct page *newpage, struct page *page,
   enum migrate_mode mode)
@@ -573,19 +561,10 @@ static int cmm_balloon_compaction_init(void)
balloon_devinfo_init(_dev_info);
b_dev_info.migratepage = cmm_migratepage;
 
-   balloon_mnt = kern_mount(_fs);
-   if (IS_ERR(balloon_mnt)) {
-   rc = PTR_ERR(balloon_mnt);
-   balloon_mnt = NULL;
-   return rc;
-   }
-
-   b_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
+   b_dev_info.inode = alloc_anon_inode();
if (IS_ERR(b_dev_info.inode)) {
rc = PTR_ERR(b_dev_info.inode);
b_dev_info.inode = NULL;
-   kern_unmount(balloon_mnt);
-   balloon_mnt = NULL;
return rc;
}
 
@@ -597,8 +576,6 @@ static void cmm_balloon_compaction_deinit(void)
if (b_dev_info.inode)
iput(b_dev_info.inode);
b_dev_info.inode = NULL;
-   kern_unmount(balloon_mnt);
-   balloon_mnt = NULL;
 }
 #else /* CONFIG_BALLOON_COMPACTION */
 static int cmm_balloon_compaction_init(void)
-- 
2.30.1



[PATCH 2/9] fs: add an argument-less alloc_anon_inode

2021-03-09 Thread Christoph Hellwig
Add a new alloc_anon_inode helper that allocates an inode on
the anon_inode file system.

Signed-off-by: Christoph Hellwig 
---
 fs/anon_inodes.c| 15 +--
 include/linux/anon_inodes.h |  1 +
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 4745fc37014332..b6a8ea71920bc3 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -63,7 +63,7 @@ static struct inode *anon_inode_make_secure_inode(
const struct qstr qname = QSTR_INIT(name, strlen(name));
int error;
 
-   inode = alloc_anon_inode_sb(anon_inode_mnt->mnt_sb);
+   inode = alloc_anon_inode();
if (IS_ERR(inode))
return inode;
inode->i_flags &= ~S_PRIVATE;
@@ -225,13 +225,24 @@ int anon_inode_getfd_secure(const char *name, const 
struct file_operations *fops
 }
 EXPORT_SYMBOL_GPL(anon_inode_getfd_secure);
 
+/**
+ * alloc_anon_inode - create a new anonymous inode
+ *
+ * Create an inode on the anon_inode file system and return it.
+ */
+struct inode *alloc_anon_inode(void)
+{
+   return alloc_anon_inode_sb(anon_inode_mnt->mnt_sb);
+}
+EXPORT_SYMBOL_GPL(alloc_anon_inode);
+
 static int __init anon_inode_init(void)
 {
anon_inode_mnt = kern_mount(_inode_fs_type);
if (IS_ERR(anon_inode_mnt))
panic("anon_inode_init() kernel mount failed (%ld)\n", 
PTR_ERR(anon_inode_mnt));
 
-   anon_inode_inode = alloc_anon_inode_sb(anon_inode_mnt->mnt_sb);
+   anon_inode_inode = alloc_anon_inode();
if (IS_ERR(anon_inode_inode))
panic("anon_inode_init() inode allocation failed (%ld)\n", 
PTR_ERR(anon_inode_inode));
 
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index 71881a2b6f7860..b5ae9a6eda9923 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -21,6 +21,7 @@ int anon_inode_getfd_secure(const char *name,
const struct file_operations *fops,
void *priv, int flags,
const struct inode *context_inode);
+struct inode *alloc_anon_inode(void);
 
 #endif /* _LINUX_ANON_INODES_H */
 
-- 
2.30.1



make alloc_anon_inode more useful

2021-03-09 Thread Christoph Hellwig
Hi all,

this series first renames the existing alloc_anon_inode to
alloc_anon_inode_sb to clearly mark it as requiring a superblock.

It then adds a new alloc_anon_inode that works on the anon_inode
file system super block, thus removing tons of boilerplate code.

The few remainig callers of alloc_anon_inode_sb all use alloc_file_pseudo
later, but might also be ripe for some cleanup.

Diffstat:
 arch/powerpc/platforms/pseries/cmm.c |   27 +-
 drivers/dma-buf/dma-buf.c|2 -
 drivers/gpu/drm/drm_drv.c|   64 +--
 drivers/misc/cxl/api.c   |2 -
 drivers/misc/vmw_balloon.c   |   24 +
 drivers/scsi/cxlflash/ocxl_hw.c  |2 -
 drivers/virtio/virtio_balloon.c  |   30 +---
 fs/aio.c |2 -
 fs/anon_inodes.c |   15 +++-
 fs/libfs.c   |2 -
 include/linux/anon_inodes.h  |1 
 include/linux/fs.h   |2 -
 kernel/resource.c|   30 ++--
 mm/z3fold.c  |   38 +---
 mm/zsmalloc.c|   48 +-
 15 files changed, 39 insertions(+), 250 deletions(-)


[PATCH 1/9] fs: rename alloc_anon_inode to alloc_anon_inode_sb

2021-03-09 Thread Christoph Hellwig
Rename alloc_inode to free the name for a new variant that does not
need boilerplate to create a super_block first.

Signed-off-by: Christoph Hellwig 
---
 arch/powerpc/platforms/pseries/cmm.c | 2 +-
 drivers/dma-buf/dma-buf.c| 2 +-
 drivers/gpu/drm/drm_drv.c| 2 +-
 drivers/misc/cxl/api.c   | 2 +-
 drivers/misc/vmw_balloon.c   | 2 +-
 drivers/scsi/cxlflash/ocxl_hw.c  | 2 +-
 drivers/virtio/virtio_balloon.c  | 2 +-
 fs/aio.c | 2 +-
 fs/anon_inodes.c | 4 ++--
 fs/libfs.c   | 2 +-
 include/linux/fs.h   | 2 +-
 kernel/resource.c| 2 +-
 mm/z3fold.c  | 2 +-
 mm/zsmalloc.c| 2 +-
 14 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/cmm.c 
b/arch/powerpc/platforms/pseries/cmm.c
index 45a3a3022a85c9..6d36b858b14df1 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -580,7 +580,7 @@ static int cmm_balloon_compaction_init(void)
return rc;
}
 
-   b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
+   b_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
if (IS_ERR(b_dev_info.inode)) {
rc = PTR_ERR(b_dev_info.inode);
b_dev_info.inode = NULL;
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index f264b70c383eb4..dedcc9483352dc 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -445,7 +445,7 @@ static inline int is_dma_buf_file(struct file *file)
 static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
 {
struct file *file;
-   struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb);
+   struct inode *inode = alloc_anon_inode_sb(dma_buf_mnt->mnt_sb);
 
if (IS_ERR(inode))
return ERR_CAST(inode);
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 20d22e41d7ce74..87e7214a8e3565 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -519,7 +519,7 @@ static struct inode *drm_fs_inode_new(void)
return ERR_PTR(r);
}
 
-   inode = alloc_anon_inode(drm_fs_mnt->mnt_sb);
+   inode = alloc_anon_inode_sb(drm_fs_mnt->mnt_sb);
if (IS_ERR(inode))
simple_release_fs(_fs_mnt, _fs_cnt);
 
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index b493de962153ba..2efbf6c98028ef 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -73,7 +73,7 @@ static struct file *cxl_getfile(const char *name,
goto err_module;
}
 
-   inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb);
+   inode = alloc_anon_inode_sb(cxl_vfs_mount->mnt_sb);
if (IS_ERR(inode)) {
file = ERR_CAST(inode);
goto err_fs;
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index b837e7eba5f7dc..5d057a05ddbee8 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -1900,7 +1900,7 @@ static __init int vmballoon_compaction_init(struct 
vmballoon *b)
return PTR_ERR(vmballoon_mnt);
 
b->b_dev_info.migratepage = vmballoon_migratepage;
-   b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb);
+   b->b_dev_info.inode = alloc_anon_inode_sb(vmballoon_mnt->mnt_sb);
 
if (IS_ERR(b->b_dev_info.inode))
return PTR_ERR(b->b_dev_info.inode);
diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c
index 244fc27215dc79..40184ed926b557 100644
--- a/drivers/scsi/cxlflash/ocxl_hw.c
+++ b/drivers/scsi/cxlflash/ocxl_hw.c
@@ -88,7 +88,7 @@ static struct file *ocxlflash_getfile(struct device *dev, 
const char *name,
goto err2;
}
 
-   inode = alloc_anon_inode(ocxlflash_vfs_mount->mnt_sb);
+   inode = alloc_anon_inode_sb(ocxlflash_vfs_mount->mnt_sb);
if (IS_ERR(inode)) {
rc = PTR_ERR(inode);
dev_err(dev, "%s: alloc_anon_inode failed rc=%d\n",
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 8985fc2cea8615..cae76ee5bdd688 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -916,7 +916,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
}
 
vb->vb_dev_info.migratepage = virtballoon_migratepage;
-   vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
+   vb->vb_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
if (IS_ERR(vb->vb_dev_info.inode)) {
err = PTR_ERR(vb->vb_dev_info.inode);
goto out_kern_unmount;
diff --git a/fs/aio.c b/fs/aio.c
index 1f32da13d39ee6..d1c2aa7fd6de7c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -234,7 +234,7 @@ static const struct address_space_operations aio_ctx_aops;
 static 

Re: [PATCH v2 7/8] powerpc/xive: Fix xmon command "dxi"

2021-03-09 Thread Cédric Le Goater
On 3/9/21 11:23 AM, Greg Kurz wrote:
> On Wed, 3 Mar 2021 18:48:56 +0100
> Cédric Le Goater  wrote:
> 
>> When under xmon, the "dxi" command dumps the state of the XIVE
>> interrupts. If an interrupt number is specified, only the state of
>> the associated XIVE interrupt is dumped. This form of the command
>> lacks an irq_data parameter which is nevertheless used by
>> xmon_xive_get_irq_config(), leading to an xmon crash.
>>
>> Fix that by doing a lookup in the system IRQ mapping to query the IRQ
>> descriptor data. Invalid interrupt numbers, or not belonging to the
>> XIVE IRQ domain, OPAL event interrupt number for instance, should be
>> caught by the previous query done at the firmware level.
>>
>> Reported-by: kernel test robot 
>> Reported-by: Dan Carpenter 
>> Fixes: 97ef27507793 ("powerpc/xive: Fix xmon support on the PowerNV 
>> platform")
>> Signed-off-by: Cédric Le Goater 
>> ---
> 
> I've tested this in a KVM guest and it seems to do the job.
> 
> 6:mon> dxi 1201
> IRQ 0x1201 : target=0xfc00 prio=ff lirq=0x0 flags= LH PQ=-Q
> 
> Bad HW irq numbers are filtered by the hypervisor:
> 
> 6:mon> dxi bad
> [  696.390577] xive: H_INT_GET_SOURCE_CONFIG lisn=2989 failed -55
> IRQ 0x0bad : no config rc=-6
> 
> Note that this also allows to show IPIs:
> 
> 6:mon> dxi 0
> IRQ 0x : target=0x0 prio=06 lirq=0x10 
> 
> This is a bit inconsistent with output of the 0-argument form of "dxi",

It's an hidden feature ! :) 

Yes. You can query at the FW level the configuration of any valid HW 
interrupt number where as "dxi" without an argument only loops on the 
XIVE IRQ domain which does not include the XIVE CPU IPIs which are 
special. You should "dxa" for these. 

> which filters them out for a reason that isn't obvious to me. 

For historical reason. XIVE support for PowerNV was the first to reach 
Linux. If you run the same xmon commands on a PowerNV machine (you could 
use QEMU), the ouput is different. it has more low level details.

> No big deal though, this should be addressed in another patch anyway.

We could simplify the xmon helpers to be sync with the debugfs one
and the QEMU/KVM "info pic" command. I agree.

Thanks,

C. 


> Reviewed-and-tested-by: Greg Kurz 
> 
>>  arch/powerpc/sysdev/xive/common.c | 14 ++
>>  1 file changed, 10 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/powerpc/sysdev/xive/common.c 
>> b/arch/powerpc/sysdev/xive/common.c
>> index f6b7b15bbb3a..8eefd152b947 100644
>> --- a/arch/powerpc/sysdev/xive/common.c
>> +++ b/arch/powerpc/sysdev/xive/common.c
>> @@ -255,17 +255,20 @@ notrace void xmon_xive_do_dump(int cpu)
>>  xmon_printf("\n");
>>  }
>>  
>> +static struct irq_data *xive_get_irq_data(u32 hw_irq)
>> +{
>> +unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq);
>> +
>> +return irq ? irq_get_irq_data(irq) : NULL;
>> +}
>> +
>>  int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
>>  {
>> -struct irq_chip *chip = irq_data_get_irq_chip(d);
>>  int rc;
>>  u32 target;
>>  u8 prio;
>>  u32 lirq;
>>  
>> -if (!is_xive_irq(chip))
>> -return -EINVAL;
>> -
>>  rc = xive_ops->get_irq_config(hw_irq, , , );
>>  if (rc) {
>>  xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
>> @@ -275,6 +278,9 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data 
>> *d)
>>  xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
>>  hw_irq, target, prio, lirq);
>>  
>> +if (!d)
>> +d = xive_get_irq_data(hw_irq);
>> +
>>  if (d) {
>>  struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
>>  u64 val = xive_esb_read(xd, XIVE_ESB_GET);
> 



Re: [PATCH 3/3] powerpc/qspinlock: Use generic smp_cond_load_relaxed

2021-03-09 Thread Davidlohr Bueso

On Tue, 09 Mar 2021, Michal Such�nek wrote:


On Mon, Mar 08, 2021 at 05:59:50PM -0800, Davidlohr Bueso wrote:

49a7d46a06c3 (powerpc: Implement smp_cond_load_relaxed()) added
busy-waiting pausing with a preferred SMT priority pattern, lowering
the priority (reducing decode cycles) during the whole loop slowpath.

However, data shows that while this pattern works well with simple

 ^^

spinlocks, queued spinlocks benefit more being kept in medium priority,
with a cpu_relax() instead, being a low+medium combo on powerpc.

...


diff --git a/arch/powerpc/include/asm/barrier.h 
b/arch/powerpc/include/asm/barrier.h
index aecfde829d5d..7ae29cfb06c0 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -80,22 +80,6 @@ do { 
\
___p1;  \
 })

-#ifdef CONFIG_PPC64

Maybe it should be kept for the simple spinlock case then?


It is kept, note that simple spinlocks don't use smp_cond_load_relaxed,
but instead deal with the priorities in arch_spin_lock(), so it will
spin in low priority until it sees a chance to take the lock, where
it switches back to medium.

Thanks,
Davidlohr


Re: [PATCH v2 4/8] powerpc/xive: Simplify xive_core_debug_show()

2021-03-09 Thread Cédric Le Goater
On 3/9/21 10:42 AM, Greg Kurz wrote:
> On Tue, 9 Mar 2021 10:13:39 +0100
> Greg Kurz  wrote:
> 
>> On Mon, 8 Mar 2021 19:11:11 +0100
>> Cédric Le Goater  wrote:
>>
>>> On 3/8/21 7:07 PM, Greg Kurz wrote:
 On Wed, 3 Mar 2021 18:48:53 +0100
 Cédric Le Goater  wrote:

> Now that the IPI interrupt has its own domain, the checks on the HW
> interrupt number XIVE_IPI_HW_IRQ and on the chip can be replaced by a
> check on the domain.
>
> Signed-off-by: Cédric Le Goater 
> ---

 Shouldn't this have the following tags ?

 Reported-by: kernel test robot 
 Reported-by: Dan Carpenter 
 Fixes: 930914b7d528 ("powerpc/xive: Add a debugfs file to dump internal 
 XIVE state")
>>>
>>> The next patch has because it removes the useless check on irq_data.
>>>  
>>
>> Ok I get it. This report isn't about an actual crash. Just a false
>> positive because of the not needed check in the caller.
>>
> 
> Hrm... I meant because of the check in xive_debug_show_irq(). On the
> contrary, the check removed by this patch in xive_core_debug_show()
> was rather an explicit hint that xive_debug_show_irq() couldn't be
> called with d being NULL.

yes. irq_desc_get_irq_data() does not return a NULL value and 
xive_debug_show_irq() is only called from the for_each_irq_desc()
loop. 


C.


> 
>>> C.
>>>

 Anyway,

 Reviewed-by: Greg Kurz 

>  arch/powerpc/sysdev/xive/common.c | 18 --
>  1 file changed, 4 insertions(+), 14 deletions(-)
>
> diff --git a/arch/powerpc/sysdev/xive/common.c 
> b/arch/powerpc/sysdev/xive/common.c
> index 678680531d26..7581cb12bb53 100644
> --- a/arch/powerpc/sysdev/xive/common.c
> +++ b/arch/powerpc/sysdev/xive/common.c
> @@ -1579,17 +1579,14 @@ static void xive_debug_show_cpu(struct seq_file 
> *m, int cpu)
>   seq_puts(m, "\n");
>  }
>  
> -static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct 
> irq_data *d)
> +static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d)
>  {
> - struct irq_chip *chip = irq_data_get_irq_chip(d);
> + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
>   int rc;
>   u32 target;
>   u8 prio;
>   u32 lirq;
>  
> - if (!is_xive_irq(chip))
> - return;
> -
>   rc = xive_ops->get_irq_config(hw_irq, , , );
>   if (rc) {
>   seq_printf(m, "IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
> @@ -1627,16 +1624,9 @@ static int xive_core_debug_show(struct seq_file 
> *m, void *private)
>  
>   for_each_irq_desc(i, desc) {
>   struct irq_data *d = irq_desc_get_irq_data(desc);
> - unsigned int hw_irq;
> -
> - if (!d)
> - continue;
> -
> - hw_irq = (unsigned int)irqd_to_hwirq(d);
>  
> - /* IPIs are special (HW number 0) */
> - if (hw_irq != XIVE_IPI_HW_IRQ)
> - xive_debug_show_irq(m, hw_irq, d);
> + if (d->domain == xive_irq_domain)
> + xive_debug_show_irq(m, d);
>   }
>   return 0;
>  }

>>>
>>
> 



Re: [PATCH v2 1/8] powerpc/xive: Use cpu_to_node() instead of ibm,chip-id property

2021-03-09 Thread Cédric Le Goater
On 3/8/21 6:13 PM, Greg Kurz wrote:
> On Wed, 3 Mar 2021 18:48:50 +0100
> Cédric Le Goater  wrote:
> 
>> The 'chip_id' field of the XIVE CPU structure is used to choose a
>> target for a source located on the same chip when possible. This field
>> is assigned on the PowerNV platform using the "ibm,chip-id" property
>> on pSeries under KVM when NUMA nodes are defined but it is undefined
> 
> This sentence seems to have a syntax problem... like it is missing an
> 'and' before 'on pSeries'.

ah yes, or simply a comma.

>> under PowerVM. The XIVE source structure has a similar field
>> 'src_chip' which is only assigned on the PowerNV platform.
>>
>> cpu_to_node() returns a compatible value on all platforms, 0 being the
>> default node. It will also give us the opportunity to set the affinity
>> of a source on pSeries when we can localize them.
>>
> 
> IIUC this relies on the fact that the NUMA node id is == to chip id
> on PowerNV, i.e. xc->chip_id which is passed to OPAL remain stable
> with this change.

Linux sets the NUMA node in numa_setup_cpu(). On pseries, the hcall 
H_HOME_NODE_ASSOCIATIVITY returns the node id if I am correct (Daniel
in Cc:)

On PowerNV, Linux uses "ibm,associativity" property of the CPU to find
the node id. This value is built from the chip id in OPAL, so the 
value returned by cpu_to_node(cpu) and the value of the "ibm,chip-id"
property are unlikely to be different.

cpu_to_node(cpu) is used in many places to allocate the structures 
locally to the owning node. XIVE is not an exception (see below in the 
same patch), it is better to be consistent and get the same information 
(node id) using the same routine.


In Linux, "ibm,chip-id" is only used in low level PowerNV drivers : 
LPC, XSCOM, RNG, VAS, NX. XIVE should be in that list also but skiboot
unifies the controllers of the system to only expose one the OS. This
is problematic and should be changed but it's another topic.


> On the other hand, you have the pSeries case under PowerVM that
> doesn't xc->chip_id, which isn't passed to any hcall AFAICT.

yes "ibm,chip-id" is an OPAL concept unfortunately and it has no meaning 
under PAPR. xc->chip_id on pseries (PowerVM) will contains an invalid 
chip id. 

QEMU/KVM exposes "ibm,chip-id" but it's not used. (its value is not
always correct btw)

> It looks like the chip id is only used for localization purpose in
> this case, right ?

Yes and PAPR sources are not localized. So it's not used. MSI sources 
could be if we rewrote the MSI driver.

> In this case, what about doing this change for pSeries only,
> somewhere in spapr.c ?

The IPI code is common to all platforms and all have the same issue. 
I rather not.

Thanks,

C.
 
>> Signed-off-by: Cédric Le Goater 
>> ---
>>  arch/powerpc/sysdev/xive/common.c | 7 +--
>>  1 file changed, 1 insertion(+), 6 deletions(-)
>>
>> diff --git a/arch/powerpc/sysdev/xive/common.c 
>> b/arch/powerpc/sysdev/xive/common.c
>> index 595310e056f4..b8e456da28aa 100644
>> --- a/arch/powerpc/sysdev/xive/common.c
>> +++ b/arch/powerpc/sysdev/xive/common.c
>> @@ -1335,16 +1335,11 @@ static int xive_prepare_cpu(unsigned int cpu)
>>  
>>  xc = per_cpu(xive_cpu, cpu);
>>  if (!xc) {
>> -struct device_node *np;
>> -
>>  xc = kzalloc_node(sizeof(struct xive_cpu),
>>GFP_KERNEL, cpu_to_node(cpu));
>>  if (!xc)
>>  return -ENOMEM;
>> -np = of_get_cpu_node(cpu, NULL);
>> -if (np)
>> -xc->chip_id = of_get_ibm_chip_id(np);
>> -of_node_put(np);
>> +xc->chip_id = cpu_to_node(cpu);
>>  xc->hw_ipi = XIVE_BAD_IRQ;
>>  
>>  per_cpu(xive_cpu, cpu) = xc;
> 



[PATCH 4/4] tools/perf: Support pipeline stage cycles for powerpc

2021-03-09 Thread Athira Rajeev
The pipeline stage cycles details can be recorded on powerpc from
the contents of Performance Monitor Unit (PMU) registers. On
ISA v3.1 platform, sampling registers exposes the cycles spent in
different pipeline stages. Patch adds perf tools support to present
two of the cycle counter information along with memory latency (weight).

Re-use the field 'ins_lat' for storing the first pipeline stage cycle.
This is stored in 'var2_w' field of 'perf_sample_weight'.

Add a new field 'p_stage_cyc' to store the second pipeline stage cycle
which is stored in 'var3_w' field of perf_sample_weight.

Add new sort function 'Pipeline Stage Cycle' and include this in
default_mem_sort_order[]. This new sort function may be used to denote
some other pipeline stage in another architecture. So add this to
list of sort entries that can have dynamic header string.

Signed-off-by: Athira Rajeev 
---
 tools/perf/Documentation/perf-report.txt |  1 +
 tools/perf/arch/powerpc/util/event.c | 18 --
 tools/perf/util/event.h  |  1 +
 tools/perf/util/hist.c   | 11 ---
 tools/perf/util/hist.h   |  1 +
 tools/perf/util/session.c|  4 +++-
 tools/perf/util/sort.c   | 24 ++--
 tools/perf/util/sort.h   |  2 ++
 8 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt 
b/tools/perf/Documentation/perf-report.txt
index f546b5e9db05..9691d9c227ba 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -112,6 +112,7 @@ OPTIONS
- ins_lat: Instruction latency in core cycles. This is the global 
instruction
  latency
- local_ins_lat: Local instruction latency version
+   - p_stage_cyc: Number of cycles spent in a pipeline stage.
 
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
diff --git a/tools/perf/arch/powerpc/util/event.c 
b/tools/perf/arch/powerpc/util/event.c
index f49d32c2c8ae..b80fbee83b6e 100644
--- a/tools/perf/arch/powerpc/util/event.c
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -18,8 +18,11 @@ void arch_perf_parse_sample_weight(struct perf_sample *data,
weight.full = *array;
if (type & PERF_SAMPLE_WEIGHT)
data->weight = weight.full;
-   else
+   else {
data->weight = weight.var1_dw;
+   data->ins_lat = weight.var2_w;
+   data->p_stage_cyc = weight.var3_w;
+   }
 }
 
 void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
@@ -27,6 +30,17 @@ void arch_perf_synthesize_sample_weight(const struct 
perf_sample *data,
 {
*array = data->weight;
 
-   if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+   if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
*array &= 0x;
+   *array |= ((u64)data->ins_lat << 32);
+   }
+}
+
+const char *arch_perf_header_entry__add(const char *se_header)
+{
+   if (!strcmp(se_header, "Local INSTR Latency"))
+   return "Finish Cyc";
+   else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+   return "Dispatch Cyc";
+   return se_header;
 }
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 89b149e2e70a..65f89e80916f 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -147,6 +147,7 @@ struct perf_sample {
u8  cpumode;
u16 misc;
u16 ins_lat;
+   u16 p_stage_cyc;
bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN];
void *raw_data;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index c82f5fc26af8..9299ee535518 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -211,6 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct 
hist_entry *h)
hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
+   hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
if (symbol_conf.nanosecs)
hists__new_col_len(hists, HISTC_TIME, 16);
else
@@ -289,13 +290,14 @@ static long hist_time(unsigned long htime)
 }
 
 static void he_stat__add_period(struct he_stat *he_stat, u64 period,
-   u64 weight, u64 ins_lat)
+   u64 weight, u64 ins_lat, u64 p_stage_cyc)
 {
 
he_stat->period += period;
he_stat->weight += weight;
he_stat->nr_events  += 1;
he_stat->ins_lat+= ins_lat;
+   he_stat->p_stage_cyc+= p_stage_cyc;
 }
 
 static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
@@ -308,6 +310,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct 
he_stat *src)
dest->nr_events += src->nr_events;

[PATCH 3/4] tools/perf: Add powerpc support for PERF_SAMPLE_WEIGHT_STRUCT

2021-03-09 Thread Athira Rajeev
Add arch specific arch_evsel__set_sample_weight() to set the new
sample type for powerpc.

Add arch specific arch_perf_parse_sample_weight() to store the
sample->weight values depending on the sample type applied.
if the new sample type (PERF_SAMPLE_WEIGHT_STRUCT) is applied,
store only the lower 32 bits to sample->weight. If sample type
is 'PERF_SAMPLE_WEIGHT', store the full 64-bit to sample->weight.

Signed-off-by: Athira Rajeev 
---
 tools/perf/arch/powerpc/util/Build   |  2 ++
 tools/perf/arch/powerpc/util/event.c | 32 
 tools/perf/arch/powerpc/util/evsel.c |  8 
 3 files changed, 42 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/event.c
 create mode 100644 tools/perf/arch/powerpc/util/evsel.c

diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index b7945e5a543b..8a79c4126e5b 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -4,6 +4,8 @@ perf-y += kvm-stat.o
 perf-y += perf_regs.o
 perf-y += mem-events.o
 perf-y += sym-handling.o
+perf-y += evsel.o
+perf-y += event.o
 
 perf-$(CONFIG_DWARF) += dwarf-regs.o
 perf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/event.c 
b/tools/perf/arch/powerpc/util/event.c
new file mode 100644
index ..f49d32c2c8ae
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+
+#include "../../../util/event.h"
+#include "../../../util/synthetic-events.h"
+#include "../../../util/machine.h"
+#include "../../../util/tool.h"
+#include "../../../util/map.h"
+#include "../../../util/debug.h"
+
+void arch_perf_parse_sample_weight(struct perf_sample *data,
+  const __u64 *array, u64 type)
+{
+   union perf_sample_weight weight;
+
+   weight.full = *array;
+   if (type & PERF_SAMPLE_WEIGHT)
+   data->weight = weight.full;
+   else
+   data->weight = weight.var1_dw;
+}
+
+void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
+   __u64 *array, u64 type)
+{
+   *array = data->weight;
+
+   if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+   *array &= 0x;
+}
diff --git a/tools/perf/arch/powerpc/util/evsel.c 
b/tools/perf/arch/powerpc/util/evsel.c
new file mode 100644
index ..2f733cdc8dbb
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/evsel.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include "util/evsel.h"
+
+void arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+   evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
+}
-- 
1.8.3.1



[PATCH 2/4] tools/perf: Add dynamic headers for perf report columns

2021-03-09 Thread Athira Rajeev
Currently the header string for different columns in perf report
is fixed. Some fields of perf sample could have different meaning
for different architectures than the meaning conveyed by the header
string. An example is the new field 'var2_w' of perf_sample_weight
structure. This is presently captured as 'Local INSTR Latency' in
perf mem report. But this could be used to denote a different latency
cycle in another architecture.

Introduce a weak function arch_perf_header_entry__add() to set
the arch specific header string for the fields which can contain dynamic
header. If the architecture do not have this function, fall back to the
default header string value.

Signed-off-by: Athira Rajeev 
---
 tools/perf/util/event.h |  1 +
 tools/perf/util/sort.c  | 19 ++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index f603edbbbc6f..89b149e2e70a 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -427,5 +427,6 @@ void  cpu_map_data__synthesize(struct 
perf_record_cpu_map_data *data, struct per
 
 void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 
*array, u64 type);
 void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 
*array, u64 type);
+const char *arch_perf_header_entry__add(const char *se_header);
 
 #endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 0d5ad42812b9..741a6df29fa0 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -25,6 +25,7 @@
 #include 
 #include "mem-events.h"
 #include "annotate.h"
+#include "event.h"
 #include "time-utils.h"
 #include "cgroup.h"
 #include "machine.h"
@@ -45,6 +46,7 @@
 regex_tignore_callees_regex;
 inthave_ignore_callees = 0;
 enum sort_mode sort__mode = SORT_MODE__NORMAL;
+const char *dynamic_headers[] = {"local_ins_lat"};
 
 /*
  * Replaces all occurrences of a char used with the:
@@ -1816,6 +1818,16 @@ struct sort_dimension {
int taken;
 };
 
+const char * __weak arch_perf_header_entry__add(const char *se_header)
+{
+   return se_header;
+}
+
+static void sort_dimension_add_dynamic_header(struct sort_dimension *sd)
+{
+   sd->entry->se_header = 
arch_perf_header_entry__add(sd->entry->se_header);
+}
+
 #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
 
 static struct sort_dimension common_sort_dimensions[] = {
@@ -2739,11 +2751,16 @@ int sort_dimension__add(struct perf_hpp_list *list, 
const char *tok,
struct evlist *evlist,
int level)
 {
-   unsigned int i;
+   unsigned int i, j;
 
for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
struct sort_dimension *sd = _sort_dimensions[i];
 
+   for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) {
+   if (!strcmp(dynamic_headers[j], sd->name))
+   sort_dimension_add_dynamic_header(sd);
+   }
+
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
 
-- 
1.8.3.1



[PATCH 1/4] powerpc/perf: Expose processor pipeline stage cycles using PERF_SAMPLE_WEIGHT_STRUCT

2021-03-09 Thread Athira Rajeev
Performance Monitoring Unit (PMU) registers in powerpc provides
information on cycles elapsed between different stages in the
pipeline. This can be used for application tuning. On ISA v3.1
platform, this information is exposed by sampling registers.
Patch adds kernel support to capture two of the cycle counters
as part of perf sample using the sample type:
PERF_SAMPLE_WEIGHT_STRUCT.

The power PMU function 'get_mem_weight' currently uses 64 bit weight
field of perf_sample_data to capture memory latency. But following the
introduction of PERF_SAMPLE_WEIGHT_TYPE, weight field could contain
64-bit or 32-bit value depending on the architexture support for
PERF_SAMPLE_WEIGHT_STRUCT. Patches uses WEIGHT_STRUCT to expose the
pipeline stage cycles info. Hence update the ppmu functions to work for
64-bit and 32-bit weight values.

If the sample type is PERF_SAMPLE_WEIGHT, use the 64-bit weight field.
if the sample type is PERF_SAMPLE_WEIGHT_STRUCT, memory subsystem
latency is stored in the low 32bits of perf_sample_weight structure.
Also for CPU_FTR_ARCH_31, capture the two cycle counter information in
two 16 bit fields of perf_sample_weight structure.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/include/asm/perf_event_server.h |  2 +-
 arch/powerpc/perf/core-book3s.c  |  4 ++--
 arch/powerpc/perf/isa207-common.c| 29 +---
 arch/powerpc/perf/isa207-common.h|  6 +-
 4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h 
b/arch/powerpc/include/asm/perf_event_server.h
index 00e7e671bb4b..112cf092d7b3 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -43,7 +43,7 @@ struct power_pmu {
u64 alt[]);
void(*get_mem_data_src)(union perf_mem_data_src *dsrc,
u32 flags, struct pt_regs *regs);
-   void(*get_mem_weight)(u64 *weight);
+   void(*get_mem_weight)(u64 *weight, u64 type);
unsigned long   group_constraint_mask;
unsigned long   group_constraint_val;
u64 (*bhrb_filter_map)(u64 branch_sample_type);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 6817331e22ff..57ff2494880c 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2206,9 +2206,9 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
ppmu->get_mem_data_src)
ppmu->get_mem_data_src(_src, ppmu->flags, 
regs);
 
-   if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
+   if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
ppmu->get_mem_weight)
-   ppmu->get_mem_weight();
+   ppmu->get_mem_weight(, 
event->attr.sample_type);
 
if (perf_event_overflow(event, , regs))
power_pmu_stop(event, 0);
diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index e4f577da33d8..5dcbdbd54598 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -284,8 +284,10 @@ void isa207_get_mem_data_src(union perf_mem_data_src 
*dsrc, u32 flags,
}
 }
 
-void isa207_get_mem_weight(u64 *weight)
+void isa207_get_mem_weight(u64 *weight, u64 type)
 {
+   union perf_sample_weight *weight_fields;
+   u64 weight_lat;
u64 mmcra = mfspr(SPRN_MMCRA);
u64 exp = MMCRA_THR_CTR_EXP(mmcra);
u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
@@ -296,9 +298,30 @@ void isa207_get_mem_weight(u64 *weight)
mantissa = P10_MMCRA_THR_CTR_MANT(mmcra);
 
if (val == 0 || val == 7)
-   *weight = 0;
+   weight_lat = 0;
else
-   *weight = mantissa << (2 * exp);
+   weight_lat = mantissa << (2 * exp);
+
+   /*
+* Use 64 bit weight field (full) if sample type is
+* WEIGHT.
+*
+* if sample type is WEIGHT_STRUCT:
+* - store memory latency in the lower 32 bits.
+* - For ISA v3.1, use remaining two 16 bit fields of
+*   perf_sample_weight to store cycle counter values
+*   from sier2.
+*/
+   weight_fields = (union perf_sample_weight *)weight;
+   if (type & PERF_SAMPLE_WEIGHT)
+   weight_fields->full = weight_lat;
+   else {
+   weight_fields->var1_dw = (u32)weight_lat;
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   weight_fields->var2_w = 
P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2));
+   weight_fields->var3_w = 
P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2));
+   }
+   }
 }
 
 int isa207_get_constraint(u64 event, unsigned long *maskp, 

[PATCH 0/4] powerpc/perf: Export processor pipeline stage cycles information

2021-03-09 Thread Athira Rajeev
Performance Monitoring Unit (PMU) registers in powerpc exports
number of cycles elapsed between different stages in the pipeline.
Example, sampling registers in ISA v3.1.

This patchset implements kernel and perf tools support to expose
these pipeline stage cycles using the sample type PERF_SAMPLE_WEIGHT_TYPE.

Patch 1/4 adds kernel side support to store the cycle counter
values as part of 'var2_w' and 'var3_w' fields of perf_sample_weight
structure.

Patch 2/4 adds support to make the perf report column header
strings as dynamic.
Patch 3/4 adds powerpc support in perf tools for PERF_SAMPLE_WEIGHT_STRUCT
in sample type: PERF_SAMPLE_WEIGHT_TYPE.
Patch 4/4 adds support to present pipeline stage cycles as part of
mem-mode.

Sample output on powerpc:

# perf mem record ls
# perf mem report

# To display the perf.data header info, please use --header/--header-only 
options.
#
#
# Total Lost Samples: 0
#
# Samples: 11  of event 'cpu/mem-loads/'
# Total weight : 1332
# Sort order   : 
local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,stall_cyc
#
# Overhead   Samples  Local Weight  Memory access Symbol
  Shared Object Data Symbol 
   Data ObjectSnoop TLB access  Locked  
Blocked Finish Cyc Dispatch Cyc 
#         
..    
.  .  
  ..  ..  ..  .  
.
#
44.14% 1  588   L1 hit[k] 
rcu_nmi_exit[kernel.vmlinux]  [k] 0xc007ffdd21b0
 [unknown]  N/A   N/A 
No   N/A7  5
22.22% 1  296   L1 hit[k] 
copypage_power7 [kernel.vmlinux]  [k] 0xc000ff6a1780
 [unknown]  N/A   N/A 
No   N/A2933
 6.98% 1  93L1 hit[.] _dl_addr  
  libc-2.31.so  [.] 0x7fff86fa5058  
   libc-2.31.so   N/A   N/A No   
N/A7  1
 6.61% 1  88L2 hit[.] 
new_do_writelibc-2.31.so  [.] _IO_2_1_stdout_+0x0   
 libc-2.31.so   N/A   N/A 
No   N/A84 1
 5.93% 1  79L1 hit[k] 
printk_nmi_exit [kernel.vmlinux]  [k] 0xc006085df6b0
 [unknown]  N/A   N/A 
No   N/A7  1
 4.05% 1  54L2 hit[.] 
__alloc_dir libc-2.31.so  [.] 0x7fffdb70a640
 [stack]N/A   N/A 
No   N/A18 1
 3.60% 1  48L1 hit[.] _init 
  ls[.] 0x00016ca82118  
   [heap] N/A   N/A No   
N/A7  6
 2.40% 1  32L1 hit[k] desc_read 
  [kernel.vmlinux]  [k] _printk_rb_static_descs+0x1ea10 
   [kernel.vmlinux].data  N/A   N/A No   
N/A7  1
 1.65% 1  22L2 hit[k] 
perf_iterate_ctx.constprop.139  [kernel.vmlinux]  [k] 0xc0064d79e8a8
 [unknown]  N/A   N/A 
No   N/A16 1
 1.58% 1  21L1 hit[k] 
perf_event_interrupt[kernel.vmlinux]  [k] 0xc006085df6b0
 [unknown]  N/A   N/A 
No   N/A7  1
 0.83% 1  11L1 hit[k] 
perf_event_exec [kernel.vmlinux]  [k] 0xc007ffdd3288
 [unknown]  N/A   N/A 
No   N/A7  4


Athira Rajeev (4):
  powerpc/perf: Expose processor pipeline stage cycles using
PERF_SAMPLE_WEIGHT_STRUCT
  tools/perf: Add dynamic headers for perf report columns
  tools/perf: Add powerpc support for PERF_SAMPLE_WEIGHT_STRUCT
  tools/perf: Support 

Re: [PATCH v2 8/8] powerpc/xive: Map one IPI interrupt per node

2021-03-09 Thread Greg Kurz
On Wed, 3 Mar 2021 18:48:57 +0100
Cédric Le Goater  wrote:

> ipistorm [*] can be used to benchmark the raw interrupt rate of an
> interrupt controller by measuring the number of IPIs a system can
> sustain. When applied to the XIVE interrupt controller of POWER9 and
> POWER10 systems, a significant drop of the interrupt rate can be
> observed when crossing the second node boundary.
> 
> This is due to the fact that a single IPI interrupt is used for all
> CPUs of the system. The structure is shared and the cache line updates
> impact greatly the traffic between nodes and the overall IPI
> performance.
> 
> As a workaround, the impact can be reduced by deactivating the IRQ
> lockup detector ("noirqdebug") which does a lot of accounting in the
> Linux IRQ descriptor structure and is responsible for most of the
> performance penalty.
> 
> As a fix, this proposal allocates an IPI interrupt per node, to be
> shared by all CPUs of that node. It solves the scaling issue, the IRQ
> lockup detector still has an impact but the XIVE interrupt rate scales
> linearly. It also improves the "noirqdebug" case as showed in the
> tables below.
> 
>  * P9 DD2.2 - 2s * 64 threads
> 
>"noirqdebug"
> Mint/sMint/s
>  chips  cpus  IPI/sys   IPI/chip   IPI/chipIPI/sys
>  --
>  1  0-15 4.984023   4.875405   4.996536   5.048892
> 0-3110.879164  10.544040  10.757632  11.037859
> 0-4715.345301  14.688764  14.926520  15.310053
> 0-6317.064907  17.066812  17.613416  17.874511
>  2  0-7911.768764  21.650749  22.689120  22.566508
> 0-9510.616812  26.878789  28.434703  28.320324
> 0-111   10.151693  31.397803  31.771773  32.388122
> 0-1279.948502  33.139336  34.875716  35.224548
> 
>  * P10 DD1 - 4s (not homogeneous) 352 threads
> 
>"noirqdebug"
> Mint/sMint/s
>  chips  cpus  IPI/sys   IPI/chip   IPI/chipIPI/sys
>  --
>  1  0-15 2.409402   2.364108   2.383303   2.395091
> 0-31 6.028325   6.046075   6.08   6.073750
> 0-47 8.655178   8.644531   8.712830   8.724702
> 0-6311.629652  11.735953  12.088203  12.055979
> 0-7914.392321  14.729959  14.986701  14.973073
> 0-9512.604158  13.004034  17.528748  17.568095
>  2  0-1119.767753  13.719831  19.968606  20.024218
> 0-1276.744566  16.418854  22.898066  22.995110
> 0-1436.005699  19.174421  25.425622  25.417541
> 0-1595.649719  21.938836  27.952662  28.059603
> 0-1755.441410  24.109484  31.133915  31.127996
>  3  0-1915.318341  24.405322  33.999221  33.775354
> 0-2075.191382  26.449769  36.050161  35.867307
> 0-2235.102790  29.356943  39.544135  39.508169
> 0-2395.035295  31.933051  42.135075  42.071975
> 0-2554.969209  34.477367  44.655395  44.757074
>  4  0-2714.907652  35.887016  47.080545  47.318537
> 0-2874.839581  38.076137  50.464307  50.636219
> 0-3034.786031  40.881319  53.478684  53.310759
> 0-3194.743750  43.448424  56.388102  55.973969
> 0-3354.709936  45.623532  59.400930  58.926857
> 0-3514.681413  45.646151  62.035804  61.830057
> 
> [*] https://github.com/antonblanchard/ipistorm
> 
> Signed-off-by: Cédric Le Goater 
> ---
>  arch/powerpc/sysdev/xive/xive-internal.h |  2 --
>  arch/powerpc/sysdev/xive/common.c| 39 ++--
>  2 files changed, 30 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/sysdev/xive/xive-internal.h 
> b/arch/powerpc/sysdev/xive/xive-internal.h
> index 9cf57c722faa..b3a456fdd3a5 100644
> --- a/arch/powerpc/sysdev/xive/xive-internal.h
> +++ b/arch/powerpc/sysdev/xive/xive-internal.h
> @@ -5,8 +5,6 @@
>  #ifndef __XIVE_INTERNAL_H
>  #define __XIVE_INTERNAL_H
>  
> -#define XIVE_IPI_HW_IRQ  0 /* interrupt source # for IPIs */
> -
>  /*
>   * A "disabled" interrupt should never fire, to catch problems
>   * we set its logical number to this
> diff --git a/arch/powerpc/sysdev/xive/common.c 
> b/arch/powerpc/sysdev/xive/common.c
> index 8eefd152b947..c27f7bb0494b 100644
> --- a/arch/powerpc/sysdev/xive/common.c
> +++ b/arch/powerpc/sysdev/xive/common.c
> @@ -65,8 +65,16 @@ static struct irq_domain *xive_irq_domain;
>  #ifdef CONFIG_SMP
>  static struct irq_domain *xive_ipi_irq_domain;
>  
> -/* The IPIs all use the same logical irq number */
> -static u32 xive_ipi_irq;
> +/* The IPIs use the same logical irq number when on the same chip */
> 

Re: [PATCH v4] powerpc/uprobes: Validation for prefixed instruction

2021-03-09 Thread Ravi Bangoria




On 3/9/21 4:51 PM, Naveen N. Rao wrote:

On 2021/03/09 08:54PM, Michael Ellerman wrote:

Ravi Bangoria  writes:

As per ISA 3.1, prefixed instruction should not cross 64-byte
boundary. So don't allow Uprobe on such prefixed instruction.

There are two ways probed instruction is changed in mapped pages.
First, when Uprobe is activated, it searches for all the relevant
pages and replace instruction in them. In this case, if that probe
is on the 64-byte unaligned prefixed instruction, error out
directly. Second, when Uprobe is already active and user maps a
relevant page via mmap(), instruction is replaced via mmap() code
path. But because Uprobe is invalid, entire mmap() operation can
not be stopped. In this case just print an error and continue.

Signed-off-by: Ravi Bangoria 
Acked-by: Naveen N. Rao 


Do we have a Fixes: tag for this?


Since this is an additional check we are adding, I don't think we should
add a Fixes: tag. Nothing is broken per-se -- we're just adding more
checks to catch simple mistakes. Also, like Oleg pointed out, there are
still many other ways for users to shoot themselves in the foot with
uprobes and prefixed instructions, if they so desire.

However, if you still think we should add a Fixes: tag, we can perhaps
use the below commit since I didn't see any specific commit adding
support for prefixed instructions for uprobes:

Fixes: 650b55b707fdfa ("powerpc: Add prefixed instructions to
instruction data type")


True. IMO, It doesn't really need any Fixes tag.






---
v3: https://lore.kernel.org/r/20210304050529.59391-1-ravi.bango...@linux.ibm.com
v3->v4:
   - CONFIG_PPC64 check was not required, remove it.
   - Use SZ_ macros instead of hardcoded numbers.

  arch/powerpc/kernel/uprobes.c | 7 +++
  1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index e8a63713e655..4cbfff6e94a3 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -41,6 +41,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
if (addr & 0x03)
return -EINVAL;
  
+	if (cpu_has_feature(CPU_FTR_ARCH_31) &&

+   ppc_inst_prefixed(auprobe->insn) &&
+   (addr & (SZ_64 - 4)) == SZ_64 - 4) {
+   pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned 
prefixed instruction\n");
+   return -EINVAL;


I realise we already did the 0x03 check above, but I still think this
would be clearer simply as:

(addr & 0x3f == 60)


Indeed, I like the use of `60' there -- hex is overrated ;)


Sure. Will resend.

Ravi


[PATCH v2 43/43] powerpc/32: Manage KUAP in C

2021-03-09 Thread Christophe Leroy
Move all KUAP management in C.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/32/kup.h | 50 +---
 arch/powerpc/include/asm/interrupt.h |  2 +
 arch/powerpc/include/asm/kup.h   |  9 
 arch/powerpc/include/asm/nohash/32/kup-8xx.h | 25 +-
 arch/powerpc/kernel/entry_32.S   |  6 ---
 arch/powerpc/kernel/interrupt.c  | 19 ++--
 arch/powerpc/kernel/process.c|  3 ++
 7 files changed, 11 insertions(+), 103 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h 
b/arch/powerpc/include/asm/book3s/32/kup.h
index c9d6c28bcd10..27991e0d2cf9 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -5,55 +5,7 @@
 #include 
 #include 
 
-#ifdef __ASSEMBLY__
-
-#ifdef CONFIG_PPC_KUAP
-
-.macro kuap_update_sr  gpr1, gpr2, gpr3/* NEVER use r0 as gpr2 due to 
addis */
-101:   mtsrin  \gpr1, \gpr2
-   addi\gpr1, \gpr1, 0x111 /* next VSID */
-   rlwinm  \gpr1, \gpr1, 0, 0xf0ff /* clear VSID overflow */
-   addis   \gpr2, \gpr2, 0x1000/* address of next segment */
-   cmplw   \gpr2, \gpr3
-   blt-101b
-   isync
-.endm
-
-.macro kuap_save_and_lock  sp, thread, gpr1, gpr2, gpr3
-   lwz \gpr2, KUAP(\thread)
-   rlwinm. \gpr3, \gpr2, 28, 0xf000
-   stw \gpr2, STACK_REGS_KUAP(\sp)
-   beq+102f
-   li  \gpr1, 0
-   stw \gpr1, KUAP(\thread)
-   mfsrin  \gpr1, \gpr2
-   oris\gpr1, \gpr1, SR_KS@h   /* set Ks */
-   kuap_update_sr  \gpr1, \gpr2, \gpr3
-102:
-.endm
-
-.macro kuap_restoresp, current, gpr1, gpr2, gpr3
-   lwz \gpr2, STACK_REGS_KUAP(\sp)
-   rlwinm. \gpr3, \gpr2, 28, 0xf000
-   stw \gpr2, THREAD + KUAP(\current)
-   beq+102f
-   mfsrin  \gpr1, \gpr2
-   rlwinm  \gpr1, \gpr1, 0, ~SR_KS /* Clear Ks */
-   kuap_update_sr  \gpr1, \gpr2, \gpr3
-102:
-.endm
-
-.macro kuap_check  current, gpr
-#ifdef CONFIG_PPC_KUAP_DEBUG
-   lwz \gpr, THREAD + KUAP(\current)
-999:   twnei   \gpr, 0
-   EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | 
BUGFLAG_ONCE)
-#endif
-.endm
-
-#endif /* CONFIG_PPC_KUAP */
-
-#else /* !__ASSEMBLY__ */
+#ifndef __ASSEMBLY__
 
 #ifdef CONFIG_PPC_KUAP
 
diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index d4bfe94b4a68..b41cb4e014b2 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -37,6 +37,8 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs, struct interrup
kuep_lock();
current->thread.regs = regs;
account_cpu_user_entry();
+   } else {
+   kuap_save_and_lock(regs);
}
 #endif
/*
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index b7efa46b3109..5bbe8f28d26b 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -28,15 +28,6 @@
 
 #ifdef __ASSEMBLY__
 #ifndef CONFIG_PPC_KUAP
-.macro kuap_save_and_lock  sp, thread, gpr1, gpr2, gpr3
-.endm
-
-.macro kuap_restoresp, current, gpr1, gpr2, gpr3
-.endm
-
-.macro kuap_check  current, gpr
-.endm
-
 .macro kuap_check_amr  gpr1, gpr2
 .endm
 
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h 
b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index c74f5704bc47..fb294dbca102 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -7,30 +7,7 @@
 
 #ifdef CONFIG_PPC_KUAP
 
-#ifdef __ASSEMBLY__
-
-.macro kuap_save_and_lock  sp, thread, gpr1, gpr2, gpr3
-   lis \gpr2, MD_APG_KUAP@h/* only APG0 and APG1 are used */
-   mfspr   \gpr1, SPRN_MD_AP
-   mtspr   SPRN_MD_AP, \gpr2
-   stw \gpr1, STACK_REGS_KUAP(\sp)
-.endm
-
-.macro kuap_restoresp, current, gpr1, gpr2, gpr3
-   lwz \gpr1, STACK_REGS_KUAP(\sp)
-   mtspr   SPRN_MD_AP, \gpr1
-.endm
-
-.macro kuap_check  current, gpr
-#ifdef CONFIG_PPC_KUAP_DEBUG
-   mfspr   \gpr, SPRN_MD_AP
-   rlwinm  \gpr, \gpr, 16, 0x
-999:   twnei   \gpr, MD_APG_KUAP@h
-   EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | 
BUGFLAG_ONCE)
-#endif
-.endm
-
-#else /* !__ASSEMBLY__ */
+#ifndef __ASSEMBLY__
 
 #include 
 
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 850cb17a937f..f5ac021ff9ed 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -52,11 +52,9 @@
.globl  prepare_transfer_to_handler
 prepare_transfer_to_handler:
andi.   r0,r9,MSR_PR
-   addir12, r2, THREAD
bnelr
 
/* if from kernel, check interrupted DOZE/NAP mode */
-   kuap_save_and_lock r11, r12, r9, r5, r6
lwz r12,TI_LOCAL_FLAGS(r2)
mtcrf   0x01,r12
bt- 31-TLF_NAPPING,4f

[PATCH v2 41/43] powerpc/32s: Create C version of kuap save/restore/check helpers

2021-03-09 Thread Christophe Leroy
In preparation of porting PPC32 to C syscall entry/exit,
create C version of kuap_save_and_lock() and kuap_user_restore() and
kuap_kernel_restore() and kuap_check() and kuap_get_and_check()
on book3s/32.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/32/kup.h | 45 
 1 file changed, 45 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h 
b/arch/powerpc/include/asm/book3s/32/kup.h
index b97ea60f6fa3..c9d6c28bcd10 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -72,6 +72,51 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
isync();/* Context sync required after mtsr() */
 }
 
+static inline void kuap_save_and_lock(struct pt_regs *regs)
+{
+   unsigned long kuap = current->thread.kuap;
+   u32 addr = kuap & 0xf000;
+   u32 end = kuap << 28;
+
+   regs->kuap = kuap;
+   if (unlikely(!kuap))
+   return;
+
+   current->thread.kuap = 0;
+   kuap_update_sr(mfsr(addr) | SR_KS, addr, end);  /* Set Ks */
+}
+
+static inline void kuap_user_restore(struct pt_regs *regs)
+{
+}
+
+static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long 
kuap)
+{
+   u32 addr = regs->kuap & 0xf000;
+   u32 end = regs->kuap << 28;
+
+   current->thread.kuap = regs->kuap;
+
+   if (unlikely(regs->kuap == kuap))
+   return;
+
+   kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */
+}
+
+static inline unsigned long kuap_get_and_check(void)
+{
+   unsigned long kuap = current->thread.kuap;
+
+   WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0);
+
+   return kuap;
+}
+
+static inline void kuap_check(void)
+{
+   kuap_get_and_check();
+}
+
 static __always_inline void allow_user_access(void __user *to, const void 
__user *from,
  u32 size, unsigned long dir)
 {
-- 
2.25.0



[PATCH v2 42/43] powerpc/8xx: Create C version of kuap save/restore/check helpers

2021-03-09 Thread Christophe Leroy
In preparation of porting PPC32 to C syscall entry/exit,
create C version of kuap_save_and_lock() and kuap_user_restore() and
kuap_kernel_restore() and kuap_check() and kuap_get_and_check() on 8xx.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/nohash/32/kup-8xx.h | 31 
 1 file changed, 31 insertions(+)

diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h 
b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 17a4a616436f..c74f5704bc47 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -34,6 +34,37 @@
 
 #include 
 
+static inline void kuap_save_and_lock(struct pt_regs *regs)
+{
+   regs->kuap = mfspr(SPRN_MD_AP);
+   mtspr(SPRN_MD_AP, MD_APG_KUAP);
+}
+
+static inline void kuap_user_restore(struct pt_regs *regs)
+{
+}
+
+static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long 
kuap)
+{
+   mtspr(SPRN_MD_AP, regs->kuap);
+}
+
+static inline unsigned long kuap_get_and_check(void)
+{
+   unsigned long kuap = mfspr(SPRN_MD_AP);
+
+   if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+   WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16);
+
+   return kuap;
+}
+
+static inline void kuap_check(void)
+{
+   if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+   kuap_get_and_check();
+}
+
 static inline void allow_user_access(void __user *to, const void __user *from,
 unsigned long size, unsigned long dir)
 {
-- 
2.25.0



[PATCH v2 40/43] powerpc/64s: Make kuap_check_amr() and kuap_get_and_check_amr() generic

2021-03-09 Thread Christophe Leroy
In preparation of porting powerpc32 to C syscall entry/exit,
rename kuap_check_amr() and kuap_get_and_check_amr() as kuap_check()
and kuap_get_and_check(), and move in the generic asm/kup.h the stub
for when CONFIG_PPC_KUAP is not selected.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/64/kup.h | 24 ++--
 arch/powerpc/include/asm/kup.h   | 10 +-
 arch/powerpc/kernel/interrupt.c  | 12 ++--
 arch/powerpc/kernel/irq.c|  2 +-
 4 files changed, 18 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup.h 
b/arch/powerpc/include/asm/book3s/64/kup.h
index 8bd905050896..d9b07e9998be 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -287,7 +287,7 @@ static inline void kuap_kernel_restore(struct pt_regs *regs,
 */
 }
 
-static inline unsigned long kuap_get_and_check_amr(void)
+static inline unsigned long kuap_get_and_check(void)
 {
if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
unsigned long amr = mfspr(SPRN_AMR);
@@ -298,27 +298,7 @@ static inline unsigned long kuap_get_and_check_amr(void)
return 0;
 }
 
-#else /* CONFIG_PPC_PKEY */
-
-static inline void kuap_user_restore(struct pt_regs *regs)
-{
-}
-
-static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
-{
-}
-
-static inline unsigned long kuap_get_and_check_amr(void)
-{
-   return 0;
-}
-
-#endif /* CONFIG_PPC_PKEY */
-
-
-#ifdef CONFIG_PPC_KUAP
-
-static inline void kuap_check_amr(void)
+static inline void kuap_check(void)
 {
if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && 
mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 25671f711ec2..b7efa46b3109 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -74,7 +74,15 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, 
bool is_write)
return false;
 }
 
-static inline void kuap_check_amr(void) { }
+static inline void kuap_check(void) { }
+static inline void kuap_save_and_lock(struct pt_regs *regs) { }
+static inline void kuap_user_restore(struct pt_regs *regs) { }
+static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long 
amr) { }
+
+static inline unsigned long kuap_get_and_check(void)
+{
+   return 0;
+}
 
 /*
  * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 727b7848c9cc..40ed55064e54 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -76,7 +76,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
} else
 #endif
 #ifdef CONFIG_PPC64
-   kuap_check_amr();
+   kuap_check();
 #endif
 
booke_restore_dbcr0();
@@ -254,7 +254,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
CT_WARN_ON(ct_state() == CONTEXT_USER);
 
 #ifdef CONFIG_PPC64
-   kuap_check_amr();
+   kuap_check();
 #endif
 
regs->result = r3;
@@ -380,7 +380,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct 
pt_regs *regs, unsigned
 * AMR can only have been unlocked if we interrupted the kernel.
 */
 #ifdef CONFIG_PPC64
-   kuap_check_amr();
+   kuap_check();
 #endif
 
local_irq_save(flags);
@@ -451,7 +451,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct 
pt_regs *regs, unsign
unsigned long flags;
unsigned long ret = 0;
 #ifdef CONFIG_PPC64
-   unsigned long amr;
+   unsigned long kuap;
 #endif
 
if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
@@ -467,7 +467,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct 
pt_regs *regs, unsign
CT_WARN_ON(ct_state() == CONTEXT_USER);
 
 #ifdef CONFIG_PPC64
-   amr = kuap_get_and_check_amr();
+   kuap = kuap_get_and_check();
 #endif
 
if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) {
@@ -511,7 +511,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct 
pt_regs *regs, unsign
 * value from the check above.
 */
 #ifdef CONFIG_PPC64
-   kuap_kernel_restore(regs, amr);
+   kuap_kernel_restore(regs, kuap);
 #endif
 
return ret;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index d71fd10a1dd4..3b18d2b2c702 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -282,7 +282,7 @@ static inline void replay_soft_interrupts_irqrestore(void)
 * and re-locking AMR but we shouldn't get here in the first place,
 * hence the warning.
 */
-   kuap_check_amr();
+   kuap_check();
 
if (kuap_state != AMR_KUAP_BLOCKED)
set_kuap(AMR_KUAP_BLOCKED);
-- 
2.25.0



[PATCH v2 39/43] powerpc/32s: Move KUEP locking/unlocking in C

2021-03-09 Thread Christophe Leroy
This can be done in C, do it.

Unrolling the loop gains approx. 15% performance.

>From now on, prepare_transfer_to_handler() is only for
interrupts from kernel.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/32/kup.h | 31 ---
 arch/powerpc/include/asm/interrupt.h |  3 ++
 arch/powerpc/include/asm/kup.h   |  8 +
 arch/powerpc/kernel/entry_32.S   | 16 +-
 arch/powerpc/kernel/interrupt.c  |  4 +++
 arch/powerpc/mm/book3s32/Makefile|  1 +
 arch/powerpc/mm/book3s32/kuep.c  | 38 
 7 files changed, 55 insertions(+), 46 deletions(-)
 create mode 100644 arch/powerpc/mm/book3s32/kuep.c

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h 
b/arch/powerpc/include/asm/book3s/32/kup.h
index 73bc5d2c431d..b97ea60f6fa3 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -7,37 +7,6 @@
 
 #ifdef __ASSEMBLY__
 
-.macro kuep_update_sr  gpr1, gpr2  /* NEVER use r0 as gpr2 due to 
addis */
-101:   mtsrin  \gpr1, \gpr2
-   addi\gpr1, \gpr1, 0x111 /* next VSID */
-   rlwinm  \gpr1, \gpr1, 0, 0xf0ff /* clear VSID overflow */
-   addis   \gpr2, \gpr2, 0x1000/* address of next segment */
-   bdnz101b
-   isync
-.endm
-
-.macro kuep_lock   gpr1, gpr2
-#ifdef CONFIG_PPC_KUEP
-   li  \gpr1, NUM_USER_SEGMENTS
-   li  \gpr2, 0
-   mtctr   \gpr1
-   mfsrin  \gpr1, \gpr2
-   oris\gpr1, \gpr1, SR_NX@h   /* set Nx */
-   kuep_update_sr \gpr1, \gpr2
-#endif
-.endm
-
-.macro kuep_unlock gpr1, gpr2
-#ifdef CONFIG_PPC_KUEP
-   li  \gpr1, NUM_USER_SEGMENTS
-   li  \gpr2, 0
-   mtctr   \gpr1
-   mfsrin  \gpr1, \gpr2
-   rlwinm  \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */
-   kuep_update_sr \gpr1, \gpr2
-#endif
-.endm
-
 #ifdef CONFIG_PPC_KUAP
 
 .macro kuap_update_sr  gpr1, gpr2, gpr3/* NEVER use r0 as gpr2 due to 
addis */
diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index e6d71c2e3aa2..d4bfe94b4a68 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -34,6 +34,7 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs, struct interrup
trace_hardirqs_off();
 
if (user_mode(regs)) {
+   kuep_lock();
current->thread.regs = regs;
account_cpu_user_entry();
}
@@ -91,6 +92,8 @@ static inline void interrupt_exit_prepare(struct pt_regs 
*regs, struct interrupt
exception_exit(state->ctx_state);
 #endif
 
+   if (user_mode(regs))
+   kuep_unlock();
/*
 * Book3S exits to user via interrupt_exit_user_prepare(), which does
 * context tracking, which is a cleaner way to handle PREEMPT=y
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 7ec21af49a45..25671f711ec2 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -55,6 +55,14 @@ void setup_kuep(bool disabled);
 static inline void setup_kuep(bool disabled) { }
 #endif /* CONFIG_PPC_KUEP */
 
+#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
+void kuep_lock(void);
+void kuep_unlock(void);
+#else
+static inline void kuep_lock(void) { }
+static inline void kuep_unlock(void) { }
+#endif
+
 #ifdef CONFIG_PPC_KUAP
 void setup_kuap(bool disabled);
 #else
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 9c333e6db5fa..850cb17a937f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -53,14 +53,9 @@
 prepare_transfer_to_handler:
andi.   r0,r9,MSR_PR
addir12, r2, THREAD
-   beq 2f
-#ifdef CONFIG_PPC_BOOK3S_32
-   kuep_lock r11, r12
-#endif
-   blr
+   bnelr
 
/* if from kernel, check interrupted DOZE/NAP mode */
-2:
kuap_save_and_lock r11, r12, r9, r5, r6
lwz r12,TI_LOCAL_FLAGS(r2)
mtcrf   0x01,r12
@@ -84,9 +79,6 @@ _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
.globl  transfer_to_syscall
 transfer_to_syscall:
SAVE_NVGPRS(r1)
-#ifdef CONFIG_PPC_BOOK3S_32
-   kuep_lock r11, r12
-#endif
 
/* Calling convention has r9 = orig r0, r10 = regs */
addir10,r1,STACK_FRAME_OVERHEAD
@@ -104,9 +96,6 @@ ret_from_syscall:
cmplwi  cr0,r5,0
bne-2f
 #endif /* CONFIG_PPC_47x */
-#ifdef CONFIG_PPC_BOOK3S_32
-   kuep_unlock r5, r7
-#endif
kuap_check r2, r4
lwz r4,_LINK(r1)
lwz r5,_CCR(r1)
@@ -287,9 +276,6 @@ interrupt_return:
bne-.Lrestore_nvgprs
 
 .Lfast_user_interrupt_return:
-#ifdef CONFIG_PPC_BOOK3S_32
-   kuep_unlock r10, r11
-#endif
kuap_check r2, r4
lwz r11,_NIP(r1)
lwz r12,_MSR(r1)
diff --git 

[PATCH v2 38/43] powerpc/32: Only use prepare_transfer_to_handler function on book3s/32 and e500

2021-03-09 Thread Christophe Leroy
Only book3s/32 and e500 have significative work to do in
prepare_transfer_to_handler.

Other 32 bit have nothing to do at all.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   | 6 ++
 arch/powerpc/kernel/head_32.h| 2 ++
 arch/powerpc/kernel/head_booke.h | 2 ++
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 5cfa10816261..9c333e6db5fa 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -48,6 +48,7 @@
  */
.align  12
 
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
.globl  prepare_transfer_to_handler
 prepare_transfer_to_handler:
andi.   r0,r9,MSR_PR
@@ -61,15 +62,12 @@ prepare_transfer_to_handler:
/* if from kernel, check interrupted DOZE/NAP mode */
 2:
kuap_save_and_lock r11, r12, r9, r5, r6
-#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
lwz r12,TI_LOCAL_FLAGS(r2)
mtcrf   0x01,r12
bt- 31-TLF_NAPPING,4f
bt- 31-TLF_SLEEPING,7f
-#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
blr
 
-#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
 4: rlwinm  r12,r12,0,~_TLF_NAPPING
stw r12,TI_LOCAL_FLAGS(r2)
b   power_save_ppc32_restore
@@ -80,8 +78,8 @@ prepare_transfer_to_handler:
rlwinm  r9,r9,0,~MSR_EE
stw r9,_MSR(r11)
b   fast_exception_return
-#endif
 _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
+#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
 
.globl  transfer_to_syscall
 transfer_to_syscall:
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 267479072495..ca303762d8cc 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -132,7 +132,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 .endm
 
 .macro prepare_transfer_to_handler
+#ifdef CONFIG_PPC_BOOK3S_32
bl  prepare_transfer_to_handler
+#endif
 .endm
 
 .macro SYSCALL_ENTRY trapno
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 4d583fbef0b6..a2565023d2d0 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -92,7 +92,9 @@ END_BTB_FLUSH_SECTION
 .endm
 
 .macro prepare_transfer_to_handler
+#ifdef CONFIG_E500
bl  prepare_transfer_to_handler
+#endif
 .endm
 
 .macro SYSCALL_ENTRY trapno intno srr1
-- 
2.25.0



[PATCH v2 37/43] powerpc/32: Return directly from power_save_ppc32_restore()

2021-03-09 Thread Christophe Leroy
transfer_to_handler_cont: is now just a blr.

Directly perform blr in power_save_ppc32_restore().

Also remove useless setting of r11 in e500 version of
power_save_ppc32_restore().

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S  |  3 ---
 arch/powerpc/kernel/idle_6xx.S  |  2 +-
 arch/powerpc/kernel/idle_e500.S | 10 +-
 3 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 815a4ff1ba76..5cfa10816261 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -67,8 +67,6 @@ prepare_transfer_to_handler:
bt- 31-TLF_NAPPING,4f
bt- 31-TLF_SLEEPING,7f
 #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
-   .globl transfer_to_handler_cont
-transfer_to_handler_cont:
blr
 
 #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
@@ -84,7 +82,6 @@ transfer_to_handler_cont:
b   fast_exception_return
 #endif
 _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
-_ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
 
.globl  transfer_to_syscall
 transfer_to_syscall:
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 153366e178c4..13cad9297d82 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -176,7 +176,7 @@ BEGIN_FTR_SECTION
lwz r9,nap_save_hid1@l(r9)
mtspr   SPRN_HID1, r9
 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
-   b   transfer_to_handler_cont
+   blr
 _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
 
.data
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
index 7795727e7f08..9e1bc4502c50 100644
--- a/arch/powerpc/kernel/idle_e500.S
+++ b/arch/powerpc/kernel/idle_e500.S
@@ -81,13 +81,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 _GLOBAL(power_save_ppc32_restore)
lwz r9,_LINK(r11)   /* interrupted in e500_idle */
stw r9,_NIP(r11)/* make it do a blr */
-
-#ifdef CONFIG_SMP
-   lwz r11,TASK_CPU(r2)/* get cpu number * 4 */
-   slwir11,r11,2
-#else
-   li  r11,0
-#endif
-
-   b   transfer_to_handler_cont
+   blr
 _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
-- 
2.25.0



[PATCH v2 36/43] powerpc/32: Set current->thread.regs in C interrupt entry

2021-03-09 Thread Christophe Leroy
No need to do that is assembly, do it in C.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/interrupt.h | 4 +++-
 arch/powerpc/kernel/entry_32.S   | 3 +--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index 861e6eadc98c..e6d71c2e3aa2 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -33,8 +33,10 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs, struct interrup
if (!arch_irq_disabled_regs(regs))
trace_hardirqs_off();
 
-   if (user_mode(regs))
+   if (user_mode(regs)) {
+   current->thread.regs = regs;
account_cpu_user_entry();
+   }
 #endif
/*
 * Book3E reconciles irq soft mask in asm
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 8fe1c3fdfa6e..815a4ff1ba76 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -52,8 +52,7 @@
 prepare_transfer_to_handler:
andi.   r0,r9,MSR_PR
addir12, r2, THREAD
-   beq 2f  /* if from user, fix up THREAD.regs */
-   stw r3,PT_REGS(r12)
+   beq 2f
 #ifdef CONFIG_PPC_BOOK3S_32
kuep_lock r11, r12
 #endif
-- 
2.25.0



[PATCH v2 35/43] powerpc/32: Save remaining registers in exception prolog

2021-03-09 Thread Christophe Leroy
Save non volatile registers, XER, CTR, MSR and NIP in exception prolog.

Also assign proper value to r2 and r3 there.

For now, recalculate thread pointer in prepare_transfer_to_handler.
It will disappear once KUAP is ported to C.

And remove the comment which is now completely wrong.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S | 25 +++--
 arch/powerpc/kernel/head_32.h  | 12 
 2 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 289f111a5ac7..8fe1c3fdfa6e 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -48,29 +48,11 @@
  */
.align  12
 
-/*
- * This code finishes saving the registers to the exception frame
- * and jumps to the appropriate handler for the exception, turning
- * on address translation.
- * Note that we rely on the caller having set cr0.eq iff the exception
- * occurred in kernel mode (i.e. MSR:PR = 0).
- */
.globl  prepare_transfer_to_handler
 prepare_transfer_to_handler:
-   SAVE_NVGPRS(r11)
-   addir3,r1,STACK_FRAME_OVERHEAD
-   stw r2,GPR2(r11)
-   stw r12,_NIP(r11)
-   stw r9,_MSR(r11)
-   andi.   r2,r9,MSR_PR
-   mfctr   r12
-   mfspr   r2,SPRN_XER
-   stw r12,_CTR(r11)
-   stw r2,_XER(r11)
-   mfspr   r12,SPRN_SPRG_THREAD
-   tovirt(r12, r12)
+   andi.   r0,r9,MSR_PR
+   addir12, r2, THREAD
beq 2f  /* if from user, fix up THREAD.regs */
-   addir2, r12, -THREAD
stw r3,PT_REGS(r12)
 #ifdef CONFIG_PPC_BOOK3S_32
kuep_lock r11, r12
@@ -79,8 +61,7 @@ prepare_transfer_to_handler:
 
/* if from kernel, check interrupted DOZE/NAP mode */
 2:
-   kuap_save_and_lock r11, r12, r9, r2, r6
-   addir2, r12, -THREAD
+   kuap_save_and_lock r11, r12, r9, r5, r6
 #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
lwz r12,TI_LOCAL_FLAGS(r2)
mtcrf   0x01,r12
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index ba20bfabdf63..267479072495 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -117,6 +117,18 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
stw r10,_TRAP(r1)
SAVE_4GPRS(3, r1)
SAVE_2GPRS(7, r1)
+   SAVE_NVGPRS(r1)
+   stw r2,GPR2(r1)
+   stw r12,_NIP(r1)
+   stw r9,_MSR(r1)
+   mfctr   r0
+   mfspr   r10,SPRN_XER
+   mfspr   r2,SPRN_SPRG_THREAD
+   stw r0,_CTR(r1)
+   tovirt(r2, r2)
+   stw r10,_XER(r1)
+   addir2, r2, -THREAD
+   addir3,r1,STACK_FRAME_OVERHEAD
 .endm
 
 .macro prepare_transfer_to_handler
-- 
2.25.0



[PATCH v2 34/43] powerpc/32: Refactor saving of volatile registers in exception prologs

2021-03-09 Thread Christophe Leroy
Exception prologs all do the same at the end:
- Save trapno in stack
- Mark stack with exception marker
- Save r0
- Save r3 to r8

Refactor that into a COMMON_EXCEPTION_PROLOG_END macro.
At the same time use r1 instead of r11.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h| 16 ++--
 arch/powerpc/kernel/head_40x.S   |  9 +
 arch/powerpc/kernel/head_booke.h | 26 +-
 3 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 84e6251622e8..ba20bfabdf63 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -104,15 +104,19 @@
li  r10, MSR_KERNEL /* can take exceptions */
mtmsr   r10 /* (except for mach check in rtas) */
 #endif
-   stw r0,GPR0(r11)
+   COMMON_EXCEPTION_PROLOG_END \trapno
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
+.endm
+
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+   stw r0,GPR0(r1)
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
addir10,r10,STACK_FRAME_REGS_MARKER@l
-   stw r10,8(r11)
+   stw r10,8(r1)
li  r10, \trapno
-   stw r10,_TRAP(r11)
-   SAVE_4GPRS(3, r11)
-   SAVE_2GPRS(7, r11)
-_ASM_NOKPROBE_SYMBOL(\name\()_virt)
+   stw r10,_TRAP(r1)
+   SAVE_4GPRS(3, r1)
+   SAVE_2GPRS(7, r1)
 .endm
 
 .macro prepare_transfer_to_handler
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 52b40bf529c6..e1360b88b6cb 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -157,14 +157,7 @@ _ENTRY(crit_esr)
mfspr   r12,SPRN_SRR2
mfspr   r9,SPRN_SRR3
rlwinm  r9,r9,0,14,12   /* clear MSR_WE (necessary?)   */
-   stw r0,GPR0(r11)
-   lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
-   addir10, r10, STACK_FRAME_REGS_MARKER@l
-   stw r10, 8(r11)
-   li  r10, \trapno + 2
-   stw r10,_TRAP(r11)
-   SAVE_4GPRS(3, r11)
-   SAVE_2GPRS(7, r11)
+   COMMON_EXCEPTION_PROLOG_END \trapno + 2
 _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 .endm
 
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index fa566e89f18b..4d583fbef0b6 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -78,14 +78,18 @@ END_BTB_FLUSH_SECTION
stw r1, 0(r11);  \
mr  r1, r11; \
rlwinm  r9,r9,0,14,12;  /* clear MSR_WE (necessary?)   */\
-   stw r0,GPR0(r11);\
-   lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \
-   addir10, r10, STACK_FRAME_REGS_MARKER@l; \
-   stw r10, 8(r11); \
-   li  r10, trapno; \
-   stw r10,_TRAP(r11);  \
-   SAVE_4GPRS(3, r11);  \
-   SAVE_2GPRS(7, r11)
+   COMMON_EXCEPTION_PROLOG_END trapno
+
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+   stw r0,GPR0(r1)
+   lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+   addir10, r10, STACK_FRAME_REGS_MARKER@l
+   stw r10, 8(r1)
+   li  r10, \trapno
+   stw r10,_TRAP(r1)
+   SAVE_4GPRS(3, r1)
+   SAVE_2GPRS(7, r1)
+.endm
 
 .macro prepare_transfer_to_handler
bl  prepare_transfer_to_handler
@@ -231,11 +235,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
stw r1,0(r11);   \
mr  r1,r11;  \
rlwinm  r9,r9,0,14,12;  /* clear MSR_WE (necessary?)   */\
-   li  r10, trapno; \
-   stw r10,_TRAP(r11);  \
-   stw r0,GPR0(r11);\
-   SAVE_4GPRS(3, r11);  \
-   SAVE_2GPRS(7, r11)
+   COMMON_EXCEPTION_PROLOG_END trapno
 
 #define SAVE_xSRR(xSRR)\
mfspr   r0,SPRN_##xSRR##0;  \
-- 
2.25.0



[PATCH v2 33/43] powerpc/32: Remove the xfer parameter in EXCEPTION() macro

2021-03-09 Thread Christophe Leroy
The xfer parameter is not used anymore, remove it.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h|  2 +-
 arch/powerpc/kernel/head_40x.S   | 42 
 arch/powerpc/kernel/head_44x.S   | 10 ++--
 arch/powerpc/kernel/head_8xx.S   | 14 +++---
 arch/powerpc/kernel/head_book3s_32.S | 72 ++--
 arch/powerpc/kernel/head_booke.h |  2 +-
 arch/powerpc/kernel/head_fsl_booke.S | 28 +--
 7 files changed, 81 insertions(+), 89 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 412ede8610f7..84e6251622e8 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -186,7 +186,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 
 #endif
 
-#define EXCEPTION(n, label, hdlr, xfer)\
+#define EXCEPTION(n, label, hdlr)  \
START_EXCEPTION(n, label)   \
EXCEPTION_PROLOG n label;   \
prepare_transfer_to_handler;\
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 7eb49ebd6000..52b40bf529c6 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -228,7 +228,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
b   interrupt_return
 
 /* 0x0500 - External Interrupt Exception */
-   EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+   EXCEPTION(0x0500, HardwareInterrupt, do_IRQ)
 
 /* 0x0600 - Alignment Exception */
START_EXCEPTION(0x0600, Alignment)
@@ -246,19 +246,19 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
REST_NVGPRS(r1)
b   interrupt_return
 
-   EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD)
+   EXCEPTION(0x0800, Trap_08, unknown_exception)
+   EXCEPTION(0x0900, Trap_09, unknown_exception)
+   EXCEPTION(0x0A00, Trap_0A, unknown_exception)
+   EXCEPTION(0x0B00, Trap_0B, unknown_exception)
 
 /* 0x0C00 - System Call Exception */
START_EXCEPTION(0x0C00, SystemCall)
SYSCALL_ENTRY   0xc00
 /* Trap_0D is commented out to get more space for system call exception */
 
-/* EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD) */
-   EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD)
+/* EXCEPTION(0x0D00, Trap_0D, unknown_exception) */
+   EXCEPTION(0x0E00, Trap_0E, unknown_exception)
+   EXCEPTION(0x0F00, Trap_0F, unknown_exception)
 
 /* 0x1000 - Programmable Interval Timer (PIT) Exception */
START_EXCEPTION(0x1000, DecrementerTrap)
@@ -433,19 +433,19 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
mfspr   r10, SPRN_SPRG_SCRATCH5
b   InstructionAccess
 
-   EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD)
-   EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD)
+   EXCEPTION(0x1300, Trap_13, unknown_exception)
+   EXCEPTION(0x1400, Trap_14, unknown_exception)
+   EXCEPTION(0x1500, Trap_15, unknown_exception)
+   EXCEPTION(0x1600, Trap_16, unknown_exception)
+   EXCEPTION(0x1700, Trap_17, unknown_exception)
+   EXCEPTION(0x1800, Trap_18, unknown_exception)
+   EXCEPTION(0x1900, Trap_19, unknown_exception)
+   EXCEPTION(0x1A00, Trap_1A, unknown_exception)
+   EXCEPTION(0x1B00, Trap_1B, unknown_exception)
+   EXCEPTION(0x1C00, Trap_1C, unknown_exception)
+   EXCEPTION(0x1D00, Trap_1D, unknown_exception)
+   EXCEPTION(0x1E00, Trap_1E, unknown_exception)
+   EXCEPTION(0x1F00, Trap_1F, unknown_exception)
 
 /* Check for a single step debug exception while in an exception
  * handler before state has been saved.  This is to catch the case
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 813fa305c33b..5c106ac36626 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -263,8 +263,7 @@ interrupt_base:
INSTRUCTION_STORAGE_EXCEPTION
 
/* External Input 

[PATCH v2 32/43] powerpc/32: Dismantle EXC_XFER_STD/LITE/TEMPLATE

2021-03-09 Thread Christophe Leroy
In order to get more control in exception prolog, dismantle
all non standard exception macros, finishing with EXC_XFER_STD
and EXC_XFER_LITE and EXC_XFER_TEMPLATE.

Also remove transfer_to_handler_full and ret_from_except and
ret_from_except_full as they are not used anymore.

Last parameter of EXCEPTION() is now ignored, will be removed
in a later patch to avoid too much churn.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   | 40 ---
 arch/powerpc/kernel/head_32.h| 21 
 arch/powerpc/kernel/head_40x.S   | 33 ---
 arch/powerpc/kernel/head_8xx.S   | 12 +--
 arch/powerpc/kernel/head_book3s_32.S | 27 ++-
 arch/powerpc/kernel/head_booke.h | 49 +++-
 arch/powerpc/kernel/head_fsl_booke.S | 14 +---
 7 files changed, 91 insertions(+), 105 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index ca14bc2f3418..289f111a5ac7 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -48,30 +48,6 @@
  */
.align  12
 
-#ifdef CONFIG_BOOKE
-   .globl  mcheck_transfer_to_handler
-mcheck_transfer_to_handler:
-   /* fall through */
-_ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler)
-
-   .globl  debug_transfer_to_handler
-debug_transfer_to_handler:
-   /* fall through */
-_ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler)
-
-   .globl  crit_transfer_to_handler
-crit_transfer_to_handler:
-   /* fall through */
-_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
-#endif
-
-#ifdef CONFIG_40x
-   .globl  crit_transfer_to_handler
-crit_transfer_to_handler:
-   /* fall through */
-_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
-#endif
-
 /*
  * This code finishes saving the registers to the exception frame
  * and jumps to the appropriate handler for the exception, turning
@@ -79,13 +55,6 @@ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
  * Note that we rely on the caller having set cr0.eq iff the exception
  * occurred in kernel mode (i.e. MSR:PR = 0).
  */
-   .globl  transfer_to_handler_full
-transfer_to_handler_full:
-_ASM_NOKPROBE_SYMBOL(transfer_to_handler_full)
-   /* fall through */
-
-   .globl  transfer_to_handler
-transfer_to_handler:
.globl  prepare_transfer_to_handler
 prepare_transfer_to_handler:
SAVE_NVGPRS(r11)
@@ -135,7 +104,6 @@ transfer_to_handler_cont:
b   fast_exception_return
 #endif
 _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
-_ASM_NOKPROBE_SYMBOL(transfer_to_handler)
 _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
 
.globl  transfer_to_syscall
@@ -333,14 +301,6 @@ fast_exception_return:
 #endif
 _ASM_NOKPROBE_SYMBOL(fast_exception_return)
 
-   .globl  ret_from_except_full
-ret_from_except_full:
-   /* fall through */
-
-   .globl  ret_from_except
-ret_from_except:
-_ASM_NOKPROBE_SYMBOL(ret_from_except)
-
.globl interrupt_return
 interrupt_return:
lwz r4,_MSR(r1)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 3ab0f3ad9a6a..412ede8610f7 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -189,20 +189,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 #define EXCEPTION(n, label, hdlr, xfer)\
START_EXCEPTION(n, label)   \
EXCEPTION_PROLOG n label;   \
-   xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret)  \
-   bl  tfer;   \
-   bl  hdlr;   \
-   b   ret
-
-#define EXC_XFER_STD(n, hdlr)  \
-   EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full,
\
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
-   EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler, \
- ret_from_except)
+   prepare_transfer_to_handler;\
+   bl  hdlr;   \
+   b   interrupt_return
 
 .macro vmap_stack_overflow_exception
__HEAD
@@ -218,7 +207,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
lwz r1, emergency_ctx@l(r1)
addir1, r1, THREAD_SIZE - INT_FRAME_SIZE
EXCEPTION_PROLOG_2 0 vmap_stack_overflow
-   EXC_XFER_STD(0, stack_overflow_exception)
+   prepare_transfer_to_handler
+   bl  stack_overflow_exception
+   b   interrupt_return
 .endm
 
 #endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index f3e5b462113f..7eb49ebd6000 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -187,8 +187,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 #define CRITICAL_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(n, label);  \

[PATCH v2 31/43] powerpc/32: Only restore non volatile registers when required

2021-03-09 Thread Christophe Leroy
Until now, non volatile registers were restored everytime they
were saved, ie using EXC_XFER_STD meant saving and restoring
them while EXC_XFER_LITE meant neither saving not restoring them.

Now that they are always saved, EXC_XFER_STD means to restore
them and EXC_XFER_LITE means to not restore them.

Most of the users of EXC_XFER_STD only need to retrieve the
non volatile registers. For them there is no need to restore
the non volatile registers as they have not been modified.

Only very few exceptions require non volatile registers restore.

Opencode the few places which require saving of non volatile
registers.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   |  1 -
 arch/powerpc/kernel/head_40x.S   | 10 --
 arch/powerpc/kernel/head_8xx.S   | 24 
 arch/powerpc/kernel/head_book3s_32.S | 17 ++---
 arch/powerpc/kernel/head_booke.h | 10 --
 arch/powerpc/kernel/head_fsl_booke.S | 16 
 6 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e2346662444d..ca14bc2f3418 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -335,7 +335,6 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return)
 
.globl  ret_from_except_full
 ret_from_except_full:
-   REST_NVGPRS(r1)
/* fall through */
 
.globl  ret_from_except
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 7270caff665c..f3e5b462113f 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -228,12 +228,18 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 /* 0x0600 - Alignment Exception */
START_EXCEPTION(0x0600, Alignment)
EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1
-   EXC_XFER_STD(0x600, alignment_exception)
+   prepare_transfer_to_handler
+   bl  alignment_exception
+   REST_NVGPRS(r1)
+   b   interrupt_return
 
 /* 0x0700 - Program Exception */
START_EXCEPTION(0x0700, ProgramCheck)
EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1
-   EXC_XFER_STD(0x700, program_check_exception)
+   prepare_transfer_to_handler
+   bl  program_check_exception
+   REST_NVGPRS(r1)
+   b   interrupt_return
 
EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD)
EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index c48de97f42fc..86f844eb0e5a 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -131,10 +131,18 @@ instruction_counter:
 /* Alignment exception */
START_EXCEPTION(0x600, Alignment)
EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1
-   EXC_XFER_STD(0x600, alignment_exception)
+   prepare_transfer_to_handler
+   bl  alignment_exception
+   REST_NVGPRS(r1)
+   b   interrupt_return
 
 /* Program check exception */
-   EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
+   START_EXCEPTION(0x700, ProgramCheck)
+   EXCEPTION_PROLOG 0x700 ProgramCheck
+   prepare_transfer_to_handler
+   bl  program_check_exception
+   REST_NVGPRS(r1)
+   b   interrupt_return
 
 /* Decrementer */
EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
@@ -149,7 +157,12 @@ instruction_counter:
 /* On the MPC8xx, this is a software emulation interrupt.  It occurs
  * for all unimplemented and illegal instructions.
  */
-   EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD)
+   START_EXCEPTION(0x1000, SoftEmu)
+   EXCEPTION_PROLOG 0x1000 SoftEmu
+   prepare_transfer_to_handler
+   bl  emulation_assist_interrupt
+   REST_NVGPRS(r1)
+   b   interrupt_return
 
 /*
  * For the MPC8xx, this is a software tablewalk to load the instruction
@@ -348,7 +361,10 @@ DARFixed:/* Return from dcbx instruction bug workaround */
EXCEPTION_PROLOG_2 0x1c00 DataBreakpoint handle_dar_dsisr=1
mfspr   r4,SPRN_BAR
stw r4,_DAR(r11)
-   EXC_XFER_STD(0x1c00, do_break)
+   prepare_transfer_to_handler
+   bl  do_break
+   REST_NVGPRS(r1)
+   b   interrupt_return
 
 #ifdef CONFIG_PERF_EVENTS
START_EXCEPTION(0x1d00, InstructionBreakpoint)
diff --git a/arch/powerpc/kernel/head_book3s_32.S 
b/arch/powerpc/kernel/head_book3s_32.S
index 67dac65b8ec3..609b2eedd4f9 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -300,7 +300,10 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
andis.  r0, r5, DSISR_DABRMATCH@h
bne-1f
EXC_XFER_LITE(0x300, do_page_fault)
-1: EXC_XFER_STD(0x300, do_break)
+1: prepare_transfer_to_handler
+   bl  do_break
+   REST_NVGPRS(r1)
+   b   interrupt_return
 
 
 

[PATCH v2 30/43] powerpc/32: Add a prepare_transfer_to_handler macro for exception prologs

2021-03-09 Thread Christophe Leroy
In order to increase flexibility, add a macro that will for now
call transfer_to_handler.

As transfer_to_handler doesn't do the actual transfer anymore,
also name it prepare_transfer_to_handler. The following patches
will progressively remove the use of transfer_to_handler label.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   | 3 +++
 arch/powerpc/kernel/head_32.h| 4 
 arch/powerpc/kernel/head_booke.h | 4 
 3 files changed, 11 insertions(+)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index cb2fa00b8fc1..e2346662444d 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -86,6 +86,8 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_full)
 
.globl  transfer_to_handler
 transfer_to_handler:
+   .globl  prepare_transfer_to_handler
+prepare_transfer_to_handler:
SAVE_NVGPRS(r11)
addir3,r1,STACK_FRAME_OVERHEAD
stw r2,GPR2(r11)
@@ -132,6 +134,7 @@ transfer_to_handler_cont:
stw r9,_MSR(r11)
b   fast_exception_return
 #endif
+_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
 _ASM_NOKPROBE_SYMBOL(transfer_to_handler)
 _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
 
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index bf4c288173ad..3ab0f3ad9a6a 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -115,6 +115,10 @@
 _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 .endm
 
+.macro prepare_transfer_to_handler
+   bl  prepare_transfer_to_handler
+.endm
+
 .macro SYSCALL_ENTRY trapno
mfspr   r9, SPRN_SRR1
mfspr   r10, SPRN_SRR0
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 27a7358c04bb..0f02b970e797 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -87,6 +87,10 @@ END_BTB_FLUSH_SECTION
SAVE_4GPRS(3, r11);  \
SAVE_2GPRS(7, r11)
 
+.macro prepare_transfer_to_handler
+   bl  prepare_transfer_to_handler
+.endm
+
 .macro SYSCALL_ENTRY trapno intno srr1
mfspr   r10, SPRN_SPRG_THREAD
 #ifdef CONFIG_KVM_BOOKE_HV
-- 
2.25.0



[PATCH v2 29/43] powerpc/32: Save trap number on stack in exception prolog

2021-03-09 Thread Christophe Leroy
Saving the trap number into the stack goes into
the exception prolog, as EXC_XFER_xxx will soon disappear.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h| 14 -
 arch/powerpc/kernel/head_40x.S   | 22 +++---
 arch/powerpc/kernel/head_8xx.S   | 14 -
 arch/powerpc/kernel/head_book3s_32.S | 14 -
 arch/powerpc/kernel/head_booke.h | 44 +++-
 arch/powerpc/kernel/head_fsl_booke.S |  4 +--
 6 files changed, 58 insertions(+), 54 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 4d638d760a96..bf4c288173ad 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -10,10 +10,10 @@
  * We assume sprg3 has the physical address of the current
  * task's thread_struct.
  */
-.macro EXCEPTION_PROLOGname handle_dar_dsisr=0
+.macro EXCEPTION_PROLOGtrapno name handle_dar_dsisr=0
EXCEPTION_PROLOG_0  handle_dar_dsisr=\handle_dar_dsisr
EXCEPTION_PROLOG_1
-   EXCEPTION_PROLOG_2  \name handle_dar_dsisr=\handle_dar_dsisr
+   EXCEPTION_PROLOG_2  \trapno \name handle_dar_dsisr=\handle_dar_dsisr
 .endm
 
 .macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0
@@ -56,7 +56,7 @@
 #endif
 .endm
 
-.macro EXCEPTION_PROLOG_2 name handle_dar_dsisr=0
+.macro EXCEPTION_PROLOG_2 trapno name handle_dar_dsisr=0
 #ifdef CONFIG_PPC_8xx
.if \handle_dar_dsisr
li  r11, RPN_PATTERN
@@ -108,6 +108,8 @@
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
addir10,r10,STACK_FRAME_REGS_MARKER@l
stw r10,8(r11)
+   li  r10, \trapno
+   stw r10,_TRAP(r11)
SAVE_4GPRS(3, r11)
SAVE_2GPRS(7, r11)
 _ASM_NOKPROBE_SYMBOL(\name\()_virt)
@@ -182,12 +184,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 
 #define EXCEPTION(n, label, hdlr, xfer)\
START_EXCEPTION(n, label)   \
-   EXCEPTION_PROLOG label; \
+   EXCEPTION_PROLOG n label;   \
xfer(n, hdlr)
 
 #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret)  \
-   li  r10,trap;   \
-   stw r10,_TRAP(r11); \
bl  tfer;   \
bl  hdlr;   \
b   ret
@@ -213,7 +213,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 #endif
lwz r1, emergency_ctx@l(r1)
addir1, r1, THREAD_SIZE - INT_FRAME_SIZE
-   EXCEPTION_PROLOG_2 vmap_stack_overflow
+   EXCEPTION_PROLOG_2 0 vmap_stack_overflow
EXC_XFER_STD(0, stack_overflow_exception)
 .endm
 
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index a65778380704..7270caff665c 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -104,7 +104,7 @@ _ENTRY(crit_esr)
  * Instead we use a couple of words of memory at low physical addresses.
  * This is OK since we don't support SMP on these processors.
  */
-.macro CRITICAL_EXCEPTION_PROLOG name
+.macro CRITICAL_EXCEPTION_PROLOG trapno name
stw r10,crit_r10@l(0)   /* save two registers to work with */
stw r11,crit_r11@l(0)
mfspr   r10,SPRN_SRR0
@@ -161,6 +161,8 @@ _ENTRY(crit_esr)
lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
addir10, r10, STACK_FRAME_REGS_MARKER@l
stw r10, 8(r11)
+   li  r10, \trapno + 2
+   stw r10,_TRAP(r11)
SAVE_4GPRS(3, r11)
SAVE_2GPRS(7, r11)
 _ASM_NOKPROBE_SYMBOL(\name\()_virt)
@@ -184,7 +186,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
  */
 #define CRITICAL_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(n, label);  \
-   CRITICAL_EXCEPTION_PROLOG label;\
+   CRITICAL_EXCEPTION_PROLOG n label;  \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
  crit_transfer_to_handler, ret_from_crit_exc)
 
@@ -206,7 +208,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
  * if they can't resolve the lightweight TLB fault.
  */
START_EXCEPTION(0x0300, DataStorage)
-   EXCEPTION_PROLOG DataStorage handle_dar_dsisr=1
+   EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1
EXC_XFER_LITE(0x300, do_page_fault)
 
 /*
@@ -214,7 +216,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
  * This is caused by a fetch from non-execute or guarded pages.
  */
START_EXCEPTION(0x0400, InstructionAccess)
-   EXCEPTION_PROLOG InstructionAccess
+   EXCEPTION_PROLOG 0x400 InstructionAccess
li  r5,0
stw r5, _ESR(r11)   /* Zero ESR */
stw r12, _DEAR(r11) /* SRR0 as DEAR */
@@ -225,12 +227,12 @@ 

[PATCH v2 27/43] powerpc/32: Call bad_page_fault() from do_page_fault()

2021-03-09 Thread Christophe Leroy
Now that non volatile registers are saved at all time, no
need to split bad_page_fault() out of do_page_fault().

Remove handle_page_fault() and use do_page_fault() directly.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   | 16 
 arch/powerpc/kernel/head_40x.S   |  4 ++--
 arch/powerpc/kernel/head_8xx.S   |  4 ++--
 arch/powerpc/kernel/head_book3s_32.S |  4 ++--
 arch/powerpc/kernel/head_booke.h |  4 ++--
 arch/powerpc/kernel/head_fsl_booke.S |  2 +-
 arch/powerpc/mm/fault.c  |  2 +-
 7 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 4698fd1bd8c8..cb2fa00b8fc1 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -220,22 +220,6 @@ ret_from_kernel_thread:
li  r3,0
b   ret_from_syscall
 
-/*
- * Top-level page fault handling.
- * This is in assembler because if do_page_fault tells us that
- * it is a bad kernel page fault, we want to save the non-volatile
- * registers before calling bad_page_fault.
- */
-   .globl  handle_page_fault
-handle_page_fault:
-   bl  do_page_fault
-   cmpwi   r3,0
-   beq+ret_from_except
-   mr  r4,r3   /* err arg for bad_page_fault */
-   addir3,r1,STACK_FRAME_OVERHEAD
-   bl  __bad_page_fault
-   b   ret_from_except_full
-
 /*
  * This routine switches between two different tasks.  The process
  * state of one is saved on its kernel stack.  Then the state
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 08563d4170c6..a65778380704 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -207,7 +207,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
  */
START_EXCEPTION(0x0300, DataStorage)
EXCEPTION_PROLOG DataStorage handle_dar_dsisr=1
-   EXC_XFER_LITE(0x300, handle_page_fault)
+   EXC_XFER_LITE(0x300, do_page_fault)
 
 /*
  * 0x0400 - Instruction Storage Exception
@@ -218,7 +218,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
li  r5,0
stw r5, _ESR(r11)   /* Zero ESR */
stw r12, _DEAR(r11) /* SRR0 as DEAR */
-   EXC_XFER_LITE(0x400, handle_page_fault)
+   EXC_XFER_LITE(0x400, do_page_fault)
 
 /* 0x0500 - External Interrupt Exception */
EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index eb1d40a8f2c4..4078d0dc2f18 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -301,7 +301,7 @@ instruction_counter:
 .Litlbie:
stw r12, _DAR(r11)
stw r5, _DSISR(r11)
-   EXC_XFER_LITE(0x400, handle_page_fault)
+   EXC_XFER_LITE(0x400, do_page_fault)
 
 /* This is the data TLB error on the MPC8xx.  This could be due to
  * many reasons, including a dirty update to a pte.  We bail out to
@@ -322,7 +322,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */
tlbie   r4
 .Ldtlbie:
/* 0x300 is DataAccess exception, needed by bad_page_fault() */
-   EXC_XFER_LITE(0x300, handle_page_fault)
+   EXC_XFER_LITE(0x300, do_page_fault)
 
 #ifdef CONFIG_VMAP_STACK
vmap_stack_overflow_exception
diff --git a/arch/powerpc/kernel/head_book3s_32.S 
b/arch/powerpc/kernel/head_book3s_32.S
index 626e9fbac2cc..81a6ec098dd1 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -299,7 +299,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
lwz r5, _DSISR(r11)
andis.  r0, r5, DSISR_DABRMATCH@h
bne-1f
-   EXC_XFER_LITE(0x300, handle_page_fault)
+   EXC_XFER_LITE(0x300, do_page_fault)
 1: EXC_XFER_STD(0x300, do_break)
 
 
@@ -328,7 +328,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
andis.  r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
stw r5, _DSISR(r11)
stw r12, _DAR(r11)
-   EXC_XFER_LITE(0x400, handle_page_fault)
+   EXC_XFER_LITE(0x400, do_page_fault)
 
 /* External interrupt */
EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 009a56d70d76..036a69d16605 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -462,7 +462,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
stw r5,_ESR(r11); \
mfspr   r4,SPRN_DEAR;   /* Grab the DEAR */   \
stw r4, _DEAR(r11);   \
-   EXC_XFER_LITE(0x0300, handle_page_fault)
+   EXC_XFER_LITE(0x0300, do_page_fault)
 
 #define INSTRUCTION_STORAGE_EXCEPTION\
START_EXCEPTION(InstructionStorage)  

[PATCH v2 28/43] powerpc/64e: Call bad_page_fault() from do_page_fault()

2021-03-09 Thread Christophe Leroy
book3e/64 is the last one calling __bad_page_fault()
from assembly.

Save non volatile registers before calling do_page_fault()
and modify do_page_fault() to call __bad_page_fault()
for all platforms.

Then it can be refactored by the call of bad_page_fault()
which avoids the duplication of the exception table search.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/exceptions-64e.S |  8 +---
 arch/powerpc/mm/fault.c  | 17 -
 2 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index e8eb9992a270..b60f89078a3f 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1010,15 +1010,9 @@ storage_fault_common:
addir3,r1,STACK_FRAME_OVERHEAD
ld  r14,PACA_EXGEN+EX_R14(r13)
ld  r15,PACA_EXGEN+EX_R15(r13)
+   bl  save_nvgprs
bl  do_page_fault
-   cmpdi   r3,0
-   bne-1f
b   ret_from_except_lite
-1: bl  save_nvgprs
-   mr  r4,r3
-   addir3,r1,STACK_FRAME_OVERHEAD
-   bl  __bad_page_fault
-   b   ret_from_except
 
 /*
  * Alignment exception doesn't fit entirely in the 0x100 bytes so it
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 2e54bac99a22..7bcff3fca110 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -541,24 +541,15 @@ NOKPROBE_SYMBOL(___do_page_fault);
 
 static long __do_page_fault(struct pt_regs *regs)
 {
-   const struct exception_table_entry *entry;
long err;
 
err = ___do_page_fault(regs, regs->dar, regs->dsisr);
if (likely(!err))
-   return err;
-
-   entry = search_exception_tables(regs->nip);
-   if (likely(entry)) {
-   instruction_pointer_set(regs, extable_fixup(entry));
return 0;
-   } else if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
-   __bad_page_fault(regs, err);
-   return 0;
-   } else {
-   /* 32 and 64e handle the bad page fault in asm */
-   return err;
-   }
+
+   bad_page_fault(regs, err);
+
+   return 0;
 }
 NOKPROBE_SYMBOL(__do_page_fault);
 
-- 
2.25.0



[PATCH v2 26/43] powerpc/32: Set regs parameter in r3 in transfer_to_handler

2021-03-09 Thread Christophe Leroy
All exception handlers take regs as first parameter.

Instead of setting r3 just before each call to a handler, set
it in transfer_to_handler.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   |  5 ++---
 arch/powerpc/kernel/head_32.h|  2 --
 arch/powerpc/kernel/head_40x.S   |  7 ---
 arch/powerpc/kernel/head_8xx.S   |  3 ---
 arch/powerpc/kernel/head_book3s_32.S |  9 ++---
 arch/powerpc/kernel/head_booke.h | 11 +--
 arch/powerpc/kernel/head_fsl_booke.S |  4 +---
 7 files changed, 6 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index d8fd2fd2c777..4698fd1bd8c8 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -87,6 +87,7 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_full)
.globl  transfer_to_handler
 transfer_to_handler:
SAVE_NVGPRS(r11)
+   addir3,r1,STACK_FRAME_OVERHEAD
stw r2,GPR2(r11)
stw r12,_NIP(r11)
stw r9,_MSR(r11)
@@ -99,8 +100,7 @@ transfer_to_handler:
tovirt(r12, r12)
beq 2f  /* if from user, fix up THREAD.regs */
addir2, r12, -THREAD
-   addir11,r1,STACK_FRAME_OVERHEAD
-   stw r11,PT_REGS(r12)
+   stw r3,PT_REGS(r12)
 #ifdef CONFIG_PPC_BOOK3S_32
kuep_lock r11, r12
 #endif
@@ -228,7 +228,6 @@ ret_from_kernel_thread:
  */
.globl  handle_page_fault
 handle_page_fault:
-   addir3,r1,STACK_FRAME_OVERHEAD
bl  do_page_fault
cmpwi   r3,0
beq+ret_from_except
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 087445e45489..4d638d760a96 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -183,7 +183,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 #define EXCEPTION(n, label, hdlr, xfer)\
START_EXCEPTION(n, label)   \
EXCEPTION_PROLOG label; \
-   addir3,r1,STACK_FRAME_OVERHEAD; \
xfer(n, hdlr)
 
 #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret)  \
@@ -215,7 +214,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
lwz r1, emergency_ctx@l(r1)
addir1, r1, THREAD_SIZE - INT_FRAME_SIZE
EXCEPTION_PROLOG_2 vmap_stack_overflow
-   addir3, r1, STACK_FRAME_OVERHEAD
EXC_XFER_STD(0, stack_overflow_exception)
 .endm
 
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 86883ccb3dc5..08563d4170c6 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -185,7 +185,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 #define CRITICAL_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(n, label);  \
CRITICAL_EXCEPTION_PROLOG label;\
-   addir3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
  crit_transfer_to_handler, ret_from_crit_exc)
 
@@ -227,13 +226,11 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 /* 0x0600 - Alignment Exception */
START_EXCEPTION(0x0600, Alignment)
EXCEPTION_PROLOG Alignment handle_dar_dsisr=1
-   addir3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x600, alignment_exception)
 
 /* 0x0700 - Program Exception */
START_EXCEPTION(0x0700, ProgramCheck)
EXCEPTION_PROLOG ProgramCheck handle_dar_dsisr=1
-   addir3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x700, program_check_exception)
 
EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD)
@@ -494,7 +491,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
/* continue normal handling for a critical exception... */
 2: mfspr   r4,SPRN_DBSR
stw r4,_ESR(r11)/* DebugException takes DBSR in _ESR */
-   addir3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_TEMPLATE(DebugException, 0x2002, \
(MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
crit_transfer_to_handler, ret_from_crit_exc)
@@ -505,21 +501,18 @@ Decrementer:
EXCEPTION_PROLOG Decrementer
lis r0,TSR_PIS@h
mtspr   SPRN_TSR,r0 /* Clear the PIT exception */
-   addir3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_LITE(0x1000, timer_interrupt)
 
/* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
__HEAD
 FITException:
EXCEPTION_PROLOG FITException
-   addir3,r1,STACK_FRAME_OVERHEAD;
EXC_XFER_STD(0x1010, unknown_exception)
 
/* Watchdog Timer (WDT) Exception. (from 0x1020) */
__HEAD
 WDTException:
CRITICAL_EXCEPTION_PROLOG WDTException
-   addir3,r1,STACK_FRAME_OVERHEAD;
EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2,
  (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
  

[PATCH v2 25/43] powerpc/32: Replace ASM exception exit by C exception exit from ppc64

2021-03-09 Thread Christophe Leroy
This patch replaces the PPC32 ASM exception exit by C exception exit.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S  | 481 +---
 arch/powerpc/kernel/interrupt.c |   4 +
 2 files changed, 132 insertions(+), 353 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 7084289994b3..d8fd2fd2c777 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -129,9 +129,7 @@ transfer_to_handler_cont:
stw r12,TI_LOCAL_FLAGS(r2)
lwz r9,_MSR(r11)/* if sleeping, clear MSR.EE */
rlwinm  r9,r9,0,~MSR_EE
-   lwz r12,_LINK(r11)  /* and return to address in LR */
-   kuap_restore r11, r2, r3, r4, r5
-   lwz r2, GPR2(r11)
+   stw r9,_MSR(r11)
b   fast_exception_return
 #endif
 _ASM_NOKPROBE_SYMBOL(transfer_to_handler)
@@ -334,69 +332,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
 
.globl  fast_exception_return
 fast_exception_return:
+   lwz r6,_MSR(r1)
+   andi.   r0,r6,MSR_PR
+   bne .Lfast_user_interrupt_return
+   li  r3,0 /* 0 return value, no EMULATE_STACK_STORE */
 #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-   andi.   r10,r9,MSR_RI   /* check for recoverable interrupt */
-   beq 1f  /* if not, we've got problems */
-#endif
-
-2: REST_4GPRS(3, r11)
-   lwz r10,_CCR(r11)
-   REST_GPR(1, r11)
-   mtcrr10
-   lwz r10,_LINK(r11)
-   mtlrr10
-   /* Clear the exception_marker on the stack to avoid confusing 
stacktrace */
-   li  r10, 0
-   stw r10, 8(r11)
-   REST_GPR(10, r11)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
-   mtspr   SPRN_NRI, r0
-#endif
-   mtspr   SPRN_SRR1,r9
-   mtspr   SPRN_SRR0,r12
-   REST_GPR(9, r11)
-   REST_GPR(12, r11)
-   lwz r11,GPR11(r11)
-   rfi
-#ifdef CONFIG_40x
-   b . /* Prevent prefetch past rfi */
-#endif
-_ASM_NOKPROBE_SYMBOL(fast_exception_return)
-
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-/* check if the exception happened in a restartable section */
-1: lis r3,exc_exit_restart_end@ha
-   addir3,r3,exc_exit_restart_end@l
-   cmplw   r12,r3
-   bge 3f
-   lis r4,exc_exit_restart@ha
-   addir4,r4,exc_exit_restart@l
-   cmplw   r12,r4
-   blt 3f
-   lis r3,fee_restarts@ha
-   tophys(r3,r3)
-   lwz r5,fee_restarts@l(r3)
-   addir5,r5,1
-   stw r5,fee_restarts@l(r3)
-   mr  r12,r4  /* restart at exc_exit_restart */
-   b   2b
-
-   .section .bss
-   .align  2
-fee_restarts:
-   .space  4
-   .previous
-
-/* aargh, a nonrecoverable interrupt, panic */
-/* aargh, we don't know which trap this is */
-3:
-   li  r10,-1
-   stw r10,_TRAP(r11)
+   andi.   r0,r6,MSR_RI
+   bne+.Lfast_kernel_interrupt_return
addir3,r1,STACK_FRAME_OVERHEAD
-   bl  transfer_to_handler_full
bl  unrecoverable_exception
-   b   ret_from_except
+   trap/* should not get here */
+#else
+   b   .Lfast_kernel_interrupt_return
 #endif
+_ASM_NOKPROBE_SYMBOL(fast_exception_return)
 
.globl  ret_from_except_full
 ret_from_except_full:
@@ -405,213 +354,146 @@ ret_from_except_full:
 
.globl  ret_from_except
 ret_from_except:
-   /* Hard-disable interrupts so that current_thread_info()->flags
-* can't change between when we test it and when we return
-* from the interrupt. */
-   /* Note: We don't bother telling lockdep about it */
-   LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-   mtmsr   r10 /* disable interrupts */
-
-   lwz r3,_MSR(r1) /* Returning to user mode? */
-   andi.   r0,r3,MSR_PR
-   beq resume_kernel
-
-user_exc_return:   /* r10 contains MSR_KERNEL here */
-   /* Check current_thread_info()->flags */
-   lwz r9,TI_FLAGS(r2)
-   andi.   r0,r9,_TIF_USER_WORK_MASK
-   bne do_work
-
-restore_user:
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-   /* Check whether this process has its own DBCR0 value.  The internal
-  debug mode bit tells us that dbcr0 should be loaded. */
-   lwz r0,THREAD+THREAD_DBCR0(r2)
-   andis.  r10,r0,DBCR0_IDM@h
-   bnel-   load_dbcr0
-#endif
-   ACCOUNT_CPU_USER_EXIT(r2, r10, r11)
+_ASM_NOKPROBE_SYMBOL(ret_from_except)
+
+   .globl interrupt_return
+interrupt_return:
+   lwz r4,_MSR(r1)
+   andi.   r0,r4,MSR_PR
+   beq .Lkernel_interrupt_return
+   addir3,r1,STACK_FRAME_OVERHEAD
+   bl  interrupt_exit_user_prepare
+   cmpwi   r3,0
+   bne-.Lrestore_nvgprs
+
+.Lfast_user_interrupt_return:
 #ifdef CONFIG_PPC_BOOK3S_32
kuep_unlock r10, r11
 #endif
+   

[PATCH v2 24/43] powerpc/32: Always save non volatile registers on exception entry

2021-03-09 Thread Christophe Leroy
In preparation of handling exception entry and exit in C,
in order to simplify the handling, always save non volatile registers
when entering an exception.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/ptrace.h |  6 ++
 arch/powerpc/kernel/entry_32.S| 13 +
 arch/powerpc/kernel/head_32.h |  3 +--
 arch/powerpc/kernel/head_booke.h  |  2 +-
 4 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h 
b/arch/powerpc/include/asm/ptrace.h
index 975ba260006a..0a5d8c6b13c4 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -209,16 +209,14 @@ static inline void regs_set_return_value(struct pt_regs 
*regs, unsigned long rc)
  */
 #define TRAP_FLAGS_MASK0x1F
 #define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK)
-#define FULL_REGS(regs)(((regs)->trap & 1) == 0)
-#define SET_FULL_REGS(regs)((regs)->trap |= 1)
+#define FULL_REGS(regs)true
+#define SET_FULL_REGS(regs)do { } while (0)
 #define IS_CRITICAL_EXC(regs)  (((regs)->trap & 2) != 0)
 #define IS_MCHECK_EXC(regs)(((regs)->trap & 4) != 0)
 #define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0)
 #define NV_REG_POISON  0xdeadbeef
 #define CHECK_FULL_REGS(regs)\
 do { \
-   if ((regs)->trap & 1) \
-   printk(KERN_CRIT "%s: partial register set\n", __func__); \
 } while (0)
 #endif /* __powerpc64__ */
 
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index fb849ef922fb..7084289994b3 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -81,12 +81,12 @@ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
  */
.globl  transfer_to_handler_full
 transfer_to_handler_full:
-   SAVE_NVGPRS(r11)
 _ASM_NOKPROBE_SYMBOL(transfer_to_handler_full)
/* fall through */
 
.globl  transfer_to_handler
 transfer_to_handler:
+   SAVE_NVGPRS(r11)
stw r2,GPR2(r11)
stw r12,_NIP(r11)
stw r9,_MSR(r11)
@@ -234,10 +234,6 @@ handle_page_fault:
bl  do_page_fault
cmpwi   r3,0
beq+ret_from_except
-   SAVE_NVGPRS(r1)
-   lwz r0,_TRAP(r1)
-   clrrwi  r0,r0,1
-   stw r0,_TRAP(r1)
mr  r4,r3   /* err arg for bad_page_fault */
addir3,r1,STACK_FRAME_OVERHEAD
bl  __bad_page_fault
@@ -810,13 +806,6 @@ recheck:
 do_user_signal:/* r10 contains MSR_KERNEL here */
ori r10,r10,MSR_EE
mtmsr   r10 /* hard-enable interrupts */
-   /* save r13-r31 in the exception frame, if not already done */
-   lwz r3,_TRAP(r1)
-   andi.   r0,r3,1
-   beq 2f
-   SAVE_NVGPRS(r1)
-   rlwinm  r3,r3,0,0,30
-   stw r3,_TRAP(r1)
 2: addir3,r1,STACK_FRAME_OVERHEAD
mr  r4,r9
bl  do_notify_resume
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index e09585b88ba7..087445e45489 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -198,7 +198,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
  ret_from_except_full)
 
 #define EXC_XFER_LITE(n, hdlr) \
-   EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
+   EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler, \
  ret_from_except)
 
 .macro vmap_stack_overflow_exception
@@ -215,7 +215,6 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
lwz r1, emergency_ctx@l(r1)
addir1, r1, THREAD_SIZE - INT_FRAME_SIZE
EXCEPTION_PROLOG_2 vmap_stack_overflow
-   SAVE_NVGPRS(r11)
addir3, r1, STACK_FRAME_OVERHEAD
EXC_XFER_STD(0, stack_overflow_exception)
 .endm
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 3707f49f0b78..b31bf9e833c0 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -331,7 +331,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
  ret_from_except_full)
 
 #define EXC_XFER_LITE(n, hdlr) \
-   EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
+   EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler, \
  ret_from_except)
 
 /* Check for a single step debug exception while in an exception
-- 
2.25.0



[PATCH v2 23/43] powerpc/32: Perform normal function call in exception entry

2021-03-09 Thread Christophe Leroy
Now that the MMU is re-enabled before calling the transfer function,
we don't need anymore that hack with the address of the handler and
the return function sitting just after the 'bl' to the transfer
fonction, that function is retrieving via a read relative to 'lr'.

Do a regular call to the transfer function, then to the handler,
then branch to the return function.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   | 14 --
 arch/powerpc/kernel/head_32.h|  4 ++--
 arch/powerpc/kernel/head_booke.h |  6 +++---
 3 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index ad1fd33e1126..fb849ef922fb 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -104,7 +104,7 @@ transfer_to_handler:
 #ifdef CONFIG_PPC_BOOK3S_32
kuep_lock r11, r12
 #endif
-   b   3f
+   blr
 
/* if from kernel, check interrupted DOZE/NAP mode */
 2:
@@ -118,13 +118,7 @@ transfer_to_handler:
 #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
.globl transfer_to_handler_cont
 transfer_to_handler_cont:
-3:
-   mflrr9
-   lwz r11,0(r9)   /* virtual address of handler */
-   lwz r9,4(r9)/* where to go when done */
-   mtctr   r11
-   mtlrr9
-   bctr/* jump to handler */
+   blr
 
 #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
 4: rlwinm  r12,r12,0,~_TLF_NAPPING
@@ -404,8 +398,8 @@ fee_restarts:
stw r10,_TRAP(r11)
addir3,r1,STACK_FRAME_OVERHEAD
bl  transfer_to_handler_full
-   .long   unrecoverable_exception
-   .long   ret_from_except
+   bl  unrecoverable_exception
+   b   ret_from_except
 #endif
 
.globl  ret_from_except_full
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 160ebd573c37..e09585b88ba7 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -190,8 +190,8 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
li  r10,trap;   \
stw r10,_TRAP(r11); \
bl  tfer;   \
-   .long   hdlr;   \
-   .long   ret
+   bl  hdlr;   \
+   b   ret
 
 #define EXC_XFER_STD(n, hdlr)  \
EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full,
\
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index a127d5e7efb4..3707f49f0b78 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -322,9 +322,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret)  \
li  r10,trap;   \
stw r10,_TRAP(r11); \
-   bl  tfer;   \
-   .long   hdlr;   \
-   .long   ret
+   bl  tfer;   \
+   bl  hdlr;   \
+   b   ret;\
 
 #define EXC_XFER_STD(n, hdlr)  \
EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
-- 
2.25.0



[PATCH v2 22/43] powerpc/32: Refactor booke critical registers saving

2021-03-09 Thread Christophe Leroy
Refactor booke critical registers saving into a few macros
and move it into the exception prolog directly.

Keep the dedicated transfert_to_handler entry point for the
moment allthough they are empty. They will be removed in a
later patch to reduce churn.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   | 33 -
 arch/powerpc/kernel/head_booke.h | 41 
 2 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 01a064c8a96a..ad1fd33e1126 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -51,49 +51,16 @@
 #ifdef CONFIG_BOOKE
.globl  mcheck_transfer_to_handler
 mcheck_transfer_to_handler:
-   mfspr   r0,SPRN_DSRR0
-   stw r0,_DSRR0(r11)
-   mfspr   r0,SPRN_DSRR1
-   stw r0,_DSRR1(r11)
/* fall through */
 _ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler)
 
.globl  debug_transfer_to_handler
 debug_transfer_to_handler:
-   mfspr   r0,SPRN_CSRR0
-   stw r0,_CSRR0(r11)
-   mfspr   r0,SPRN_CSRR1
-   stw r0,_CSRR1(r11)
/* fall through */
 _ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler)
 
.globl  crit_transfer_to_handler
 crit_transfer_to_handler:
-#ifdef CONFIG_PPC_BOOK3E_MMU
-   mfspr   r0,SPRN_MAS0
-   stw r0,MAS0(r11)
-   mfspr   r0,SPRN_MAS1
-   stw r0,MAS1(r11)
-   mfspr   r0,SPRN_MAS2
-   stw r0,MAS2(r11)
-   mfspr   r0,SPRN_MAS3
-   stw r0,MAS3(r11)
-   mfspr   r0,SPRN_MAS6
-   stw r0,MAS6(r11)
-#ifdef CONFIG_PHYS_64BIT
-   mfspr   r0,SPRN_MAS7
-   stw r0,MAS7(r11)
-#endif /* CONFIG_PHYS_64BIT */
-#endif /* CONFIG_PPC_BOOK3E_MMU */
-#ifdef CONFIG_44x
-   mfspr   r0,SPRN_MMUCR
-   stw r0,MMUCR(r11)
-#endif
-   mfspr   r0,SPRN_SRR0
-   stw r0,_SRR0(r11)
-   mfspr   r0,SPRN_SRR1
-   stw r0,_SRR1(r11)
-
/* fall through */
 _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
 #endif
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index f712b9bc6d62..a127d5e7efb4 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -229,6 +229,36 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
SAVE_4GPRS(3, r11);  \
SAVE_2GPRS(7, r11)
 
+#define SAVE_xSRR(xSRR)\
+   mfspr   r0,SPRN_##xSRR##0;  \
+   stw r0,_##xSRR##0(r1);  \
+   mfspr   r0,SPRN_##xSRR##1;  \
+   stw r0,_##xSRR##1(r1)
+
+
+.macro SAVE_MMU_REGS
+#ifdef CONFIG_PPC_BOOK3E_MMU
+   mfspr   r0,SPRN_MAS0
+   stw r0,MAS0(r1)
+   mfspr   r0,SPRN_MAS1
+   stw r0,MAS1(r1)
+   mfspr   r0,SPRN_MAS2
+   stw r0,MAS2(r1)
+   mfspr   r0,SPRN_MAS3
+   stw r0,MAS3(r1)
+   mfspr   r0,SPRN_MAS6
+   stw r0,MAS6(r1)
+#ifdef CONFIG_PHYS_64BIT
+   mfspr   r0,SPRN_MAS7
+   stw r0,MAS7(r1)
+#endif /* CONFIG_PHYS_64BIT */
+#endif /* CONFIG_PPC_BOOK3E_MMU */
+#ifdef CONFIG_44x
+   mfspr   r0,SPRN_MMUCR
+   stw r0,MMUCR(r1)
+#endif
+.endm
+
 #define CRITICAL_EXCEPTION_PROLOG(intno) \
EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1)
 #define DEBUG_EXCEPTION_PROLOG \
@@ -271,6 +301,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
START_EXCEPTION(label); \
CRITICAL_EXCEPTION_PROLOG(intno);   \
addir3,r1,STACK_FRAME_OVERHEAD; \
+   SAVE_MMU_REGS;  \
+   SAVE_xSRR(SRR); \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
  crit_transfer_to_handler, ret_from_crit_exc)
 
@@ -280,6 +312,10 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
mfspr   r5,SPRN_ESR;\
stw r5,_ESR(r11);   \
addir3,r1,STACK_FRAME_OVERHEAD; \
+   SAVE_xSRR(DSRR);\
+   SAVE_xSRR(CSRR);\
+   SAVE_MMU_REGS;  \
+   SAVE_xSRR(SRR); \
EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
  mcheck_transfer_to_handler, ret_from_mcheck_exc)
 
@@ -363,6 +399,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 2: mfspr   r4,SPRN_DBSR; \
stw r4,_ESR(r11);   /* DebugException takes DBSR in _ESR */\
addir3,r1,STACK_FRAME_OVERHEAD;   \
+   

[PATCH v2 21/43] powerpc/32: Provide a name to exception prolog continuation in virtual mode

2021-03-09 Thread Christophe Leroy
Now that the prolog continuation is separated in .text, give it a name
and mark it _ASM_NOKPROBE_SYMBOL.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h| 12 +++-
 arch/powerpc/kernel/head_40x.S   | 22 --
 arch/powerpc/kernel/head_8xx.S   | 10 +-
 arch/powerpc/kernel/head_book3s_32.S | 14 +++---
 4 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 3c0aa4538514..160ebd573c37 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -10,10 +10,10 @@
  * We assume sprg3 has the physical address of the current
  * task's thread_struct.
  */
-.macro EXCEPTION_PROLOG handle_dar_dsisr=0
+.macro EXCEPTION_PROLOGname handle_dar_dsisr=0
EXCEPTION_PROLOG_0  handle_dar_dsisr=\handle_dar_dsisr
EXCEPTION_PROLOG_1
-   EXCEPTION_PROLOG_2  handle_dar_dsisr=\handle_dar_dsisr
+   EXCEPTION_PROLOG_2  \name handle_dar_dsisr=\handle_dar_dsisr
 .endm
 
 .macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0
@@ -56,7 +56,7 @@
 #endif
 .endm
 
-.macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0
+.macro EXCEPTION_PROLOG_2 name handle_dar_dsisr=0
 #ifdef CONFIG_PPC_8xx
.if \handle_dar_dsisr
li  r11, RPN_PATTERN
@@ -72,6 +72,7 @@
rfi
 
.text
+\name\()_virt:
 1:
stw r11,GPR1(r1)
stw r11,0(r1)
@@ -109,6 +110,7 @@
stw r10,8(r11)
SAVE_4GPRS(3, r11)
SAVE_2GPRS(7, r11)
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
 .endm
 
 .macro SYSCALL_ENTRY trapno
@@ -180,7 +182,7 @@
 
 #define EXCEPTION(n, label, hdlr, xfer)\
START_EXCEPTION(n, label)   \
-   EXCEPTION_PROLOG;   \
+   EXCEPTION_PROLOG label; \
addir3,r1,STACK_FRAME_OVERHEAD; \
xfer(n, hdlr)
 
@@ -212,7 +214,7 @@
 #endif
lwz r1, emergency_ctx@l(r1)
addir1, r1, THREAD_SIZE - INT_FRAME_SIZE
-   EXCEPTION_PROLOG_2
+   EXCEPTION_PROLOG_2 vmap_stack_overflow
SAVE_NVGPRS(r11)
addir3, r1, STACK_FRAME_OVERHEAD
EXC_XFER_STD(0, stack_overflow_exception)
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index e7d8856714d3..86883ccb3dc5 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -104,7 +104,7 @@ _ENTRY(crit_esr)
  * Instead we use a couple of words of memory at low physical addresses.
  * This is OK since we don't support SMP on these processors.
  */
-.macro CRITICAL_EXCEPTION_PROLOG
+.macro CRITICAL_EXCEPTION_PROLOG name
stw r10,crit_r10@l(0)   /* save two registers to work with */
stw r11,crit_r11@l(0)
mfspr   r10,SPRN_SRR0
@@ -135,6 +135,7 @@ _ENTRY(crit_esr)
 
.text
 1:
+\name\()_virt:
lwz r11,crit_r1@l(0)
stw r11,GPR1(r1)
stw r11,0(r1)
@@ -162,6 +163,7 @@ _ENTRY(crit_esr)
stw r10, 8(r11)
SAVE_4GPRS(3, r11)
SAVE_2GPRS(7, r11)
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
 .endm
 
/*
@@ -182,7 +184,7 @@ _ENTRY(crit_esr)
  */
 #define CRITICAL_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(n, label);  \
-   CRITICAL_EXCEPTION_PROLOG;  \
+   CRITICAL_EXCEPTION_PROLOG label;\
addir3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
  crit_transfer_to_handler, ret_from_crit_exc)
@@ -205,7 +207,7 @@ _ENTRY(crit_esr)
  * if they can't resolve the lightweight TLB fault.
  */
START_EXCEPTION(0x0300, DataStorage)
-   EXCEPTION_PROLOG handle_dar_dsisr=1
+   EXCEPTION_PROLOG DataStorage handle_dar_dsisr=1
EXC_XFER_LITE(0x300, handle_page_fault)
 
 /*
@@ -213,7 +215,7 @@ _ENTRY(crit_esr)
  * This is caused by a fetch from non-execute or guarded pages.
  */
START_EXCEPTION(0x0400, InstructionAccess)
-   EXCEPTION_PROLOG
+   EXCEPTION_PROLOG InstructionAccess
li  r5,0
stw r5, _ESR(r11)   /* Zero ESR */
stw r12, _DEAR(r11) /* SRR0 as DEAR */
@@ -224,13 +226,13 @@ _ENTRY(crit_esr)
 
 /* 0x0600 - Alignment Exception */
START_EXCEPTION(0x0600, Alignment)
-   EXCEPTION_PROLOG handle_dar_dsisr=1
+   EXCEPTION_PROLOG Alignment handle_dar_dsisr=1
addir3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x600, alignment_exception)
 
 /* 0x0700 - Program Exception */
START_EXCEPTION(0x0700, ProgramCheck)
-   EXCEPTION_PROLOG handle_dar_dsisr=1
+   EXCEPTION_PROLOG ProgramCheck handle_dar_dsisr=1
addir3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x700, program_check_exception)
 
@@ -450,7 +452,7 @@ _ENTRY(crit_esr)
 

[PATCH v2 20/43] powerpc/32: Move exception prolog code into .text once MMU is back on

2021-03-09 Thread Christophe Leroy
The space in the head section is rather constrained by the fact that
exception vectors are spread every 0x100 bytes and sometimes we
need to have "out of line" code because it doesn't fit.

Now that we are enabling MMU early in the prolog, take that opportunity
to jump somewhere else in the .text section where we don't have any
space constraint.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h|  5 
 arch/powerpc/kernel/head_40x.S   |  6 +
 arch/powerpc/kernel/head_8xx.S   | 25 
 arch/powerpc/kernel/head_book3s_32.S | 34 
 4 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index d97ec94b34da..3c0aa4538514 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -70,6 +70,8 @@
mtspr   SPRN_SRR0, r11
mfspr   r11, SPRN_SPRG_SCRATCH2
rfi
+
+   .text
 1:
stw r11,GPR1(r1)
stw r11,0(r1)
@@ -163,12 +165,14 @@
  */
 #ifdef CONFIG_PPC_BOOK3S
 #defineSTART_EXCEPTION(n, label)   \
+   __HEAD; \
. = n;  \
DO_KVM n;   \
 label:
 
 #else
 #defineSTART_EXCEPTION(n, label)   \
+   __HEAD; \
. = n;  \
 label:
 
@@ -196,6 +200,7 @@
  ret_from_except)
 
 .macro vmap_stack_overflow_exception
+   __HEAD
 vmap_stack_overflow:
 #ifdef CONFIG_SMP
mfspr   r1, SPRN_SPRG_THREAD
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index c14a71e0d6d3..e7d8856714d3 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -132,6 +132,8 @@ _ENTRY(crit_esr)
ori r11, r11, 1f@l
mtspr   SPRN_SRR0, r11
rfi
+
+   .text
 1:
lwz r11,crit_r1@l(0)
stw r11,GPR1(r1)
@@ -496,6 +498,7 @@ _ENTRY(crit_esr)
crit_transfer_to_handler, ret_from_crit_exc)
 
/* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
+   __HEAD
 Decrementer:
EXCEPTION_PROLOG
lis r0,TSR_PIS@h
@@ -504,12 +507,14 @@ Decrementer:
EXC_XFER_LITE(0x1000, timer_interrupt)
 
/* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
+   __HEAD
 FITException:
EXCEPTION_PROLOG
addir3,r1,STACK_FRAME_OVERHEAD;
EXC_XFER_STD(0x1010, unknown_exception)
 
/* Watchdog Timer (WDT) Exception. (from 0x1020) */
+   __HEAD
 WDTException:
CRITICAL_EXCEPTION_PROLOG;
addir3,r1,STACK_FRAME_OVERHEAD;
@@ -523,6 +528,7 @@ WDTException:
  * reserved.
  */
 
+   __HEAD
/* Damn, I came up one instruction too many to fit into the
 * exception space :-).  Both the instruction and data TLB
 * miss get to this point to load the TLB.
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 11789a077d76..d16d0ec71bb2 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -133,7 +133,7 @@ instruction_counter:
START_EXCEPTION(0x600, Alignment)
EXCEPTION_PROLOG handle_dar_dsisr=1
addir3,r1,STACK_FRAME_OVERHEAD
-   b   .Lalignment_exception_ool
+   EXC_XFER_STD(0x600, alignment_exception)
 
 /* Program check exception */
EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
@@ -141,11 +141,6 @@ instruction_counter:
 /* Decrementer */
EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
 
-   /* With VMAP_STACK there's not enough room for this at 0x600 */
-   . = 0xa00
-.Lalignment_exception_ool:
-   EXC_XFER_STD(0x600, alignment_exception)
-
 /* System call */
START_EXCEPTION(0xc00, SystemCall)
SYSCALL_ENTRY   0xc00
@@ -339,26 +334,25 @@ DARFixed:/* Return from dcbx instruction bug workaround */
  * support of breakpoints and such.  Someday I will get around to
  * using them.
  */
-do_databreakpoint:
-   EXCEPTION_PROLOG_1
-   EXCEPTION_PROLOG_2 handle_dar_dsisr=1
-   addir3,r1,STACK_FRAME_OVERHEAD
-   mfspr   r4,SPRN_BAR
-   stw r4,_DAR(r11)
-   EXC_XFER_STD(0x1c00, do_break)
-
START_EXCEPTION(0x1c00, DataBreakpoint)
EXCEPTION_PROLOG_0 handle_dar_dsisr=1
mfspr   r11, SPRN_SRR0
cmplwi  cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l
cmplwi  cr7, r11, (.Litlbie - PAGE_OFFSET)@l
cror4*cr1+eq, 4*cr1+eq, 4*cr7+eq
-   bne cr1, do_databreakpoint
+   bne cr1, 1f
mtcrr10
mfspr   r10, SPRN_SPRG_SCRATCH0
mfspr   r11, SPRN_SPRG_SCRATCH1
rfi
 
+1: EXCEPTION_PROLOG_1
+   EXCEPTION_PROLOG_2 handle_dar_dsisr=1
+   addir3,r1,STACK_FRAME_OVERHEAD
+   mfspr   r4,SPRN_BAR

[PATCH v2 19/43] powerpc/32: Use START_EXCEPTION() as much as possible

2021-03-09 Thread Christophe Leroy
Everywhere where it is possible, use START_EXCEPTION().

This will help for proper exception init in future patches.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_40x.S   | 12 +--
 arch/powerpc/kernel/head_8xx.S   | 27 +
 arch/powerpc/kernel/head_book3s_32.S | 30 
 3 files changed, 22 insertions(+), 47 deletions(-)

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 55fa99c5085c..c14a71e0d6d3 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -247,17 +247,15 @@ _ENTRY(crit_esr)
EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD)
 
 /* 0x1000 - Programmable Interval Timer (PIT) Exception */
-   . = 0x1000
+   START_EXCEPTION(0x1000, DecrementerTrap)
b Decrementer
 
-/* 0x1010 - Fixed Interval Timer (FIT) Exception
-*/
-   . = 0x1010
+/* 0x1010 - Fixed Interval Timer (FIT) Exception */
+   START_EXCEPTION(0x1010, FITExceptionTrap)
b FITException
 
-/* 0x1020 - Watchdog Timer (WDT) Exception
-*/
-   . = 0x1020
+/* 0x1020 - Watchdog Timer (WDT) Exception */
+   START_EXCEPTION(0x1020, WDTExceptionTrap)
b WDTException
 
 /* 0x1100 - Data TLB Miss Exception
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index b63445c55f4d..11789a077d76 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -121,8 +121,7 @@ instruction_counter:
EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD)
 
 /* Machine check */
-   . = 0x200
-MachineCheck:
+   START_EXCEPTION(0x200, MachineCheck)
EXCEPTION_PROLOG handle_dar_dsisr=1
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x200, machine_check_exception)
@@ -131,8 +130,7 @@ MachineCheck:
EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
 
 /* Alignment exception */
-   . = 0x600
-Alignment:
+   START_EXCEPTION(0x600, Alignment)
EXCEPTION_PROLOG handle_dar_dsisr=1
addir3,r1,STACK_FRAME_OVERHEAD
b   .Lalignment_exception_ool
@@ -149,8 +147,7 @@ Alignment:
EXC_XFER_STD(0x600, alignment_exception)
 
 /* System call */
-   . = 0xc00
-SystemCall:
+   START_EXCEPTION(0xc00, SystemCall)
SYSCALL_ENTRY   0xc00
 
 /* Single step - not used on 601 */
@@ -161,7 +158,6 @@ SystemCall:
  */
EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD)
 
-   . = 0x1100
 /*
  * For the MPC8xx, this is a software tablewalk to load the instruction
  * TLB.  The task switch loads the M_TWB register with the pointer to the first
@@ -183,7 +179,7 @@ SystemCall:
 #define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp)
 #endif
 
-InstructionTLBMiss:
+   START_EXCEPTION(0x1100, InstructionTLBMiss)
mtspr   SPRN_SPRG_SCRATCH2, r10
mtspr   SPRN_M_TW, r11
 
@@ -239,8 +235,7 @@ InstructionTLBMiss:
rfi
 #endif
 
-   . = 0x1200
-DataStoreTLBMiss:
+   START_EXCEPTION(0x1200, DataStoreTLBMiss)
mtspr   SPRN_SPRG_SCRATCH2, r10
mtspr   SPRN_M_TW, r11
mfcrr11
@@ -303,8 +298,7 @@ DataStoreTLBMiss:
  * to many reasons, such as executing guarded memory or illegal instruction
  * addresses.  There is nothing to do but handle a big time error fault.
  */
-   . = 0x1300
-InstructionTLBError:
+   START_EXCEPTION(0x1300, InstructionTLBError)
EXCEPTION_PROLOG
andis.  r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
andis.  r10,r9,SRR1_ISI_NOPT@h
@@ -320,8 +314,7 @@ InstructionTLBError:
  * many reasons, including a dirty update to a pte.  We bail out to
  * a higher level function that can handle it.
  */
-   . = 0x1400
-DataTLBError:
+   START_EXCEPTION(0x1400, DataTLBError)
EXCEPTION_PROLOG_0 handle_dar_dsisr=1
mfspr   r11, SPRN_DAR
cmpwi   cr1, r11, RPN_PATTERN
@@ -354,8 +347,7 @@ do_databreakpoint:
stw r4,_DAR(r11)
EXC_XFER_STD(0x1c00, do_break)
 
-   . = 0x1c00
-DataBreakpoint:
+   START_EXCEPTION(0x1c00, DataBreakpoint)
EXCEPTION_PROLOG_0 handle_dar_dsisr=1
mfspr   r11, SPRN_SRR0
cmplwi  cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l
@@ -368,8 +360,7 @@ DataBreakpoint:
rfi
 
 #ifdef CONFIG_PERF_EVENTS
-   . = 0x1d00
-InstructionBreakpoint:
+   START_EXCEPTION(0x1d00, InstructionBreakpoint)
mtspr   SPRN_SPRG_SCRATCH0, r10
lwz r10, (instruction_counter - PAGE_OFFSET)@l(0)
addir10, r10, -1
diff --git a/arch/powerpc/kernel/head_book3s_32.S 
b/arch/powerpc/kernel/head_book3s_32.S
index 9dc05890477d..8f5c8c8da63d 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -255,9 +255,7 @@ __secondary_hold_acknowledge:
  * pointer when we take an exception from supervisor mode.)
  * -- paulus.
  */
-   . = 0x200
-   DO_KVM  0x200
-MachineCheck:
+ 

[PATCH v2 18/43] powerpc/32: Add vmap_stack_overflow label inside the macro

2021-03-09 Thread Christophe Leroy
For consistency, add in the macro the label used by exception prolog
to branch to stack overflow processing.

While at it, enclose the macro in #ifdef CONFIG_VMAP_STACK on the 8xx
as already done on book3s/32.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h| 3 ++-
 arch/powerpc/kernel/head_8xx.S   | 3 ++-
 arch/powerpc/kernel/head_book3s_32.S | 1 -
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 15c6fc7cbbf5..d97ec94b34da 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -52,7 +52,7 @@
 1:
 #ifdef CONFIG_VMAP_STACK
mtcrf   0x3f, r1
-   bt  32 - THREAD_ALIGN_SHIFT, stack_overflow
+   bt  32 - THREAD_ALIGN_SHIFT, vmap_stack_overflow
 #endif
 .endm
 
@@ -196,6 +196,7 @@
  ret_from_except)
 
 .macro vmap_stack_overflow_exception
+vmap_stack_overflow:
 #ifdef CONFIG_SMP
mfspr   r1, SPRN_SPRG_THREAD
lwz r1, TASK_CPU - THREAD(r1)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index cdbfa9d41353..b63445c55f4d 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -338,8 +338,9 @@ DARFixed:/* Return from dcbx instruction bug workaround */
/* 0x300 is DataAccess exception, needed by bad_page_fault() */
EXC_XFER_LITE(0x300, handle_page_fault)
 
-stack_overflow:
+#ifdef CONFIG_VMAP_STACK
vmap_stack_overflow_exception
+#endif
 
 /* On the MPC8xx, these next four traps are used for development
  * support of breakpoints and such.  Someday I will get around to
diff --git a/arch/powerpc/kernel/head_book3s_32.S 
b/arch/powerpc/kernel/head_book3s_32.S
index 59efbee7c080..9dc05890477d 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -729,7 +729,6 @@ fast_hash_page_return:
 #endif /* CONFIG_PPC_BOOK3S_604 */
 
 #ifdef CONFIG_VMAP_STACK
-stack_overflow:
vmap_stack_overflow_exception
 #endif
 
-- 
2.25.0



[PATCH v2 17/43] powerpc/32: Statically initialise first emergency context

2021-03-09 Thread Christophe Leroy
The check of the emergency context initialisation in
vmap_stack_overflow is buggy for the SMP case, as it
compares r1 with 0 while in the SMP case r1 is offseted
by the CPU id.

Instead of fixing it, just perform static initialisation
of the first emergency context.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h  | 6 +-
 arch/powerpc/kernel/setup_32.c | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 88b02bd91e8e..15c6fc7cbbf5 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -205,11 +205,7 @@
lis r1, emergency_ctx@ha
 #endif
lwz r1, emergency_ctx@l(r1)
-   cmpwi   cr1, r1, 0
-   bne cr1, 1f
-   lis r1, init_thread_union@ha
-   addir1, r1, init_thread_union@l
-1: addir1, r1, THREAD_SIZE - INT_FRAME_SIZE
+   addir1, r1, THREAD_SIZE - INT_FRAME_SIZE
EXCEPTION_PROLOG_2
SAVE_NVGPRS(r11)
addir3, r1, STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 8ba49a6bf515..d7c1f92152af 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -164,7 +164,7 @@ void __init irqstack_early_init(void)
 }
 
 #ifdef CONFIG_VMAP_STACK
-void *emergency_ctx[NR_CPUS] __ro_after_init;
+void *emergency_ctx[NR_CPUS] __ro_after_init = {[0] = _stack};
 
 void __init emergency_stack_init(void)
 {
-- 
2.25.0



[PATCH v2 16/43] powerpc/32: Enable instruction translation at the same time as data translation

2021-03-09 Thread Christophe Leroy
On 40x and 8xx, kernel text is pinned.
On book3s/32, kernel text is mapped by BATs.

Enable instruction translation at the same time as data translation, it
makes things simpler.

In syscall handler, MSR_RI can also be set at the same time because
srr0/srr1 are already saved and r1 is set properly.

On booke, translation is always on, so at the end all PPC32
have translation on early. Just update msr.

Also update comment in power_save_ppc32_restore().

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   | 30 --
 arch/powerpc/kernel/head_32.h| 13 -
 arch/powerpc/kernel/head_40x.S   | 10 +++---
 arch/powerpc/kernel/head_booke.h |  6 --
 4 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 33e97032ca25..01a064c8a96a 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -153,19 +153,11 @@ transfer_to_handler:
 transfer_to_handler_cont:
 3:
mflrr9
-   tovirt(r9, r9)
lwz r11,0(r9)   /* virtual address of handler */
lwz r9,4(r9)/* where to go when done */
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
-   mtspr   SPRN_NRI, r0
-#endif
-   mtspr   SPRN_SRR0,r11
-   mtspr   SPRN_SRR1,r10
+   mtctr   r11
mtlrr9
-   rfi /* jump to handler, enable MMU */
-#ifdef CONFIG_40x
-   b . /* Prevent prefetch past rfi */
-#endif
+   bctr/* jump to handler */
 
 #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
 4: rlwinm  r12,r12,0,~_TLF_NAPPING
@@ -444,8 +436,6 @@ fee_restarts:
li  r10,-1
stw r10,_TRAP(r11)
addir3,r1,STACK_FRAME_OVERHEAD
-   lis r10,MSR_KERNEL@h
-   ori r10,r10,MSR_KERNEL@l
bl  transfer_to_handler_full
.long   unrecoverable_exception
.long   ret_from_except
@@ -945,16 +935,20 @@ _GLOBAL(enter_rtas)
mtspr   SPRN_SRR1,r9
rfi
 1:
-   li  r0, MSR_KERNEL & ~MSR_IR/* can take DTLB miss */
-   mtmsr   r0
-   isync
+   lis r8, 1f@h
+   ori r8, r8, 1f@l
+   LOAD_REG_IMMEDIATE(r9,MSR_KERNEL)
+   mtspr   SPRN_SRR0,r8
+   mtspr   SPRN_SRR1,r9
+   rfi /* Reactivate MMU translation */
+1:
lwz r8,INT_FRAME_SIZE+4(r1) /* get return address */
lwz r9,8(r1)/* original msr value */
addir1,r1,INT_FRAME_SIZE
li  r0,0
stw r0, THREAD + RTAS_SP(r2)
-   mtspr   SPRN_SRR0,r8
-   mtspr   SPRN_SRR1,r9
-   rfi /* return to caller */
+   mtlrr8
+   mtmsr   r9
+   blr /* return to caller */
 _ASM_NOKPROBE_SYMBOL(enter_rtas)
 #endif /* CONFIG_PPC_RTAS */
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 910f86642eec..88b02bd91e8e 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -63,10 +63,14 @@
mtspr   SPRN_DAR, r11   /* Tag DAR, to be used in DTLB Error */
.endif
 #endif
-   LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take 
DTLB miss */
-   mtmsr   r11
-   isync
+   LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~MSR_RI) /* re-enable MMU */
+   mtspr   SPRN_SRR1, r11
+   lis r11, 1f@h
+   ori r11, r11, 1f@l
+   mtspr   SPRN_SRR0, r11
mfspr   r11, SPRN_SPRG_SCRATCH2
+   rfi
+1:
stw r11,GPR1(r1)
stw r11,0(r1)
mr  r11, r1
@@ -94,7 +98,7 @@
 #elif defined(CONFIG_PPC_8xx)
mtspr   SPRN_EID, r2/* Set MSR_RI */
 #else
-   li  r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */
+   li  r10, MSR_KERNEL /* can take exceptions */
mtmsr   r10 /* (except for mach check in rtas) */
 #endif
stw r0,GPR0(r11)
@@ -179,7 +183,6 @@
 #define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret)  \
li  r10,trap;   \
stw r10,_TRAP(r11); \
-   LOAD_REG_IMMEDIATE(r10, msr);   \
bl  tfer;   \
.long   hdlr;   \
.long   ret
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 7da673ec63ef..55fa99c5085c 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -126,9 +126,13 @@ _ENTRY(crit_esr)
lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */
 1: stw r1,crit_r1@l(0)
addir1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm  */
-   LOAD_REG_IMMEDIATE(r11,MSR_KERNEL & ~(MSR_IR | MSR_RI))
-   mtmsr   r11
-   

[PATCH v2 15/43] powerpc/32: Tag DAR in EXCEPTION_PROLOG_2 for the 8xx

2021-03-09 Thread Christophe Leroy
8xx requires to tag the DAR with a magic value in order to
fixup DAR on faults generated by 'dcbX', as the 8xx
forgets to update the DAR for those faults.

Do the tagging as early as possible, that is before enabling MMU.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h  |  6 ++
 arch/powerpc/kernel/head_8xx.S | 18 ++
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 1b707755c68e..910f86642eec 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -57,6 +57,12 @@
 .endm
 
 .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0
+#ifdef CONFIG_PPC_8xx
+   .if \handle_dar_dsisr
+   li  r11, RPN_PATTERN
+   mtspr   SPRN_DAR, r11   /* Tag DAR, to be used in DTLB Error */
+   .endif
+#endif
LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take 
DTLB miss */
mtmsr   r11
isync
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 792e2fd86479..cdbfa9d41353 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -30,6 +30,12 @@
 #include 
 #include 
 
+/*
+ * Value for the bits that have fixed value in RPN entries.
+ * Also used for tagging DAR for DTLBerror.
+ */
+#define RPN_PATTERN0x00f0
+
 #include "head_32.h"
 
 .macro compare_to_kernel_boundary scratch, addr
@@ -42,12 +48,6 @@
 #endif
 .endm
 
-/*
- * Value for the bits that have fixed value in RPN entries.
- * Also used for tagging DAR for DTLBerror.
- */
-#define RPN_PATTERN0x00f0
-
 #define PAGE_SHIFT_512K19
 #define PAGE_SHIFT_8M  23
 
@@ -124,8 +124,6 @@ instruction_counter:
. = 0x200
 MachineCheck:
EXCEPTION_PROLOG handle_dar_dsisr=1
-   li  r6, RPN_PATTERN
-   mtspr   SPRN_DAR, r6/* Tag DAR, to be used in DTLB Error */
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x200, machine_check_exception)
 
@@ -136,8 +134,6 @@ MachineCheck:
. = 0x600
 Alignment:
EXCEPTION_PROLOG handle_dar_dsisr=1
-   li  r6, RPN_PATTERN
-   mtspr   SPRN_DAR, r6/* Tag DAR, to be used in DTLB Error */
addir3,r1,STACK_FRAME_OVERHEAD
b   .Lalignment_exception_ool
 
@@ -331,8 +327,6 @@ DataTLBError:
cmpwi   cr1, r11, RPN_PATTERN
beq-cr1, FixupDAR   /* must be a buggy dcbX, icbi insn. */
 DARFixed:/* Return from dcbx instruction bug workaround */
-   li  r11, RPN_PATTERN
-   mtspr   SPRN_DAR, r11   /* Tag DAR, to be used in DTLB Error */
EXCEPTION_PROLOG_1
EXCEPTION_PROLOG_2 handle_dar_dsisr=1
lwz r4, _DAR(r11)
-- 
2.25.0



[PATCH v2 14/43] powerpc/32: Always enable data translation in exception prolog

2021-03-09 Thread Christophe Leroy
If the code can use a stack in vm area, it can also use a
stack in linear space.

Simplify code by removing old non VMAP stack code on PPC32.

That means the data translation is now re-enabled early in
exception prolog in all cases, not only when using VMAP stacks.

While we are touching EXCEPTION_PROLOG macros, remove the
unused for_rtas parameter in EXCEPTION_PROLOG_1.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/processor.h |  4 +-
 arch/powerpc/kernel/asm-offsets.c|  2 -
 arch/powerpc/kernel/entry_32.S   | 19 +++
 arch/powerpc/kernel/fpu.S|  2 -
 arch/powerpc/kernel/head_32.h| 85 +---
 arch/powerpc/kernel/head_40x.S   | 23 
 arch/powerpc/kernel/head_8xx.S   | 19 +--
 arch/powerpc/kernel/head_book3s_32.S | 47 +--
 arch/powerpc/kernel/idle_6xx.S   | 12 +---
 arch/powerpc/kernel/idle_e500.S  |  4 +-
 arch/powerpc/kernel/vector.S |  2 -
 arch/powerpc/mm/book3s32/hash_low.S  | 14 -
 12 files changed, 17 insertions(+), 216 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 43cbd9281055..eae16facc390 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -147,11 +147,9 @@ struct thread_struct {
 #ifdef CONFIG_PPC_RTAS
unsigned long   rtas_sp;/* stack pointer for when in RTAS */
 #endif
-#endif
 #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
unsigned long   kuap;   /* opened segments for user access */
 #endif
-#ifdef CONFIG_VMAP_STACK
unsigned long   srr0;
unsigned long   srr1;
unsigned long   dar;
@@ -160,7 +158,7 @@ struct thread_struct {
unsigned long   r0, r3, r4, r5, r6, r8, r9, r11;
unsigned long   lr, ctr;
 #endif
-#endif
+#endif /* CONFIG_PPC32 */
/* Debug Registers */
struct debug_reg debug;
 #ifdef CONFIG_PPC_FPU_REGS
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 73620536c801..85ba2b0bc8d8 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -131,7 +131,6 @@ int main(void)
OFFSET(KSP_VSID, thread_struct, ksp_vsid);
 #else /* CONFIG_PPC64 */
OFFSET(PGDIR, thread_struct, pgdir);
-#ifdef CONFIG_VMAP_STACK
OFFSET(SRR0, thread_struct, srr0);
OFFSET(SRR1, thread_struct, srr1);
OFFSET(DAR, thread_struct, dar);
@@ -148,7 +147,6 @@ int main(void)
OFFSET(THLR, thread_struct, lr);
OFFSET(THCTR, thread_struct, ctr);
 #endif
-#endif
 #ifdef CONFIG_SPE
OFFSET(THREAD_EVR0, thread_struct, evr[0]);
OFFSET(THREAD_ACC, thread_struct, acc);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 66198e6e25e7..33e97032ca25 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -129,7 +129,7 @@ transfer_to_handler:
stw r12,_CTR(r11)
stw r2,_XER(r11)
mfspr   r12,SPRN_SPRG_THREAD
-   tovirt_vmstack r12, r12
+   tovirt(r12, r12)
beq 2f  /* if from user, fix up THREAD.regs */
addir2, r12, -THREAD
addir11,r1,STACK_FRAME_OVERHEAD
@@ -153,8 +153,7 @@ transfer_to_handler:
 transfer_to_handler_cont:
 3:
mflrr9
-   tovirt_novmstack r2, r2 /* set r2 to current */
-   tovirt_vmstack r9, r9
+   tovirt(r9, r9)
lwz r11,0(r9)   /* virtual address of handler */
lwz r9,4(r9)/* where to go when done */
 #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
@@ -933,7 +932,6 @@ _GLOBAL(enter_rtas)
lis r6,1f@ha/* physical return address for rtas */
addir6,r6,1f@l
tophys(r6,r6)
-   tophys_novmstack r7, r1
lwz r8,RTASENTRY(r4)
lwz r4,RTASBASE(r4)
mfmsr   r9
@@ -942,22 +940,19 @@ _GLOBAL(enter_rtas)
mtmsr   r0  /* disable interrupts so SRR0/1 don't get trashed */
li  r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
mtlrr6
-   stw r7, THREAD + RTAS_SP(r2)
+   stw r1, THREAD + RTAS_SP(r2)
mtspr   SPRN_SRR0,r8
mtspr   SPRN_SRR1,r9
rfi
-1: tophys_novmstack r9, r1
-#ifdef CONFIG_VMAP_STACK
+1:
li  r0, MSR_KERNEL & ~MSR_IR/* can take DTLB miss */
mtmsr   r0
isync
-#endif
-   lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */
-   lwz r9,8(r9)/* original msr value */
+   lwz r8,INT_FRAME_SIZE+4(r1) /* get return address */
+   lwz r9,8(r1)/* original msr value */
addir1,r1,INT_FRAME_SIZE
li  r0,0
-   tophys_novmstack r7, r2
-   stw r0, THREAD + RTAS_SP(r7)
+   stw r0, THREAD + RTAS_SP(r2)
mtspr   SPRN_SRR0,r8
mtspr   SPRN_SRR1,r9
rfi 

[PATCH v2 13/43] powerpc/32: Remove ksp_limit

2021-03-09 Thread Christophe Leroy
ksp_limit is there to help detect stack overflows.
That is specific to ppc32 as it was removed from ppc64 in
commit cbc9565ee826 ("powerpc: Remove ksp_limit on ppc64").

There are other means for detecting stack overflows.

As ppc64 has proven to not need it, ppc32 should be able to do
without it too.

Lets remove it and simplify exception handling.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/processor.h |  2 -
 arch/powerpc/kernel/asm-offsets.c|  2 -
 arch/powerpc/kernel/entry_32.S   | 68 +---
 arch/powerpc/kernel/head_40x.S   |  2 -
 arch/powerpc/kernel/head_booke.h |  1 -
 arch/powerpc/kernel/misc_32.S| 14 --
 arch/powerpc/kernel/process.c|  3 --
 arch/powerpc/kernel/traps.c  |  9 
 arch/powerpc/lib/sstep.c |  9 
 9 files changed, 2 insertions(+), 108 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 8acc3590c971..43cbd9281055 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -144,7 +144,6 @@ struct thread_struct {
 #endif
 #ifdef CONFIG_PPC32
void*pgdir; /* root of page-table tree */
-   unsigned long   ksp_limit;  /* if ksp <= ksp_limit stack overflow */
 #ifdef CONFIG_PPC_RTAS
unsigned long   rtas_sp;/* stack pointer for when in RTAS */
 #endif
@@ -282,7 +281,6 @@ struct thread_struct {
 #ifdef CONFIG_PPC32
 #define INIT_THREAD { \
.ksp = INIT_SP, \
-   .ksp_limit = INIT_SP_LIMIT, \
.pgdir = swapper_pg_dir, \
.fpexc_mode = MSR_FE0 | MSR_FE1, \
SPEFSCR_INIT \
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index f3a662201a9f..73620536c801 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -91,7 +91,6 @@ int main(void)
DEFINE(SIGSEGV, SIGSEGV);
DEFINE(NMI_MASK, NMI_MASK);
 #else
-   OFFSET(KSP_LIMIT, thread_struct, ksp_limit);
 #ifdef CONFIG_PPC_RTAS
OFFSET(RTAS_SP, thread_struct, rtas_sp);
 #endif
@@ -381,7 +380,6 @@ int main(void)
DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, 
csrr1));
DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, 
dsrr0));
DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, 
dsrr1));
-   DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct 
exception_regs, saved_ksp_limit));
 #endif
 #endif
 
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 4ffbcf3df72e..66198e6e25e7 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -94,12 +94,6 @@ crit_transfer_to_handler:
mfspr   r0,SPRN_SRR1
stw r0,_SRR1(r11)
 
-   /* set the stack limit to the current stack */
-   mfspr   r8,SPRN_SPRG_THREAD
-   lwz r0,KSP_LIMIT(r8)
-   stw r0,SAVED_KSP_LIMIT(r11)
-   rlwinm  r0,r1,0,0,(31 - THREAD_SHIFT)
-   stw r0,KSP_LIMIT(r8)
/* fall through */
 _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
 #endif
@@ -107,12 +101,6 @@ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
 #ifdef CONFIG_40x
.globl  crit_transfer_to_handler
 crit_transfer_to_handler:
-   /* set the stack limit to the current stack */
-   mfspr   r8,SPRN_SPRG_THREAD
-   lwz r0,KSP_LIMIT(r8)
-   stw r0,saved_ksp_limit@l(0)
-   rlwinm  r0,r1,0,0,(31 - THREAD_SHIFT)
-   stw r0,KSP_LIMIT(r8)
/* fall through */
 _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
 #endif
@@ -151,17 +139,10 @@ transfer_to_handler:
 #endif
b   3f
 
-2: /* if from kernel, check interrupted DOZE/NAP mode and
- * check for stack overflow
- */
+   /* if from kernel, check interrupted DOZE/NAP mode */
+2:
kuap_save_and_lock r11, r12, r9, r2, r6
addir2, r12, -THREAD
-#ifndef CONFIG_VMAP_STACK
-   lwz r9,KSP_LIMIT(r12)
-   cmplw   r1,r9   /* if r1 <= ksp_limit */
-   ble-stack_ovf   /* then the kernel stack overflowed */
-#endif
-5:
 #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
lwz r12,TI_LOCAL_FLAGS(r2)
mtcrf   0x01,r12
@@ -204,37 +185,6 @@ transfer_to_handler_cont:
 _ASM_NOKPROBE_SYMBOL(transfer_to_handler)
 _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
 
-#ifndef CONFIG_VMAP_STACK
-/*
- * On kernel stack overflow, load up an initial stack pointer
- * and call StackOverflow(regs), which should not return.
- */
-stack_ovf:
-   /* sometimes we use a statically-allocated stack, which is OK. */
-   lis r12,_end@h
-   ori r12,r12,_end@l
-   cmplw   r1,r12
-   ble 5b  /* r1 <= &_end is OK */
-   SAVE_NVGPRS(r11)
-   addir3,r1,STACK_FRAME_OVERHEAD
-   lis r1,init_thread_union@ha
-   addi

[PATCH v2 12/43] powerpc/32: Use fast instruction to set MSR RI in exception prolog on 8xx

2021-03-09 Thread Christophe Leroy
8xx has registers SPRN_NRI, SPRN_EID and SPRN_EIE for changing
MSR EE and RI.

Use SPRN_EID in exception prolog to set RI.

On an 8xx, it reduces the null_syscall test by 3 cycles.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_32.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index ac6b391f1493..25ee6b26ef5a 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -107,6 +107,8 @@
 #endif
 #ifdef CONFIG_40x
rlwinm  r9,r9,0,14,12   /* clear MSR_WE (necessary?) */
+#elif defined(CONFIG_PPC_8xx)
+   mtspr   SPRN_EID, r2/* Set MSR_RI */
 #else
 #ifdef CONFIG_VMAP_STACK
li  r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */
-- 
2.25.0



[PATCH v2 11/43] powerpc/32: Handle bookE debugging in C in exception entry

2021-03-09 Thread Christophe Leroy
The handling of SPRN_DBCR0 and other registers can easily
be done in C instead of ASM.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/interrupt.h |  2 ++
 arch/powerpc/kernel/entry_32.S   | 23 ---
 2 files changed, 2 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index c35368adbe71..861e6eadc98c 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -65,6 +65,8 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs, struct interrup
if (user_mode(regs))
account_cpu_user_entry();
 #endif
+
+   booke_restore_dbcr0();
 }
 
 /*
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0f3f1bdd909e..4ffbcf3df72e 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -146,32 +146,9 @@ transfer_to_handler:
addir2, r12, -THREAD
addir11,r1,STACK_FRAME_OVERHEAD
stw r11,PT_REGS(r12)
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-   /* Check to see if the dbcr0 register is set up to debug.  Use the
-  internal debug mode bit to do this. */
-   lwz r12,THREAD_DBCR0(r12)
-   andis.  r12,r12,DBCR0_IDM@h
-#endif
 #ifdef CONFIG_PPC_BOOK3S_32
kuep_lock r11, r12
 #endif
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-   beq+3f
-   /* From user and task is ptraced - load up global dbcr0 */
-   li  r12,-1  /* clear all pending debug events */
-   mtspr   SPRN_DBSR,r12
-   lis r11,global_dbcr0@ha
-   tophys_novmstack r11,r11
-   addir11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
-   lwz r9,TASK_CPU(r2)
-   slwir9,r9,2
-   add r11,r11,r9
-#endif
-   lwz r12,0(r11)
-   mtspr   SPRN_DBCR0,r12
-#endif
-
b   3f
 
 2: /* if from kernel, check interrupted DOZE/NAP mode and
-- 
2.25.0



[PATCH v2 09/43] powerpc/32: Reconcile interrupts in C

2021-03-09 Thread Christophe Leroy
There is no need for this to be in asm anymore,
use the new interrupt entry wrapper.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/interrupt.h |  4 ++
 arch/powerpc/kernel/entry_32.S   | 58 
 2 files changed, 4 insertions(+), 58 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index 232a4847f596..b2f69e5dcb50 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -29,6 +29,10 @@ static inline void booke_restore_dbcr0(void)
 
 static inline void interrupt_enter_prepare(struct pt_regs *regs, struct 
interrupt_state *state)
 {
+#ifdef CONFIG_PPC32
+   if (!arch_irq_disabled_regs(regs))
+   trace_hardirqs_off();
+#endif
/*
 * Book3E reconciles irq soft mask in asm
 */
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 535c55f4393a..0f18fe14649c 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -202,22 +202,6 @@ transfer_to_handler_cont:
lwz r9,4(r9)/* where to go when done */
 #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr   SPRN_NRI, r0
-#endif
-#ifdef CONFIG_TRACE_IRQFLAGS
-   /*
-* When tracing IRQ state (lockdep) we enable the MMU before we call
-* the IRQ tracing functions as they might access vmalloc space or
-* perform IOs for console output.
-*
-* To speed up the syscall path where interrupts stay on, let's check
-* first if we are changing the MSR value at all.
-*/
-   tophys_novmstack r12, r1
-   lwz r12,_MSR(r12)
-   andi.   r12,r12,MSR_EE
-   bne 1f
-
-   /* MSR isn't changing, just transition directly */
 #endif
mtspr   SPRN_SRR0,r11
mtspr   SPRN_SRR1,r10
@@ -244,48 +228,6 @@ transfer_to_handler_cont:
 _ASM_NOKPROBE_SYMBOL(transfer_to_handler)
 _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to
-* keep interrupts disabled at this point otherwise we might risk
-* taking an interrupt before we tell lockdep they are enabled.
-*/
-   lis r12,reenable_mmu@h
-   ori r12,r12,reenable_mmu@l
-   LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
-   mtspr   SPRN_SRR0,r12
-   mtspr   SPRN_SRR1,r0
-   rfi
-#ifdef CONFIG_40x
-   b . /* Prevent prefetch past rfi */
-#endif
-
-reenable_mmu:
-   /*
-* We save a bunch of GPRs,
-* r3 can be different from GPR3(r1) at this point, r9 and r11
-* contains the old MSR and handler address respectively,
-* r0, r4-r8, r12, CCR, CTR, XER etc... are left
-* clobbered as they aren't useful past this point.
-*/
-
-   stwur1,-32(r1)
-   stw r9,8(r1)
-   stw r11,12(r1)
-   stw r3,16(r1)
-
-   /* If we are disabling interrupts (normal case), simply log it with
-* lockdep
-*/
-1: bl  trace_hardirqs_off
-   lwz r3,16(r1)
-   lwz r11,12(r1)
-   lwz r9,8(r1)
-   addir1,r1,32
-   mtctr   r11
-   mtlrr9
-   bctr/* jump to handler */
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
 #ifndef CONFIG_VMAP_STACK
 /*
  * On kernel stack overflow, load up an initial stack pointer
-- 
2.25.0



[PATCH v2 10/43] powerpc/32: Entry cpu time accounting in C

2021-03-09 Thread Christophe Leroy
There is no need for this to be in asm,
use the new interrupt entry wrapper.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/interrupt.h |  3 +++
 arch/powerpc/include/asm/ppc_asm.h   | 10 --
 arch/powerpc/kernel/entry_32.S   |  1 -
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index b2f69e5dcb50..c35368adbe71 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -32,6 +32,9 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs, struct interrup
 #ifdef CONFIG_PPC32
if (!arch_irq_disabled_regs(regs))
trace_hardirqs_off();
+
+   if (user_mode(regs))
+   account_cpu_user_entry();
 #endif
/*
 * Book3E reconciles irq soft mask in asm
diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index 3dceb64fc9af..8998122fc7e2 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -23,18 +23,8 @@
  */
 
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb)
 #define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb)
 #else
-#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb)\
-   MFTB(ra);   /* get timebase */  \
-   PPC_LL  rb, ACCOUNT_STARTTIME_USER(ptr);\
-   PPC_STL ra, ACCOUNT_STARTTIME(ptr); \
-   subfrb,rb,ra;   /* subtract start value */  \
-   PPC_LL  ra, ACCOUNT_USER_TIME(ptr); \
-   add ra,ra,rb;   /* add on to user time */   \
-   PPC_STL ra, ACCOUNT_USER_TIME(ptr); \
-
 #define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) \
MFTB(ra);   /* get timebase */  \
PPC_LL  rb, ACCOUNT_STARTTIME(ptr); \
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0f18fe14649c..0f3f1bdd909e 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -152,7 +152,6 @@ transfer_to_handler:
lwz r12,THREAD_DBCR0(r12)
andis.  r12,r12,DBCR0_IDM@h
 #endif
-   ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
 #ifdef CONFIG_PPC_BOOK3S_32
kuep_lock r11, r12
 #endif
-- 
2.25.0



  1   2   >