Re: [PATCH 1/4] powerpc/xive: Use XIVE_BAD_IRQ instead of zero to catch non configured IPIs

2020-03-09 Thread David Gibson
On Fri, Mar 06, 2020 at 04:01:40PM +0100, Cédric Le Goater wrote:
> When a CPU is brought up, an IPI number is allocated and recorded
> under the XIVE CPU structure. Invalid IPI numbers are tracked with
> interrupt number 0x0.
> 
> On the PowerNV platform, the interrupt number space starts at 0x10 and
> this works fine. However, on the sPAPR platform, it is possible to
> allocate the interrupt number 0x0 and this raises an issue when CPU 0
> is unplugged. The XIVE spapr driver tracks allocated interrupt numbers
> in a bitmask and it is not correctly updated when interrupt number 0x0
> is freed. It stays allocated and it is then impossible to reallocate.
> 
> Fix by using the XIVE_BAD_IRQ value instead of zero on both platforms.
> 
> Reported-by: David Gibson 
> Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt 
> controller")
> Cc: sta...@vger.kernel.org # v4.14+
> Signed-off-by: Cédric Le Goater 

Reviewed-by: David Gibson 
Tested-by: David Gibson 

> ---
>  arch/powerpc/sysdev/xive/xive-internal.h |  7 +++
>  arch/powerpc/sysdev/xive/common.c| 12 +++-
>  arch/powerpc/sysdev/xive/native.c|  4 ++--
>  arch/powerpc/sysdev/xive/spapr.c |  4 ++--
>  4 files changed, 14 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/powerpc/sysdev/xive/xive-internal.h 
> b/arch/powerpc/sysdev/xive/xive-internal.h
> index 59cd366e7933..382980f4de2d 100644
> --- a/arch/powerpc/sysdev/xive/xive-internal.h
> +++ b/arch/powerpc/sysdev/xive/xive-internal.h
> @@ -5,6 +5,13 @@
>  #ifndef __XIVE_INTERNAL_H
>  #define __XIVE_INTERNAL_H
>  
> +/*
> + * A "disabled" interrupt should never fire, to catch problems
> + * we set its logical number to this
> + */
> +#define XIVE_BAD_IRQ 0x7fff
> +#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1)
> +
>  /* Each CPU carry one of these with various per-CPU state */
>  struct xive_cpu {
>  #ifdef CONFIG_SMP
> diff --git a/arch/powerpc/sysdev/xive/common.c 
> b/arch/powerpc/sysdev/xive/common.c
> index fa49193206b6..550baba98ec9 100644
> --- a/arch/powerpc/sysdev/xive/common.c
> +++ b/arch/powerpc/sysdev/xive/common.c
> @@ -68,13 +68,6 @@ static u32 xive_ipi_irq;
>  /* Xive state for each CPU */
>  static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu);
>  
> -/*
> - * A "disabled" interrupt should never fire, to catch problems
> - * we set its logical number to this
> - */
> -#define XIVE_BAD_IRQ 0x7fff
> -#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1)
> -
>  /* An invalid CPU target */
>  #define XIVE_INVALID_TARGET  (-1)
>  
> @@ -1153,7 +1146,7 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
>   xc = per_cpu(xive_cpu, cpu);
>  
>   /* Check if we are already setup */
> - if (xc->hw_ipi != 0)
> + if (xc->hw_ipi != XIVE_BAD_IRQ)
>   return 0;
>  
>   /* Grab an IPI from the backend, this will populate xc->hw_ipi */
> @@ -1190,7 +1183,7 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, 
> struct xive_cpu *xc)
>   /* Disable the IPI and free the IRQ data */
>  
>   /* Already cleaned up ? */
> - if (xc->hw_ipi == 0)
> + if (xc->hw_ipi == XIVE_BAD_IRQ)
>   return;
>  
>   /* Mask the IPI */
> @@ -1346,6 +1339,7 @@ static int xive_prepare_cpu(unsigned int cpu)
>   if (np)
>   xc->chip_id = of_get_ibm_chip_id(np);
>   of_node_put(np);
> + xc->hw_ipi = XIVE_BAD_IRQ;
>  
>   per_cpu(xive_cpu, cpu) = xc;
>   }
> diff --git a/arch/powerpc/sysdev/xive/native.c 
> b/arch/powerpc/sysdev/xive/native.c
> index 0ff6b739052c..50e1a8e02497 100644
> --- a/arch/powerpc/sysdev/xive/native.c
> +++ b/arch/powerpc/sysdev/xive/native.c
> @@ -312,7 +312,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct 
> xive_cpu *xc)
>   s64 rc;
>  
>   /* Free the IPI */
> - if (!xc->hw_ipi)
> + if (xc->hw_ipi == XIVE_BAD_IRQ)
>   return;
>   for (;;) {
>   rc = opal_xive_free_irq(xc->hw_ipi);
> @@ -320,7 +320,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct 
> xive_cpu *xc)
>   msleep(OPAL_BUSY_DELAY_MS);
>   continue;
>   }
> - xc->hw_ipi = 0;
> + xc->hw_ipi = XIVE_BAD_IRQ;
>   break;
>   }
>  }
> diff --git a/arch/powerpc/sysdev/xive/spapr.c 
> b/arch/powerpc/sysdev/xive/spapr.c
> index 55dc61cb4867..3f15615712b5 100644
> --- a/arch/powerpc/sysdev/xive/spapr.c
> +++ b/arch/powerpc/sysdev/xive/spapr.c
> @@ -560,11 +560,11 @@ static int xive_spapr_get_ipi(unsigned int cpu, struct 
> xive_cpu *xc)
>  
>  static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc)
>  {
> - if (!xc->hw_ipi)
> + if (xc->hw_ipi == XIVE_BAD_IRQ)
>   return;
>  
>   xive_irq_bitmap_free(xc->hw_ipi);
> - xc->hw_ipi = 0;
> + xc->hw_ipi = XIVE_BAD_IRQ;
>  }
>  #endif /* CONFIG_SMP */
>  

-- 
David Gibson| I'll ha

Re: ppc32 panic on boot on linux-next

2020-03-09 Thread Michael Ellerman
Christophe Leroy  writes:
> Le 07/03/2020 à 09:42, Christophe Leroy a écrit :
>> Le 06/03/2020 à 20:05, Nick Desaulniers a écrit :
>>> As a heads up, our CI went red last night, seems like a panic from
>>> free_initmem?  Is this a known issue?
>> 
>> Thanks for the heads up.
>> 
>> No such issue with either 8xx or book3s/32.
>> 
>> I've now been able to reproduce it with bamboo QEMU.
>> 
>> Reverting 2efc7c085f05 makes it disappear. I'll investigate.
>> 
>
> Ok, I found the problem. virt_to_kpte() lacks a NULL pmd check. I'll 
> send a patch for that.
>
> However, if there is no PMD I guess this area is mapped through some 
> kind of block mapping. Therefore it should bail out of the function through:
>
>   if (v_block_mapped(address))
>   return 0;
>
>
> Can someone who knows BOOKE investigate that ?

Not sure we have anyone left?

cheers


Re: [PATCH V15] mm/debug: Add tests validating architecture page table helpers

2020-03-09 Thread Anshuman Khandual



On 03/07/2020 12:35 PM, Christophe Leroy wrote:
> 
> 
> Le 07/03/2020 à 01:56, Anshuman Khandual a écrit :
>>
>>
>> On 03/07/2020 06:04 AM, Qian Cai wrote:
>>>
>>>
 On Mar 6, 2020, at 7:03 PM, Anshuman Khandual  
 wrote:

 Hmm, set_pte_at() function is not preferred here for these tests. The idea
 is to avoid or atleast minimize TLB/cache flushes triggered from these sort
 of 'static' tests. set_pte_at() is platform provided and could/might 
 trigger
 these flushes or some other platform specific synchronization stuff. Just
>>>
>>> Why is that important for this debugging option?
>>
>> Primarily reason is to avoid TLB/cache flush instructions on the system
>> during these tests that only involve transforming different page table
>> level entries through helpers. Unless really necessary, why should it
>> emit any TLB/cache flush instructions ?
> 
> What's the problem with thoses flushes ?
> 
>>
>>>
 wondering is there specific reason with respect to the soft lock up problem
 making it necessary to use set_pte_at() rather than a simple WRITE_ONCE() ?
>>>
>>> Looks at the s390 version of set_pte_at(), it has this comment,
>>> vmaddr);
>>>
>>> /*
>>>   * Certain architectures need to do special things when PTEs
>>>   * within a page table are directly modified.  Thus, the following
>>>   * hook is made available.
>>>   */
>>>
>>> I can only guess that powerpc  could be the same here.
>>
>> This comment is present in multiple platforms while defining set_pte_at().
>> Is not 'barrier()' here alone good enough ? Else what exactly set_pte_at()
>> does as compared to WRITE_ONCE() that avoids the soft lock up, just trying
>> to understand.
>>
> 
> 
> Argh ! I didn't realise that you were writing directly into the page tables. 
> When it works, that's only by chance I guess.
> 
> To properly set the page table entries, set_pte_at() has to be used:
> - On powerpc 8xx, with 16k pages, the page table entry must be copied four 
> times. set_pte_at() does it, WRITE_ONCE() doesn't.
> - On powerpc book3s/32 (hash MMU), the flag _PAGE_HASHPTE must be preserved 
> among writes. set_pte_at() preserves it, WRITE_ONCE() doesn't.
> 
> set_pte_at() also does a few other mandatory things, like calling pte_mkpte()
> 
> So, the WRITE_ONCE() must definitely become a set_pte_at()

Sure, will do. These are part of the clear tests that populates a given
entry with a non zero value before clearing and testing it with pxx_none().
In that context, WRITE_ONCE() seemed sufficient. But pte_clear() might be
closely tied with proper page table entry update and hence a preceding
set_pte_at() will be better.

There are still more WRITE_ONCE() for other page table levels during these
clear tests. set_pmd_at() and set_pud_at() are defined on platforms that
support (and enable) THP and PUD based THP respectively. Hence they could
not be used for clear tests as remaining helpers pmd_clear(), pud_clear(),
p4d_clear() and pgd_clear() still need to be validated with or without
THP support and enablement. We should just leave all other WRITE_ONCE()
instances unchanged. Please correct me if I am missing something here.

> 
> Christophe
> 


Re: [PATCH net] ibmvnic: Do not process device remove during device reset

2020-03-09 Thread David Miller
From: Juliet Kim 
Date: Mon,  9 Mar 2020 19:02:04 -0500

> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
> b/drivers/net/ethernet/ibm/ibmvnic.c
> index c75239d8820f..7ef1ae0d49bc 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.c
> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> @@ -2144,6 +2144,8 @@ static void __ibmvnic_reset(struct work_struct *work)
>   struct ibmvnic_adapter *adapter;
>   u32 reset_state;
>   int rc = 0;
> + unsigned long flags;
> + bool saved_state = false;

Please preserve the reverse christmas tree ordering of local variables.

Thank you.


[PATCH v6 7/7] powerpc/32: use set_memory_attr()

2020-03-09 Thread Russell Currey
From: Christophe Leroy 

Use set_memory_attr() instead of the PPC32 specific change_page_attr()

change_page_attr() was checking that the address was not mapped by
blocks and was handling highmem, but that's unneeded because the
affected pages can't be in highmem and block mapping verification
is already done by the callers.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/pgtable_32.c | 95 
 1 file changed, 10 insertions(+), 85 deletions(-)

diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 5fb90edd865e..3d92eaf3ee2f 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -121,99 +122,20 @@ void __init mapin_ram(void)
}
 }
 
-/* Scan the real Linux page tables and return a PTE pointer for
- * a virtual address in a context.
- * Returns true (1) if PTE was found, zero otherwise.  The pointer to
- * the PTE pointer is unmodified if PTE is not found.
- */
-static int
-get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t 
**pmdp)
-{
-pgd_t  *pgd;
-   pud_t   *pud;
-pmd_t  *pmd;
-pte_t  *pte;
-int retval = 0;
-
-pgd = pgd_offset(mm, addr & PAGE_MASK);
-if (pgd) {
-   pud = pud_offset(pgd, addr & PAGE_MASK);
-   if (pud && pud_present(*pud)) {
-   pmd = pmd_offset(pud, addr & PAGE_MASK);
-   if (pmd_present(*pmd)) {
-   pte = pte_offset_map(pmd, addr & PAGE_MASK);
-   if (pte) {
-   retval = 1;
-   *ptep = pte;
-   if (pmdp)
-   *pmdp = pmd;
-   /* XXX caller needs to do pte_unmap, 
yuck */
-   }
-   }
-   }
-}
-return(retval);
-}
-
-static int __change_page_attr_noflush(struct page *page, pgprot_t prot)
-{
-   pte_t *kpte;
-   pmd_t *kpmd;
-   unsigned long address;
-
-   BUG_ON(PageHighMem(page));
-   address = (unsigned long)page_address(page);
-
-   if (v_block_mapped(address))
-   return 0;
-   if (!get_pteptr(&init_mm, address, &kpte, &kpmd))
-   return -EINVAL;
-   __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0);
-   pte_unmap(kpte);
-
-   return 0;
-}
-
-/*
- * Change the page attributes of an page in the linear mapping.
- *
- * THIS DOES NOTHING WITH BAT MAPPINGS, DEBUG USE ONLY
- */
-static int change_page_attr(struct page *page, int numpages, pgprot_t prot)
-{
-   int i, err = 0;
-   unsigned long flags;
-   struct page *start = page;
-
-   local_irq_save(flags);
-   for (i = 0; i < numpages; i++, page++) {
-   err = __change_page_attr_noflush(page, prot);
-   if (err)
-   break;
-   }
-   wmb();
-   local_irq_restore(flags);
-   flush_tlb_kernel_range((unsigned long)page_address(start),
-  (unsigned long)page_address(page));
-   return err;
-}
-
 void mark_initmem_nx(void)
 {
-   struct page *page = virt_to_page(_sinittext);
unsigned long numpages = PFN_UP((unsigned long)_einittext) -
 PFN_DOWN((unsigned long)_sinittext);
 
if (v_block_mapped((unsigned long)_stext + 1))
mmu_mark_initmem_nx();
else
-   change_page_attr(page, numpages, PAGE_KERNEL);
+   set_memory_attr((unsigned long)_sinittext, numpages, 
PAGE_KERNEL);
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
 void mark_rodata_ro(void)
 {
-   struct page *page;
unsigned long numpages;
 
if (v_block_mapped((unsigned long)_sinittext)) {
@@ -222,20 +144,18 @@ void mark_rodata_ro(void)
return;
}
 
-   page = virt_to_page(_stext);
numpages = PFN_UP((unsigned long)_etext) -
   PFN_DOWN((unsigned long)_stext);
 
-   change_page_attr(page, numpages, PAGE_KERNEL_ROX);
+   set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX);
/*
 * mark .rodata as read only. Use __init_begin rather than __end_rodata
 * to cover NOTES and EXCEPTION_TABLE.
 */
-   page = virt_to_page(__start_rodata);
numpages = PFN_UP((unsigned long)__init_begin) -
   PFN_DOWN((unsigned long)__start_rodata);
 
-   change_page_attr(page, numpages, PAGE_KERNEL_RO);
+   set_memory_attr((unsigned long)__start_rodata, numpages, 
PAGE_KERNEL_RO);
 
// mark_initmem_nx() should have already run by now
ptdump_check_wx();
@@ -245,9 +165,14 @@ void mark_rodata_ro(void)
 #ifdef CONFIG_DEBUG_PAGEALLOC
 void __kernel_m

[PATCH v6 6/7] powerpc/mm: implement set_memory_attr()

2020-03-09 Thread Russell Currey
From: Christophe Leroy 

In addition to the set_memory_xx() functions which allows to change
the memory attributes of not (yet) used memory regions, implement a
set_memory_attr() function to:
- set the final memory protection after init on currently used
kernel regions.
- enable/disable kernel memory regions in the scope of DEBUG_PAGEALLOC.

Unlike the set_memory_xx() which can act in three step as the regions
are unused, this function must modify 'on the fly' as the kernel is
executing from them. At the moment only PPC32 will use it and changing
page attributes on the fly is not an issue.

Signed-off-by: Christophe Leroy 
Reported-by: kbuild test robot 
[ruscur: cast "data" to unsigned long instead of int]
Signed-off-by: Russell Currey 
---
 arch/powerpc/include/asm/set_memory.h |  2 ++
 arch/powerpc/mm/pageattr.c| 33 +++
 2 files changed, 35 insertions(+)

diff --git a/arch/powerpc/include/asm/set_memory.h 
b/arch/powerpc/include/asm/set_memory.h
index 64011ea444b4..b040094f7920 100644
--- a/arch/powerpc/include/asm/set_memory.h
+++ b/arch/powerpc/include/asm/set_memory.h
@@ -29,4 +29,6 @@ static inline int set_memory_x(unsigned long addr, int 
numpages)
return change_memory_attr(addr, numpages, SET_MEMORY_X);
 }
 
+int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot);
+
 #endif
diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c
index 748fa56d9db0..60139fedc6cc 100644
--- a/arch/powerpc/mm/pageattr.c
+++ b/arch/powerpc/mm/pageattr.c
@@ -77,3 +77,36 @@ int change_memory_attr(unsigned long addr, int numpages, 
long action)
 
return apply_to_page_range(&init_mm, start, sz, change_page_attr, (void 
*)action);
 }
+
+/*
+ * Set the attributes of a page:
+ *
+ * This function is used by PPC32 at the end of init to set final kernel memory
+ * protection. It includes changing the maping of the page it is executing from
+ * and data pages it is using.
+ */
+static int set_page_attr(pte_t *ptep, unsigned long addr, void *data)
+{
+   pgprot_t prot = __pgprot((unsigned long)data);
+
+   spin_lock(&init_mm.page_table_lock);
+
+   set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot));
+   flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+   spin_unlock(&init_mm.page_table_lock);
+
+   return 0;
+}
+
+int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot)
+{
+   unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+   unsigned long sz = numpages * PAGE_SIZE;
+
+   if (!numpages)
+   return 0;
+
+   return apply_to_page_range(&init_mm, start, sz, set_page_attr,
+  (void *)pgprot_val(prot));
+}
-- 
2.25.1



[PATCH v6 5/7] powerpc/configs: Enable STRICT_MODULE_RWX in skiroot_defconfig

2020-03-09 Thread Russell Currey
skiroot_defconfig is the only powerpc defconfig with STRICT_KERNEL_RWX
enabled, and if you want memory protection for kernel text you'd want it
for modules too, so enable STRICT_MODULE_RWX there.

Acked-by: Joel Stanley 
Signed-off-by: Russell Currey 
---
 arch/powerpc/configs/skiroot_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/configs/skiroot_defconfig 
b/arch/powerpc/configs/skiroot_defconfig
index 1b6bdad36b13..66d20dbe67b7 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -51,6 +51,7 @@ CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 
quiet"
 # CONFIG_PPC_MEM_KEYS is not set
 CONFIG_JUMP_LABEL=y
 CONFIG_STRICT_KERNEL_RWX=y
+CONFIG_STRICT_MODULE_RWX=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_SIG_FORCE=y
-- 
2.25.1



[PATCH v6 4/7] powerpc: Set ARCH_HAS_STRICT_MODULE_RWX

2020-03-09 Thread Russell Currey
To enable strict module RWX on powerpc, set:

CONFIG_STRICT_MODULE_RWX=y

You should also have CONFIG_STRICT_KERNEL_RWX=y set to have any real
security benefit.

ARCH_HAS_STRICT_MODULE_RWX is set to require ARCH_HAS_STRICT_KERNEL_RWX.
This is due to a quirk in arch/Kconfig and arch/powerpc/Kconfig that
makes STRICT_MODULE_RWX *on by default* in configurations where
STRICT_KERNEL_RWX is *unavailable*.

Since this doesn't make much sense, and module RWX without kernel RWX
doesn't make much sense, having the same dependencies as kernel RWX
works around this problem.

Signed-off-by: Russell Currey 
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index bd074246e34e..e1fc7fba10bf 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -131,6 +131,7 @@ config PPC
select ARCH_HAS_SCALED_CPUTIME  if VIRT_CPU_ACCOUNTING_NATIVE 
&& PPC_BOOK3S_64
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_STRICT_KERNEL_RWX   if ((PPC_BOOK3S_64 || PPC32) && 
!HIBERNATION)
+   select ARCH_HAS_STRICT_MODULE_RWX   if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_TICK_BROADCAST  if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_UACCESS_MCSAFE  if PPC64
-- 
2.25.1



[PATCH v6 1/7] powerpc/mm: Implement set_memory() routines

2020-03-09 Thread Russell Currey
The set_memory_{ro/rw/nx/x}() functions are required for STRICT_MODULE_RWX,
and are generally useful primitives to have.  This implementation is
designed to be completely generic across powerpc's many MMUs.

It's possible that this could be optimised to be faster for specific
MMUs, but the focus is on having a generic and safe implementation for
now.

This implementation does not handle cases where the caller is attempting
to change the mapping of the page it is executing from, or if another
CPU is concurrently using the page being altered.  These cases likely
shouldn't happen, but a more complex implementation with MMU-specific code
could safely handle them, so that is left as a TODO for now.

These functions do nothing if STRICT_KERNEL_RWX is not enabled.

Signed-off-by: Russell Currey 
Signed-off-by: Christophe Leroy 
---
v6: Merge patch 8/8 from v5, handling RWX not being enabled.
Add note to change_page_attr() in case it's ever made non-static
---
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/include/asm/set_memory.h | 32 +++
 arch/powerpc/mm/Makefile  |  2 +-
 arch/powerpc/mm/pageattr.c| 79 +++
 4 files changed, 113 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/include/asm/set_memory.h
 create mode 100644 arch/powerpc/mm/pageattr.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 497b7d0b2d7e..bd074246e34e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -129,6 +129,7 @@ config PPC
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_SCALED_CPUTIME  if VIRT_CPU_ACCOUNTING_NATIVE 
&& PPC_BOOK3S_64
+   select ARCH_HAS_SET_MEMORY
select ARCH_HAS_STRICT_KERNEL_RWX   if ((PPC_BOOK3S_64 || PPC32) && 
!HIBERNATION)
select ARCH_HAS_TICK_BROADCAST  if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
diff --git a/arch/powerpc/include/asm/set_memory.h 
b/arch/powerpc/include/asm/set_memory.h
new file mode 100644
index ..64011ea444b4
--- /dev/null
+++ b/arch/powerpc/include/asm/set_memory.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SET_MEMORY_H
+#define _ASM_POWERPC_SET_MEMORY_H
+
+#define SET_MEMORY_RO  0
+#define SET_MEMORY_RW  1
+#define SET_MEMORY_NX  2
+#define SET_MEMORY_X   3
+
+int change_memory_attr(unsigned long addr, int numpages, long action);
+
+static inline int set_memory_ro(unsigned long addr, int numpages)
+{
+   return change_memory_attr(addr, numpages, SET_MEMORY_RO);
+}
+
+static inline int set_memory_rw(unsigned long addr, int numpages)
+{
+   return change_memory_attr(addr, numpages, SET_MEMORY_RW);
+}
+
+static inline int set_memory_nx(unsigned long addr, int numpages)
+{
+   return change_memory_attr(addr, numpages, SET_MEMORY_NX);
+}
+
+static inline int set_memory_x(unsigned long addr, int numpages)
+{
+   return change_memory_attr(addr, numpages, SET_MEMORY_X);
+}
+
+#endif
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 5e147986400d..a998fdac52f9 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,7 +5,7 @@
 
 ccflags-$(CONFIG_PPC64):= $(NO_MINIMAL_TOC)
 
-obj-y  := fault.o mem.o pgtable.o mmap.o \
+obj-y  := fault.o mem.o pgtable.o mmap.o pageattr.o \
   init_$(BITS).o pgtable_$(BITS).o \
   pgtable-frag.o ioremap.o ioremap_$(BITS).o \
   init-common.o mmu_context.o drmem.o
diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c
new file mode 100644
index ..748fa56d9db0
--- /dev/null
+++ b/arch/powerpc/mm/pageattr.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * MMU-generic set_memory implementation for powerpc
+ *
+ * Copyright 2019, IBM Corporation.
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+
+/*
+ * Updates the attributes of a page in three steps:
+ *
+ * 1. invalidate the page table entry
+ * 2. flush the TLB
+ * 3. install the new entry with the updated attributes
+ *
+ * This is unsafe if the caller is attempting to change the mapping of the
+ * page it is executing from, or if another CPU is concurrently using the
+ * page being altered.
+ *
+ * TODO make the implementation resistant to this.
+ *
+ * NOTE: can be dangerous to call without STRICT_KERNEL_RWX
+ */
+static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
+{
+   long action = (long)data;
+   pte_t pte;
+
+   spin_lock(&init_mm.page_table_lock);
+
+   /* invalidate the PTE so it's safe to modify */
+   pte = ptep_get_and_clear(&init_mm, addr, ptep);
+   flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+   /* modify the PTE bits as desired, then apply */
+   switch (action) {
+   case SET_MEMORY_RO:
+ 

[PATCH v6 2/7] powerpc/kprobes: Mark newly allocated probes as RO

2020-03-09 Thread Russell Currey
With CONFIG_STRICT_KERNEL_RWX=y and CONFIG_KPROBES=y, there will be one
W+X page at boot by default.  This can be tested with
CONFIG_PPC_PTDUMP=y and CONFIG_PPC_DEBUG_WX=y set, and checking the
kernel log during boot.

powerpc doesn't implement its own alloc() for kprobes like other
architectures do, but we couldn't immediately mark RO anyway since we do
a memcpy to the page we allocate later.  After that, nothing should be
allowed to modify the page, and write permissions are removed well
before the kprobe is armed.

The memcpy() would fail if >1 probes were allocated, so use
patch_instruction() instead which is safe for RO.

Reviewed-by: Daniel Axtens 
Signed-off-by: Russell Currey 
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/kprobes.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 2d27ec4feee4..bfab91ded234 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -24,6 +24,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
@@ -102,6 +104,16 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, 
unsigned int offset)
return addr;
 }
 
+void *alloc_insn_page(void)
+{
+   void *page = vmalloc_exec(PAGE_SIZE);
+
+   if (page)
+   set_memory_ro((unsigned long)page, 1);
+
+   return page;
+}
+
 int arch_prepare_kprobe(struct kprobe *p)
 {
int ret = 0;
@@ -124,11 +136,8 @@ int arch_prepare_kprobe(struct kprobe *p)
}
 
if (!ret) {
-   memcpy(p->ainsn.insn, p->addr,
-   MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+   patch_instruction(p->ainsn.insn, *p->addr);
p->opcode = *p->addr;
-   flush_icache_range((unsigned long)p->ainsn.insn,
-   (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
}
 
p->ainsn.boostable = 0;
-- 
2.25.1



[PATCH v6 3/7] powerpc/mm/ptdump: debugfs handler for W+X checks at runtime

2020-03-09 Thread Russell Currey
Very rudimentary, just

echo 1 > [debugfs]/check_wx_pages

and check the kernel log.  Useful for testing strict module RWX.

Updated the Kconfig entry to reflect this.

Also fixed a typo.

Reviewed-by: Kees Cook 
Signed-off-by: Russell Currey 
---
 arch/powerpc/Kconfig.debug  |  6 --
 arch/powerpc/mm/ptdump/ptdump.c | 21 -
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 0b063830eea8..e37960ef68c6 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -370,7 +370,7 @@ config PPC_PTDUMP
  If you are unsure, say N.
 
 config PPC_DEBUG_WX
-   bool "Warn on W+X mappings at boot"
+   bool "Warn on W+X mappings at boot & enable manual checks at runtime"
depends on PPC_PTDUMP && STRICT_KERNEL_RWX
help
  Generate a warning if any W+X mappings are found at boot.
@@ -384,7 +384,9 @@ config PPC_DEBUG_WX
  of other unfixed kernel bugs easier.
 
  There is no runtime or memory usage effect of this option
- once the kernel has booted up - it's a one time check.
+ once the kernel has booted up, it only automatically checks once.
+
+ Enables the "check_wx_pages" debugfs entry for checking at runtime.
 
  If in doubt, say "Y".
 
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 206156255247..a15e19a3b14e 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -4,7 +4,7 @@
  *
  * This traverses the kernel pagetables and dumps the
  * information about the used sections of memory to
- * /sys/kernel/debug/kernel_pagetables.
+ * /sys/kernel/debug/kernel_page_tables.
  *
  * Derived from the arm64 implementation:
  * Copyright (c) 2014, The Linux Foundation, Laura Abbott.
@@ -413,6 +413,25 @@ void ptdump_check_wx(void)
else
pr_info("Checked W+X mappings: passed, no W+X pages found\n");
 }
+
+static int check_wx_debugfs_set(void *data, u64 val)
+{
+   if (val != 1ULL)
+   return -EINVAL;
+
+   ptdump_check_wx();
+
+   return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(check_wx_fops, NULL, check_wx_debugfs_set, "%llu\n");
+
+static int ptdump_check_wx_init(void)
+{
+   return debugfs_create_file("check_wx_pages", 0200, NULL,
+  NULL, &check_wx_fops) ? 0 : -ENOMEM;
+}
+device_initcall(ptdump_check_wx_init);
 #endif
 
 static int ptdump_init(void)
-- 
2.25.1



[PATCH v6 0/7] set_memory() routines and STRICT_MODULE_RWX

2020-03-09 Thread Russell Currey
Back again, just minor changes.

v5: https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=160869

Changes since v5:
[1/8]: Patch 8/8 squashed as suggested by Andrew Donnellan
   Added a note to the comment of change_page_attr()
   Rename size to sz to meet 90 chars without multiple lines

[8/8]: Removed, change_memory_attr() section squashed, rest dropped
   as suggested by Christophe Leroy (since I just assumed it was
   the right thing to do instead of actually checking)

Thanks for the feedback.

Christophe Leroy (2):
  powerpc/mm: implement set_memory_attr()
  powerpc/32: use set_memory_attr()

Russell Currey (5):
  powerpc/mm: Implement set_memory() routines
  powerpc/kprobes: Mark newly allocated probes as RO
  powerpc/mm/ptdump: debugfs handler for W+X checks at runtime
  powerpc: Set ARCH_HAS_STRICT_MODULE_RWX
  powerpc/configs: Enable STRICT_MODULE_RWX in skiroot_defconfig

 arch/powerpc/Kconfig   |   2 +
 arch/powerpc/Kconfig.debug |   6 +-
 arch/powerpc/configs/skiroot_defconfig |   1 +
 arch/powerpc/include/asm/set_memory.h  |  34 
 arch/powerpc/kernel/kprobes.c  |  17 +++-
 arch/powerpc/mm/Makefile   |   2 +-
 arch/powerpc/mm/pageattr.c | 112 +
 arch/powerpc/mm/pgtable_32.c   |  95 +++--
 arch/powerpc/mm/ptdump/ptdump.c|  21 -
 9 files changed, 197 insertions(+), 93 deletions(-)
 create mode 100644 arch/powerpc/include/asm/set_memory.h
 create mode 100644 arch/powerpc/mm/pageattr.c

-- 
2.25.1



[PATCH net] ibmvnic: Do not process device remove during device reset

2020-03-09 Thread Juliet Kim
The ibmvnic driver does not check the device state when the device
is removed. If the device is removed while a device reset is being 
processed, the remove may free structures needed by the reset, 
causing an oops.

Fix this by checking the device state before processing device remove.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 24 ++--
 drivers/net/ethernet/ibm/ibmvnic.h |  6 +-
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index c75239d8820f..7ef1ae0d49bc 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2144,6 +2144,8 @@ static void __ibmvnic_reset(struct work_struct *work)
struct ibmvnic_adapter *adapter;
u32 reset_state;
int rc = 0;
+   unsigned long flags;
+   bool saved_state = false;
 
adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
 
@@ -2153,17 +2155,25 @@ static void __ibmvnic_reset(struct work_struct *work)
return;
}
 
-   reset_state = adapter->state;
-
rwi = get_next_rwi(adapter);
while (rwi) {
+   spin_lock_irqsave(&adapter->state_lock, flags);
+
if (adapter->state == VNIC_REMOVING ||
adapter->state == VNIC_REMOVED) {
+   spin_unlock_irqrestore(&adapter->state_lock, flags);
kfree(rwi);
rc = EBUSY;
break;
}
 
+   if (!saved_state) {
+   reset_state = adapter->state;
+   adapter->state = VNIC_RESETTING;
+   saved_state = true;
+   }
+   spin_unlock_irqrestore(&adapter->state_lock, flags);
+
if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
/* CHANGE_PARAM requestor holds rtnl_lock */
rc = do_change_param_reset(adapter, rwi, reset_state);
@@ -5091,6 +5101,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
  __ibmvnic_delayed_reset);
INIT_LIST_HEAD(&adapter->rwi_list);
spin_lock_init(&adapter->rwi_lock);
+   spin_lock_init(&adapter->state_lock);
mutex_init(&adapter->fw_lock);
init_completion(&adapter->init_done);
init_completion(&adapter->fw_done);
@@ -5161,10 +5172,19 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
 
 static int ibmvnic_remove(struct vio_dev *dev)
 {
+   unsigned long flags;
struct net_device *netdev = dev_get_drvdata(&dev->dev);
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
+   spin_lock_irqsave(&adapter->state_lock, flags);
+   if (adapter->state == VNIC_RESETTING) {
+   spin_unlock_irqrestore(&adapter->state_lock, flags);
+   return -EBUSY;
+   }
+
adapter->state = VNIC_REMOVING;
+   spin_unlock_irqrestore(&adapter->state_lock, flags);
+
rtnl_lock();
unregister_netdevice(netdev);
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 60eccaf91b12..971505e4cc52 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -941,7 +941,8 @@ enum vnic_state {VNIC_PROBING = 1,
 VNIC_CLOSING,
 VNIC_CLOSED,
 VNIC_REMOVING,
-VNIC_REMOVED};
+VNIC_REMOVED,
+VNIC_RESETTING};
 
 enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1,
   VNIC_RESET_MOBILITY,
@@ -1090,4 +1091,7 @@ struct ibmvnic_adapter {
 
struct ibmvnic_tunables desired;
struct ibmvnic_tunables fallback;
+
+   /* lock to protect state field */
+   spinlock_t state_lock;
 };
-- 
2.25.0



[PATCH] ibmvnic: Do not process device remove during device reset

2020-03-09 Thread Juliet Kim
The ibmvnic driver does not check the device state when the device
is removed. If the device is removed while a device reset is being 
processed, the remove may free structures needed by the reset, 
causing an oops.

Fix this by checking the device state before processing device remove.

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 24 ++--
 drivers/net/ethernet/ibm/ibmvnic.h |  6 +-
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index c75239d8820f..7ef1ae0d49bc 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2144,6 +2144,8 @@ static void __ibmvnic_reset(struct work_struct *work)
struct ibmvnic_adapter *adapter;
u32 reset_state;
int rc = 0;
+   unsigned long flags;
+   bool saved_state = false;
 
adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
 
@@ -2153,17 +2155,25 @@ static void __ibmvnic_reset(struct work_struct *work)
return;
}
 
-   reset_state = adapter->state;
-
rwi = get_next_rwi(adapter);
while (rwi) {
+   spin_lock_irqsave(&adapter->state_lock, flags);
+
if (adapter->state == VNIC_REMOVING ||
adapter->state == VNIC_REMOVED) {
+   spin_unlock_irqrestore(&adapter->state_lock, flags);
kfree(rwi);
rc = EBUSY;
break;
}
 
+   if (!saved_state) {
+   reset_state = adapter->state;
+   adapter->state = VNIC_RESETTING;
+   saved_state = true;
+   }
+   spin_unlock_irqrestore(&adapter->state_lock, flags);
+
if (rwi->reset_reason == VNIC_RESET_CHANGE_PARAM) {
/* CHANGE_PARAM requestor holds rtnl_lock */
rc = do_change_param_reset(adapter, rwi, reset_state);
@@ -5091,6 +5101,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
  __ibmvnic_delayed_reset);
INIT_LIST_HEAD(&adapter->rwi_list);
spin_lock_init(&adapter->rwi_lock);
+   spin_lock_init(&adapter->state_lock);
mutex_init(&adapter->fw_lock);
init_completion(&adapter->init_done);
init_completion(&adapter->fw_done);
@@ -5161,10 +5172,19 @@ static int ibmvnic_probe(struct vio_dev *dev, const 
struct vio_device_id *id)
 
 static int ibmvnic_remove(struct vio_dev *dev)
 {
+   unsigned long flags;
struct net_device *netdev = dev_get_drvdata(&dev->dev);
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
+   spin_lock_irqsave(&adapter->state_lock, flags);
+   if (adapter->state == VNIC_RESETTING) {
+   spin_unlock_irqrestore(&adapter->state_lock, flags);
+   return -EBUSY;
+   }
+
adapter->state = VNIC_REMOVING;
+   spin_unlock_irqrestore(&adapter->state_lock, flags);
+
rtnl_lock();
unregister_netdevice(netdev);
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 60eccaf91b12..971505e4cc52 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -941,7 +941,8 @@ enum vnic_state {VNIC_PROBING = 1,
 VNIC_CLOSING,
 VNIC_CLOSED,
 VNIC_REMOVING,
-VNIC_REMOVED};
+VNIC_REMOVED,
+VNIC_RESETTING};
 
 enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1,
   VNIC_RESET_MOBILITY,
@@ -1090,4 +1091,7 @@ struct ibmvnic_adapter {
 
struct ibmvnic_tunables desired;
struct ibmvnic_tunables fallback;
+
+   /* lock to protect state field */
+   spinlock_t state_lock;
 };
-- 
2.25.0



Re: [PATCH v5 7/7] ASoC: fsl_easrc: Add EASRC ASoC CPU DAI drivers

2020-03-09 Thread Nicolin Chen
A few small comments -- trying to improve readability.

On Mon, Mar 09, 2020 at 11:58:34AM +0800, Shengjiu Wang wrote:
> EASRC (Enhanced Asynchronous Sample Rate Converter) is a new IP module
> found on i.MX8MN. It is different with old ASRC module.
> 
> The primary features for the EASRC are as follows:
> - 4 Contexts - groups of channels with an independent time base
> - Fully independent and concurrent context control
> - Simultaneous processing of up to 32 audio channels
> - Programmable filter charachteristics for each context
> - 32, 24, 20, and 16-bit fixed point audio sample support
> - 32-bit floating point audio sample support
> - 8kHz to 384kHz sample rate
> - 1/16 to 8x sample rate conversion ratio
> 
> Signed-off-by: Shengjiu Wang 
> Signed-off-by: Cosmin-Gabriel Samoila 
> ---
>  sound/soc/fsl/Kconfig |   11 +
>  sound/soc/fsl/Makefile|2 +
>  sound/soc/fsl/fsl_easrc.c | 2111 +
>  sound/soc/fsl/fsl_easrc.h |  651 
>  4 files changed, 2775 insertions(+)
>  create mode 100644 sound/soc/fsl/fsl_easrc.c
>  create mode 100644 sound/soc/fsl/fsl_easrc.h

> diff --git a/sound/soc/fsl/fsl_easrc.c b/sound/soc/fsl/fsl_easrc.c

> +static int fsl_easrc_resampler_config(struct fsl_asrc *easrc)
> +{
> + struct device *dev = &easrc->pdev->dev;
> + struct fsl_easrc_priv *easrc_priv = easrc->private;
> + struct asrc_firmware_hdr *hdr =  easrc_priv->firmware_hdr;
> + struct interp_params *interp = easrc_priv->interp;
> + struct interp_params *selected_interp = NULL;
> + unsigned int num_coeff;
> + unsigned int i;
> + u64 *arr;
> + u32 *r;
> + int ret;
> +
> + if (!hdr) {
> + dev_err(dev, "firmware not loaded!\n");
> + return -ENODEV;
> + }
> +
> + for (i = 0; i < hdr->interp_scen; i++) {
> + if ((interp[i].num_taps - 1) ==
> + bits_taps_to_val(easrc_priv->rs_num_taps)) {

Could fit everything under 80 characters:

+   if ((interp[i].num_taps - 1) !=
+   bits_taps_to_val(easrc_priv->rs_num_taps))
+   continue;
+
+   arr = interp[i].coeff;
+   selected_interp = &interp[i];
+   dev_dbg(dev, "Selected interp_filter: %u taps - %u phases\n",
+   selected_interp->num_taps,
+   selected_interp->num_phases);
+   break;


> +static int fsl_easrc_normalize_filter(struct fsl_asrc *easrc,
> +   u64 *infilter,
> +   u64 *outfilter,
> +   int shift)
> +{
> + struct device *dev = &easrc->pdev->dev;
> + u64 coef = *infilter;

> + s64 exp  = (coef & 0x7ff0ll) >> 52;

Hmm...by masking 0x7ff0ll, MSB (sign bit) is gone.
Would the result still possibly be a negative value?

> + /*
> +  * If exponent is zero (value == 0), or 7ff (value == NaNs)
> +  * dont touch the content
> +  */
> + if (((coef & 0x7ff0ll) == 0) ||
> + ((coef & 0x7ff0ll) == ((u64)0x7ff << 52))) {

You have extracted "exp" already:
+   if (exp == 0 || (u64)exp & 0x7ff == 0x7ff)

> + *outfilter = coef;

Could simply a bit by returning here:
+   return 0;
+   }

Then:

+   /* coef * 2^shift == exp + shift */
+   exp += shift;
+
+   if ((shift > 0 && exp >= 2047) || (shift < 0 && exp <= 0)) {
+   dev_err(dev, "coef out of range\n");
+   return -EINVAL;
+   }
+
+   outcoef = (u64)(coef & 0x800Fll) + ((u64)exp << 52);
+   *outfilter = outcoef;


> +static int fsl_easrc_write_pf_coeff_mem(struct fsl_asrc *easrc, int ctx_id,
> + u64 *arr, int n_taps, int shift)
> +{
> + if (!arr) {
> + dev_err(dev, "NULL buffer\n");

Could it be slightly more specific?


> +static int fsl_easrc_prefilter_config(struct fsl_asrc *easrc,
> +   unsigned int ctx_id)
> +{
> + ctx_priv->in_filled_sample = bits_taps_to_val(easrc_priv->rs_num_taps) 
> / 2;
> + ctx_priv->out_missed_sample = ctx_priv->in_filled_sample *
> +   ctx_priv->out_params.sample_rate /
> +   ctx_priv->in_params.sample_rate;

There are quite a few references to sample_rate and sample_format
here, so we could use some local variables to cache them:

+   in_s_rate = ctx_priv->in_params.sample_rate;
+   out_s_rate = ctx_priv->out_params.sample_rate;
+   in_s_fmt = ctx_priv->in_params.sample_format;
+   out_s_fmt = ctx_priv->out_params.sample_format;


> +static int fsl_easrc_config_slot(struct fsl_asrc *easrc, unsigned int ctx_id)
> +{
> + struct fsl_easrc_priv *easrc_priv = easrc->private;
> + struct fsl_asrc_pair *ctx = easrc->pair[ctx_id];
> + int req_channels = ctx-

Re: [PATCH -next] soc: fsl: dpio: remove set but not used variable 'addr_cena'

2020-03-09 Thread Li Yang
On Fri, Feb 21, 2020 at 2:38 AM YueHaibing  wrote:
>
> commit 3b2abda7d28c ("soc: fsl: dpio: Replace QMAN array
> mode with ring mode enqueue") introduced this, but not
> used, so remove it.
>
> Reported-by: Hulk Robot 
> Signed-off-by: YueHaibing 
> ---
>  drivers/soc/fsl/dpio/qbman-portal.c | 4 
>  1 file changed, 4 deletions(-)
>
> diff --git a/drivers/soc/fsl/dpio/qbman-portal.c 
> b/drivers/soc/fsl/dpio/qbman-portal.c
> index 740ee0d..350de56 100644
> --- a/drivers/soc/fsl/dpio/qbman-portal.c
> +++ b/drivers/soc/fsl/dpio/qbman-portal.c
> @@ -658,7 +658,6 @@ int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
> const uint32_t *cl = (uint32_t *)d;
> uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
> int i, num_enqueued = 0;
> -   uint64_t addr_cena;
>
> spin_lock(&s->access_spinlock);
> half_mask = (s->eqcr.pi_ci_mask>>1);
> @@ -711,7 +710,6 @@ int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
>
> /* Flush all the cacheline without load/store in between */
> eqcr_pi = s->eqcr.pi;
> -   addr_cena = (size_t)s->addr_cena;
> for (i = 0; i < num_enqueued; i++)
> eqcr_pi++;
> s->eqcr.pi = eqcr_pi & full_mask;
> @@ -822,7 +820,6 @@ int qbman_swp_enqueue_multiple_desc_direct(struct 
> qbman_swp *s,
> const uint32_t *cl;
> uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
> int i, num_enqueued = 0;
> -   uint64_t addr_cena;
>
> half_mask = (s->eqcr.pi_ci_mask>>1);
> full_mask = s->eqcr.pi_ci_mask;
> @@ -866,7 +863,6 @@ int qbman_swp_enqueue_multiple_desc_direct(struct 
> qbman_swp *s,
>
> /* Flush all the cacheline without load/store in between */
> eqcr_pi = s->eqcr.pi;
> -   addr_cena = (uint64_t)s->addr_cena;

Hi Youri,

Looks like this problem exposed another issue that you removed the
cacheline related code in the upstream version.  Then the comment /*
Flush all the cacheline without load/store in between */ is no longer
true now, and probably the whole block can be replaced with a single
line to increase s->eqcr.pi?   The same for the block above.  Can you
provide a better fix for this issue?

Regards,
Leo

> for (i = 0; i < num_enqueued; i++)
> eqcr_pi++;
> s->eqcr.pi = eqcr_pi & full_mask;
> --
> 2.7.4
>
>


Re: [PATCH] powerpc/64: BE option to use ELFv2 ABI for big endian kernels

2020-03-09 Thread Segher Boessenkool
On Sat, Mar 07, 2020 at 10:58:54AM +1000, Nicholas Piggin wrote:
> Segher Boessenkool's on March 5, 2020 8:55 pm:
> > That name looks perfect to me.  You'll have to update REs expecting the
> > arch at the end (like /le$/), but you had to already I think?
> 
> le$ is still okay for testing ppc64le, unless you wanted me to add the 
> -elfv2 suffix on there as well? If that was the case, for consistency
> we'd also have to add -elfv1 for the BE v1 case. I was just going to add
> -elfv2 for the new variant.

I was thinking that, yes, but your plan is better alright (there aren't
many supported configs at all, anyway!)


Segher


Re: [PATCH v5 4/7] ASoC: fsl_asrc: rename asrc_priv to asrc

2020-03-09 Thread Nicolin Chen
On Mon, Mar 09, 2020 at 11:58:31AM +0800, Shengjiu Wang wrote:
> In order to move common structure to fsl_asrc_common.h
> we change the name of asrc_priv to asrc, the asrc_priv
> will be used by new struct fsl_asrc_priv.

This actually could be a cleanup patch which comes as the
first one in this series, so that we could ack it and get
merged without depending on others. Maybe next version?

Thanks

> 
> Signed-off-by: Shengjiu Wang 
> ---
>  sound/soc/fsl/fsl_asrc.c | 296 +--
>  sound/soc/fsl/fsl_asrc.h |   4 +-
>  sound/soc/fsl/fsl_asrc_dma.c |  24 +--
>  3 files changed, 162 insertions(+), 162 deletions(-)
> 
> diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
> index 11dfe3068b04..eea19e2b723b 100644
> --- a/sound/soc/fsl/fsl_asrc.c
> +++ b/sound/soc/fsl/fsl_asrc.c
> @@ -21,10 +21,10 @@
>  #define IDEAL_RATIO_DECIMAL_DEPTH 26
>  
>  #define pair_err(fmt, ...) \
> - dev_err(&asrc_priv->pdev->dev, "Pair %c: " fmt, 'A' + index, 
> ##__VA_ARGS__)
> + dev_err(&asrc->pdev->dev, "Pair %c: " fmt, 'A' + index, ##__VA_ARGS__)
>  
>  #define pair_dbg(fmt, ...) \
> - dev_dbg(&asrc_priv->pdev->dev, "Pair %c: " fmt, 'A' + index, 
> ##__VA_ARGS__)
> + dev_dbg(&asrc->pdev->dev, "Pair %c: " fmt, 'A' + index, ##__VA_ARGS__)
>  
>  /* Corresponding to process_option */
>  static unsigned int supported_asrc_rate[] = {
> @@ -157,15 +157,15 @@ static void fsl_asrc_sel_proc(int inrate, int outrate,
>  int fsl_asrc_request_pair(int channels, struct fsl_asrc_pair *pair)
>  {
>   enum asrc_pair_index index = ASRC_INVALID_PAIR;
> - struct fsl_asrc *asrc_priv = pair->asrc_priv;
> - struct device *dev = &asrc_priv->pdev->dev;
> + struct fsl_asrc *asrc = pair->asrc;
> + struct device *dev = &asrc->pdev->dev;
>   unsigned long lock_flags;
>   int i, ret = 0;
>  
> - spin_lock_irqsave(&asrc_priv->lock, lock_flags);
> + spin_lock_irqsave(&asrc->lock, lock_flags);
>  
>   for (i = ASRC_PAIR_A; i < ASRC_PAIR_MAX_NUM; i++) {
> - if (asrc_priv->pair[i] != NULL)
> + if (asrc->pair[i] != NULL)
>   continue;
>  
>   index = i;
> @@ -177,17 +177,17 @@ int fsl_asrc_request_pair(int channels, struct 
> fsl_asrc_pair *pair)
>   if (index == ASRC_INVALID_PAIR) {
>   dev_err(dev, "all pairs are busy now\n");
>   ret = -EBUSY;
> - } else if (asrc_priv->channel_avail < channels) {
> + } else if (asrc->channel_avail < channels) {
>   dev_err(dev, "can't afford required channels: %d\n", channels);
>   ret = -EINVAL;
>   } else {
> - asrc_priv->channel_avail -= channels;
> - asrc_priv->pair[index] = pair;
> + asrc->channel_avail -= channels;
> + asrc->pair[index] = pair;
>   pair->channels = channels;
>   pair->index = index;
>   }
>  
> - spin_unlock_irqrestore(&asrc_priv->lock, lock_flags);
> + spin_unlock_irqrestore(&asrc->lock, lock_flags);
>  
>   return ret;
>  }
> @@ -195,25 +195,25 @@ int fsl_asrc_request_pair(int channels, struct 
> fsl_asrc_pair *pair)
>  /**
>   * Release ASRC pair
>   *
> - * It clears the resource from asrc_priv and releases the occupied channels.
> + * It clears the resource from asrc and releases the occupied channels.
>   */
>  void fsl_asrc_release_pair(struct fsl_asrc_pair *pair)
>  {
> - struct fsl_asrc *asrc_priv = pair->asrc_priv;
> + struct fsl_asrc *asrc = pair->asrc;
>   enum asrc_pair_index index = pair->index;
>   unsigned long lock_flags;
>  
>   /* Make sure the pair is disabled */
> - regmap_update_bits(asrc_priv->regmap, REG_ASRCTR,
> + regmap_update_bits(asrc->regmap, REG_ASRCTR,
>  ASRCTR_ASRCEi_MASK(index), 0);
>  
> - spin_lock_irqsave(&asrc_priv->lock, lock_flags);
> + spin_lock_irqsave(&asrc->lock, lock_flags);
>  
> - asrc_priv->channel_avail += pair->channels;
> - asrc_priv->pair[index] = NULL;
> + asrc->channel_avail += pair->channels;
> + asrc->pair[index] = NULL;
>   pair->error = 0;
>  
> - spin_unlock_irqrestore(&asrc_priv->lock, lock_flags);
> + spin_unlock_irqrestore(&asrc->lock, lock_flags);
>  }
>  
>  /**
> @@ -221,10 +221,10 @@ void fsl_asrc_release_pair(struct fsl_asrc_pair *pair)
>   */
>  static void fsl_asrc_set_watermarks(struct fsl_asrc_pair *pair, u32 in, u32 
> out)
>  {
> - struct fsl_asrc *asrc_priv = pair->asrc_priv;
> + struct fsl_asrc *asrc = pair->asrc;
>   enum asrc_pair_index index = pair->index;
>  
> - regmap_update_bits(asrc_priv->regmap, REG_ASRMCR(index),
> + regmap_update_bits(asrc->regmap, REG_ASRMCR(index),
>  ASRMCRi_EXTTHRSHi_MASK |
>  ASRMCRi_INFIFO_THRESHOLD_MASK |
>  ASRMCRi_OUTFIFO_THRESHOLD_MASK,
> @@ -257,7 +257,7 @@ static u32 fsl_asrc_cal_asrck_divisor(struct 
> fs

Re: [PATCH v5 1/7] ASoC: dt-bindings: fsl_asrc: Add new property fsl,asrc-format

2020-03-09 Thread Nicolin Chen
On Mon, Mar 09, 2020 at 11:58:28AM +0800, Shengjiu Wang wrote:
> In order to support new EASRC and simplify the code structure,
> We decide to share the common structure between them. This bring
> a problem that EASRC accept format directly from devicetree, but
> ASRC accept width from devicetree.
> 
> In order to align with new ESARC, we add new property fsl,asrc-format.
> The fsl,asrc-format can replace the fsl,asrc-width, then driver
> can accept format from devicetree, don't need to convert it to
> format through width.
> 
> Signed-off-by: Shengjiu Wang 
> ---
>  Documentation/devicetree/bindings/sound/fsl,asrc.txt | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/sound/fsl,asrc.txt 
> b/Documentation/devicetree/bindings/sound/fsl,asrc.txt
> index cb9a25165503..780455cf7f71 100644
> --- a/Documentation/devicetree/bindings/sound/fsl,asrc.txt
> +++ b/Documentation/devicetree/bindings/sound/fsl,asrc.txt
> @@ -51,6 +51,11 @@ Optional properties:
> will be in use as default. Otherwise, the big endian
> mode will be in use for all the device registers.
>  
> +   - fsl,asrc-format : Defines a mutual sample format used by DPCM Back
> +   Ends, which can replace the fsl,asrc-width.
> +   The value is SNDRV_PCM_FORMAT_S16_LE, or
> +   SNDRV_PCM_FORMAT_S24_LE

I am still holding the concern at the DT binding of this format,
as it uses values from ASoC header file instead of a dt-binding
header file -- not sure if we can do this. Let's wait for Rob's
comments.


Re: [PATCH v5 2/7] ASoC: fsl-asoc-card: Support new property fsl,asrc-format

2020-03-09 Thread Nicolin Chen
On Mon, Mar 09, 2020 at 11:58:29AM +0800, Shengjiu Wang wrote:
> In order to align with new ESARC, we add new property fsl,asrc-format.
> The fsl,asrc-format can replace the fsl,asrc-width, driver
> can accept format from devicetree, don't need to convert it to
> format through width.
> 
> Signed-off-by: Shengjiu Wang 
> ---
>  sound/soc/fsl/fsl-asoc-card.c | 20 +++-
>  1 file changed, 11 insertions(+), 9 deletions(-)
> 
> diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c
> index 9ce55feaac22..32101b9a37b9 100644
> --- a/sound/soc/fsl/fsl-asoc-card.c
> +++ b/sound/soc/fsl/fsl-asoc-card.c
> @@ -680,17 +680,19 @@ static int fsl_asoc_card_probe(struct platform_device 
> *pdev)
>   goto asrc_fail;
>   }
>  
> - ret = of_property_read_u32(asrc_np, "fsl,asrc-width", &width);
> + ret = of_property_read_u32(asrc_np, "fsl,asrc-format", 
> &priv->asrc_format);
>   if (ret) {
> - dev_err(&pdev->dev, "failed to get output rate\n");

Nice that your patch fixed my copy-n-paste typo here :)

> - ret = -EINVAL;
> - goto asrc_fail;
> - }

It'd be nicer to have a line of comments:
/* Fallback to old binding; translate to asrc_format */

> + ret = of_property_read_u32(asrc_np, "fsl,asrc-width", 
> &width);
> + if (ret) {
> + dev_err(&pdev->dev, "failed to get output 
> width\n");
> + return ret;
> + }
>  
> - if (width == 24)
> - priv->asrc_format = SNDRV_PCM_FORMAT_S24_LE;
> - else
> - priv->asrc_format = SNDRV_PCM_FORMAT_S16_LE;
> + if (width == 24)
> + priv->asrc_format = SNDRV_PCM_FORMAT_S24_LE;
> + else
> + priv->asrc_format = SNDRV_PCM_FORMAT_S16_LE;
> + }


Re: [PATCH v4 0/8] powerpc/perf: Add json file metric support for the hv_24x7 socket/chip level events

2020-03-09 Thread Arnaldo Melo
Sure, will do it today

On March 9, 2020 6:35:06 AM GMT-03:00, Jiri Olsa  wrote:
>On Mon, Mar 09, 2020 at 11:55:44AM +0530, Kajol Jain wrote:
>> First patch of the patchset fix inconsistent results we are getting
>when
>> we run multiple 24x7 events.
>> 
>> Patchset adds json file metric support for the hv_24x7 socket/chip
>level
>> events. "hv_24x7" pmu interface events needs system dependent
>parameter
>> like socket/chip/core. For example, hv_24x7 chip level events needs
>> specific chip-id to which the data is requested should be added as
>part
>> of pmu events.
>> 
>> So to enable JSON file support to "hv_24x7" interface, patchset
>expose
>> total number of sockets and chips per-socket details in sysfs
>> files (sockets, chips) under "/sys/devices/hv_24x7/interface/".
>> 
>> To get sockets and number of chips per sockets, patchset adds a rtas
>call
>> with token "PROCESSOR_MODULE_INFO" to get these details. Patchset
>also
>> handles partition migration case to re-init these system depended
>> parameters by adding proper calls in post_mobility_fixup()
>(mobility.c).
>> 
>> Patch 6 & 8 of the patchset handles perf tool plumbing needed to
>replace
>> the "?" character in the metric expression to proper value and
>hv_24x7
>> json metric file for different Socket/chip resources.
>> 
>> Patch set also enable Hz/hz prinitg for --metric-only option to print
>> metric data for bus frequency.
>> 
>> Applied and tested all these patches cleanly on top of jiri's flex
>changes
>> with the changes done by Kan Liang for "Support metric group
>constraint"
>> patchset and made required changes.
>> 
>> Changelog:
>> v3 -> v4
>> - Made changes suggested by jiri.
>
>could you please mention them next time? ;-)
>
>> - Apply these patch on top of Kan liang changes.
>
>Arnaldo, could you please pull the expr flex changes and Kan's
>metric group constraint changes? it's both prereq of this patchset
>
>thanks,
>jirka

-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

Re: [PATCH 8/8] powerpc/papr_scm: Implement support for DSM_PAPR_SCM_HEALTH

2020-03-09 Thread Aneesh Kumar K.V
Vaibhav Jain  writes:

> The DSM 'DSM_PAPR_SCM_HEALTH' should return a 'struct
> nd_papr_scm_dimm_health_stat' containing information in dimm health back
> to user space in response to ND_CMD_CALL. We implement this DSM by
> implementing a new function papr_scm_get_health() that queries the
> DIMM health information and then copies these bitmaps to the package
> payload whose layout is defined by 'struct papr_scm_ndctl_health'.
>
> The patch also handle cases where in future versions of 'struct
> papr_scm_ndctl_health' may want to return more health
> information. Such payload envelops will contain appropriate version
> information in 'struct nd_papr_scm_cmd_pkg.payload_version'. The patch
> takes care of only returning the sub-data corresponding to the payload
> version requested. Please see the comments in papr_scm_get_health()
> for how this is done.
>
> Signed-off-by: Vaibhav Jain 
> ---
>  arch/powerpc/platforms/pseries/papr_scm.c | 73 +++
>  1 file changed, 73 insertions(+)
>
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
> b/arch/powerpc/platforms/pseries/papr_scm.c
> index 29f38246c59f..bf81acb0bf3f 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -415,6 +415,74 @@ static int cmd_to_func(struct nvdimm *nvdimm, unsigned 
> int cmd, void *buf,
>   return pkg->hdr.nd_command;
>  }
>  
> +/*
> + * Fetch the DIMM health info and populate it in provided papr_scm package.
> + * Since the caller can request a different version of payload and each new
> + * version of struct nd_papr_scm_dimm_health_stat is a proper-subset of
> + * previous version hence we return a subset of the cached 'struct
> + * nd_papr_scm_dimm_health_stat' depending on the payload version requested.
> + */
> +static int papr_scm_get_health(struct papr_scm_priv *p,
> +struct nd_papr_scm_cmd_pkg *pkg)
> +{
> + int rc;
> + size_t copysize;
> + /* Map version to number of bytes to be copied to payload */
> + const size_t copysizes[] = {
> + [1] =
> + sizeof(struct nd_papr_scm_dimm_health_stat_v1),
> +
> + /*  This should always be preset */
> + [ND_PAPR_SCM_DIMM_HEALTH_VERSION] =
> + sizeof(struct nd_papr_scm_dimm_health_stat),
> + };


We will not be able to determine that during build. For performance
hcall to run LPAR should be privileged. ie, even if the kernel supports v2
version of the health information, it may only be able to
return v1 version of the health because LPAR performance stat hcall
failed.

-aneesh


Re: [PATCH 3/8] powerpc/papr_scm: Fetch dimm performance stats from PHYP

2020-03-09 Thread Aneesh Kumar K.V
Vaibhav Jain  writes:

> Implement support for fetching dimm performance metrics via
> H_SCM_PERFORMANCE_HEALTH hcall as documented in Ref[1]. The hcall
> returns a structure as described in Ref[1] and defined as newly
> introduced 'struct papr_scm_perf_stats'. The struct has a header
> followed by key-value pairs of performance attributes. The 'key' part
> is a 8-byte char array naming the attribute encoded as a __be64
> integer. This makes the output buffer format for the hcall self
> describing and can be easily interpreted.
>
> This patch implements functionality to fetch these performance stats
> and reporting them via a nvdimm sysfs attribute named
> 'papr_perf_stats'.
>
> A new function drc_pmem_query_stats() is implemented that issues hcall
> H_SCM_PERFORMANCE_HEALTH ,requesting PHYP to store performance stats
> in pre-allocated 'struct papr_scm_perf_stats' buffer. During nvdimm
> initialization in papr_scm_nvdimm_init() this function is called with
> an empty buffer to know the max buffer size needed for issuing the
> H_SCM_PERFORMANCE_HEALTH hcall. The buffer size retrieved is stored in
> newly introduced 'struct papc_scm_priv.len_stat_buffer' for later
> retrival.
>

You are not using this hcall in the series? If so can you drop it from
the series and add it when you want to use hcall returned value.

> [1]: commit 58b278f568f0 ("powerpc: Provide initial documentation for
> PAPR hcalls")
>


-aneesh


Re: [PATCH 7/8] powerpc/papr_scm: Re-implement 'papr_flags' using 'nd_papr_scm_dimm_health_stat'

2020-03-09 Thread Aneesh Kumar K.V
Vaibhav Jain  writes:

> Previous commit [1] introduced 'struct nd_papr_scm_dimm_health_stat' for
> communicating health status of an nvdimm to libndctl. This struct
> however can also be used to cache the nvdimm health information in
> 'struct papr_scm_priv' instead of two '__be64' values. Benefit of this
> re-factoring will be apparent when support for libndctl being able to
> request nvdimm health stats is implemented where we can simply memcpy
> this struct over to the user-space provided payload envelope.
>
> Hence this patch introduces a new member 'struct papr_scm_priv.health'
> that caches the health information of a dimm. This member is populated
> inside drc_pmem_query_health() which checks for the various bit flags
> returned from H_SCM_HEALTH and sets them in this struct. We also
> re-factor 'papr_flags' sysfs attribute show function papr_flags_show()
> to use the flags in 'struct papr_scm_priv.health' to return
> appropriate status strings pertaining to dimm health.
>
> This patch shouldn't introduce any behavioral change.

This patch is undoing what is doing in PATCH 2. If you reorder things,
we could drop either this or PATCH2.?

>
> Signed-off-by: Vaibhav Jain 
> ---
>  arch/powerpc/platforms/pseries/papr_scm.c | 61 ---
>  1 file changed, 44 insertions(+), 17 deletions(-)
>
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
> b/arch/powerpc/platforms/pseries/papr_scm.c
> index d5eea2f38fa6..29f38246c59f 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -47,8 +47,7 @@ struct papr_scm_priv {
>   struct mutex dimm_mutex;
>  
>   /* Health information for the dimm */
> - __be64 health_bitmap;
> - __be64 health_bitmap_valid;
> + struct nd_papr_scm_dimm_health_stat health;
>  
>   /* length of the stat buffer as expected by phyp */
>   size_t len_stat_buffer;
> @@ -205,6 +204,7 @@ static int drc_pmem_query_health(struct papr_scm_priv *p)
>  {
>   unsigned long ret[PLPAR_HCALL_BUFSIZE];
>   int64_t rc;
> + __be64 health;
>  
>   rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index);
>   if (rc != H_SUCCESS) {
> @@ -219,13 +219,41 @@ static int drc_pmem_query_health(struct papr_scm_priv 
> *p)
>   return rc;
>  
>   /* Store the retrieved health information in dimm platform data */
> - p->health_bitmap = ret[0];
> - p->health_bitmap_valid = ret[1];
> + health = ret[0] & ret[1];
>  
>   dev_dbg(&p->pdev->dev,
>   "Queried dimm health info. Bitmap:0x%016llx Mask:0x%016llx\n",
> - be64_to_cpu(p->health_bitmap),
> - be64_to_cpu(p->health_bitmap_valid));
> + be64_to_cpu(ret[0]),
> + be64_to_cpu(ret[1]));
> +
> + memset(&p->health, 0, sizeof(p->health));
> +
> + /* Check for various masks in bitmap and set the buffer */
> + if (health & PAPR_SCM_DIMM_UNARMED_MASK)
> + p->health.dimm_unarmed = true;
> +
> + if (health & PAPR_SCM_DIMM_BAD_SHUTDOWN_MASK)
> + p->health.dimm_bad_shutdown = true;
> +
> + if (health & PAPR_SCM_DIMM_BAD_RESTORE_MASK)
> + p->health.dimm_bad_restore = true;
> +
> + if (health & PAPR_SCM_DIMM_ENCRYPTED)
> + p->health.dimm_encrypted = true;
> +
> + if (health & PAPR_SCM_DIMM_SCRUBBED_AND_LOCKED) {
> + p->health.dimm_locked = true;
> + p->health.dimm_scrubbed = true;
> + }
> +
> + if (health & PAPR_SCM_DIMM_HEALTH_UNHEALTHY)
> + p->health.dimm_health = DSM_PAPR_SCM_DIMM_UNHEALTHY;
> +
> + if (health & PAPR_SCM_DIMM_HEALTH_CRITICAL)
> + p->health.dimm_health = DSM_PAPR_SCM_DIMM_CRITICAL;
> +
> + if (health & PAPR_SCM_DIMM_HEALTH_FATAL)
> + p->health.dimm_health = DSM_PAPR_SCM_DIMM_FATAL;
>  
>   mutex_unlock(&p->dimm_mutex);
>   return 0;
> @@ -513,7 +541,6 @@ static ssize_t papr_flags_show(struct device *dev,
>  {
>   struct nvdimm *dimm = to_nvdimm(dev);
>   struct papr_scm_priv *p = nvdimm_provider_data(dimm);
> - __be64 health;
>   int rc;
>  
>   rc = drc_pmem_query_health(p);
> @@ -525,26 +552,26 @@ static ssize_t papr_flags_show(struct device *dev,
>   if (rc)
>   return rc;
>  
> - health = p->health_bitmap & p->health_bitmap_valid;
> -
> - /* Check for various masks in bitmap and set the buffer */
> - if (health & PAPR_SCM_DIMM_UNARMED_MASK)
> + if (p->health.dimm_unarmed)
>   rc += sprintf(buf, "not_armed ");
>  
> - if (health & PAPR_SCM_DIMM_BAD_SHUTDOWN_MASK)
> + if (p->health.dimm_bad_shutdown)
>   rc += sprintf(buf + rc, "save_fail ");
>  
> - if (health & PAPR_SCM_DIMM_BAD_RESTORE_MASK)
> + if (p->health.dimm_bad_restore)
>   rc += sprintf(buf + rc, "restore_fail ");
>  
> - if (health & PAPR_SCM_DIMM_ENCRYPTED)
> + if (p->health.dimm_encrypted)
>   rc += sprintf(buf + 

Re: [PATCH 2/8] powerpc/papr_scm: Provide support for fetching dimm health information

2020-03-09 Thread Aneesh Kumar K.V
Vaibhav Jain  writes:

> Implement support for fetching dimm health information via
> H_SCM_HEALTH hcall as documented in Ref[1]. The hcall returns a pair of
> 64-bit big-endian integers which are then stored in 'struct
> papr_scm_priv' and subsequently exposed to userspace via dimm
> attribute 'papr_flags'.

Can you  add this as part of PATCH 1. While reviewing i had to go back
and forth between 1 and 2 to find different flags. IMHO it would be nice
to introduce new flags and users of the flag together. 
>
> 'papr_flags' sysfs attribute reports space separated string flags
> indicating various health state an nvdimm can be. These are:
>
> * "not_armed" : Indicating that nvdimm contents wont survive a power
> cycle.
> * "save_fail" : Indicating that nvdimm contents couldn't be flushed
> during last shutdown event.
> * "restore_fail": Indicating that nvdimm contents couldn't be restored
> during dimm initialization.
> * "encrypted" : Dimm contents are encrypted.
> * "smart_notify": There is health event for the nvdimm.

Can you explain this more? This is suppose to be set when there is a
smart event? Currently you derive this based on

+/* Bit status indicators for smart event notification */
+#define PAPR_SCM_DIMM_SMART_EVENT_MASK (PAPR_SCM_DIMM_HEALTH_CRITICAL | \
+  PAPR_SCM_DIMM_HEALTH_FATAL | \
+  PAPR_SCM_DIMM_HEALTH_UNHEALTHY | \
+


> * "scrubbed"  : Indicating that contents of the nvdimm have been
> scrubbed.
> * "locked": Indicating that nvdimm contents cant be modified
> until next power cycle.
>
> [1]: commit 58b278f568f0 ("powerpc: Provide initial documentation for
> PAPR hcalls")
>


-aneesh


Re: [PATCH 1/8] powerpc: Add asm header 'papr_scm.h' describing the papr-scm interface

2020-03-09 Thread Aneesh Kumar K.V
Vaibhav Jain  writes:

> Add a new powerpc specific asm header named 'papr-scm.h' that descibes
> the interface between PHYP and guest kernel running as an LPAR.
>
> The HCALLs specific to managing SCM are descibed in Ref[1]. The asm
> header introduced by this patch however describes the data structures
> exchanged between PHYP and kernel during those HCALLs.
>
> Future patches will use these structures to provide support for
> retriving nvdimm health and performance stats in papr_scm kernel
> module.
>
> [1]: commit 58b278f568f0 ("powerpc: Provide initial documentation for
> PAPR hcalls")
>
> Signed-off-by: Vaibhav Jain 
> ---
>  arch/powerpc/include/asm/papr_scm.h | 68 +
>  1 file changed, 68 insertions(+)
>  create mode 100644 arch/powerpc/include/asm/papr_scm.h
>
> diff --git a/arch/powerpc/include/asm/papr_scm.h 
> b/arch/powerpc/include/asm/papr_scm.h
> new file mode 100644
> index ..d893621063f3
> --- /dev/null
> +++ b/arch/powerpc/include/asm/papr_scm.h
> @@ -0,0 +1,68 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * Structures and defines needed to manage nvdimms for spapr guests.
> + */
> +#ifndef _ASM_POWERPC_PAPR_SCM_H_
> +#define _ASM_POWERPC_PAPR_SCM_H_
> +
> +#include 
> +#include 
> +#include 
> +
> +/* DIMM health bitmap bitmap indicators */
> +/* SCM device is unable to persist memory contents */
> +#define PAPR_SCM_DIMM_UNARMEDPPC_BIT(0)
> +/* SCM device failed to persist memory contents */
> +#define PAPR_SCM_DIMM_SHUTDOWN_DIRTY PPC_BIT(1)
> +/* SCM device contents are persisted from previous IPL */
> +#define PAPR_SCM_DIMM_SHUTDOWN_CLEAN PPC_BIT(2)
> +/* SCM device contents are not persisted from previous IPL */
> +#define PAPR_SCM_DIMM_EMPTY  PPC_BIT(3)
> +/* SCM device memory life remaining is critically low */
> +#define PAPR_SCM_DIMM_HEALTH_CRITICALPPC_BIT(4)
> +/* SCM device will be garded off next IPL due to failure */
> +#define PAPR_SCM_DIMM_HEALTH_FATAL   PPC_BIT(5)
> +/* SCM contents cannot persist due to current platform health status */
> +#define PAPR_SCM_DIMM_HEALTH_UNHEALTHY   PPC_BIT(6)
> +/* SCM device is unable to persist memory contents in certain conditions */
> +#define PAPR_SCM_DIMM_HEALTH_NON_CRITICALPPC_BIT(7)

That is not really a bad health condition. That is an indication of
vpmem where we won't persist memory contents on CEC reboot.



> +/* SCM device is encrypted */
> +#define PAPR_SCM_DIMM_ENCRYPTED  PPC_BIT(8)
> +/* SCM device has been scrubbed and locked */
> +#define PAPR_SCM_DIMM_SCRUBBED_AND_LOCKEDPPC_BIT(9)
> +
> +/* Bits status indicators for health bitmap indicating unarmed dimm */
> +#define PAPR_SCM_DIMM_UNARMED_MASK (PAPR_SCM_DIMM_UNARMED |  \
> + PAPR_SCM_DIMM_HEALTH_UNHEALTHY | \
> + PAPR_SCM_DIMM_HEALTH_NON_CRITICAL)

Based on the above, I guess you should remove PAPR_SCM_DIMM_HEALTH_NON_CRITICAL
from the above?

> +
> +/* Bits status indicators for health bitmap indicating unflushed dimm */
> +#define PAPR_SCM_DIMM_BAD_SHUTDOWN_MASK (PAPR_SCM_DIMM_SHUTDOWN_DIRTY)
> +
> +/* Bits status indicators for health bitmap indicating unrestored dimm */
> +#define PAPR_SCM_DIMM_BAD_RESTORE_MASK  (PAPR_SCM_DIMM_EMPTY)
> +
> +/* Bit status indicators for smart event notification */
> +#define PAPR_SCM_DIMM_SMART_EVENT_MASK (PAPR_SCM_DIMM_HEALTH_CRITICAL | \
> +PAPR_SCM_DIMM_HEALTH_FATAL | \
> +PAPR_SCM_DIMM_HEALTH_UNHEALTHY | \
> +PAPR_SCM_DIMM_HEALTH_NON_CRITICAL)
> +

-aneesh


Re: [PATCH v4 0/8] powerpc/perf: Add json file metric support for the hv_24x7 socket/chip level events

2020-03-09 Thread Jiri Olsa
On Mon, Mar 09, 2020 at 11:55:44AM +0530, Kajol Jain wrote:
> First patch of the patchset fix inconsistent results we are getting when
> we run multiple 24x7 events.
> 
> Patchset adds json file metric support for the hv_24x7 socket/chip level
> events. "hv_24x7" pmu interface events needs system dependent parameter
> like socket/chip/core. For example, hv_24x7 chip level events needs
> specific chip-id to which the data is requested should be added as part
> of pmu events.
> 
> So to enable JSON file support to "hv_24x7" interface, patchset expose
> total number of sockets and chips per-socket details in sysfs
> files (sockets, chips) under "/sys/devices/hv_24x7/interface/".
> 
> To get sockets and number of chips per sockets, patchset adds a rtas call
> with token "PROCESSOR_MODULE_INFO" to get these details. Patchset also
> handles partition migration case to re-init these system depended
> parameters by adding proper calls in post_mobility_fixup() (mobility.c).
> 
> Patch 6 & 8 of the patchset handles perf tool plumbing needed to replace
> the "?" character in the metric expression to proper value and hv_24x7
> json metric file for different Socket/chip resources.
> 
> Patch set also enable Hz/hz prinitg for --metric-only option to print
> metric data for bus frequency.
> 
> Applied and tested all these patches cleanly on top of jiri's flex changes
> with the changes done by Kan Liang for "Support metric group constraint"
> patchset and made required changes.
> 
> Changelog:
> v3 -> v4
> - Made changes suggested by jiri.

could you please mention them next time? ;-)

> - Apply these patch on top of Kan liang changes.

Arnaldo, could you please pull the expr flex changes and Kan's
metric group constraint changes? it's both prereq of this patchset

thanks,
jirka



[PATCH 15/15] powerpc/watchpoint/xmon: Support 2nd dawr

2020-03-09 Thread Ravi Bangoria
Add support for 2nd DAWR in xmon. With this, we can have two
simultaneous breakpoints from xmon.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/xmon/xmon.c | 101 ++-
 1 file changed, 69 insertions(+), 32 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index ac18fe3e4295..20adc83404c8 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -110,7 +110,7 @@ struct bpt {
 
 #define NBPTS  256
 static struct bpt bpts[NBPTS];
-static struct bpt dabr;
+static struct bpt dabr[HBP_NUM_MAX];
 static struct bpt *iabr;
 static unsigned bpinstr = 0x7fe8;  /* trap */
 
@@ -786,10 +786,17 @@ static int xmon_sstep(struct pt_regs *regs)
 
 static int xmon_break_match(struct pt_regs *regs)
 {
+   int i;
+
if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
return 0;
-   if (dabr.enabled == 0)
-   return 0;
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (dabr[i].enabled)
+   goto found;
+   }
+   return 0;
+
+found:
xmon_core(regs, 0);
return 1;
 }
@@ -928,13 +935,16 @@ static void insert_bpts(void)
 
 static void insert_cpu_bpts(void)
 {
+   int i;
struct arch_hw_breakpoint brk;
 
-   if (dabr.enabled) {
-   brk.address = dabr.address;
-   brk.type = (dabr.enabled & HW_BRK_TYPE_DABR) | 
HW_BRK_TYPE_PRIV_ALL;
-   brk.len = DABR_MAX_LEN;
-   __set_breakpoint(&brk, 0);
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (dabr[i].enabled) {
+   brk.address = dabr[i].address;
+   brk.type = (dabr[i].enabled & HW_BRK_TYPE_DABR) | 
HW_BRK_TYPE_PRIV_ALL;
+   brk.len = 8;
+   __set_breakpoint(&brk, i);
+   }
}
 
if (iabr)
@@ -1348,6 +1358,35 @@ static long check_bp_loc(unsigned long addr)
return 1;
 }
 
+static int free_data_bpt(void)
+{
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (!dabr[i].enabled)
+   return i;
+   }
+   printf("Couldn't find free breakpoint register\n");
+   return -1;
+}
+
+static void print_data_bpts(void)
+{
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (!dabr[i].enabled)
+   continue;
+
+   printf("   data   "REG"  [", dabr[i].address);
+   if (dabr[i].enabled & 1)
+   printf("r");
+   if (dabr[i].enabled & 2)
+   printf("w");
+   printf("]\n");
+   }
+}
+
 static char *breakpoint_help_string =
 "Breakpoint command usage:\n"
 "bshow breakpoints\n"
@@ -1381,10 +1420,9 @@ bpt_cmds(void)
printf("Hardware data breakpoint not supported on this 
cpu\n");
break;
}
-   if (dabr.enabled) {
-   printf("Couldn't find free breakpoint register\n");
+   i = free_data_bpt();
+   if (i < 0)
break;
-   }
mode = 7;
cmd = inchar();
if (cmd == 'r')
@@ -1393,15 +1431,15 @@ bpt_cmds(void)
mode = 6;
else
termch = cmd;
-   dabr.address = 0;
-   dabr.enabled = 0;
-   if (scanhex(&dabr.address)) {
-   if (!is_kernel_addr(dabr.address)) {
+   dabr[i].address = 0;
+   dabr[i].enabled = 0;
+   if (scanhex(&dabr[i].address)) {
+   if (!is_kernel_addr(dabr[i].address)) {
printf(badaddr);
break;
}
-   dabr.address &= ~HW_BRK_TYPE_DABR;
-   dabr.enabled = mode | BP_DABR;
+   dabr[i].address &= ~HW_BRK_TYPE_DABR;
+   dabr[i].enabled = mode | BP_DABR;
}
 
force_enable_xmon();
@@ -1440,7 +1478,9 @@ bpt_cmds(void)
for (i = 0; i < NBPTS; ++i)
bpts[i].enabled = 0;
iabr = NULL;
-   dabr.enabled = 0;
+   for (i = 0; i < nr_wp_slots(); i++)
+   dabr[i].enabled = 0;
+
printf("All breakpoints cleared\n");
break;
}
@@ -1474,14 +1514,7 @@ bpt_cmds(void)
if (xmon_is_ro || !scanhex(&a)) {
/* print all breakpoints */
printf("   typeaddress\n");
-   if (dabr.enabled) {
-   printf("   data   "REG"  [", dabr.address);
- 

[PATCH 14/15] powerpc/watchpoint/xmon: Don't allow breakpoint overwriting

2020-03-09 Thread Ravi Bangoria
Xmon allows overwriting breakpoints because it's supported by only
one dawr. But with multiple dawrs, overwriting becomes ambiguous
or unnecessary complicated. So let's not allow it.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/xmon/xmon.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 0ca0d29f99c6..ac18fe3e4295 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1381,6 +1381,10 @@ bpt_cmds(void)
printf("Hardware data breakpoint not supported on this 
cpu\n");
break;
}
+   if (dabr.enabled) {
+   printf("Couldn't find free breakpoint register\n");
+   break;
+   }
mode = 7;
cmd = inchar();
if (cmd == 'r')
-- 
2.21.1



[PATCH 13/15] powerpc/watchpoint: Don't allow concurrent perf and ptrace events

2020-03-09 Thread Ravi Bangoria
ptrace and perf watchpoints on powerpc behaves differently. Ptrace
watchpoint works in one-shot mode and generates signal before executing
instruction. It's ptrace user's job to single-step the instruction and
re-enable the watchpoint. OTOH, in case of perf watchpoint, kernel
emulates/single-steps the instruction and then generates event. If perf
and ptrace creates two events with same or overlapping address ranges,
it's ambiguous to decide who should single-step the instruction. Because
of this issue ptrace and perf event can't coexist when the address range
overlaps.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/include/asm/hw_breakpoint.h |   2 +
 arch/powerpc/kernel/hw_breakpoint.c  | 220 +++
 kernel/events/hw_breakpoint.c|  16 ++
 3 files changed, 238 insertions(+)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index ec61e2b7195c..6e1a19af5177 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -66,6 +66,8 @@ extern int hw_breakpoint_exceptions_notify(struct 
notifier_block *unused,
unsigned long val, void *data);
 int arch_install_hw_breakpoint(struct perf_event *bp);
 void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+int arch_reserve_bp_slot(struct perf_event *bp);
+void arch_release_bp_slot(struct perf_event *bp);
 void arch_unregister_hw_breakpoint(struct perf_event *bp);
 void hw_breakpoint_pmu_read(struct perf_event *bp);
 extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 2ac89b92590f..d8529d9151e8 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -123,6 +123,226 @@ static bool is_ptrace_bp(struct perf_event *bp)
return (bp->overflow_handler == ptrace_triggered);
 }
 
+struct breakpoint {
+   struct list_head list;
+   struct perf_event *bp;
+   bool ptrace_bp;
+};
+
+static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
+static LIST_HEAD(task_bps);
+
+static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
+{
+   struct breakpoint *tmp;
+
+   tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+   if (!tmp)
+   return ERR_PTR(-ENOMEM);
+   tmp->bp = bp;
+   tmp->ptrace_bp = is_ptrace_bp(bp);
+   return tmp;
+}
+
+static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event 
*bp2)
+{
+   __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr;
+
+   bp1_saddr = bp1->attr.bp_addr & ~HW_BREAKPOINT_ALIGN;
+   bp1_eaddr = (bp1->attr.bp_addr + bp1->attr.bp_len - 1) | 
HW_BREAKPOINT_ALIGN;
+   bp2_saddr = bp2->attr.bp_addr & ~HW_BREAKPOINT_ALIGN;
+   bp2_eaddr = (bp2->attr.bp_addr + bp2->attr.bp_len - 1) | 
HW_BREAKPOINT_ALIGN;
+
+   return (bp1_saddr <= bp2_eaddr && bp1_eaddr >= bp2_saddr);
+}
+
+static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp)
+{
+   return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp;
+}
+
+static bool can_co_exist(struct breakpoint *b, struct perf_event *bp)
+{
+   return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp));
+}
+
+static int task_bps_add(struct perf_event *bp)
+{
+   struct breakpoint *tmp;
+
+   tmp = alloc_breakpoint(bp);
+   if (IS_ERR(tmp))
+   return PTR_ERR(tmp);
+
+   list_add(&tmp->list, &task_bps);
+   return 0;
+}
+
+static void task_bps_remove(struct perf_event *bp)
+{
+   struct list_head *pos, *q;
+   struct breakpoint *tmp;
+
+   list_for_each_safe(pos, q, &task_bps) {
+   tmp = list_entry(pos, struct breakpoint, list);
+
+   if (tmp->bp == bp) {
+   list_del(&tmp->list);
+   kfree(tmp);
+   break;
+   }
+   }
+}
+
+/*
+ * If any task has breakpoint from alternate infrastructure,
+ * return true. Otherwise return false.
+ */
+static bool all_task_bps_check(struct perf_event *bp)
+{
+   struct breakpoint *tmp;
+
+   list_for_each_entry(tmp, &task_bps, list) {
+   if (!can_co_exist(tmp, bp))
+   return true;
+   }
+   return false;
+}
+
+/*
+ * If same task has breakpoint from alternate infrastructure,
+ * return true. Otherwise return false.
+ */
+static bool same_task_bps_check(struct perf_event *bp)
+{
+   struct breakpoint *tmp;
+
+   list_for_each_entry(tmp, &task_bps, list) {
+   if (tmp->bp->hw.target == bp->hw.target &&
+   !can_co_exist(tmp, bp))
+   return true;
+   }
+   return false;
+}
+
+static int cpu_bps_add(struct perf_event *bp)
+{
+   struct breakpoint **cpu_bp;
+   struct breakpoint *tmp;
+   int i = 0;
+
+   tmp = alloc_breakpoint(bp);
+   if (IS_ERR(tmp))
+   

[PATCH 12/15] powerpc/watchpoint: Prepare handler to handle more than one watcnhpoint

2020-03-09 Thread Ravi Bangoria
Currently we assume that we have only one watchpoint supported by hw.
Get rid of that assumption and use dynamic loop instead. This should
make supporting more watchpoints very easy.

So far, with only one watchpoint, the handler was simple. But with
multiple watchpoints, we need a mechanism to detect which watchpoint
caused the exception. HW doesn't provide that information and thus
we need software logic for this. This makes exception handling bit
more complex.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/include/asm/processor.h |   2 +-
 arch/powerpc/include/asm/sstep.h |   2 +
 arch/powerpc/kernel/hw_breakpoint.c  | 396 +--
 arch/powerpc/kernel/process.c|   3 -
 4 files changed, 313 insertions(+), 90 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 57a8fac2e72b..1609ee75ee74 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -181,7 +181,7 @@ struct thread_struct {
 * Helps identify source of single-step exception and subsequent
 * hw-breakpoint enablement
 */
-   struct perf_event *last_hit_ubp;
+   struct perf_event *last_hit_ubp[HBP_NUM_MAX];
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint 
info */
unsigned long   trap_nr;/* last trap # on this thread */
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index 769f055509c9..38919b27a6fa 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -48,6 +48,8 @@ enum instruction_type {
 
 #define INSTR_TYPE_MASK0x1f
 
+#define OP_IS_LOAD(type)   ((LOAD <= (type) && (type) <= LOAD_VSX) || 
(type) == LARX)
+#define OP_IS_STORE(type)  ((STORE <= (type) && (type) <= STORE_VSX) || 
(type) == STCX)
 #define OP_IS_LOAD_STORE(type) (LOAD <= (type) && (type) <= STCX)
 
 /* Compute flags, ORed in with type */
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 0e35ff372d8e..2ac89b92590f 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -30,7 +30,7 @@
  * Stores the breakpoints currently in use on each breakpoint address
  * register for every cpu
  */
-static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM_MAX]);
 
 /*
  * Returns total number of data or instruction breakpoints available.
@@ -42,6 +42,17 @@ int hw_breakpoint_slots(int type)
return 0;   /* no instruction breakpoints available */
 }
 
+static bool single_step_pending(void)
+{
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (current->thread.last_hit_ubp[i])
+   return true;
+   }
+   return false;
+}
+
 /*
  * Install a perf counter breakpoint.
  *
@@ -54,16 +65,26 @@ int hw_breakpoint_slots(int type)
 int arch_install_hw_breakpoint(struct perf_event *bp)
 {
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-   struct perf_event **slot = this_cpu_ptr(&bp_per_reg);
+   struct perf_event **slot;
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   slot = this_cpu_ptr(&bp_per_reg[i]);
+   if (!*slot) {
+   *slot = bp;
+   break;
+   }
+   }
 
-   *slot = bp;
+   if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
+   return -EBUSY;
 
/*
 * Do not install DABR values if the instruction must be single-stepped.
 * If so, DABR will be populated in single_step_dabr_instruction().
 */
-   if (current->thread.last_hit_ubp != bp)
-   __set_breakpoint(info, 0);
+   if (!single_step_pending())
+   __set_breakpoint(info, i);
 
return 0;
 }
@@ -79,15 +100,22 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
  */
 void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 {
-   struct perf_event **slot = this_cpu_ptr(&bp_per_reg);
+   struct arch_hw_breakpoint null_brk = {0};
+   struct perf_event **slot;
+   int i;
 
-   if (*slot != bp) {
-   WARN_ONCE(1, "Can't find the breakpoint");
-   return;
+   for (i = 0; i < nr_wp_slots(); i++) {
+   slot = this_cpu_ptr(&bp_per_reg[i]);
+   if (*slot == bp) {
+   *slot = NULL;
+   break;
+   }
}
 
-   *slot = NULL;
-   hw_breakpoint_disable();
+   if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
+   return;
+
+   __set_breakpoint(&null_brk, i);
 }
 
 static bool is_ptrace_bp(struct perf_event *bp)
@@ -101,14 +129,20 @@ static bool is_ptrace_bp(struct perf_event *bp)
  */
 void arch_unregister_hw_breakpoint(str

[PATCH 11/15] powerpc/watchpoint: Introduce is_ptrace_bp() function

2020-03-09 Thread Ravi Bangoria
Introduce is_ptrace_bp() function and move the check inside the
function. We will utilize it more in later set of patches.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/kernel/hw_breakpoint.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index b27aca623267..0e35ff372d8e 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -90,6 +90,11 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
hw_breakpoint_disable();
 }
 
+static bool is_ptrace_bp(struct perf_event *bp)
+{
+   return (bp->overflow_handler == ptrace_triggered);
+}
+
 /*
  * Perform cleanup of arch-specific counters during unregistration
  * of the perf-event
@@ -324,7 +329,7 @@ int hw_breakpoint_handler(struct die_args *args)
 * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal
 * generated in do_dabr().
 */
-   if (bp->overflow_handler == ptrace_triggered) {
+   if (is_ptrace_bp(bp)) {
perf_bp_event(bp, regs);
rc = NOTIFY_DONE;
goto out;
-- 
2.21.1



[PATCH 10/15] powerpc/watchpoint: Use loop for thread_struct->ptrace_bps

2020-03-09 Thread Ravi Bangoria
ptrace_bps is already an array of size HBP_NUM_MAX. But we use
hardcoded index 0 while fetching/updating it. Convert such code
to loop over array.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/kernel/hw_breakpoint.c |  7 +--
 arch/powerpc/kernel/process.c   |  6 +-
 arch/powerpc/kernel/ptrace.c| 28 +---
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index f4d48f87dcb8..b27aca623267 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -419,10 +419,13 @@ NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify);
  */
 void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
 {
+   int i;
struct thread_struct *t = &tsk->thread;
 
-   unregister_hw_breakpoint(t->ptrace_bps[0]);
-   t->ptrace_bps[0] = NULL;
+   for (i = 0; i < nr_wp_slots(); i++) {
+   unregister_hw_breakpoint(t->ptrace_bps[i]);
+   t->ptrace_bps[i] = NULL;
+   }
 }
 
 void hw_breakpoint_pmu_read(struct perf_event *bp)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 42ff62ef749c..b9ab740fcacf 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1628,6 +1628,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned 
long usp,
void (*f)(void);
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
struct thread_info *ti = task_thread_info(p);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+   int i;
+#endif
 
klp_init_thread_info(p);
 
@@ -1687,7 +1690,8 @@ int copy_thread_tls(unsigned long clone_flags, unsigned 
long usp,
p->thread.ksp_limit = (unsigned long)end_of_stack(p);
 #endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-   p->thread.ptrace_bps[0] = NULL;
+   for (i = 0; i < nr_wp_slots(); i++)
+   p->thread.ptrace_bps[i] = NULL;
 #endif
 
p->thread.fp_save_area = NULL;
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index f6d7955fc61e..e2651f86d56f 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -2829,6 +2829,19 @@ static int set_dac_range(struct task_struct *child,
 }
 #endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+static int empty_ptrace_bp(struct thread_struct *thread)
+{
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (!thread->ptrace_bps[i])
+   return i;
+   }
+   return -1;
+}
+#endif
+
 #ifndef CONFIG_PPC_ADV_DEBUG_REGS
 static int empty_hw_brk(struct thread_struct *thread)
 {
@@ -2915,8 +2928,9 @@ static long ppc_set_hwdebug(struct task_struct *child,
len = 1;
else
return -EINVAL;
-   bp = thread->ptrace_bps[0];
-   if (bp)
+
+   i = empty_ptrace_bp(thread);
+   if (i < 0)
return -ENOSPC;
 
/* Create a new breakpoint request if one doesn't exist already */
@@ -2925,14 +2939,14 @@ static long ppc_set_hwdebug(struct task_struct *child,
attr.bp_len = len;
arch_bp_generic_fields(brk.type, &attr.bp_type);
 
-   thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
+   thread->ptrace_bps[i] = bp = register_user_hw_breakpoint(&attr,
   ptrace_triggered, NULL, child);
if (IS_ERR(bp)) {
-   thread->ptrace_bps[0] = NULL;
+   thread->ptrace_bps[i] = NULL;
return PTR_ERR(bp);
}
 
-   return 1;
+   return i + 1;
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
@@ -2979,10 +2993,10 @@ static long ppc_del_hwdebug(struct task_struct *child, 
long data)
return -EINVAL;
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-   bp = thread->ptrace_bps[0];
+   bp = thread->ptrace_bps[data - 1];
if (bp) {
unregister_hw_breakpoint(bp);
-   thread->ptrace_bps[0] = NULL;
+   thread->ptrace_bps[data - 1] = NULL;
} else
ret = -ENOENT;
return ret;
-- 
2.21.1



[PATCH 09/15] powerpc/watchpoint: Convert thread_struct->hw_brk to an array

2020-03-09 Thread Ravi Bangoria
So far powerpc hw supported only one watchpoint. But Future Power
architecture is introducing 2nd DAWR. Convert thread_struct->hw_brk
into an array.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/include/asm/processor.h |  2 +-
 arch/powerpc/kernel/process.c| 43 
 arch/powerpc/kernel/ptrace.c | 42 ---
 arch/powerpc/kernel/ptrace32.c   |  4 +--
 arch/powerpc/kernel/signal.c |  9 --
 5 files changed, 72 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 666b2825278c..57a8fac2e72b 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -183,7 +183,7 @@ struct thread_struct {
 */
struct perf_event *last_hit_ubp;
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
-   struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
+   struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint 
info */
unsigned long   trap_nr;/* last trap # on this thread */
u8 load_slb;/* Ages out SLB preload cache entries */
u8 load_fp;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index f6bb2586fa5d..42ff62ef749c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -704,21 +704,25 @@ void switch_booke_debug_regs(struct debug_reg *new_debug)
 EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
 #else  /* !CONFIG_PPC_ADV_DEBUG_REGS */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
-static void set_breakpoint(struct arch_hw_breakpoint *brk)
+static void set_breakpoint(struct arch_hw_breakpoint *brk, int i)
 {
preempt_disable();
-   __set_breakpoint(brk, 0);
+   __set_breakpoint(brk, i);
preempt_enable();
 }
 
 static void set_debug_reg_defaults(struct thread_struct *thread)
 {
-   thread->hw_brk.address = 0;
-   thread->hw_brk.type = 0;
-   thread->hw_brk.len = 0;
-   thread->hw_brk.hw_len = 0;
-   if (ppc_breakpoint_available())
-   set_breakpoint(&thread->hw_brk);
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   thread->hw_brk[i].address = 0;
+   thread->hw_brk[i].type = 0;
+   thread->hw_brk[i].len = 0;
+   thread->hw_brk[i].hw_len = 0;
+   if (ppc_breakpoint_available())
+   set_breakpoint(&thread->hw_brk[i], i);
+   }
 }
 #endif /* !CONFIG_HAVE_HW_BREAKPOINT */
 #endif /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -1141,6 +1145,24 @@ static inline void restore_sprs(struct thread_struct 
*old_thread,
thread_pkey_regs_restore(new_thread, old_thread);
 }
 
+#ifndef CONFIG_HAVE_HW_BREAKPOINT
+static void switch_hw_breakpoint(struct task_struct *new)
+{
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk[i]),
+  &new->thread.hw_brk[i]))) {
+   __set_breakpoint(&new->thread.hw_brk[i], i);
+   }
+   }
+}
+#else
+static void switch_hw_breakpoint(struct task_struct *new)
+{
+}
+#endif
+
 struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
 {
@@ -1172,10 +1194,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
  * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would
  * schedule DABR
  */
-#ifndef CONFIG_HAVE_HW_BREAKPOINT
-   if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk[0]), 
&new->thread.hw_brk)))
-   __set_breakpoint(&new->thread.hw_brk, 0);
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+   switch_hw_breakpoint(new);
 #endif
 
/*
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index dd46e174dbe7..f6d7955fc61e 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -2382,6 +2382,11 @@ void ptrace_triggered(struct perf_event *bp,
 }
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
+/*
+ * ptrace_set_debugreg() fakes DABR and DABR is only one. So even if
+ * internal hw supports more than one watchpoint, we support only one
+ * watchpoint with this interface.
+ */
 static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
   unsigned long data)
 {
@@ -2451,7 +2456,7 @@ static int ptrace_set_debugreg(struct task_struct *task, 
unsigned long addr,
return ret;
}
thread->ptrace_bps[0] = bp;
-   thread->hw_brk = hw_brk;
+   thread->hw_brk[0] = hw_brk;
return 0;
}
 
@@ -2473,7 +2478,7 @@ static int ptrace_set_debugreg(struct task_struct *task, 
unsigned long addr,
if (set_bp && (!ppc_breakpoint_available()))
return -ENODEV;
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
-   task->thread.hw_brk = hw_brk;
+   task->thread.hw

[PATCH 08/15] powerpc/watchpoint: Disable all available watchpoints when !dawr_force_enable

2020-03-09 Thread Ravi Bangoria
Instead of disabling only first watchpoint, disable all available
watchpoints while clearing dawr_force_enable.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/kernel/dawr.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index 311e51ee09f4..5c882f07ac7d 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -50,9 +50,13 @@ int set_dawr(struct arch_hw_breakpoint *brk, int nr)
return 0;
 }
 
-static void set_dawr_cb(void *info)
+static void disable_dawrs(void *info)
 {
-   set_dawr(info, 0);
+   struct arch_hw_breakpoint null_brk = {0};
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++)
+   set_dawr(&null_brk, i);
 }
 
 static ssize_t dawr_write_file_bool(struct file *file,
@@ -74,7 +78,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
 
/* If we are clearing, make sure all CPUs have the DAWR cleared */
if (!dawr_force_enable)
-   smp_call_function(set_dawr_cb, &null_brk, 0);
+   smp_call_function(disable_dawrs, NULL, 0);
 
return rc;
 }
-- 
2.21.1



[PATCH 07/15] powerpc/watchpoint: Get watchpoint count dynamically while disabling them

2020-03-09 Thread Ravi Bangoria
Instead of disabling only one watchpooint, get num of available
watchpoints dynamically and disable all of them.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/include/asm/hw_breakpoint.h | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 980ac7d9f267..ec61e2b7195c 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -75,14 +75,13 @@ extern void ptrace_triggered(struct perf_event *bp,
struct perf_sample_data *data, struct pt_regs *regs);
 static inline void hw_breakpoint_disable(void)
 {
-   struct arch_hw_breakpoint brk;
-
-   brk.address = 0;
-   brk.type = 0;
-   brk.len = 0;
-   brk.hw_len = 0;
-   if (ppc_breakpoint_available())
-   __set_breakpoint(&brk, 0);
+   int i;
+   struct arch_hw_breakpoint null_brk = {0};
+
+   if (ppc_breakpoint_available()) {
+   for (i = 0; i < nr_wp_slots(); i++)
+   __set_breakpoint(&null_brk, i);
+   }
 }
 extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
 int hw_breakpoint_handler(struct die_args *args);
-- 
2.21.1



[PATCH 06/15] powerpc/watchpoint: Provide DAWR number to __set_breakpoint

2020-03-09 Thread Ravi Bangoria
Introduce new parameter 'nr' to __set_breakpoint() which indicates
which DAWR should be programed. Also convert current_brk variable
to an array.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/include/asm/debug.h |  2 +-
 arch/powerpc/include/asm/hw_breakpoint.h |  2 +-
 arch/powerpc/kernel/hw_breakpoint.c  |  8 
 arch/powerpc/kernel/process.c| 14 +++---
 arch/powerpc/kernel/signal.c |  2 +-
 arch/powerpc/xmon/xmon.c |  2 +-
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index 7756026b95ca..6228935a8b64 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -45,7 +45,7 @@ static inline int debugger_break_match(struct pt_regs *regs) 
{ return 0; }
 static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
 #endif
 
-void __set_breakpoint(struct arch_hw_breakpoint *brk);
+void __set_breakpoint(struct arch_hw_breakpoint *brk, int nr);
 bool ppc_breakpoint_available(void);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 extern void do_send_trap(struct pt_regs *regs, unsigned long address,
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index c4e797753895..980ac7d9f267 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -82,7 +82,7 @@ static inline void hw_breakpoint_disable(void)
brk.len = 0;
brk.hw_len = 0;
if (ppc_breakpoint_available())
-   __set_breakpoint(&brk);
+   __set_breakpoint(&brk, 0);
 }
 extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
 int hw_breakpoint_handler(struct die_args *args);
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index e68798cee3fa..f4d48f87dcb8 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -63,7 +63,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 * If so, DABR will be populated in single_step_dabr_instruction().
 */
if (current->thread.last_hit_ubp != bp)
-   __set_breakpoint(info);
+   __set_breakpoint(info, 0);
 
return 0;
 }
@@ -221,7 +221,7 @@ void thread_change_pc(struct task_struct *tsk, struct 
pt_regs *regs)
 
info = counter_arch_bp(tsk->thread.last_hit_ubp);
regs->msr &= ~MSR_SE;
-   __set_breakpoint(info);
+   __set_breakpoint(info, 0);
tsk->thread.last_hit_ubp = NULL;
 }
 
@@ -346,7 +346,7 @@ int hw_breakpoint_handler(struct die_args *args)
if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
perf_bp_event(bp, regs);
 
-   __set_breakpoint(info);
+   __set_breakpoint(info, 0);
 out:
rcu_read_unlock();
return rc;
@@ -379,7 +379,7 @@ static int single_step_dabr_instruction(struct die_args 
*args)
if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
perf_bp_event(bp, regs);
 
-   __set_breakpoint(info);
+   __set_breakpoint(info, 0);
current->thread.last_hit_ubp = NULL;
 
/*
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 0657b3a3792a..f6bb2586fa5d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -630,7 +630,7 @@ void do_break (struct pt_regs *regs, unsigned long address,
 }
 #endif /* CONFIG_PPC_ADV_DEBUG_REGS */
 
-static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk);
+static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk[HBP_NUM_MAX]);
 
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 /*
@@ -707,7 +707,7 @@ EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
 static void set_breakpoint(struct arch_hw_breakpoint *brk)
 {
preempt_disable();
-   __set_breakpoint(brk);
+   __set_breakpoint(brk, 0);
preempt_enable();
 }
 
@@ -793,13 +793,13 @@ static inline int set_breakpoint_8xx(struct 
arch_hw_breakpoint *brk)
return 0;
 }
 
-void __set_breakpoint(struct arch_hw_breakpoint *brk)
+void __set_breakpoint(struct arch_hw_breakpoint *brk, int nr)
 {
-   memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk));
+   memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk));
 
if (dawr_enabled())
// Power8 or later
-   set_dawr(brk, 0);
+   set_dawr(brk, nr);
else if (IS_ENABLED(CONFIG_PPC_8xx))
set_breakpoint_8xx(brk);
else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
@@ -1173,8 +1173,8 @@ struct task_struct *__switch_to(struct task_struct *prev,
  * schedule DABR
  */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
-   if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), 
&new->thread.hw_brk)))
-   __set_breakpoint(&new->thread.hw_brk);
+   if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk[0]), 
&new->thread.hw_brk)))
+   __set_breakpoint(&new->thread.hw_brk, 0);
 #end

[PATCH 05/15] powerpc/watchpoint: Provide DAWR number to set_dawr

2020-03-09 Thread Ravi Bangoria
Introduce new parameter 'nr' to set_dawr() which indicates which DAWR
should be programed.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/include/asm/hw_breakpoint.h |  4 ++--
 arch/powerpc/kernel/dawr.c   | 15 ++-
 arch/powerpc/kernel/process.c|  2 +-
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 741c4f7573c4..c4e797753895 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -101,10 +101,10 @@ static inline bool dawr_enabled(void)
 {
return dawr_force_enable;
 }
-int set_dawr(struct arch_hw_breakpoint *brk);
+int set_dawr(struct arch_hw_breakpoint *brk, int nr);
 #else
 static inline bool dawr_enabled(void) { return false; }
-static inline int set_dawr(struct arch_hw_breakpoint *brk) { return -1; }
+static inline int set_dawr(struct arch_hw_breakpoint *brk, int nr) { return 
-1; }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index e91b613bf137..311e51ee09f4 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -16,7 +16,7 @@
 bool dawr_force_enable;
 EXPORT_SYMBOL_GPL(dawr_force_enable);
 
-int set_dawr(struct arch_hw_breakpoint *brk)
+int set_dawr(struct arch_hw_breakpoint *brk, int nr)
 {
unsigned long dawr, dawrx, mrd;
 
@@ -39,15 +39,20 @@ int set_dawr(struct arch_hw_breakpoint *brk)
if (ppc_md.set_dawr)
return ppc_md.set_dawr(dawr, dawrx);
 
-   mtspr(SPRN_DAWR0, dawr);
-   mtspr(SPRN_DAWRX0, dawrx);
+   if (nr == 0) {
+   mtspr(SPRN_DAWR0, dawr);
+   mtspr(SPRN_DAWRX0, dawrx);
+   } else {
+   mtspr(SPRN_DAWR1, dawr);
+   mtspr(SPRN_DAWRX1, dawrx);
+   }
 
return 0;
 }
 
 static void set_dawr_cb(void *info)
 {
-   set_dawr(info);
+   set_dawr(info, 0);
 }
 
 static ssize_t dawr_write_file_bool(struct file *file,
@@ -60,7 +65,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
/* Send error to user if they hypervisor won't allow us to write DAWR */
if (!dawr_force_enable &&
firmware_has_feature(FW_FEATURE_LPAR) &&
-   set_dawr(&null_brk) != H_SUCCESS)
+   set_dawr(&null_brk, 0) != H_SUCCESS)
return -ENODEV;
 
rc = debugfs_write_file_bool(file, user_buf, count, ppos);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6d4b029532e2..0657b3a3792a 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -799,7 +799,7 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
 
if (dawr_enabled())
// Power8 or later
-   set_dawr(brk);
+   set_dawr(brk, 0);
else if (IS_ENABLED(CONFIG_PPC_8xx))
set_breakpoint_8xx(brk);
else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
-- 
2.21.1



[PATCH 04/15] powerpc/watchpoint/ptrace: Return actual num of available watchpoints

2020-03-09 Thread Ravi Bangoria
User can ask for num of available watchpoints(dbginfo.num_data_bps)
using ptrace(PPC_PTRACE_GETHWDBGINFO). Return actual number of
available watchpoints on the machine rather than hardcoded 1.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/kernel/ptrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 25c0424e8868..dd46e174dbe7 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -3074,7 +3074,7 @@ long arch_ptrace(struct task_struct *child, long request,
 #else /* !CONFIG_PPC_ADV_DEBUG_REGS */
dbginfo.num_instruction_bps = 0;
if (ppc_breakpoint_available())
-   dbginfo.num_data_bps = 1;
+   dbginfo.num_data_bps = nr_wp_slots();
else
dbginfo.num_data_bps = 0;
dbginfo.num_condition_regs = 0;
-- 
2.21.1



[PATCH 03/15] powerpc/watchpoint: Introduce function to get nr watchpoints dynamically

2020-03-09 Thread Ravi Bangoria
So far we had only one watchpoint, so we have hardcoded HBP_NUM to 1.
But future Power architecture is introducing 2nd DAWR and thus kernel
should be able to dynamically find actual number of watchpoints
supported by hw it's running on. Introduce function for the same.
Also convert HBP_NUM macro to HBP_NUM_MAX, which will now represent
maximum number of watchpoints supported by Powerpc.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/include/asm/cputable.h  | 6 +-
 arch/powerpc/include/asm/hw_breakpoint.h | 2 ++
 arch/powerpc/include/asm/processor.h | 2 +-
 arch/powerpc/kernel/hw_breakpoint.c  | 2 +-
 arch/powerpc/kernel/process.c| 6 ++
 5 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/cputable.h 
b/arch/powerpc/include/asm/cputable.h
index 40a4d3c6fd99..c67b94f3334c 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -614,7 +614,11 @@ enum {
 };
 #endif /* __powerpc64__ */
 
-#define HBP_NUM 1
+/*
+ * Maximum number of hw breakpoint supported on powerpc. Number of
+ * breakpoints supported by actual hw might be less than this.
+ */
+#define HBP_NUM_MAX1
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index f2f8d8aa8e3b..741c4f7573c4 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -43,6 +43,8 @@ struct arch_hw_breakpoint {
 #define DABR_MAX_LEN   8
 #define DAWR_MAX_LEN   512
 
+extern int nr_wp_slots(void);
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 #include 
 #include 
diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 8387698bd5b6..666b2825278c 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -176,7 +176,7 @@ struct thread_struct {
int fpexc_mode; /* floating-point exception mode */
unsigned intalign_ctl;  /* alignment handling control */
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-   struct perf_event *ptrace_bps[HBP_NUM];
+   struct perf_event *ptrace_bps[HBP_NUM_MAX];
/*
 * Helps identify source of single-step exception and subsequent
 * hw-breakpoint enablement
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index d0854320bb50..e68798cee3fa 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -38,7 +38,7 @@ static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
 int hw_breakpoint_slots(int type)
 {
if (type == TYPE_DATA)
-   return HBP_NUM;
+   return nr_wp_slots();
return 0;   /* no instruction breakpoints available */
 }
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 110db94cdf3c..6d4b029532e2 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -835,6 +835,12 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint 
*a,
return true;
 }
 
+/* Returns total number of data breakpoints available. */
+int nr_wp_slots(void)
+{
+   return HBP_NUM_MAX;
+}
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 
 static inline bool tm_enabled(struct task_struct *tsk)
-- 
2.21.1



[PATCH 01/15] powerpc/watchpoint: Rename current DAWR macros

2020-03-09 Thread Ravi Bangoria
Future Power architecture is introducing second DAWR. Rename current
DAWR macros as:
 s/SPRN_DAWR/SPRN_DAWR0/
 s/SPRN_DAWRX/SPRN_DAWRX0/

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/include/asm/reg.h  |  4 ++--
 arch/powerpc/kernel/dawr.c  |  4 ++--
 arch/powerpc/kvm/book3s_hv.c| 12 ++--
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 18 +-
 arch/powerpc/xmon/xmon.c|  2 +-
 5 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index da5cab038e25..156ee89fa9be 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -283,14 +283,14 @@
 #define   CTRL_CT1 0x4000  /* thread 1 */
 #define   CTRL_TE  0x00c0  /* thread enable */
 #define   CTRL_RUNLATCH0x1
-#define SPRN_DAWR  0xB4
+#define SPRN_DAWR0 0xB4
 #define SPRN_RPR   0xBA/* Relative Priority Register */
 #define SPRN_CIABR 0xBB
 #define   CIABR_PRIV   0x3
 #define   CIABR_PRIV_USER  1
 #define   CIABR_PRIV_SUPER 2
 #define   CIABR_PRIV_HYPER 3
-#define SPRN_DAWRX 0xBC
+#define SPRN_DAWRX00xBC
 #define   DAWRX_USER   __MASK(0)
 #define   DAWRX_KERNEL __MASK(1)
 #define   DAWRX_HYP__MASK(2)
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index cc14aa6c4a1b..e91b613bf137 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -39,8 +39,8 @@ int set_dawr(struct arch_hw_breakpoint *brk)
if (ppc_md.set_dawr)
return ppc_md.set_dawr(dawr, dawrx);
 
-   mtspr(SPRN_DAWR, dawr);
-   mtspr(SPRN_DAWRX, dawrx);
+   mtspr(SPRN_DAWR0, dawr);
+   mtspr(SPRN_DAWRX0, dawrx);
 
return 0;
 }
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 33be4d93248a..498c57e1f5c9 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3383,8 +3383,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu 
*vcpu, u64 time_limit,
int trap;
unsigned long host_hfscr = mfspr(SPRN_HFSCR);
unsigned long host_ciabr = mfspr(SPRN_CIABR);
-   unsigned long host_dawr = mfspr(SPRN_DAWR);
-   unsigned long host_dawrx = mfspr(SPRN_DAWRX);
+   unsigned long host_dawr = mfspr(SPRN_DAWR0);
+   unsigned long host_dawrx = mfspr(SPRN_DAWRX0);
unsigned long host_psscr = mfspr(SPRN_PSSCR);
unsigned long host_pidr = mfspr(SPRN_PID);
 
@@ -3413,8 +3413,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu 
*vcpu, u64 time_limit,
mtspr(SPRN_SPURR, vcpu->arch.spurr);
 
if (dawr_enabled()) {
-   mtspr(SPRN_DAWR, vcpu->arch.dawr);
-   mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
+   mtspr(SPRN_DAWR0, vcpu->arch.dawr);
+   mtspr(SPRN_DAWRX0, vcpu->arch.dawrx);
}
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
mtspr(SPRN_IC, vcpu->arch.ic);
@@ -3466,8 +3466,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu 
*vcpu, u64 time_limit,
  (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, host_hfscr);
mtspr(SPRN_CIABR, host_ciabr);
-   mtspr(SPRN_DAWR, host_dawr);
-   mtspr(SPRN_DAWRX, host_dawrx);
+   mtspr(SPRN_DAWR0, host_dawr);
+   mtspr(SPRN_DAWRX0, host_dawrx);
mtspr(SPRN_PID, host_pidr);
 
/*
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index dbc2fecc37f0..f4b412b7cad8 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -707,8 +707,8 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 BEGIN_FTR_SECTION
mfspr   r5, SPRN_CIABR
-   mfspr   r6, SPRN_DAWR
-   mfspr   r7, SPRN_DAWRX
+   mfspr   r6, SPRN_DAWR0
+   mfspr   r7, SPRN_DAWRX0
mfspr   r8, SPRN_IAMR
std r5, STACK_SLOT_CIABR(r1)
std r6, STACK_SLOT_DAWR(r1)
@@ -803,8 +803,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
beq 1f
ld  r5, VCPU_DAWR(r4)
ld  r6, VCPU_DAWRX(r4)
-   mtspr   SPRN_DAWR, r5
-   mtspr   SPRN_DAWRX, r6
+   mtspr   SPRN_DAWR0, r5
+   mtspr   SPRN_DAWRX0, r6
 1:
ld  r7, VCPU_CIABR(r4)
ld  r8, VCPU_TAR(r4)
@@ -1772,8 +1772,8 @@ BEGIN_FTR_SECTION
 * If the DAWR doesn't work, it's ok to write these here as
 * this value should always be zero
*/
-   mtspr   SPRN_DAWR, r6
-   mtspr   SPRN_DAWRX, r7
+   mtspr   SPRN_DAWR0, r6
+   mtspr   SPRN_DAWRX0, r7
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 BEGIN_FTR_SECTION
ld  r5, STACK_SLOT_TID(r1)
@@ -2583,8 +2583,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfmsr   r6
andi.   r6, r6, MSR_DR  /* in real mode? */
bne 4f
-   mtspr   SPRN_DAWR, r4
-   mtspr   SPRN_DA

[PATCH 00/15] powerpc/watchpoint: Preparation for more than one watchpoint

2020-03-09 Thread Ravi Bangoria
So far, powerpc Book3S code has been written with an assumption of only
one watchpoint. But future power architecture is introducing second
watchpoint register (DAWR). Even though this patchset does not enable
2nd DAWR, it make the infrastructure ready so that enabling 2nd DAWR
should just be a matter of changing count.

Existing functionality works fine with the patchset. I've tested it with
perf, ptrace(gdb), xmon. All hw-breakpoint selftests are passing as well.
And I've build tested for 8xx.

Note: kvm or PowerVM geust is not enabled yet.

The series applies fine to powerpc/next plus one more dependency patch:
https://git.kernel.org/powerpc/c/e08658a657f974590809290c62e889f0fd420200

Ravi Bangoria (15):
  powerpc/watchpoint: Rename current DAWR macros
  powerpc/watchpoint: Add SPRN macros for second DAWR
  powerpc/watchpoint: Introduce function to get nr watchpoints
dynamically
  powerpc/watchpoint/ptrace: Return actual num of available watchpoints
  powerpc/watchpoint: Provide DAWR number to set_dawr
  powerpc/watchpoint: Provide DAWR number to __set_breakpoint
  powerpc/watchpoint: Get watchpoint count dynamically while disabling
them
  powerpc/watchpoint: Disable all available watchpoints when
!dawr_force_enable
  powerpc/watchpoint: Convert thread_struct->hw_brk to an array
  powerpc/watchpoint: Use loop for thread_struct->ptrace_bps
  powerpc/watchpoint: Introduce is_ptrace_bp() function
  powerpc/watchpoint: Prepare handler to handle more than one
watcnhpoint
  powerpc/watchpoint: Don't allow concurrent perf and ptrace events
  powerpc/watchpoint/xmon: Don't allow breakpoint overwriting
  powerpc/watchpoint/xmon: Support 2nd dawr

 arch/powerpc/include/asm/cputable.h  |   6 +-
 arch/powerpc/include/asm/debug.h |   2 +-
 arch/powerpc/include/asm/hw_breakpoint.h |  23 +-
 arch/powerpc/include/asm/processor.h |   6 +-
 arch/powerpc/include/asm/reg.h   |   6 +-
 arch/powerpc/include/asm/sstep.h |   2 +
 arch/powerpc/kernel/dawr.c   |  23 +-
 arch/powerpc/kernel/hw_breakpoint.c  | 628 +++
 arch/powerpc/kernel/process.c|  66 ++-
 arch/powerpc/kernel/ptrace.c |  72 ++-
 arch/powerpc/kernel/ptrace32.c   |   4 +-
 arch/powerpc/kernel/signal.c |   9 +-
 arch/powerpc/kvm/book3s_hv.c |  12 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  |  18 +-
 arch/powerpc/xmon/xmon.c |  99 ++--
 kernel/events/hw_breakpoint.c|  16 +
 16 files changed, 793 insertions(+), 199 deletions(-)

-- 
2.21.1



[PATCH 02/15] powerpc/watchpoint: Add SPRN macros for second DAWR

2020-03-09 Thread Ravi Bangoria
Future Power architecture is introducing second DAWR. Add SPRN_ macros
for the same.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/include/asm/reg.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 156ee89fa9be..062e74cf41fd 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -284,6 +284,7 @@
 #define   CTRL_TE  0x00c0  /* thread enable */
 #define   CTRL_RUNLATCH0x1
 #define SPRN_DAWR0 0xB4
+#define SPRN_DAWR1 0xB5
 #define SPRN_RPR   0xBA/* Relative Priority Register */
 #define SPRN_CIABR 0xBB
 #define   CIABR_PRIV   0x3
@@ -291,6 +292,7 @@
 #define   CIABR_PRIV_SUPER 2
 #define   CIABR_PRIV_HYPER 3
 #define SPRN_DAWRX00xBC
+#define SPRN_DAWRX10xBD
 #define   DAWRX_USER   __MASK(0)
 #define   DAWRX_KERNEL __MASK(1)
 #define   DAWRX_HYP__MASK(2)
-- 
2.21.1