date:20200427

Re: [PATCH v6 03/28] powerpc/xmon: Move breakpoints to text section

2020-04-27 Thread Christophe Leroy





Le 28/04/2020 à 07:30, Jordan Niethe a écrit :

On Tue, Apr 28, 2020 at 3:20 PM Christophe Leroy
 wrote:




Le 28/04/2020 à 03:57, Jordan Niethe a écrit :

The instructions for xmon's breakpoint are stored bpt_table[] which is in
the data section. This is problematic as the data section may be marked
as no execute. Move bpt_table[] to the text section.

Signed-off-by: Jordan Niethe 
---
v6: - New to series. Was part of the previous patch.
  - Make BPT_SIZE available in assembly
---
   arch/powerpc/kernel/asm-offsets.c |  8 
   arch/powerpc/xmon/Makefile|  2 +-
   arch/powerpc/xmon/xmon.c  |  6 +-
   arch/powerpc/xmon/xmon_bpts.S |  9 +
   arch/powerpc/xmon/xmon_bpts.h | 14 ++
   5 files changed, 33 insertions(+), 6 deletions(-)
   create mode 100644 arch/powerpc/xmon/xmon_bpts.S
   create mode 100644 arch/powerpc/xmon/xmon_bpts.h

diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index c25e562f1cd9..2401f415f423 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -70,6 +70,10 @@
   #include 
   #endif

+#ifdef CONFIG_XMON
+#include "../xmon/xmon_bpts.h"
+#endif
+
   #define STACK_PT_REGS_OFFSET(sym, val)  \
   DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val))

@@ -783,5 +787,9 @@ int main(void)
   DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
   #endif

+#ifdef CONFIG_XMON
+ DEFINE(BPT_SIZE, BPT_SIZE);
+#endif
+
   return 0;
   }
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index c3842dbeb1b7..515a13ea6f28 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -21,7 +21,7 @@ endif

   ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)

-obj-y+= xmon.o nonstdio.o spr_access.o
+obj-y+= xmon.o nonstdio.o spr_access.o xmon_bpts.o

   ifdef CONFIG_XMON_DISASSEMBLY
   obj-y   += ppc-dis.o ppc-opc.o
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index a064392df1b8..f7ce3ea8694c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -62,6 +62,7 @@

   #include "nonstdio.h"
   #include "dis-asm.h"
+#include "xmon_bpts.h"

   #ifdef CONFIG_SMP
   static cpumask_t cpus_in_xmon = CPU_MASK_NONE;
@@ -108,7 +109,6 @@ struct bpt {
   #define BP_TRAP 2
   #define BP_DABR 4

-#define NBPTS256
   static struct bpt bpts[NBPTS];
   static struct bpt dabr;
   static struct bpt *iabr;
@@ -116,10 +116,6 @@ static unsigned bpinstr = 0x7fe8;/* trap */

   #define BP_NUM(bp)  ((bp) - bpts + 1)

-#define BPT_SIZE   (sizeof(unsigned int) * 2)
-#define BPT_WORDS  (BPT_SIZE / sizeof(unsigned int))
-static unsigned int bpt_table[NBPTS * BPT_WORDS];
-
   /* Prototypes */
   static int cmds(struct pt_regs *);
   static int mread(unsigned long, void *, int);
diff --git a/arch/powerpc/xmon/xmon_bpts.S b/arch/powerpc/xmon/xmon_bpts.S
new file mode 100644
index ..f3ad0ab50854
--- /dev/null
+++ b/arch/powerpc/xmon/xmon_bpts.S
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include 
+#include 
+#include 
+#include "xmon_bpts.h"
+
+.global bpt_table
+bpt_table:
+ .space NBPTS * BPT_SIZE


No alignment required ? Standard alignment (probably 4 bytes ?) is
acceptable ?

No, I'll add a .balign4 to be sure.


I think it is aligned to 4 by default. My question was to know whether 4 
is enough.

I see BPT_SIZE is 8, should the alignment be at least 8 ?





diff --git a/arch/powerpc/xmon/xmon_bpts.h b/arch/powerpc/xmon/xmon_bpts.h
new file mode 100644
index ..b7e94375db86
--- /dev/null
+++ b/arch/powerpc/xmon/xmon_bpts.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef XMON_BPTS_H
+#define XMON_BPTS_H
+
+#define NBPTS256
+#ifndef __ASSEMBLY__
+#define BPT_SIZE (sizeof(unsigned int) * 2)
+#define BPT_WORDS(BPT_SIZE / sizeof(unsigned int))
+
+extern unsigned int bpt_table[NBPTS * BPT_WORDS];
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* XMON_BPTS_H */



Christophe

Re: [PATCH 1/2] powerpc: Discard .rela* sections if CONFIG_RELOCATABLE is undefined

2020-04-27 Thread Christophe Leroy


Hi,

Le 28/04/2020 à 03:48, H.J. Lu a écrit :

arch/powerpc/kernel/vmlinux.lds.S has

 DISCARDS
 /DISCARD/ : {
 *(*.EMB.apuinfo)
 *(.glink .iplt .plt .rela* .comment)
 *(.gnu.version*)
 *(.gnu.attributes)
 *(.eh_frame)
 }

Since .rela* sections are needed when CONFIG_RELOCATABLE is defined,
change to discard .rela* sections if CONFIG_RELOCATABLE is undefined.


Your explanation and especially the subject are unclear.

From the subject you understand that you are adding a discard of the 
.rela* sections if CONFIG_RELOCATABLE is undefined.


But when reading the patch, you see that it is the contrary: you are 
removing a discard of the .rela* sections if CONFIG_RELOCATABLE is defined.



So I think the subject could instead be:

Only discard .rela* sections when CONFIG_RELOCATABLE is undefined

Or maybe better:

Keep .rela* sections when CONFIG_RELOCATABLE is defined

And the explanation could be:

Since .rela* sections are needed when CONFIG_RELOCATABLE
is defined, don't discard .rela* sections if
CONFIG_RELOCATABLE is defined.



Signed-off-by: H.J. Lu 
Acked-by: Michael Ellerman  (powerpc)
---
  arch/powerpc/kernel/vmlinux.lds.S | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/vmlinux.lds.S 
b/arch/powerpc/kernel/vmlinux.lds.S
index 31a0f201fb6f..4ba07734a210 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -366,9 +366,12 @@ SECTIONS
DISCARDS
/DISCARD/ : {
*(*.EMB.apuinfo)
-   *(.glink .iplt .plt .rela* .comment)
+   *(.glink .iplt .plt .comment)
*(.gnu.version*)
*(.gnu.attributes)
*(.eh_frame)
+#ifndef CONFIG_RELOCATABLE
+   *(.rela*)
+#endif
}
  }



Christophe

[PATCH] tty: hvc: Fix data abort due to race in hvc_open

2020-04-27 Thread Raghavendra Rao Ananta

Potentially, hvc_open() can be called in parallel when two tasks calls
open() on /dev/hvcX. In such a scenario, if the hp->ops->notifier_add()
callback in the function fails, where it sets the tty->driver_data to
NULL, the parallel hvc_open() can see this NULL and cause a memory abort.
Hence, serialize hvc_open and check if tty->private_data is NULL before
proceeding ahead.

The issue can be easily reproduced by launching two tasks simultaneously
that does nothing but open() and close() on /dev/hvcX.
For example:
$ ./simple_open_close /dev/hvc0 & ./simple_open_close /dev/hvc0 &

Signed-off-by: Raghavendra Rao Ananta 
---
 drivers/tty/hvc/hvc_console.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index 436cc51c92c3..ebe26fe5ac09 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -75,6 +75,8 @@ static LIST_HEAD(hvc_structs);
  */
 static DEFINE_MUTEX(hvc_structs_mutex);
 
+/* Mutex to serialize hvc_open */
+static DEFINE_MUTEX(hvc_open_mutex);
 /*
  * This value is used to assign a tty->index value to a hvc_struct based
  * upon order of exposure via hvc_probe(), when we can not match it to
@@ -346,16 +348,24 @@ static int hvc_install(struct tty_driver *driver, struct 
tty_struct *tty)
  */
 static int hvc_open(struct tty_struct *tty, struct file * filp)
 {
-   struct hvc_struct *hp = tty->driver_data;
+   struct hvc_struct *hp;
unsigned long flags;
int rc = 0;
 
+   mutex_lock(_open_mutex);
+
+   hp = tty->driver_data;
+   if (!hp) {
+   rc = -EIO;
+   goto out;
+   }
+
spin_lock_irqsave(>port.lock, flags);
/* Check and then increment for fast path open. */
if (hp->port.count++ > 0) {
spin_unlock_irqrestore(>port.lock, flags);
hvc_kick();
-   return 0;
+   goto out;
} /* else count == 0 */
spin_unlock_irqrestore(>port.lock, flags);
 
@@ -384,6 +394,8 @@ static int hvc_open(struct tty_struct *tty, struct file * 
filp)
/* Force wakeup of the polling thread */
hvc_kick();
 
+out:
+   mutex_unlock(_open_mutex);
return rc;
 }
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

Re: [PATCH v4] pci: Make return value of pcie_capability_read*() consistent

2020-04-27 Thread Yicong Yang

On 2020/4/28 2:13, Bjorn Helgaas wrote:
>
> I'm starting to think we're approaching this backwards.  I searched
> for PCIBIOS_FUNC_NOT_SUPPORTED, PCIBIOS_BAD_VENDOR_ID, and the other
> error values.  Almost every use is a *return* in a config accessor.
> There are very, very few *tests* for these values.

If we have certain reasons to reserve PCI_BIOS* error to identify PCI errors
in PCI drivers, maybe redefine the PCI_BIOS* to generic error codes can solve
the issues, and no need to call pcibios_err_to_errno() to do the conversion.
Few changes may be made to current codes. One possible patch may
look like below. Otherwise, maybe convert all PCI_BIOS* errors to generic error
codes is a better idea.

Not sure it's the best way or not. Just FYI.


diff --git a/include/linux/pci.h b/include/linux/pci.h
index 83ce1cd..843987c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -675,14 +675,18 @@ static inline bool pci_dev_msi_enabled(struct pci_dev 
*pci_dev) { return false;
 
 /* Error values that may be returned by PCI functions */
 #define PCIBIOS_SUCCESSFUL 0x00
-#define PCIBIOS_FUNC_NOT_SUPPORTED 0x81
-#define PCIBIOS_BAD_VENDOR_ID  0x83
-#define PCIBIOS_DEVICE_NOT_FOUND   0x86
-#define PCIBIOS_BAD_REGISTER_NUMBER0x87
-#define PCIBIOS_SET_FAILED 0x88
-#define PCIBIOS_BUFFER_TOO_SMALL   0x89
-
-/* Translate above to generic errno for passing back through non-PCI code */
+#define PCIBIOS_FUNC_NOT_SUPPORTED -ENOENT
+#define PCIBIOS_BAD_VENDOR_ID  -ENOTTY
+#define PCIBIOS_DEVICE_NOT_FOUND   -ENODEV
+#define PCIBIOS_BAD_REGISTER_NUMBER-EFAULT
+#define PCIBIOS_SET_FAILED -EIO
+#define PCIBIOS_BUFFER_TOO_SMALL   -ENOSPC
+
+/**
+ * Translate above to generic errno for passing back through non-PCI code
+ *
+ * Deprecated. Use the PCIBIOS_* directly without a translation.
+ */
 static inline int pcibios_err_to_errno(int err)
 {
if (err <= PCIBIOS_SUCCESSFUL)
@@ -690,17 +694,12 @@ static inline int pcibios_err_to_errno(int err)
 
switch (err) {
case PCIBIOS_FUNC_NOT_SUPPORTED:
-   return -ENOENT;
case PCIBIOS_BAD_VENDOR_ID:
-   return -ENOTTY;
case PCIBIOS_DEVICE_NOT_FOUND:
-   return -ENODEV;
case PCIBIOS_BAD_REGISTER_NUMBER:
-   return -EFAULT;
case PCIBIOS_SET_FAILED:
-   return -EIO;
case PCIBIOS_BUFFER_TOO_SMALL:
-   return -ENOSPC;
+   return err;
}
 
return -ERANGE;


>
> For example, the only tests for PCIBIOS_FUNC_NOT_SUPPORTED are in
> xen_pcibios_err_to_errno() and pcibios_err_to_errno(), i.e., we're
> just converting that value to -ENOENT or the Xen-specific thing.
>
> So I think the best approach might be to remove the PCIBIOS_* error
> values completely and replace them with the corresponding values from
> pcibios_err_to_errno().  For example, a part of the patch would look
> like this:
>
> diff --git a/arch/mips/pci/ops-emma2rh.c b/arch/mips/pci/ops-emma2rh.c
> index 65f47344536c..d4d9c902c147 100644
> --- a/arch/mips/pci/ops-emma2rh.c
> +++ b/arch/mips/pci/ops-emma2rh.c
> @@ -100,7 +100,7 @@ static int pci_config_read(struct pci_bus *bus, unsigned 
> int devfn, int where,
>   break;
>   default:
>   emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0);
> - return PCIBIOS_FUNC_NOT_SUPPORTED;
> + return -ENOENT;
>   }
>  
>   emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0);
> @@ -149,7 +149,7 @@ static int pci_config_write(struct pci_bus *bus, unsigned 
> int devfn, int where,
>   break;
>   default:
>   emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0);
> - return PCIBIOS_FUNC_NOT_SUPPORTED;
> + return -ENOENT;
>   }
>   *(volatile u32 *)(base + (PCI_FUNC(devfn) << 8) +
> (where & 0xfffc)) = data;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 83ce1cdf5676..f95637a8d391 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -675,7 +675,6 @@ static inline bool pci_dev_msi_enabled(struct pci_dev 
> *pci_dev) { return false;
>  
>  /* Error values that may be returned by PCI functions */
>  #define PCIBIOS_SUCCESSFUL   0x00
> -#define PCIBIOS_FUNC_NOT_SUPPORTED   0x81
>  #define PCIBIOS_BAD_VENDOR_ID0x83
>  #define PCIBIOS_DEVICE_NOT_FOUND 0x86
>  #define PCIBIOS_BAD_REGISTER_NUMBER  0x87
> @@ -689,8 +688,6 @@ static inline int pcibios_err_to_errno(int err)
>   return err; /* Assume already errno */
>  
>   switch (err) {
> - case PCIBIOS_FUNC_NOT_SUPPORTED:
> - return -ENOENT;
>   case PCIBIOS_BAD_VENDOR_ID:
>   return -ENOTTY;
>   case PCIBIOS_DEVICE_NOT_FOUND:
> .
>

Re: [PATCH v6 03/28] powerpc/xmon: Move breakpoints to text section

2020-04-27 Thread Jordan Niethe

On Tue, Apr 28, 2020 at 3:20 PM Christophe Leroy
 wrote:
>
>
>
> Le 28/04/2020 à 03:57, Jordan Niethe a écrit :
> > The instructions for xmon's breakpoint are stored bpt_table[] which is in
> > the data section. This is problematic as the data section may be marked
> > as no execute. Move bpt_table[] to the text section.
> >
> > Signed-off-by: Jordan Niethe 
> > ---
> > v6: - New to series. Was part of the previous patch.
> >  - Make BPT_SIZE available in assembly
> > ---
> >   arch/powerpc/kernel/asm-offsets.c |  8 
> >   arch/powerpc/xmon/Makefile|  2 +-
> >   arch/powerpc/xmon/xmon.c  |  6 +-
> >   arch/powerpc/xmon/xmon_bpts.S |  9 +
> >   arch/powerpc/xmon/xmon_bpts.h | 14 ++
> >   5 files changed, 33 insertions(+), 6 deletions(-)
> >   create mode 100644 arch/powerpc/xmon/xmon_bpts.S
> >   create mode 100644 arch/powerpc/xmon/xmon_bpts.h
> >
> > diff --git a/arch/powerpc/kernel/asm-offsets.c 
> > b/arch/powerpc/kernel/asm-offsets.c
> > index c25e562f1cd9..2401f415f423 100644
> > --- a/arch/powerpc/kernel/asm-offsets.c
> > +++ b/arch/powerpc/kernel/asm-offsets.c
> > @@ -70,6 +70,10 @@
> >   #include 
> >   #endif
> >
> > +#ifdef CONFIG_XMON
> > +#include "../xmon/xmon_bpts.h"
> > +#endif
> > +
> >   #define STACK_PT_REGS_OFFSET(sym, val)  \
> >   DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val))
> >
> > @@ -783,5 +787,9 @@ int main(void)
> >   DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
> >   #endif
> >
> > +#ifdef CONFIG_XMON
> > + DEFINE(BPT_SIZE, BPT_SIZE);
> > +#endif
> > +
> >   return 0;
> >   }
> > diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
> > index c3842dbeb1b7..515a13ea6f28 100644
> > --- a/arch/powerpc/xmon/Makefile
> > +++ b/arch/powerpc/xmon/Makefile
> > @@ -21,7 +21,7 @@ endif
> >
> >   ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
> >
> > -obj-y+= xmon.o nonstdio.o spr_access.o
> > +obj-y+= xmon.o nonstdio.o spr_access.o xmon_bpts.o
> >
> >   ifdef CONFIG_XMON_DISASSEMBLY
> >   obj-y   += ppc-dis.o ppc-opc.o
> > diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> > index a064392df1b8..f7ce3ea8694c 100644
> > --- a/arch/powerpc/xmon/xmon.c
> > +++ b/arch/powerpc/xmon/xmon.c
> > @@ -62,6 +62,7 @@
> >
> >   #include "nonstdio.h"
> >   #include "dis-asm.h"
> > +#include "xmon_bpts.h"
> >
> >   #ifdef CONFIG_SMP
> >   static cpumask_t cpus_in_xmon = CPU_MASK_NONE;
> > @@ -108,7 +109,6 @@ struct bpt {
> >   #define BP_TRAP 2
> >   #define BP_DABR 4
> >
> > -#define NBPTS256
> >   static struct bpt bpts[NBPTS];
> >   static struct bpt dabr;
> >   static struct bpt *iabr;
> > @@ -116,10 +116,6 @@ static unsigned bpinstr = 0x7fe8;/* trap */
> >
> >   #define BP_NUM(bp)  ((bp) - bpts + 1)
> >
> > -#define BPT_SIZE   (sizeof(unsigned int) * 2)
> > -#define BPT_WORDS  (BPT_SIZE / sizeof(unsigned int))
> > -static unsigned int bpt_table[NBPTS * BPT_WORDS];
> > -
> >   /* Prototypes */
> >   static int cmds(struct pt_regs *);
> >   static int mread(unsigned long, void *, int);
> > diff --git a/arch/powerpc/xmon/xmon_bpts.S b/arch/powerpc/xmon/xmon_bpts.S
> > new file mode 100644
> > index ..f3ad0ab50854
> > --- /dev/null
> > +++ b/arch/powerpc/xmon/xmon_bpts.S
> > @@ -0,0 +1,9 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#include 
> > +#include 
> > +#include 
> > +#include "xmon_bpts.h"
> > +
> > +.global bpt_table
> > +bpt_table:
> > + .space NBPTS * BPT_SIZE
>
> No alignment required ? Standard alignment (probably 4 bytes ?) is
> acceptable ?
No, I'll add a .balign4 to be sure.
>
>
> > diff --git a/arch/powerpc/xmon/xmon_bpts.h b/arch/powerpc/xmon/xmon_bpts.h
> > new file mode 100644
> > index ..b7e94375db86
> > --- /dev/null
> > +++ b/arch/powerpc/xmon/xmon_bpts.h
> > @@ -0,0 +1,14 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#ifndef XMON_BPTS_H
> > +#define XMON_BPTS_H
> > +
> > +#define NBPTS256
> > +#ifndef __ASSEMBLY__
> > +#define BPT_SIZE (sizeof(unsigned int) * 2)
> > +#define BPT_WORDS(BPT_SIZE / sizeof(unsigned int))
> > +
> > +extern unsigned int bpt_table[NBPTS * BPT_WORDS];
> > +
> > +#endif /* __ASSEMBLY__ */
> > +
> > +#endif /* XMON_BPTS_H */
> >
>
> Christophe

Re: [PATCH v6 03/28] powerpc/xmon: Move breakpoints to text section

2020-04-27 Thread Christophe Leroy





Le 28/04/2020 à 03:57, Jordan Niethe a écrit :

The instructions for xmon's breakpoint are stored bpt_table[] which is in
the data section. This is problematic as the data section may be marked
as no execute. Move bpt_table[] to the text section.

Signed-off-by: Jordan Niethe 
---
v6: - New to series. Was part of the previous patch.
 - Make BPT_SIZE available in assembly
---
  arch/powerpc/kernel/asm-offsets.c |  8 
  arch/powerpc/xmon/Makefile|  2 +-
  arch/powerpc/xmon/xmon.c  |  6 +-
  arch/powerpc/xmon/xmon_bpts.S |  9 +
  arch/powerpc/xmon/xmon_bpts.h | 14 ++
  5 files changed, 33 insertions(+), 6 deletions(-)
  create mode 100644 arch/powerpc/xmon/xmon_bpts.S
  create mode 100644 arch/powerpc/xmon/xmon_bpts.h

diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index c25e562f1cd9..2401f415f423 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -70,6 +70,10 @@
  #include 
  #endif
  
+#ifdef CONFIG_XMON

+#include "../xmon/xmon_bpts.h"
+#endif
+
  #define STACK_PT_REGS_OFFSET(sym, val)\
DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val))
  
@@ -783,5 +787,9 @@ int main(void)

DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
  #endif
  
+#ifdef CONFIG_XMON

+   DEFINE(BPT_SIZE, BPT_SIZE);
+#endif
+
return 0;
  }
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index c3842dbeb1b7..515a13ea6f28 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -21,7 +21,7 @@ endif
  
  ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
  
-obj-y			+= xmon.o nonstdio.o spr_access.o

+obj-y  += xmon.o nonstdio.o spr_access.o xmon_bpts.o
  
  ifdef CONFIG_XMON_DISASSEMBLY

  obj-y += ppc-dis.o ppc-opc.o
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index a064392df1b8..f7ce3ea8694c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -62,6 +62,7 @@
  
  #include "nonstdio.h"

  #include "dis-asm.h"
+#include "xmon_bpts.h"
  
  #ifdef CONFIG_SMP

  static cpumask_t cpus_in_xmon = CPU_MASK_NONE;
@@ -108,7 +109,6 @@ struct bpt {
  #define BP_TRAP   2
  #define BP_DABR   4
  
-#define NBPTS	256

  static struct bpt bpts[NBPTS];
  static struct bpt dabr;
  static struct bpt *iabr;
@@ -116,10 +116,6 @@ static unsigned bpinstr = 0x7fe8;  /* trap */
  
  #define BP_NUM(bp)	((bp) - bpts + 1)
  
-#define BPT_SIZE   (sizeof(unsigned int) * 2)

-#define BPT_WORDS  (BPT_SIZE / sizeof(unsigned int))
-static unsigned int bpt_table[NBPTS * BPT_WORDS];
-
  /* Prototypes */
  static int cmds(struct pt_regs *);
  static int mread(unsigned long, void *, int);
diff --git a/arch/powerpc/xmon/xmon_bpts.S b/arch/powerpc/xmon/xmon_bpts.S
new file mode 100644
index ..f3ad0ab50854
--- /dev/null
+++ b/arch/powerpc/xmon/xmon_bpts.S
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include 
+#include 
+#include 
+#include "xmon_bpts.h"
+
+.global bpt_table
+bpt_table:
+   .space NBPTS * BPT_SIZE


No alignment required ? Standard alignment (probably 4 bytes ?) is 
acceptable ?




diff --git a/arch/powerpc/xmon/xmon_bpts.h b/arch/powerpc/xmon/xmon_bpts.h
new file mode 100644
index ..b7e94375db86
--- /dev/null
+++ b/arch/powerpc/xmon/xmon_bpts.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef XMON_BPTS_H
+#define XMON_BPTS_H
+
+#define NBPTS  256
+#ifndef __ASSEMBLY__
+#define BPT_SIZE   (sizeof(unsigned int) * 2)
+#define BPT_WORDS  (BPT_SIZE / sizeof(unsigned int))
+
+extern unsigned int bpt_table[NBPTS * BPT_WORDS];
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* XMON_BPTS_H */



Christophe

Re: [PATCH v3 2/4] hugetlbfs: move hugepagesz= parsing to arch independent code

2020-04-27 Thread Sandipan Das

Hi Mike,

On 28/04/20 12:39 am, Mike Kravetz wrote:
> On 4/27/20 10:25 AM, Mike Kravetz wrote:
>> On 4/26/20 10:04 PM, Sandipan Das wrote:
>>> On 18/04/20 12:20 am, Mike Kravetz wrote:
 Now that architectures provide arch_hugetlb_valid_size(), parsing
 of "hugepagesz=" can be done in architecture independent code.
>>>
>>> This isn't working as expected on powerpc64.
>>>
>>>   [0.00] Kernel command line: 
>>> root=UUID=dc7b49cf-95a2-4996-8e7d-7c64ddc7a6ff hugepagesz=16G hugepages=2 
>>>   [0.00] HugeTLB: huge pages not supported, ignoring hugepagesz = 
>>> 16G
>>>   [0.00] HugeTLB: huge pages not supported, ignoring hugepages = 2
>>>   [0.284177] HugeTLB registered 16.0 MiB page size, pre-allocated 0 
>>> pages
>>>   [0.284182] HugeTLB registered 16.0 GiB page size, pre-allocated 0 
>>> pages
>>>   [2.585062] hugepagesz=16G
>>>   [2.585063] hugepages=2
>>>
>>
>> In the new arch independent version of hugepages_setup, I added the following
>> code in patch 4 off this series:
>>
>>> +   if (!hugepages_supported()) {
>>> +   pr_warn("HugeTLB: huge pages not supported, ignoring hugepages 
>>> = %s\n", s);
>>> +   return 0;
>>> +   }
>>> +
>>
>> The easy solution is to remove all the hugepages_supported() checks from
>> command line parsing routines and rely on the later check in hugetlb_init().
> 
> Here is a patch to address the issue.  Sorry, as my series breaks all hugetlb
> command line processing on powerpc.
> 
> Sandipan, can you test the following patch?
> 

The following patch does fix the issue. Thanks.

Tested-by: Sandipan Das 


> From 480fe2847361e2a85aeec1fb39fe643bb7100a07 Mon Sep 17 00:00:00 2001
> From: Mike Kravetz 
> Date: Mon, 27 Apr 2020 11:37:30 -0700
> Subject: [PATCH] hugetlbfs: fix changes to command line processing
> 
> Previously, a check for hugepages_supported was added before processing
> hugetlb command line parameters.  On some architectures such as powerpc,
> hugepages_supported() is not set to true until after command line
> processing.  Therefore, no hugetlb command line parameters would be
> accepted.
> 
> Remove the additional checks for hugepages_supported.  In hugetlb_init,
> print a warning if !hugepages_supported and command line parameters were
> specified.
> 
> Signed-off-by: Mike Kravetz 
> ---
>  mm/hugetlb.c | 20 
>  1 file changed, 4 insertions(+), 16 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 1075abdb5717..5548e8851b93 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3212,8 +3212,11 @@ static int __init hugetlb_init(void)
>  {
>   int i;
>  
> - if (!hugepages_supported())
> + if (!hugepages_supported()) {
> + if (hugetlb_max_hstate || default_hstate_max_huge_pages)
> + pr_warn("HugeTLB: huge pages not supported, ignoring 
> associated command-line parameters\n");
>   return 0;
> + }
>  
>   /*
>* Make sure HPAGE_SIZE (HUGETLB_PAGE_ORDER) hstate exists.  Some
> @@ -3315,11 +3318,6 @@ static int __init hugepages_setup(char *s)
>   unsigned long *mhp;
>   static unsigned long *last_mhp;
>  
> - if (!hugepages_supported()) {
> - pr_warn("HugeTLB: huge pages not supported, ignoring hugepages 
> = %s\n", s);
> - return 0;
> - }
> -
>   if (!parsed_valid_hugepagesz) {
>   pr_warn("HugeTLB: hugepages=%s does not follow a valid 
> hugepagesz, ignoring\n", s);
>   parsed_valid_hugepagesz = true;
> @@ -3372,11 +3370,6 @@ static int __init hugepagesz_setup(char *s)
>   struct hstate *h;
>  
>   parsed_valid_hugepagesz = false;
> - if (!hugepages_supported()) {
> - pr_warn("HugeTLB: huge pages not supported, ignoring hugepagesz 
> = %s\n", s);
> - return 0;
> - }
> -
>   size = (unsigned long)memparse(s, NULL);
>  
>   if (!arch_hugetlb_valid_size(size)) {
> @@ -3424,11 +3417,6 @@ static int __init default_hugepagesz_setup(char *s)
>   unsigned long size;
>  
>   parsed_valid_hugepagesz = false;
> - if (!hugepages_supported()) {
> - pr_warn("HugeTLB: huge pages not supported, ignoring 
> default_hugepagesz = %s\n", s);
> - return 0;
> - }
> -
>   if (parsed_default_hugepagesz) {
>   pr_err("HugeTLB: default_hugepagesz previously specified, 
> ignoring %s\n", s);
>   return 0;
>

[PATCH v4 1/2] powerpc/eeh: fix pseries_eeh_configure_bridge()

2020-04-27 Thread Sam Bobroff

If a device is hot unplgged during EEH recovery, it's possible for the
RTAS call to ibm,configure-pe in pseries_eeh_configure() to return
parameter error (-3), however negative return values are not checked
for and this leads to an infinite loop.

Fix this by correctly bailing out on negative values.

Signed-off-by: Sam Bobroff 
---
v4 - Just handle the error translation locally, as it's specific to the RTAS 
call,
 but log the unaltered code in case it's useful for debugging.

 arch/powerpc/platforms/pseries/eeh_pseries.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 893ba3f562c4..04c1ed79bc6e 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -607,6 +607,8 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 
if (!ret)
return ret;
+   if (ret < 0)
+   break;
 
/*
 * If RTAS returns a delay value that's above 100ms, cut it
@@ -627,7 +629,11 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 
pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
__func__, pe->phb->global_number, pe->addr, ret);
-   return ret;
+   /* PAPR defines -3 as "Parameter Error" for this function: */
+   if (ret == -3)
+   return -EINVAL;
+   else
+   return -EIO;
 }
 
 /**
-- 
2.22.0.216.g00a2a96fc9

[PATCH v4 2/2] powerpc/eeh: Release EEH device state synchronously

2020-04-27 Thread Sam Bobroff

EEH device state is currently removed (by eeh_remove_device()) during
the device release handler, which is invoked as the device's reference
count drops to zero. This may take some time, or forever, as other
threads may hold references.

However, the PCI device state is released synchronously by
pci_stop_and_remove_bus_device(). This mismatch causes problems, for
example the device may be re-discovered as a new device before the
release handler has been called, leaving the PCI and EEH state
mismatched.

So instead, call eeh_remove_device() from the bus device removal
handlers, which are called synchronously in the removal path.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/eeh.c | 31 +++
 arch/powerpc/kernel/pci-hotplug.c |  2 --
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 17cb3e9b5697..64361311bc8e 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1106,6 +1106,37 @@ static int eeh_init(void)
 
 core_initcall_sync(eeh_init);
 
+static int eeh_device_notifier(struct notifier_block *nb,
+  unsigned long action, void *data)
+{
+   struct device *dev = data;
+
+   switch (action) {
+   /*
+* Note: It's not possible to perform EEH device addition (i.e.
+* {pseries,pnv}_pcibios_bus_add_device()) here because it depends on
+* the device's resources, which have not yet been set up.
+*/
+   case BUS_NOTIFY_DEL_DEVICE:
+   eeh_remove_device(to_pci_dev(dev));
+   break;
+   default:
+   break;
+   }
+   return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_device_nb = {
+   .notifier_call = eeh_device_notifier,
+};
+
+static __init int eeh_set_bus_notifier(void)
+{
+   bus_register_notifier(_bus_type, _device_nb);
+   return 0;
+}
+arch_initcall(eeh_set_bus_notifier);
+
 /**
  * eeh_add_device_early - Enable EEH for the indicated device node
  * @pdn: PCI device node for which to set up EEH
diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index d6a67f814983..28e9aa274f64 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -57,8 +57,6 @@ void pcibios_release_device(struct pci_dev *dev)
struct pci_controller *phb = pci_bus_to_host(dev->bus);
struct pci_dn *pdn = pci_get_pdn(dev);
 
-   eeh_remove_device(dev);
-
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
 
-- 
2.22.0.216.g00a2a96fc9

[PATCH v4 0/2] powerpc/eeh: Release EEH device state synchronously

2020-04-27 Thread Sam Bobroff

Hi everyone,

Here are some fixes and cleanups that have come from other work but that I
think stand on their own.

Only one patch ("Release EEH device state synchronously", suggested by Oliver
O'Halloran) is a significant change: it moves the cleanup of some EEH device
data out of the (possibly asynchronous) device release handler and into the
(synchronously called) bus notifier. This is useful for future work as it makes
it easier to reason about the lifetimes of EEH structures.

Note that I've left a few WARN_ON_ONCEs in the code because I'm paranoid, but I
have not been able to hit them during testing.

Cheers,
Sam.

Notes for v4:
Stopped using rtas_error_rc() as it is too specific, intead just translate the
one code that is valid for this RTAS call. Therefore, the new patch to export
rtas_error_rc() is dropped.

Notes for v3:
I've tweaked the fix for pseries_eeh_configure_bridge() to return the correct
error code (even though it's not used) by calling an already present RTAS
function, rtas_error_rc(). However, I had to make another change to export that
function and while it does seem like the right thing to do, but I'm concerned
it's a bit out of scope for such a small fix.

Notes for v2:

I've dropped both cleanup patches (3/4, 4/4) because that type of cleanup
(replacing a call to eeh_rmv_from_parent_pe() with one to eeh_remove_device())
is incorrect: if called during recovery, it will cause edev->pe to remain set
when it would have been cleared previously. This would lead to stale
information in the edev. I think there should be a way to simplify the code
around EEH_PE_KEEP but I'll look at that separately.

Patch set changelog follows:

Patch set v4: 
Patch 1/2 (was 2/3): powerpc/eeh: fix pseries_eeh_configure_bridge()
- Just handle the error translation locally, as it's specific to the RTAS call,
  but log the unaltered code in case it's useful for debugging.
Patch 2/2 (was 3/3): powerpc/eeh: Release EEH device state synchronously
Dropped (was 1/3) powerpc/rtas: Export rtas_error_rc

Patch set v3: 
Patch 1/3 (new in this version): powerpc/rtas: Export rtas_error_rc
Patch 2/3 (was 1/2): powerpc/eeh: fix pseries_eeh_configure_bridge()
Patch 3/3 (was 2/2): powerpc/eeh: Release EEH device state synchronously

Patch set v2: 
Patch 1/2: powerpc/eeh: fix pseries_eeh_configure_bridge()
Patch 2/2: powerpc/eeh: Release EEH device state synchronously
- Added comment explaining why the add case can't be handled similarly to the 
remove case.
Dropped (was 4/4) powerpc/eeh: Clean up edev cleanup for VFs
Dropped (was 3/4) powerpc/eeh: Remove workaround from eeh_add_device_late()

Patch set v1:
Patch 1/4: powerpc/eeh: fix pseries_eeh_configure_bridge()
Patch 2/4: powerpc/eeh: Release EEH device state synchronously
Patch 3/4: powerpc/eeh: Remove workaround from eeh_add_device_late()
Patch 4/4: powerpc/eeh: Clean up edev cleanup for VFs

Sam Bobroff (2):
  powerpc/eeh: fix pseries_eeh_configure_bridge()
  powerpc/eeh: Release EEH device state synchronously

 arch/powerpc/kernel/eeh.c| 31 
 arch/powerpc/kernel/pci-hotplug.c|  2 --
 arch/powerpc/platforms/pseries/eeh_pseries.c |  8 -
 3 files changed, 38 insertions(+), 3 deletions(-)

-- 
2.22.0.216.g00a2a96fc9

Re: [PATCH v3 1/3] powerpc/rtas: Export rtas_error_rc

2020-04-27 Thread Michael Ellerman

Sam Bobroff  writes:
> Export rtas_error_rc() so that it can be used by other users of
> rtas_call() (which is already exported).
>
> Signed-off-by: Sam Bobroff 
> ---
> v3 * New in this version.
>
>  arch/powerpc/include/asm/rtas.h | 1 +
>  arch/powerpc/kernel/rtas.c  | 3 ++-
>  2 files changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
> index 3c1887351c71..7c9e4d3635cf 100644
> --- a/arch/powerpc/include/asm/rtas.h
> +++ b/arch/powerpc/include/asm/rtas.h
> @@ -379,6 +379,7 @@ extern int rtas_set_rtc_time(struct rtc_time *rtc_time);
>  
>  extern unsigned int rtas_busy_delay_time(int status);
>  extern unsigned int rtas_busy_delay(int status);
> +extern int rtas_error_rc(int rtas_rc);
>  
>  extern int early_init_dt_scan_rtas(unsigned long node,
>   const char *uname, int depth, void *data);
> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
> index c5fa251b8950..238bf112d29a 100644
> --- a/arch/powerpc/kernel/rtas.c
> +++ b/arch/powerpc/kernel/rtas.c
> @@ -518,7 +518,7 @@ unsigned int rtas_busy_delay(int status)
>  }
>  EXPORT_SYMBOL(rtas_busy_delay);
>  
> -static int rtas_error_rc(int rtas_rc)
> +int rtas_error_rc(int rtas_rc)
>  {
>   int rc;
>  
> @@ -546,6 +546,7 @@ static int rtas_error_rc(int rtas_rc)
>   }
>   return rc;
>  }
> +EXPORT_SYMBOL(rtas_error_rc);

Will it be used in a module somewhere?

AFAICS the only caller you add is built-in.

cheers

[Bug 104871] bcl+8 in arch/powerpc/kernel/vdso64/datapage.S causes branch prediction issues

2020-04-27 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=104871

Michael Ellerman (mich...@ellerman.id.au) changed:

   What|Removed |Added

 Status|RESOLVED|CLOSED

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

[Bug 104871] bcl+8 in arch/powerpc/kernel/vdso64/datapage.S causes branch prediction issues

2020-04-27 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=104871

Michael Ellerman (mich...@ellerman.id.au) changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 CC||mich...@ellerman.id.au
 Resolution|--- |CODE_FIX

--- Comment #2 from Michael Ellerman (mich...@ellerman.id.au) ---
Fixed in:

c974809a26a1 ("powerpc/vdso: Avoid link stack corruption in __get_datapage()")

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c974809a26a13e40254dbe3cf46f49aa32acca11

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

[Bug 199471] [Bisected][Regression] windfarm_pm* no longer gets automatically loaded when CONFIG_I2C_POWERMAC=y is set

2020-04-27 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=199471

Michael Ellerman (mich...@ellerman.id.au) changed:

   What|Removed |Added

 Status|VERIFIED|CLOSED

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

[Bug 199471] [Bisected][Regression] windfarm_pm* no longer gets automatically loaded when CONFIG_I2C_POWERMAC=y is set

2020-04-27 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=199471

Michael Ellerman (mich...@ellerman.id.au) changed:

   What|Removed |Added

 Status|RESOLVED|VERIFIED

--- Comment #26 from Michael Ellerman (mich...@ellerman.id.au) ---
OK thanks all.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

Re: [PATCH 1/5] powerpc/spufs: simplify spufs core dumping

2020-04-27 Thread Jeremy Kerr

Hi Al & Christoph,

> Again, this really needs fixing.  Preferably - as a separate commit
> preceding this series, so that it could be
> backported.  simple_read_from_buffer() is a blocking operation.

I'll put together a patch that fixes this.

Christoph: I'll do it in a way that matches your changes to the _read
functions, so hopefully those hunks would just drop from your change,
leaving only the _dump additions. Would that work?

Cheers,


Jeremy

Re: [PATCH v3 1/3] powerpc/rtas: Export rtas_error_rc

2020-04-27 Thread Sam Bobroff

On Fri, Apr 24, 2020 at 11:07:43AM -0500, Nathan Lynch wrote:
> Sam Bobroff  writes:
> > Export rtas_error_rc() so that it can be used by other users of
> > rtas_call() (which is already exported).
> 
> This will do the right thing for your ibm,configure-pe use case in patch
> 2, but the -900x => errno translations in rtas_error_rc() appear
> tailored for the indicator- and sensor-related calls that currently use
> it. From my reading of PAPR+, the meaning of a -900x RTAS status word
> depends on the call. For example, -9002 commonly means "not authorized",
> which we would typically translate to -EPERM, but rtas_error_rc() would
> translate it to -ENODEV.
> 
> Also the semantics of -9001 as a return value seem to vary a bit.
> 
> So I don't think rtas_error_rc() should be advertised as a generically
> useful facility in its current form.
> 
> (I have had some thoughts about how firmware/hypervisor call status can
> be translated to meaningful Linux error values without tedious switch
> statements, which I'm happy to expand on if anyone is interested, but I
> don't want to hijack your submission for that discussion.)

Ah, interesting.

I'll do another version as you suggest.

Cheers,
Sam.


signature.asc
Description: PGP signature

[PATCH v6 28/28] powerpc sstep: Add support for prefixed fixed-point arithmetic

2020-04-27 Thread Jordan Niethe

This adds emulation support for the following prefixed Fixed-Point
Arithmetic instructions:
  * Prefixed Add Immediate (paddi)

Reviewed-by: Balamuruhan S 
Signed-off-by: Jordan Niethe 
---
v3: Since we moved the prefixed loads/stores into the load/store switch
statement it no longer makes sense to have paddi in there, so move it
out.
---
 arch/powerpc/lib/sstep.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index d3ae0a36975f..ea419f65285b 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1337,6 +1337,26 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
 
switch (opcode) {
 #ifdef __powerpc64__
+   case 1:
+   prefix_r = word & (1ul << 20);
+   ra = (suffix >> 16) & 0x1f;
+   rd = (suffix >> 21) & 0x1f;
+   op->reg = rd;
+   op->val = regs->gpr[rd];
+   suffixopcode = suffix >> 26;
+   prefixtype = (word >> 24) & 0x3;
+   switch (prefixtype) {
+   case 2:
+   if (prefix_r && ra)
+   return 0;
+   switch (suffixopcode) {
+   case 14:/* paddi */
+   op->type = COMPUTE | PREFIXED;
+   op->val = mlsd_8lsd_ea(word, suffix, regs);
+   goto compute_done;
+   }
+   }
+   break;
case 2: /* tdi */
if (rd & trap_compare(regs->gpr[ra], (short) word))
goto trap;
-- 
2.17.1

[PATCH v6 27/28] powerpc sstep: Add support for prefixed load/stores

2020-04-27 Thread Jordan Niethe

This adds emulation support for the following prefixed integer
load/stores:
  * Prefixed Load Byte and Zero (plbz)
  * Prefixed Load Halfword and Zero (plhz)
  * Prefixed Load Halfword Algebraic (plha)
  * Prefixed Load Word and Zero (plwz)
  * Prefixed Load Word Algebraic (plwa)
  * Prefixed Load Doubleword (pld)
  * Prefixed Store Byte (pstb)
  * Prefixed Store Halfword (psth)
  * Prefixed Store Word (pstw)
  * Prefixed Store Doubleword (pstd)
  * Prefixed Load Quadword (plq)
  * Prefixed Store Quadword (pstq)

the follow prefixed floating-point load/stores:
  * Prefixed Load Floating-Point Single (plfs)
  * Prefixed Load Floating-Point Double (plfd)
  * Prefixed Store Floating-Point Single (pstfs)
  * Prefixed Store Floating-Point Double (pstfd)

and for the following prefixed VSX load/stores:
  * Prefixed Load VSX Scalar Doubleword (plxsd)
  * Prefixed Load VSX Scalar Single-Precision (plxssp)
  * Prefixed Load VSX Vector [0|1]  (plxv, plxv0, plxv1)
  * Prefixed Store VSX Scalar Doubleword (pstxsd)
  * Prefixed Store VSX Scalar Single-Precision (pstxssp)
  * Prefixed Store VSX Vector [0|1] (pstxv, pstxv0, pstxv1)

Reviewed-by: Balamuruhan S 
Signed-off-by: Jordan Niethe 
---
v2: - Combine all load/store patches
- Fix the name of Type 01 instructions
- Remove sign extension flag from pstd/pld
- Rename sufx -> suffix
v3: - Move prefixed loads and stores into the switch statement
v6: - Compile on ppc32
- Add back in + GETLENGTH(op->type)
---
 arch/powerpc/include/asm/sstep.h |   4 +
 arch/powerpc/lib/sstep.c | 165 ++-
 2 files changed, 167 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index c3ce903ac488..9b200a5f8794 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -90,11 +90,15 @@ enum instruction_type {
 #define VSX_LDLEFT 4   /* load VSX register from left */
 #define VSX_CHECK_VEC  8   /* check MSR_VEC not MSR_VSX for reg >= 32 */
 
+/* Prefixed flag, ORed in with type */
+#define PREFIXED   0x800
+
 /* Size field in type word */
 #define SIZE(n)((n) << 12)
 #define GETSIZE(w) ((w) >> 12)
 
 #define GETTYPE(t) ((t) & INSTR_TYPE_MASK)
+#define GETLENGTH(t)   (((t) & PREFIXED) ? 8 : 4)
 
 #define MKOP(t, f, s)  ((t) | (f) | SIZE(s))
 
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 81c4c44262a6..d3ae0a36975f 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -187,6 +187,44 @@ static nokprobe_inline unsigned long xform_ea(unsigned int 
instr,
return ea;
 }
 
+/*
+ * Calculate effective address for a MLS:D-form / 8LS:D-form
+ * prefixed instruction
+ */
+static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr,
+ unsigned int suffix,
+ const struct pt_regs *regs)
+{
+   int ra, prefix_r;
+   unsigned int  dd;
+   unsigned long ea, d0, d1, d;
+
+   prefix_r = instr & (1ul << 20);
+   ra = (suffix >> 16) & 0x1f;
+
+   d0 = instr & 0x3;
+   d1 = suffix & 0x;
+   d = (d0 << 16) | d1;
+
+   /*
+* sign extend a 34 bit number
+*/
+   dd = (unsigned int)(d >> 2);
+   ea = (signed int)dd;
+   ea = (ea << 2) | (d & 0x3);
+
+   if (!prefix_r && ra)
+   ea += regs->gpr[ra];
+   else if (!prefix_r && !ra)
+   ; /* Leave ea as is */
+   else if (prefix_r && !ra)
+   ea += regs->nip;
+   else if (prefix_r && ra)
+   ; /* Invalid form. Should already be checked for by caller! */
+
+   return ea;
+}
+
 /*
  * Return the largest power of 2, not greater than sizeof(unsigned long),
  * such that x is a multiple of it.
@@ -1166,6 +1204,9 @@ int analyse_instr(struct instruction_op *op, const struct 
pt_regs *regs,
  struct ppc_inst instr)
 {
unsigned int opcode, ra, rb, rc, rd, spr, u;
+#ifdef __powerpc64__
+   unsigned int suffixopcode, prefixtype, prefix_r;
+#endif
unsigned long int imm;
unsigned long int val, val2;
unsigned int mb, me, sh;
@@ -2652,6 +2693,126 @@ int analyse_instr(struct instruction_op *op, const 
struct pt_regs *regs,
break;
}
break;
+   case 1: /* Prefixed instructions */
+   prefix_r = word & (1ul << 20);
+   ra = (suffix >> 16) & 0x1f;
+   op->update_reg = ra;
+   rd = (suffix >> 21) & 0x1f;
+   op->reg = rd;
+   op->val = regs->gpr[rd];
+
+   suffixopcode = suffix >> 26;
+   prefixtype = (word >> 24) & 0x3;
+   switch (prefixtype) {
+   case 0: /* Type 00  Eight-Byte Load/Store */
+   if (prefix_r && ra)
+   break;
+

[PATCH v6 26/28] powerpc: Support prefixed instructions in alignment handler

2020-04-27 Thread Jordan Niethe

If a prefixed instruction results in an alignment exception, the
SRR1_PREFIXED bit is set. The handler attempts to emulate the
responsible instruction and then increment the NIP past it. Use
SRR1_PREFIXED to determine by how much the NIP should be incremented.

Prefixed instructions are not permitted to cross 64-byte boundaries. If
they do the alignment interrupt is invoked with SRR1 BOUNDARY bit set.
If this occurs send a SIGBUS to the offending process if in user mode.
If in kernel mode call bad_page_fault().

Signed-off-by: Jordan Niethe 
---
v2: - Move __get_user_instr() and __get_user_instr_inatomic() to this
commit (previously in "powerpc sstep: Prepare to support prefixed
instructions").
- Rename sufx to suffix
- Use a macro for calculating instruction length
v3: Move __get_user_{instr(), instr_inatomic()} up with the other
get_user definitions and remove nested if.
v4: Rolled into "Add prefixed instructions to instruction data type"
v5: Only one definition of inst_length()
---
 arch/powerpc/kernel/traps.c | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index a4764b039749..9b97d2e6055a 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -583,6 +583,8 @@ static inline int check_io_access(struct pt_regs *regs)
 #define REASON_ILLEGAL (ESR_PIL | ESR_PUO)
 #define REASON_PRIVILEGED  ESR_PPR
 #define REASON_TRAPESR_PTR
+#define REASON_PREFIXED0
+#define REASON_BOUNDARY0
 
 /* single-step stuff */
 #define single_stepping(regs)  (current->thread.debug.dbcr0 & DBCR0_IC)
@@ -597,12 +599,16 @@ static inline int check_io_access(struct pt_regs *regs)
 #define REASON_ILLEGAL SRR1_PROGILL
 #define REASON_PRIVILEGED  SRR1_PROGPRIV
 #define REASON_TRAPSRR1_PROGTRAP
+#define REASON_PREFIXEDSRR1_PREFIXED
+#define REASON_BOUNDARYSRR1_BOUNDARY
 
 #define single_stepping(regs)  ((regs)->msr & MSR_SE)
 #define clear_single_step(regs)((regs)->msr &= ~MSR_SE)
 #define clear_br_trace(regs)   ((regs)->msr &= ~MSR_BE)
 #endif
 
+#define inst_length(reason)(((reason) & REASON_PREFIXED) ? 8 : 4)
+
 #if defined(CONFIG_E500)
 int machine_check_e500mc(struct pt_regs *regs)
 {
@@ -1593,11 +1599,20 @@ void alignment_exception(struct pt_regs *regs)
 {
enum ctx_state prev_state = exception_enter();
int sig, code, fixed = 0;
+   unsigned long  reason;
 
/* We restore the interrupt state now */
if (!arch_irq_disabled_regs(regs))
local_irq_enable();
 
+   reason = get_reason(regs);
+
+   if (reason & REASON_BOUNDARY) {
+   sig = SIGBUS;
+   code = BUS_ADRALN;
+   goto bad;
+   }
+
if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
goto bail;
 
@@ -1606,7 +1621,8 @@ void alignment_exception(struct pt_regs *regs)
fixed = fix_alignment(regs);
 
if (fixed == 1) {
-   regs->nip += 4; /* skip over emulated instruction */
+   /* skip over emulated instruction */
+   regs->nip += inst_length(reason);
emulate_single_step(regs);
goto bail;
}
@@ -1619,6 +1635,7 @@ void alignment_exception(struct pt_regs *regs)
sig = SIGBUS;
code = BUS_ADRALN;
}
+bad:
if (user_mode(regs))
_exception(sig, regs, code, regs->dar);
else
-- 
2.17.1

[PATCH v6 25/28] powerpc: Test prefixed instructions in feature fixups

2020-04-27 Thread Jordan Niethe

Expand the feature-fixups self-tests to includes tests for prefixed
instructions.

Signed-off-by: Jordan Niethe 
---
v6: New to series
---
 arch/powerpc/lib/feature-fixups-test.S | 68 +++
 arch/powerpc/lib/feature-fixups.c  | 74 ++
 2 files changed, 142 insertions(+)

diff --git a/arch/powerpc/lib/feature-fixups-test.S 
b/arch/powerpc/lib/feature-fixups-test.S
index b12168c2447a..6e2da9123a9b 100644
--- a/arch/powerpc/lib/feature-fixups-test.S
+++ b/arch/powerpc/lib/feature-fixups-test.S
@@ -791,3 +791,71 @@ globl(lwsync_fixup_test_expected_SYNC)
 1: or  1,1,1
sync
 
+globl(ftr_fixup_prefix1)
+   or  1,1,1
+   .long 1 << 26
+   .long 0x000
+   or  2,2,2
+globl(end_ftr_fixup_prefix1)
+
+globl(ftr_fixup_prefix1_orig)
+   or  1,1,1
+   .long 1 << 26
+   .long 0x000
+   or  2,2,2
+
+globl(ftr_fixup_prefix1_expected)
+   or  1,1,1
+   nop
+   nop
+   or  2,2,2
+
+globl(ftr_fixup_prefix2)
+   or  1,1,1
+   .long 1 << 26
+   .long 0x000
+   or  2,2,2
+globl(end_ftr_fixup_prefix2)
+
+globl(ftr_fixup_prefix2_orig)
+   or  1,1,1
+   .long 1 << 26
+   .long 0x000
+   or  2,2,2
+
+globl(ftr_fixup_prefix2_alt)
+   .long 0x700
+   .long 0x001
+
+globl(ftr_fixup_prefix2_expected)
+   or  1,1,1
+   .long 1 << 26
+   .long 0x001
+   or  2,2,2
+
+globl(ftr_fixup_prefix3)
+   or  1,1,1
+   .long 1 << 26
+   .long 0x000
+   or  2,2,2
+   or  3,3,3
+globl(end_ftr_fixup_prefix3)
+
+globl(ftr_fixup_prefix3_orig)
+   or  1,1,1
+   .long 1 << 26
+   .long 0x000
+   or  2,2,2
+   or  3,3,3
+
+globl(ftr_fixup_prefix3_alt)
+   .long 1 << 26
+   .long 0x001
+   nop
+
+globl(ftr_fixup_prefix3_expected)
+   or  1,1,1
+   .long 1 << 26
+   .long 0x001
+   nop
+   or  3,3,3
diff --git a/arch/powerpc/lib/feature-fixups.c 
b/arch/powerpc/lib/feature-fixups.c
index 243011f85287..6fc499b1d63e 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -687,6 +687,75 @@ static void test_lwsync_macros(void)
}
 }
 
+#ifdef __powerpc64__
+static void __init test_prefix_patching(void)
+{
+   extern unsigned int ftr_fixup_prefix1[];
+   extern unsigned int end_ftr_fixup_prefix1[];
+   extern unsigned int ftr_fixup_prefix1_orig[];
+   extern unsigned int ftr_fixup_prefix1_expected[];
+   int size = sizeof(unsigned int) * (end_ftr_fixup_prefix1 - 
ftr_fixup_prefix1);
+
+   fixup.value = fixup.mask = 8;
+   fixup.start_off = calc_offset(, ftr_fixup_prefix1 + 1);
+   fixup.end_off = calc_offset(, ftr_fixup_prefix1 + 3);
+   fixup.alt_start_off = fixup.alt_end_off = 0;
+
+   /* Sanity check */
+   check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) == 0);
+
+   patch_feature_section(0, );
+   check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_expected, size) == 0);
+   check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) != 0);
+}
+
+static void __init test_prefix_alt_patching(void)
+{
+   extern unsigned int ftr_fixup_prefix2[];
+   extern unsigned int end_ftr_fixup_prefix2[];
+   extern unsigned int ftr_fixup_prefix2_orig[];
+   extern unsigned int ftr_fixup_prefix2_expected[];
+   extern unsigned int ftr_fixup_prefix2_alt[];
+   int size = sizeof(unsigned int) * (end_ftr_fixup_prefix2 - 
ftr_fixup_prefix2);
+
+   fixup.value = fixup.mask = 8;
+   fixup.start_off = calc_offset(, ftr_fixup_prefix2 + 1);
+   fixup.end_off = calc_offset(, ftr_fixup_prefix2 + 3);
+   fixup.alt_start_off = calc_offset(, ftr_fixup_prefix2_alt);
+   fixup.alt_end_off = calc_offset(, ftr_fixup_prefix2_alt + 2);
+   /* Sanity check */
+   check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) == 0);
+
+   patch_feature_section(0, );
+   check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_expected, size) == 0);
+   patch_feature_section(0, );
+   check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) != 0);
+}
+
+static void __init test_prefix_word_alt_patching(void)
+{
+   extern unsigned int ftr_fixup_prefix3[];
+   extern unsigned int end_ftr_fixup_prefix3[];
+   extern unsigned int ftr_fixup_prefix3_orig[];
+   extern unsigned int ftr_fixup_prefix3_expected[];
+   extern unsigned int ftr_fixup_prefix3_alt[];
+   int size = sizeof(unsigned int) * (end_ftr_fixup_prefix3 - 
ftr_fixup_prefix3);
+
+   fixup.value = fixup.mask = 8;
+   fixup.start_off = calc_offset(, ftr_fixup_prefix3 + 1);
+   fixup.end_off = calc_offset(, ftr_fixup_prefix3 + 4);
+   fixup.alt_start_off = calc_offset(, ftr_fixup_prefix3_alt);
+   fixup.alt_end_off = calc_offset(, ftr_fixup_prefix3_alt + 3);
+   /*

[PATCH v6 24/28] powerpc: Test prefixed code patching

2020-04-27 Thread Jordan Niethe

Expand the code-patching self-tests to includes tests for patching
prefixed instructions.

Signed-off-by: Jordan Niethe 
---
v6: New to series
---
 arch/powerpc/lib/Makefile |  2 +-
 arch/powerpc/lib/code-patching.c  | 21 +
 arch/powerpc/lib/test_code-patching.S | 19 +++
 3 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/lib/test_code-patching.S

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 546591848219..5e994cda8e40 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -16,7 +16,7 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
 CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
 endif
 
-obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o
+obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o 
test_code-patching.o
 
 ifndef CONFIG_KASAN
 obj-y  +=  string.o memcmp_$(BITS).o
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index dd2f982bb29e..ad5754c5f007 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -700,6 +700,24 @@ static void __init test_translate_branch(void)
vfree(buf);
 }
 
+#ifdef __powerpc64__
+static void __init test_prefixed_patching(void)
+{
+   extern unsigned int code_patching_test1[];
+   extern unsigned int code_patching_test1_expected[];
+   extern unsigned int end_code_patching_test1[];
+
+   __patch_instruction((struct ppc_inst *)code_patching_test1,
+   ppc_inst_prefix(1 << 26, 0x),
+   (struct ppc_inst *)code_patching_test1);
+
+   check(!memcmp(code_patching_test1,
+ code_patching_test1_expected,
+ sizeof(unsigned int) *
+ (end_code_patching_test1 - code_patching_test1)));
+}
+#endif
+
 static int __init test_code_patching(void)
 {
printk(KERN_DEBUG "Running code patching self-tests ...\n");
@@ -708,6 +726,9 @@ static int __init test_code_patching(void)
test_branch_bform();
test_create_function_call();
test_translate_branch();
+#ifdef __powerpc64__
+   test_prefixed_patching();
+#endif
 
return 0;
 }
diff --git a/arch/powerpc/lib/test_code-patching.S 
b/arch/powerpc/lib/test_code-patching.S
new file mode 100644
index ..91aab208a804
--- /dev/null
+++ b/arch/powerpc/lib/test_code-patching.S
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 IBM Corporation
+ */
+
+   .text
+
+#define globl(x)   \
+   .globl x;   \
+x:
+
+globl(code_patching_test1)
+   nop
+   nop
+globl(end_code_patching_test1)
+
+globl(code_patching_test1_expected)
+   .long 1 << 26
+   .long 0x000
-- 
2.17.1

[PATCH v6 23/28] powerpc: Add prefixed instructions to instruction data type

2020-04-27 Thread Jordan Niethe

For powerpc64, redefine the ppc_inst type so both word and prefixed
instructions can be represented. On powerpc32 the type will remain the
same.  Update places which had assumed instructions to be 4 bytes long.

Signed-off-by: Jordan Niethe 
---
v4: New to series
v5:  - Distinguish normal instructions from prefixed instructions with a
   0xff marker for the suffix.
 - __patch_instruction() using std for prefixed instructions
v6:  - Return false instead of 0 in ppc_inst_prefixed()
 - Fix up types for ppc32 so it compiles
 - remove ppc_inst_write()
 - __patching_instruction(): move flush out of condition
---
 arch/powerpc/include/asm/inst.h  | 68 +---
 arch/powerpc/include/asm/kprobes.h   |  2 +-
 arch/powerpc/include/asm/uaccess.h   | 32 -
 arch/powerpc/include/asm/uprobes.h   |  2 +-
 arch/powerpc/kernel/optprobes.c  | 42 +
 arch/powerpc/kernel/optprobes_head.S |  3 ++
 arch/powerpc/lib/code-patching.c | 13 --
 arch/powerpc/lib/feature-fixups.c|  5 +-
 arch/powerpc/lib/inst.c  | 40 
 arch/powerpc/lib/sstep.c |  4 +-
 arch/powerpc/xmon/xmon.c |  4 +-
 arch/powerpc/xmon/xmon_bpts.S|  2 +
 12 files changed, 180 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 2f3c9d5bcf7c..1e743635c214 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -8,23 +8,72 @@
 
 struct ppc_inst {
u32 val;
+#ifdef __powerpc64__
+   u32 suffix;
+#endif /* __powerpc64__ */
 } __packed;
 
-#define ppc_inst(x) ((struct ppc_inst){ .val = x })
-
 static inline u32 ppc_inst_val(struct ppc_inst x)
 {
return x.val;
 }
 
-static inline int ppc_inst_len(struct ppc_inst x)
+static inline int ppc_inst_primary_opcode(struct ppc_inst x)
 {
-   return sizeof(struct ppc_inst);
+   return ppc_inst_val(x) >> 26;
 }
 
-static inline int ppc_inst_primary_opcode(struct ppc_inst x)
+#ifdef __powerpc64__
+#define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff })
+
+#define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) })
+
+static inline u32 ppc_inst_suffix(struct ppc_inst x)
 {
-   return ppc_inst_val(x) >> 26;
+   return x.suffix;
+}
+
+static inline bool ppc_inst_prefixed(struct ppc_inst x)
+{
+   return (ppc_inst_primary_opcode(x) == 1) && ppc_inst_suffix(x) != 0xff;
+}
+
+static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x)
+{
+   return ppc_inst_prefix(swab32(ppc_inst_val(x)),
+  swab32(ppc_inst_suffix(x)));
+}
+
+static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr)
+{
+   u32 val, suffix;
+
+   val = *(u32 *)ptr;
+   if ((val >> 26) == 1) {
+   suffix = *((u32 *)ptr + 1);
+   return ppc_inst_prefix(val, suffix);
+   } else {
+   return ppc_inst(val);
+   }
+}
+
+static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
+{
+   return *(u64 *) == *(u64 *)
+}
+
+#else
+
+#define ppc_inst(x) ((struct ppc_inst){ .val = x })
+
+static inline bool ppc_inst_prefixed(struct ppc_inst x)
+{
+   return false;
+}
+
+static inline u32 ppc_inst_suffix(struct ppc_inst x)
+{
+   return 0;
 }
 
 static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x)
@@ -42,6 +91,13 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct 
ppc_inst y)
return ppc_inst_val(x) == ppc_inst_val(y);
 }
 
+#endif /* __powerpc64__ */
+
+static inline int ppc_inst_len(struct ppc_inst x)
+{
+   return (ppc_inst_prefixed(x)) ? 8  : 4;
+}
+
 int probe_user_read_inst(struct ppc_inst *inst,
 struct ppc_inst *nip);
 int probe_kernel_read_inst(struct ppc_inst *inst,
diff --git a/arch/powerpc/include/asm/kprobes.h 
b/arch/powerpc/include/asm/kprobes.h
index 66b3f2983b22..4fc0e15e23a5 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -43,7 +43,7 @@ extern kprobe_opcode_t optprobe_template_ret[];
 extern kprobe_opcode_t optprobe_template_end[];
 
 /* Fixed instruction size for powerpc */
-#define MAX_INSN_SIZE  1
+#define MAX_INSN_SIZE  2
 #define MAX_OPTIMIZED_LENGTH   sizeof(kprobe_opcode_t) /* 4 bytes */
 #define MAX_OPTINSN_SIZE   (optprobe_template_end - 
optprobe_template_entry)
 #define RELATIVEJUMP_SIZE  sizeof(kprobe_opcode_t) /* 4 bytes */
diff --git a/arch/powerpc/include/asm/uaccess.h 
b/arch/powerpc/include/asm/uaccess.h
index c0a35e4586a5..12e52aa179b6 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -105,11 +105,41 @@ static inline int __access_ok(unsigned long addr, 
unsigned long size,
 #define __put_user_inatomic(x, ptr) \
__put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
 
+#ifdef __powerpc64__
+#define __get_user_instr(x, ptr)

[PATCH v6 22/28] powerpc: Define new SRR1 bits for a future ISA version

2020-04-27 Thread Jordan Niethe

Add the BOUNDARY SRR1 bit definition for when the cause of an alignment
exception is a prefixed instruction that crosses a 64-byte boundary.
Add the PREFIXED SRR1 bit definition for exceptions caused by prefixed
instructions.

Bit 35 of SRR1 is called SRR1_ISI_N_OR_G. This name comes from it being
used to indicate that an ISI was due to the access being no-exec or
guarded. A future ISA version adds another purpose. It is also set if
there is an access in a cache-inhibited location for prefixed
instruction.  Rename from SRR1_ISI_N_OR_G to SRR1_ISI_N_G_OR_CIP.

Signed-off-by: Jordan Niethe 
---
v2: Combined all the commits concerning SRR1 bits.
---
 arch/powerpc/include/asm/reg.h  | 4 +++-
 arch/powerpc/kvm/book3s_hv_nested.c | 2 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c7758c2ccc5f..173f33df4fab 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -762,7 +762,7 @@
 #endif
 
 #define   SRR1_ISI_NOPT0x4000 /* ISI: Not found in hash */
-#define   SRR1_ISI_N_OR_G  0x1000 /* ISI: Access is no-exec or G */
+#define   SRR1_ISI_N_G_OR_CIP  0x1000 /* ISI: Access is no-exec or G or CI 
for a prefixed instruction */
 #define   SRR1_ISI_PROT0x0800 /* ISI: Other protection 
fault */
 #define   SRR1_WAKEMASK0x0038 /* reason for wakeup */
 #define   SRR1_WAKEMASK_P8 0x003c /* reason for wakeup on POWER8 and 9 
*/
@@ -789,6 +789,8 @@
 #define   SRR1_PROGADDR0x0001 /* SRR0 contains subsequent 
addr */
 
 #define   SRR1_MCE_MCP 0x0008 /* Machine check signal caused 
interrupt */
+#define   SRR1_BOUNDARY0x1000 /* Prefixed instruction 
crosses 64-byte boundary */
+#define   SRR1_PREFIXED0x2000 /* Exception caused by 
prefixed instruction */
 
 #define SPRN_HSRR0 0x13A   /* Save/Restore Register 0 */
 #define SPRN_HSRR1 0x13B   /* Save/Restore Register 1 */
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
b/arch/powerpc/kvm/book3s_hv_nested.c
index dc97e5be76f6..6ab685227574 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -1169,7 +1169,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu 
*vcpu,
} else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
/* Can we execute? */
if (!gpte_p->may_execute) {
-   flags |= SRR1_ISI_N_OR_G;
+   flags |= SRR1_ISI_N_G_OR_CIP;
goto forward_to_l1;
}
} else {
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c 
b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 220305454c23..b53a9f1c1a46 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -1260,7 +1260,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned 
long addr,
status &= ~DSISR_NOHPTE;/* DSISR_NOHPTE == SRR1_ISI_NOPT */
if (!data) {
if (gr & (HPTE_R_N | HPTE_R_G))
-   return status | SRR1_ISI_N_OR_G;
+   return status | SRR1_ISI_N_G_OR_CIP;
if (!hpte_read_permission(pp, slb_v & key))
return status | SRR1_ISI_PROT;
} else if (status & DSISR_ISSTORE) {
-- 
2.17.1

[PATCH v6 21/28] powerpc: Enable Prefixed Instructions

2020-04-27 Thread Jordan Niethe

From: Alistair Popple 

Prefix instructions have their own FSCR bit which needs to enabled via
a CPU feature. The kernel will save the FSCR for problem state but it
needs to be enabled initially.

If prefixed instructions are made unavailable by the [H]FSCR, attempting
to use them will cause a facility unavailable exception. Add "PREFIX" to
the facility_strings[].

Currently there are no prefixed instructions that are actually emulated
by emulate_instruction() within facility_unavailable_exception().
However, when caused by a prefixed instructions the SRR1 PREFIXED bit is
set. Prepare for dealing with emulated prefixed instructions by checking
for this bit.

Reviewed-by: Nicholas Piggin 
Signed-off-by: Alistair Popple 
Signed-off-by: Jordan Niethe 
---
v4:
- Squash "Check for prefixed instructions in
  facility_unavailable_exception()" here
- Remove dt parts for now
---
 arch/powerpc/include/asm/reg.h | 3 +++
 arch/powerpc/kernel/traps.c| 1 +
 2 files changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1aa46dff0957..c7758c2ccc5f 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -397,6 +397,7 @@
 #define SPRN_RWMR  0x375   /* Region-Weighting Mode Register */
 
 /* HFSCR and FSCR bit numbers are the same */
+#define FSCR_PREFIX_LG 13  /* Enable Prefix Instructions */
 #define FSCR_SCV_LG12  /* Enable System Call Vectored */
 #define FSCR_MSGP_LG   10  /* Enable MSGP */
 #define FSCR_TAR_LG8   /* Enable Target Address Register */
@@ -408,11 +409,13 @@
 #define FSCR_VECVSX_LG 1   /* Enable VMX/VSX  */
 #define FSCR_FP_LG 0   /* Enable Floating Point */
 #define SPRN_FSCR  0x099   /* Facility Status & Control Register */
+#define   FSCR_PREFIX  __MASK(FSCR_PREFIX_LG)
 #define   FSCR_SCV __MASK(FSCR_SCV_LG)
 #define   FSCR_TAR __MASK(FSCR_TAR_LG)
 #define   FSCR_EBB __MASK(FSCR_EBB_LG)
 #define   FSCR_DSCR__MASK(FSCR_DSCR_LG)
 #define SPRN_HFSCR 0xbe/* HV=1 Facility Status & Control Register */
+#define   HFSCR_PREFIX __MASK(FSCR_PREFIX_LG)
 #define   HFSCR_MSGP   __MASK(FSCR_MSGP_LG)
 #define   HFSCR_TAR__MASK(FSCR_TAR_LG)
 #define   HFSCR_EBB__MASK(FSCR_EBB_LG)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 82a3438300fd..a4764b039749 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1720,6 +1720,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
[FSCR_TAR_LG] = "TAR",
[FSCR_MSGP_LG] = "MSGP",
[FSCR_SCV_LG] = "SCV",
+   [FSCR_PREFIX_LG] = "PREFIX",
};
char *facility = "unknown";
u64 value;
-- 
2.17.1

[PATCH v6 20/28] powerpc: Make test_translate_branch() independent of instruction length

2020-04-27 Thread Jordan Niethe

test_translate_branch() uses two pointers to instructions within a
buffer, p and q, to test patch_branch(). The pointer arithmetic done on
them assumes a size of 4. This will not work if the instruction length
changes. Instead do the arithmetic relative to the void * to the buffer.

Signed-off-by: Jordan Niethe 
---
v4: New to series
---
 arch/powerpc/lib/code-patching.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 85ad61aa0304..53b0d5cbc86a 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -570,7 +570,7 @@ static void __init test_branch_bform(void)
 static void __init test_translate_branch(void)
 {
unsigned long addr;
-   struct ppc_inst *p, *q;
+   void *p, *q;
struct ppc_inst instr;
void *buf;
 
@@ -584,7 +584,7 @@ static void __init test_translate_branch(void)
addr = (unsigned long)p;
patch_branch(p, addr, 0);
check(instr_is_branch_to_addr(p, addr));
-   q = p + 1;
+   q = p + 4;
translate_branch(, q, p);
patch_instruction(q, instr);
check(instr_is_branch_to_addr(q, addr));
@@ -640,7 +640,7 @@ static void __init test_translate_branch(void)
create_cond_branch(, p, addr, 0);
patch_instruction(p, instr);
check(instr_is_branch_to_addr(p, addr));
-   q = p + 1;
+   q = buf + 4;
translate_branch(, q, p);
patch_instruction(q, instr);
check(instr_is_branch_to_addr(q, addr));
-- 
2.17.1

[PATCH v6 19/28] powerpc/xmon: Move insertion of breakpoint for xol'ing

2020-04-27 Thread Jordan Niethe

When a new breakpoint is created, the second instruction of that
breakpoint is patched with a trap instruction. This assumes the length
of the instruction is always the same. In preparation for prefixed
instructions, remove this assumption. Insert the trap instruction at the
same time the first instruction is inserted.

Signed-off-by: Jordan Niethe 
---
 arch/powerpc/xmon/xmon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 716967f12063..ce2a0150d43c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -877,7 +877,6 @@ static struct bpt *new_breakpoint(unsigned long a)
if (!bp->enabled && atomic_read(>ref_count) == 0) {
bp->address = a;
bp->instr = (void *)(bpt_table + ((bp - bpts) * 
BPT_WORDS));
-   patch_instruction(bp->instr + 1, ppc_inst(bpinstr));
return bp;
}
}
@@ -909,6 +908,7 @@ static void insert_bpts(void)
continue;
}
patch_instruction(bp->instr, instr);
+   patch_instruction((void *)bp->instr + ppc_inst_len(instr), 
ppc_inst(bpinstr));
if (bp->enabled & BP_CIABR)
continue;
if (patch_instruction((struct ppc_inst *)bp->address,
-- 
2.17.1

[PATCH v6 18/28] powerpc/xmon: Use a function for reading instructions

2020-04-27 Thread Jordan Niethe

Currently in xmon, mread() is used for reading instructions. In
preparation for prefixed instructions, create and use a new function,
mread_instr(), especially for reading instructions.

Signed-off-by: Jordan Niethe 
---
v5: New to series, seperated from "Add prefixed instructions to
instruction data type"
v6: mread_instr(): correctly return error status
---
 arch/powerpc/xmon/xmon.c | 28 
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 65abae7a0103..716967f12063 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -121,6 +121,7 @@ static unsigned bpinstr = 0x7fe8;   /* trap */
 static int cmds(struct pt_regs *);
 static int mread(unsigned long, void *, int);
 static int mwrite(unsigned long, void *, int);
+static int mread_instr(unsigned long, struct ppc_inst *);
 static int handle_fault(struct pt_regs *);
 static void byterev(unsigned char *, int);
 static void memex(void);
@@ -895,7 +896,7 @@ static void insert_bpts(void)
for (i = 0; i < NBPTS; ++i, ++bp) {
if ((bp->enabled & (BP_TRAP|BP_CIABR)) == 0)
continue;
-   if (mread(bp->address, , 4) != 4) {
+   if (!mread_instr(bp->address, )) {
printf("Couldn't read instruction at %lx, "
   "disabling breakpoint there\n", bp->address);
bp->enabled = 0;
@@ -945,7 +946,7 @@ static void remove_bpts(void)
for (i = 0; i < NBPTS; ++i, ++bp) {
if ((bp->enabled & (BP_TRAP|BP_CIABR)) != BP_TRAP)
continue;
-   if (mread(bp->address, , 4) == 4
+   if (mread_instr(bp->address, )
&& ppc_inst_equal(instr, ppc_inst(bpinstr))
&& patch_instruction(
(struct ppc_inst *)bp->address, 
ppc_inst_read(bp->instr)) != 0)
@@ -1161,7 +1162,7 @@ static int do_step(struct pt_regs *regs)
force_enable_xmon();
/* check we are in 64-bit kernel mode, translation enabled */
if ((regs->msr & (MSR_64BIT|MSR_PR|MSR_IR)) == (MSR_64BIT|MSR_IR)) {
-   if (mread(regs->nip, , 4) == 4) {
+   if (mread_instr(regs->nip, )) {
stepped = emulate_step(regs, instr);
if (stepped < 0) {
printf("Couldn't single-step %s instruction\n",
@@ -1328,7 +1329,7 @@ static long check_bp_loc(unsigned long addr)
printf("Breakpoints may only be placed at kernel addresses\n");
return 0;
}
-   if (!mread(addr, , sizeof(instr))) {
+   if (!mread_instr(addr, )) {
printf("Can't read instruction at address %lx\n", addr);
return 0;
}
@@ -2121,6 +2122,25 @@ mwrite(unsigned long adrs, void *buf, int size)
return n;
 }
 
+static int
+mread_instr(unsigned long adrs, struct ppc_inst *instr)
+{
+   volatile int n;
+
+   n = 0;
+   if (setjmp(bus_error_jmp) == 0) {
+   catch_memory_errors = 1;
+   sync();
+   *instr = ppc_inst_read((struct ppc_inst *)adrs);
+   sync();
+   /* wait a little while to see if we get a machine check */
+   __delay(200);
+   n = ppc_inst_len(*instr);
+   }
+   catch_memory_errors = 0;
+   return n;
+}
+
 static int fault_type;
 static int fault_except;
 static char *fault_chars[] = { "--", "**", "##" };
-- 
2.17.1

[PATCH v6 17/28] powerpc: Introduce a function for reporting instruction length

2020-04-27 Thread Jordan Niethe

Currently all instructions have the same length, but in preparation for
prefixed instructions introduce a function for returning instruction
length.

Signed-off-by: Jordan Niethe 
---
v6: - feature-fixups.c: do_final_fixups(): use here
- ppc_inst_len(): change return type from bool to int
- uprobes: Use ppc_inst_read() before calling ppc_inst_len()
---
 arch/powerpc/include/asm/inst.h   |  5 +
 arch/powerpc/kernel/kprobes.c |  6 --
 arch/powerpc/kernel/uprobes.c |  2 +-
 arch/powerpc/lib/feature-fixups.c | 14 +++---
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 0d581b332c20..2f3c9d5bcf7c 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -17,6 +17,11 @@ static inline u32 ppc_inst_val(struct ppc_inst x)
return x.val;
 }
 
+static inline int ppc_inst_len(struct ppc_inst x)
+{
+   return sizeof(struct ppc_inst);
+}
+
 static inline int ppc_inst_primary_opcode(struct ppc_inst x)
 {
return ppc_inst_val(x) >> 26;
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index ff53e5ef7e40..8d17cfdcdc54 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -474,14 +474,16 @@ NOKPROBE_SYMBOL(trampoline_probe_handler);
  */
 int kprobe_post_handler(struct pt_regs *regs)
 {
+   int len;
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
if (!cur || user_mode(regs))
return 0;
 
+   len = ppc_inst_len(ppc_inst_read((struct ppc_inst *)cur->ainsn.insn));
/* make sure we got here for instruction we have a kprobe on */
-   if (((unsigned long)cur->ainsn.insn + 4) != regs->nip)
+   if (((unsigned long)cur->ainsn.insn + len) != regs->nip)
return 0;
 
if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
@@ -490,7 +492,7 @@ int kprobe_post_handler(struct pt_regs *regs)
}
 
/* Adjust nip to after the single-stepped instruction */
-   regs->nip = (unsigned long)cur->addr + 4;
+   regs->nip = (unsigned long)cur->addr + len;
regs->msr |= kcb->kprobe_saved_msr;
 
/*Restore back the original saved kprobes variables and continue. */
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index 6893d40a48c5..83e883e1a42d 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -112,7 +112,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, 
struct pt_regs *regs)
 * support doesn't exist and have to fix-up the next instruction
 * to be executed.
 */
-   regs->nip = utask->vaddr + MAX_UINSN_BYTES;
+   regs->nip = utask->vaddr + ppc_inst_len(ppc_inst_read(>insn));
 
user_disable_single_step(current);
return 0;
diff --git a/arch/powerpc/lib/feature-fixups.c 
b/arch/powerpc/lib/feature-fixups.c
index 13ec3264a565..f4845e740338 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -390,20 +390,20 @@ void do_lwsync_fixups(unsigned long value, void 
*fixup_start, void *fixup_end)
 static void do_final_fixups(void)
 {
 #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
-   struct ppc_inst *src, *dest;
-   unsigned long length;
+   struct ppc_inst inst, *src, *dest, *end;
 
if (PHYSICAL_START == 0)
return;
 
src = (struct ppc_inst *)(KERNELBASE + PHYSICAL_START);
dest = (struct ppc_inst *)KERNELBASE;
-   length = (__end_interrupts - _stext) / sizeof(struct ppc_inst);
+   end = (void *)src + (__end_interrupts - _stext);
 
-   while (length--) {
-   raw_patch_instruction(dest, ppc_inst_read(src));
-   src++;
-   dest++;
+   while (src < end) {
+   inst = ppc_inst_read(src);
+   raw_patch_instruction(dest, inst);
+   src = (void *)src + ppc_inst_len(inst);
+   dest = (void *)dest + ppc_inst_len(inst);
}
 #endif
 }
-- 
2.17.1

[PATCH v6 16/28] powerpc: Define and use __get_user_instr{, inatomic}()

2020-04-27 Thread Jordan Niethe

Define specific __get_user_instr() and __get_user_instr_inatomic()
macros for reading instructions from user space.

Signed-off-by: Jordan Niethe 
---
 arch/powerpc/include/asm/uaccess.h  | 5 +
 arch/powerpc/kernel/align.c | 2 +-
 arch/powerpc/kernel/hw_breakpoint.c | 2 +-
 arch/powerpc/kernel/vecemu.c| 2 +-
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h 
b/arch/powerpc/include/asm/uaccess.h
index 2f500debae21..c0a35e4586a5 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -105,6 +105,11 @@ static inline int __access_ok(unsigned long addr, unsigned 
long size,
 #define __put_user_inatomic(x, ptr) \
__put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
 
+#define __get_user_instr(x, ptr) \
+   __get_user_nocheck((x).val, (u32 *)(ptr), sizeof(u32), true)
+
+#define __get_user_instr_inatomic(x, ptr) \
+   __get_user_nosleep((x).val, (u32 *)(ptr), sizeof(u32))
 extern long __put_user_bad(void);
 
 /*
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 9e66e6c62354..b8f56052c6fe 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -304,7 +304,7 @@ int fix_alignment(struct pt_regs *regs)
 */
CHECK_FULL_REGS(regs);
 
-   if (unlikely(__get_user(instr.val, (unsigned int __user *)regs->nip)))
+   if (unlikely(__get_user_instr(instr, (void __user *)regs->nip)))
return -EFAULT;
if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
/* We don't handle PPC little-endian any more... */
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 542f65ccf68b..cebab14e2788 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -249,7 +249,7 @@ static bool stepping_handler(struct pt_regs *regs, struct 
perf_event *bp,
struct instruction_op op;
unsigned long addr = info->address;
 
-   if (__get_user_inatomic(instr.val, (unsigned int *)regs->nip))
+   if (__get_user_instr_inatomic(instr, (void __user *)regs->nip))
goto fail;
 
ret = analyse_instr(, regs, instr);
diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
index bb262707fb5c..adcdba6d534e 100644
--- a/arch/powerpc/kernel/vecemu.c
+++ b/arch/powerpc/kernel/vecemu.c
@@ -266,7 +266,7 @@ int emulate_altivec(struct pt_regs *regs)
unsigned int va, vb, vc, vd;
vector128 *vrs;
 
-   if (get_user(instr.val, (unsigned int __user *) regs->nip))
+   if (__get_user_instr(instr, (void __user *) regs->nip))
return -EFAULT;
 
word = ppc_inst_val(instr);
-- 
2.17.1

[PATCH v6 15/28] powerpc/kprobes: Use patch_instruction()

2020-04-27 Thread Jordan Niethe

Instead of using memcpy() and flush_icache_range() use
patch_instruction() which not only accomplishes both of these steps but
will also make it easier to add support for prefixed instructions.

Signed-off-by: Jordan Niethe 
---
v6: New to series.
---
 arch/powerpc/kernel/kprobes.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 17ad844e24f5..ff53e5ef7e40 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -125,11 +125,8 @@ int arch_prepare_kprobe(struct kprobe *p)
}
 
if (!ret) {
-   memcpy(p->ainsn.insn, p->addr,
-   MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+   patch_instruction((struct ppc_inst *)p->ainsn.insn, insn);
p->opcode = ppc_inst_val(insn);
-   flush_icache_range((unsigned long)p->ainsn.insn,
-   (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
}
 
p->ainsn.boostable = 0;
-- 
2.17.1

[PATCH v6 14/28] powerpc: Add a probe_kernel_read_inst() function

2020-04-27 Thread Jordan Niethe

Introduce a probe_kernel_read_inst() function to use in cases where
probe_kernel_read() is used for getting an instruction. This will be
more useful for prefixed instructions.

Signed-off-by: Jordan Niethe 
---
v6: - This was previously just in ftrace.c
---
 arch/powerpc/include/asm/inst.h|  2 ++
 arch/powerpc/kernel/trace/ftrace.c | 23 +--
 arch/powerpc/lib/inst.c| 11 +++
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 3e9a58420151..0d581b332c20 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -39,5 +39,7 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct 
ppc_inst y)
 
 int probe_user_read_inst(struct ppc_inst *inst,
 struct ppc_inst *nip);
+int probe_kernel_read_inst(struct ppc_inst *inst,
+  struct ppc_inst *src);
 
 #endif /* _ASM_INST_H */
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 63228e0e8cfc..a6064e1977ca 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -68,7 +68,7 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, 
struct ppc_inst new)
 */
 
/* read the text we want to modify */
-   if (probe_kernel_read(, (void *)ip, MCOUNT_INSN_SIZE))
+   if (probe_kernel_read_inst(, (void *)ip))
return -EFAULT;
 
/* Make sure it is what we expect it to be */
@@ -130,7 +130,7 @@ __ftrace_make_nop(struct module *mod,
struct ppc_inst op, pop;
 
/* read where this goes */
-   if (probe_kernel_read(, (void *)ip, sizeof(int))) {
+   if (probe_kernel_read_inst(, (void *)ip)) {
pr_err("Fetching opcode failed.\n");
return -EFAULT;
}
@@ -164,7 +164,7 @@ __ftrace_make_nop(struct module *mod,
/* When using -mkernel_profile there is no load to jump over */
pop = ppc_inst(PPC_INST_NOP);
 
-   if (probe_kernel_read(, (void *)(ip - 4), 4)) {
+   if (probe_kernel_read_inst(, (void *)(ip - 4))) {
pr_err("Fetching instruction at %lx failed.\n", ip - 4);
return -EFAULT;
}
@@ -196,7 +196,7 @@ __ftrace_make_nop(struct module *mod,
 * Check what is in the next instruction. We can see ld r2,40(r1), but
 * on first pass after boot we will see mflr r0.
 */
-   if (probe_kernel_read(, (void *)(ip+4), MCOUNT_INSN_SIZE)) {
+   if (probe_kernel_read_inst(, (void *)(ip+4))) {
pr_err("Fetching op failed.\n");
return -EFAULT;
}
@@ -348,7 +348,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
return -1;
 
/* New trampoline -- read where this goes */
-   if (probe_kernel_read(, (void *)tramp, sizeof(int))) {
+   if (probe_kernel_read_inst(, (void *)tramp)) {
pr_debug("Fetching opcode failed.\n");
return -1;
}
@@ -398,7 +398,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, 
unsigned long addr)
struct ppc_inst op;
 
/* Read where this goes */
-   if (probe_kernel_read(, (void *)ip, sizeof(int))) {
+   if (probe_kernel_read_inst(, (void *)ip)) {
pr_err("Fetching opcode failed.\n");
return -EFAULT;
}
@@ -524,7 +524,10 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long 
addr)
struct module *mod = rec->arch.mod;
 
/* read where this goes */
-   if (probe_kernel_read(op, ip, sizeof(op)))
+   if (probe_kernel_read_inst(op, ip))
+   return -EFAULT;
+
+   if (probe_kernel_read_inst(op + 1, ip + 4))
return -EFAULT;
 
if (!expected_nop_sequence(ip, op[0], op[1])) {
@@ -587,7 +590,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long 
addr)
unsigned long ip = rec->ip;
 
/* read where this goes */
-   if (probe_kernel_read(, (void *)ip, MCOUNT_INSN_SIZE))
+   if (probe_kernel_read_inst(, (void *)ip))
return -EFAULT;
 
/* It should be pointing to a nop */
@@ -643,7 +646,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace 
*rec, unsigned long addr)
}
 
/* Make sure we have a nop */
-   if (probe_kernel_read(, ip, sizeof(op))) {
+   if (probe_kernel_read_inst(, ip)) {
pr_err("Unable to read ftrace location %p\n", ip);
return -EFAULT;
}
@@ -721,7 +724,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long 
old_addr,
}
 
/* read where this goes */
-   if (probe_kernel_read(, (void *)ip, sizeof(int))) {
+   if (probe_kernel_read_inst(, (void *)ip)) {
pr_err("Fetching opcode failed.\n");
return -EFAULT;
}
diff --git a/arch/powerpc/lib/inst.c

[PATCH v6 13/28] powerpc: Add a probe_user_read_inst() function

2020-04-27 Thread Jordan Niethe

Introduce a probe_user_read_inst() function to use in cases where
probe_user_read() is used for getting an instruction. This will be more
useful for prefixed instructions.

Signed-off-by: Jordan Niethe 
---
v6: - New to series
---
 arch/powerpc/include/asm/inst.h |  3 +++
 arch/powerpc/lib/Makefile   |  2 +-
 arch/powerpc/lib/inst.c | 18 ++
 arch/powerpc/mm/fault.c |  2 +-
 4 files changed, 23 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/lib/inst.c

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 552e953bf04f..3e9a58420151 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -37,4 +37,7 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct 
ppc_inst y)
return ppc_inst_val(x) == ppc_inst_val(y);
 }
 
+int probe_user_read_inst(struct ppc_inst *inst,
+struct ppc_inst *nip);
+
 #endif /* _ASM_INST_H */
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index b8de3be10eb4..546591848219 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -16,7 +16,7 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
 CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
 endif
 
-obj-y += alloc.o code-patching.o feature-fixups.o pmem.o
+obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o
 
 ifndef CONFIG_KASAN
 obj-y  +=  string.o memcmp_$(BITS).o
diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c
new file mode 100644
index ..eaf786afad2b
--- /dev/null
+++ b/arch/powerpc/lib/inst.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Copyright 2020, IBM Corporation.
+ */
+
+#include 
+#include 
+
+int probe_user_read_inst(struct ppc_inst *inst,
+struct ppc_inst *nip)
+{
+   unsigned int val;
+   int err;
+
+   err = probe_user_read(, nip, sizeof(val));
+   *inst = ppc_inst(val);
+   return err;
+}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 7a68a3b32615..4983197d4a8e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -281,7 +281,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, 
unsigned long address,
access_ok(nip, sizeof(*nip))) {
struct ppc_inst inst;
 
-   if (!probe_user_read(, nip, sizeof(inst)))
+   if (!probe_user_read_inst(, (struct ppc_inst 
__user *)nip))
return !store_updates_sp(inst);
*must_retry = true;
}
-- 
2.17.1

[PATCH v6 12/28] powerpc: Use a function for reading instructions

2020-04-27 Thread Jordan Niethe

Prefixed instructions will mean there are instructions of different
length. As a result dereferencing a pointer to an instruction will not
necessarily give the desired result. Introduce a function for reading
instructions from memory into the instruction data type.

Signed-off-by: Jordan Niethe 
---
v4: New to series
v5: - Rename read_inst() -> probe_kernel_read_inst()
- No longer modify uprobe probe type in this patch
v6: - feature-fixups.c: do_final_fixups(): Use here
- arch_prepare_kprobe(): patch_instruction(): no longer part of this
  patch
- Move probe_kernel_read_inst() out of this patch
- Use in uprobes
---
 arch/powerpc/include/asm/inst.h|  5 +
 arch/powerpc/kernel/kprobes.c  |  6 +++---
 arch/powerpc/kernel/mce_power.c|  2 +-
 arch/powerpc/kernel/optprobes.c|  4 ++--
 arch/powerpc/kernel/trace/ftrace.c |  4 ++--
 arch/powerpc/kernel/uprobes.c  |  2 +-
 arch/powerpc/lib/code-patching.c   | 23 +++
 arch/powerpc/lib/feature-fixups.c  |  4 ++--
 arch/powerpc/xmon/xmon.c   |  6 +++---
 9 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 19d8bb7a1c2b..552e953bf04f 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -27,6 +27,11 @@ static inline struct ppc_inst ppc_inst_swab(struct ppc_inst 
x)
return ppc_inst(swab32(ppc_inst_val(x)));
 }
 
+static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr)
+{
+   return *ptr;
+}
+
 static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
 {
return ppc_inst_val(x) == ppc_inst_val(y);
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 9ed996cb0589..17ad844e24f5 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -106,7 +106,7 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, 
unsigned int offset)
 int arch_prepare_kprobe(struct kprobe *p)
 {
int ret = 0;
-   struct ppc_inst insn = *(struct ppc_inst *)p->addr;
+   struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr);
 
if ((unsigned long)p->addr & 0x03) {
printk("Attempt to register kprobe at an unaligned address\n");
@@ -127,7 +127,7 @@ int arch_prepare_kprobe(struct kprobe *p)
if (!ret) {
memcpy(p->ainsn.insn, p->addr,
MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
-   p->opcode = *p->addr;
+   p->opcode = ppc_inst_val(insn);
flush_icache_range((unsigned long)p->ainsn.insn,
(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
}
@@ -217,7 +217,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe);
 static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
 {
int ret;
-   struct ppc_inst insn = *(struct ppc_inst *)p->ainsn.insn;
+   struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->ainsn.insn);
 
/* regs->nip is also adjusted if emulate_step returns 1 */
ret = emulate_step(regs, insn);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 7118b46a6543..859b602fa270 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -374,7 +374,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, 
uint64_t *addr,
pfn = addr_to_pfn(regs, regs->nip);
if (pfn != ULONG_MAX) {
instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
-   instr = *(struct ppc_inst *)(instr_addr);
+   instr = ppc_inst_read((struct ppc_inst *)instr_addr);
if (!analyse_instr(, , instr)) {
pfn = addr_to_pfn(regs, op.ea);
*addr = op.ea;
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index dfeb317a58ad..d704f9598f48 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -100,8 +100,8 @@ static unsigned long can_optimize(struct kprobe *p)
 * Ensure that the instruction is not a conditional branch,
 * and that can be emulated.
 */
-   if (!is_conditional_branch(*(struct ppc_inst *)p->ainsn.insn) &&
-   analyse_instr(, , *(struct ppc_inst 
*)p->ainsn.insn) == 1) {
+   if (!is_conditional_branch(ppc_inst_read((struct ppc_inst 
*)p->ainsn.insn)) &&
+   analyse_instr(, , ppc_inst_read((struct 
ppc_inst *)p->ainsn.insn)) == 1) {
emulate_update_regs(, );
nip = regs.nip;
}
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 2c70d1854b5c..63228e0e8cfc 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -846,7 +846,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
struct ppc_inst old, new;

[PATCH v6 11/28] powerpc: Use a datatype for instructions

2020-04-27 Thread Jordan Niethe

Currently unsigned ints are used to represent instructions on powerpc.
This has worked well as instructions have always been 4 byte words.
However, a future ISA version will introduce some changes to
instructions that mean this scheme will no longer work as well. This
change is Prefixed Instructions. A prefixed instruction is made up of a
word prefix followed by a word suffix to make an 8 byte double word
instruction. No matter the endianness of the system the prefix always
comes first. Prefixed instructions are only planned for powerpc64.

Introduce a ppc_inst type to represent both prefixed and word
instructions on powerpc64 while keeping it possible to exclusively have
word instructions on powerpc32.

Signed-off-by: Jordan Niethe 
---
v4: New to series
v5: Add to epapr_paravirt.c, kgdb.c
v6: - setup_32.c: machine_init(): Use type
- feature-fixups.c: do_final_fixups(): Use type
- optprobes.c: arch_prepare_optimized_kprobe(): change a void * to
  struct ppc_inst *
- fault.c: store_updates_sp(): Use type
- Change ppc_inst_equal() implementation from memcpy()
---
 arch/powerpc/include/asm/code-patching.h | 32 +-
 arch/powerpc/include/asm/inst.h  | 18 --
 arch/powerpc/include/asm/sstep.h |  5 +-
 arch/powerpc/include/asm/uprobes.h   |  5 +-
 arch/powerpc/kernel/align.c  |  4 +-
 arch/powerpc/kernel/epapr_paravirt.c |  6 +-
 arch/powerpc/kernel/hw_breakpoint.c  |  4 +-
 arch/powerpc/kernel/jump_label.c |  2 +-
 arch/powerpc/kernel/kgdb.c   |  4 +-
 arch/powerpc/kernel/kprobes.c|  8 +--
 arch/powerpc/kernel/mce_power.c  |  5 +-
 arch/powerpc/kernel/optprobes.c  | 40 ++--
 arch/powerpc/kernel/setup_32.c   |  4 +-
 arch/powerpc/kernel/trace/ftrace.c   | 81 
 arch/powerpc/kernel/vecemu.c |  5 +-
 arch/powerpc/lib/code-patching.c | 71 +++--
 arch/powerpc/lib/feature-fixups.c| 58 -
 arch/powerpc/lib/sstep.c |  4 +-
 arch/powerpc/lib/test_emulate_step.c |  9 +--
 arch/powerpc/mm/fault.c  |  4 +-
 arch/powerpc/perf/core-book3s.c  |  4 +-
 arch/powerpc/xmon/xmon.c | 22 +++
 arch/powerpc/xmon/xmon_bpts.h|  6 +-
 23 files changed, 206 insertions(+), 195 deletions(-)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 48e021957ee5..eacc9102c251 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -23,33 +23,33 @@
 #define BRANCH_ABSOLUTE0x2
 
 bool is_offset_in_branch_range(long offset);
-int create_branch(unsigned int *instr, const unsigned int *addr,
+int create_branch(struct ppc_inst *instr, const struct ppc_inst *addr,
  unsigned long target, int flags);
-int create_cond_branch(unsigned int *instr, const unsigned int *addr,
+int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr,
   unsigned long target, int flags);
-int patch_branch(unsigned int *addr, unsigned long target, int flags);
-int patch_instruction(unsigned int *addr, unsigned int instr);
-int raw_patch_instruction(unsigned int *addr, unsigned int instr);
+int patch_branch(struct ppc_inst *addr, unsigned long target, int flags);
+int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr);
+int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr);
 
 static inline unsigned long patch_site_addr(s32 *site)
 {
return (unsigned long)site + *site;
 }
 
-static inline int patch_instruction_site(s32 *site, unsigned int instr)
+static inline int patch_instruction_site(s32 *site, struct ppc_inst instr)
 {
-   return patch_instruction((unsigned int *)patch_site_addr(site), instr);
+   return patch_instruction((struct ppc_inst *)patch_site_addr(site), 
instr);
 }
 
 static inline int patch_branch_site(s32 *site, unsigned long target, int flags)
 {
-   return patch_branch((unsigned int *)patch_site_addr(site), target, 
flags);
+   return patch_branch((struct ppc_inst *)patch_site_addr(site), target, 
flags);
 }
 
 static inline int modify_instruction(unsigned int *addr, unsigned int clr,
 unsigned int set)
 {
-   return patch_instruction(addr, ppc_inst((*addr & ~clr) | set));
+   return patch_instruction((struct ppc_inst *)addr, ppc_inst((*addr & 
~clr) | set));
 }
 
 static inline int modify_instruction_site(s32 *site, unsigned int clr, 
unsigned int set)
@@ -57,13 +57,13 @@ static inline int modify_instruction_site(s32 *site, 
unsigned int clr, unsigned
return modify_instruction((unsigned int *)patch_site_addr(site), clr, 
set);
 }
 
-int instr_is_relative_branch(unsigned int instr);
-int instr_is_relative_link_branch(unsigned int instr);
-int instr_is_branch_to_addr(const unsigned int *instr, unsigned long

[PATCH v6 10/28] powerpc: Introduce functions for instruction equality

2020-04-27 Thread Jordan Niethe

In preparation for an instruction data type that can not be directly
used with the '==' operator use functions for checking equality.

Reviewed-by: Balamuruhan S 
Signed-off-by: Jordan Niethe 
---
v5: Remove ppc_inst_null()
---
 arch/powerpc/include/asm/inst.h|  5 +
 arch/powerpc/kernel/trace/ftrace.c | 15 ---
 arch/powerpc/lib/code-patching.c   | 12 ++--
 arch/powerpc/xmon/xmon.c   |  4 ++--
 4 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 23fd57a86b03..0c5dc539160a 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -23,4 +23,9 @@ static inline u32 ppc_inst_swab(u32 x)
return ppc_inst(swab32(ppc_inst_val(x)));
 }
 
+static inline bool ppc_inst_equal(u32 x, u32 y)
+{
+   return x == y;
+}
+
 #endif /* _ASM_INST_H */
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 335b10008035..e2dcb9d92c39 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -72,7 +72,7 @@ ftrace_modify_code(unsigned long ip, unsigned int old, 
unsigned int new)
return -EFAULT;
 
/* Make sure it is what we expect it to be */
-   if (replaced != old) {
+   if (!ppc_inst_equal(replaced, old)) {
pr_err("%p: replaced (%#x) != old (%#x)",
(void *)ip, ppc_inst_val(replaced), ppc_inst_val(old));
return -EINVAL;
@@ -170,7 +170,8 @@ __ftrace_make_nop(struct module *mod,
}
 
/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
-   if (op != ppc_inst(PPC_INST_MFLR) && op != ppc_inst(PPC_INST_STD_LR)) {
+   if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) &&
+   !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) {
pr_err("Unexpected instruction %08x around bl _mcount\n", 
ppc_inst_val(op));
return -EINVAL;
}
@@ -200,7 +201,7 @@ __ftrace_make_nop(struct module *mod,
return -EFAULT;
}
 
-   if (op != ppc_inst(PPC_INST_LD_TOC)) {
+   if (!ppc_inst_equal(op,  ppc_inst(PPC_INST_LD_TOC))) {
pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, 
ppc_inst_val(op));
return -EINVAL;
}
@@ -497,7 +498,7 @@ expected_nop_sequence(void *ip, unsigned int op0, unsigned 
int op1)
 * The load offset is different depending on the ABI. For simplicity
 * just mask it out when doing the compare.
 */
-   if ((op0 != ppc_inst(0x4808)) || (ppc_inst_val(op1) & 0x) 
!= 0xe841)
+   if ((!ppc_inst_equal(op0), ppc_inst(0x4808)) || (ppc_inst_val(op1) 
& 0x) != 0xe841)
return 0;
return 1;
 }
@@ -506,7 +507,7 @@ static int
 expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
 {
/* look for patched "NOP" on ppc64 with -mprofile-kernel */
-   if (op0 != ppc_inst(PPC_INST_NOP))
+   if (!ppc_inst_equal(op0, ppc_inst(PPC_INST_NOP)))
return 0;
return 1;
 }
@@ -589,7 +590,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long 
addr)
return -EFAULT;
 
/* It should be pointing to a nop */
-   if (op != ppc_inst(PPC_INST_NOP)) {
+   if (!ppc_inst_equal(op,  ppc_inst(PPC_INST_NOP))) {
pr_err("Expected NOP but have %x\n", op);
return -EINVAL;
}
@@ -646,7 +647,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace 
*rec, unsigned long addr)
return -EFAULT;
}
 
-   if (op != ppc_inst(PPC_INST_NOP)) {
+   if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) {
pr_err("Unexpected call sequence at %p: %x\n", ip, 
ppc_inst_val(op));
return -EINVAL;
}
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index f5c6dcbac44b..d298bb16936e 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -479,7 +479,7 @@ static void __init test_branch_iform(void)
/* Check flags are masked correctly */
err = create_branch(, , addr, 0xFFFC);
check(instr_is_branch_to_addr(, addr));
-   check(instr == ppc_inst(0x4800));
+   check(ppc_inst_equal(instr, ppc_inst(0x4800)));
 }
 
 static void __init test_create_function_call(void)
@@ -564,7 +564,7 @@ static void __init test_branch_bform(void)
/* Check flags are masked correctly */
err = create_cond_branch(, iptr, addr, 0xFFFC);
check(instr_is_branch_to_addr(, addr));
-   check(instr == ppc_inst(0x43FF));
+   check(ppc_inst_equal(instr, ppc_inst(0x43FF)));
 }
 
 static void __init test_translate_branch(void)
@@ -598,7 +598,7 @@ static void __init test_translate_branch(void)
patch_instruction(q, instr);
check(instr_is_branch_to_addr(p, addr));

[PATCH v6 09/28] powerpc: Use a function for byte swapping instructions

2020-04-27 Thread Jordan Niethe

Use a function for byte swapping instructions in preparation of a more
complicated instruction type.

Reviewed-by: Balamuruhan S 
Signed-off-by: Jordan Niethe 
---
 arch/powerpc/include/asm/inst.h | 5 +
 arch/powerpc/kernel/align.c | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 442a95f20de7..23fd57a86b03 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -18,4 +18,9 @@ static inline int ppc_inst_primary_opcode(u32 x)
return ppc_inst_val(x) >> 26;
 }
 
+static inline u32 ppc_inst_swab(u32 x)
+{
+   return ppc_inst(swab32(ppc_inst_val(x)));
+}
+
 #endif /* _ASM_INST_H */
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 47dbba81a227..a63216da8cf1 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -310,7 +310,7 @@ int fix_alignment(struct pt_regs *regs)
/* We don't handle PPC little-endian any more... */
if (cpu_has_feature(CPU_FTR_PPC_LE))
return -EIO;
-   instr = swab32(instr);
+   instr = ppc_inst_swab(instr);
}
 
 #ifdef CONFIG_SPE
-- 
2.17.1

[PATCH v6 08/28] powerpc: Use a function for getting the instruction op code

2020-04-27 Thread Jordan Niethe

In preparation for using a data type for instructions that can not be
directly used with the '>>' operator use a function for getting the op
code of an instruction.

Signed-off-by: Jordan Niethe 
---
v4: New to series
v6: - Rename ppc_inst_primary() to ppc_inst_primary_opcode()
- Use in vecemu.c, fault.c, sstep.c
- Move this patch after the ppc_inst_val() patch
---
 arch/powerpc/include/asm/inst.h  | 5 +
 arch/powerpc/kernel/align.c  | 2 +-
 arch/powerpc/kernel/vecemu.c | 3 ++-
 arch/powerpc/lib/code-patching.c | 4 ++--
 arch/powerpc/lib/sstep.c | 2 +-
 arch/powerpc/mm/fault.c  | 3 ++-
 6 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 8a9e73bfbd27..442a95f20de7 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -13,4 +13,9 @@ static inline u32 ppc_inst_val(u32 x)
return x;
 }
 
+static inline int ppc_inst_primary_opcode(u32 x)
+{
+   return ppc_inst_val(x) >> 26;
+}
+
 #endif /* _ASM_INST_H */
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 44921001f84a..47dbba81a227 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -314,7 +314,7 @@ int fix_alignment(struct pt_regs *regs)
}
 
 #ifdef CONFIG_SPE
-   if ((ppc_inst_val(instr) >> 26) == 0x4) {
+   if (ppc_inst_primary_opcode(instr) == 0x4) {
int reg = (ppc_inst_val(instr) >> 21) & 0x1f;
PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr);
diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
index 1f5e3b4c8ae4..a544590b90e5 100644
--- a/arch/powerpc/kernel/vecemu.c
+++ b/arch/powerpc/kernel/vecemu.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Functions in vector.S */
 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
@@ -268,7 +269,7 @@ int emulate_altivec(struct pt_regs *regs)
return -EFAULT;
 
word = ppc_inst_val(instr);
-   if ((word >> 26) != 4)
+   if (ppc_inst_primary_opcode(instr) != 4)
return -EINVAL; /* not an altivec instruction */
vd = (word >> 21) & 0x1f;
va = (word >> 16) & 0x1f;
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index baa849b1a1f9..f5c6dcbac44b 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -231,7 +231,7 @@ bool is_offset_in_branch_range(long offset)
  */
 bool is_conditional_branch(unsigned int instr)
 {
-   unsigned int opcode = instr >> 26;
+   unsigned int opcode = ppc_inst_primary_opcode(instr);
 
if (opcode == 16)   /* bc, bca, bcl, bcla */
return true;
@@ -289,7 +289,7 @@ int create_cond_branch(unsigned int *instr, const unsigned 
int *addr,
 
 static unsigned int branch_opcode(unsigned int instr)
 {
-   return (instr >> 26) & 0x3F;
+   return ppc_inst_primary_opcode(instr) & 0x3F;
 }
 
 static int instr_is_branch_iform(unsigned int instr)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 337be1c4d23c..9ea77dc9256f 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1175,7 +1175,7 @@ int analyse_instr(struct instruction_op *op, const struct 
pt_regs *regs,
word = ppc_inst_val(instr);
op->type = COMPUTE;
 
-   opcode = instr >> 26;
+   opcode = ppc_inst_primary_opcode(instr);
switch (opcode) {
case 16:/* bc */
op->type = BRANCH;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 7c9f3f686044..2789e1dbd605 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -41,6 +41,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Check whether the instruction inst is a store using
@@ -52,7 +53,7 @@ static bool store_updates_sp(unsigned int inst)
if (((ppc_inst_val(inst) >> 16) & 0x1f) != 1)
return false;
/* check major opcode */
-   switch (inst >> 26) {
+   switch (ppc_inst_primary_opcode(inst)) {
case OP_STWU:
case OP_STBU:
case OP_STHU:
-- 
2.17.1

[PATCH v6 07/28] powerpc: Use an accessor for instructions

2020-04-27 Thread Jordan Niethe

In preparation for introducing a more complicated instruction type to
accomodate prefixed instructions use an accessor for getting an
instruction as a u32.

Signed-off-by: Jordan Niethe 
---
v4: New to series
v5: Remove references to 'word' instructions
v6: - test_emulate_step.c: execute_compute_instr(): Introduce
  ppc_inst_val() here instead of in a later patch
- ftrace.c: __ftrace_make_call(): Introduce adding ppc_inst_val() in
  this patch
- fault.c: store_updates_sp(): Start using ppc_inst_val()
- Move this patch before the ppc_inst_primary_opcode() patch
---
 arch/powerpc/include/asm/inst.h  |   5 +
 arch/powerpc/include/asm/sstep.h |   6 +-
 arch/powerpc/kernel/align.c  |   6 +-
 arch/powerpc/kernel/kprobes.c|   2 +-
 arch/powerpc/kernel/trace/ftrace.c   |  24 +--
 arch/powerpc/kernel/vecemu.c |  16 +-
 arch/powerpc/lib/code-patching.c |  18 +-
 arch/powerpc/lib/sstep.c | 268 ++-
 arch/powerpc/lib/test_emulate_step.c |   8 +-
 arch/powerpc/mm/fault.c  |   6 +-
 arch/powerpc/xmon/xmon.c |   4 +-
 11 files changed, 186 insertions(+), 177 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 5298ba33b6e5..8a9e73bfbd27 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -8,4 +8,9 @@
 
 #define ppc_inst(x) (x)
 
+static inline u32 ppc_inst_val(u32 x)
+{
+   return x;
+}
+
 #endif /* _ASM_INST_H */
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index 769f055509c9..26d729562fe2 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -15,9 +15,9 @@ struct pt_regs;
  * Note that IS_MTMSRD returns true for both an mtmsr (32-bit)
  * and an mtmsrd (64-bit).
  */
-#define IS_MTMSRD(instr)   (((instr) & 0xfc0007be) == 0x7c000124)
-#define IS_RFID(instr) (((instr) & 0xfc0007fe) == 0x4c24)
-#define IS_RFI(instr)  (((instr) & 0xfc0007fe) == 0x4c64)
+#define IS_MTMSRD(instr)   ((ppc_inst_val(instr) & 0xfc0007be) == 
0x7c000124)
+#define IS_RFID(instr) ((ppc_inst_val(instr) & 0xfc0007fe) == 
0x4c24)
+#define IS_RFI(instr)  ((ppc_inst_val(instr) & 0xfc0007fe) == 
0x4c64)
 
 enum instruction_type {
COMPUTE,/* arith/logical/CR op, etc. */
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 86e9bf62f18c..44921001f84a 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -314,8 +314,8 @@ int fix_alignment(struct pt_regs *regs)
}
 
 #ifdef CONFIG_SPE
-   if ((instr >> 26) == 0x4) {
-   int reg = (instr >> 21) & 0x1f;
+   if ((ppc_inst_val(instr) >> 26) == 0x4) {
+   int reg = (ppc_inst_val(instr) >> 21) & 0x1f;
PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr);
}
@@ -332,7 +332,7 @@ int fix_alignment(struct pt_regs *regs)
 * when pasting to a co-processor. Furthermore, paste_last is the
 * synchronisation point for preceding copy/paste sequences.
 */
-   if ((instr & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe))
+   if ((ppc_inst_val(instr) & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe))
return -EIO;
 
r = analyse_instr(, regs, instr);
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index a1a3686f41c6..8420b1944164 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -234,7 +234,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs 
*regs)
 * So, we should never get here... but, its still
 * good to catch them, just in case...
 */
-   printk("Can't step on instruction %x\n", insn);
+   printk("Can't step on instruction %x\n", ppc_inst_val(insn));
BUG();
} else {
/*
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 0318e1ed6248..335b10008035 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -74,7 +74,7 @@ ftrace_modify_code(unsigned long ip, unsigned int old, 
unsigned int new)
/* Make sure it is what we expect it to be */
if (replaced != old) {
pr_err("%p: replaced (%#x) != old (%#x)",
-   (void *)ip, replaced, old);
+   (void *)ip, ppc_inst_val(replaced), ppc_inst_val(old));
return -EINVAL;
}
 
@@ -99,19 +99,19 @@ static int test_24bit_addr(unsigned long ip, unsigned long 
addr)
 
 static int is_bl_op(unsigned int op)
 {
-   return (op & 0xfc03) == 0x4801;
+   return (ppc_inst_val(op) & 0xfc03) == 0x4801;
 }
 
 static int is_b_op(unsigned int op)
 {
-   return (op & 0xfc03) == 0x4800;
+   return

[PATCH v6 06/28] powerpc: Use a macro for creating instructions from u32s

2020-04-27 Thread Jordan Niethe

In preparation for instructions having a more complex data type start
using a macro, ppc_inst(), for making an instruction out of a u32.  A
macro is used so that instructions can be used as initializer elements.
Currently this does nothing, but it will allow for creating a data type
that can represent prefixed instructions.

Signed-off-by: Jordan Niethe 
---
v4: New to series
v5: - Rename PPC_INST() -> ppc_inst().
- Use on epapr_paravirt.c, kgdb.c
v6: - Use in setup_32.c
- epapr_paravirt.c: early_init_dt_scan_epapr(): move the use of
  ppc_inst() earlier.
---
 arch/powerpc/include/asm/code-patching.h |  3 +-
 arch/powerpc/include/asm/inst.h  | 11 +
 arch/powerpc/kernel/align.c  |  1 +
 arch/powerpc/kernel/epapr_paravirt.c |  3 +-
 arch/powerpc/kernel/hw_breakpoint.c  |  3 +-
 arch/powerpc/kernel/jump_label.c |  3 +-
 arch/powerpc/kernel/kgdb.c   |  5 ++-
 arch/powerpc/kernel/kprobes.c|  5 ++-
 arch/powerpc/kernel/module_64.c  |  3 +-
 arch/powerpc/kernel/optprobes.c  | 31 ++---
 arch/powerpc/kernel/security.c   |  9 ++--
 arch/powerpc/kernel/setup_32.c   |  2 +-
 arch/powerpc/kernel/trace/ftrace.c   | 25 ++-
 arch/powerpc/kernel/uprobes.c|  1 +
 arch/powerpc/kvm/emulate_loadstore.c |  2 +-
 arch/powerpc/lib/code-patching.c | 57 
 arch/powerpc/lib/feature-fixups.c| 39 
 arch/powerpc/lib/test_emulate_step.c | 39 
 arch/powerpc/xmon/xmon.c |  7 +--
 19 files changed, 138 insertions(+), 111 deletions(-)
 create mode 100644 arch/powerpc/include/asm/inst.h

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 351dda7215b6..48e021957ee5 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Flags for create_branch:
  * "b"   == create_branch(addr, target, 0);
@@ -48,7 +49,7 @@ static inline int patch_branch_site(s32 *site, unsigned long 
target, int flags)
 static inline int modify_instruction(unsigned int *addr, unsigned int clr,
 unsigned int set)
 {
-   return patch_instruction(addr, (*addr & ~clr) | set);
+   return patch_instruction(addr, ppc_inst((*addr & ~clr) | set));
 }
 
 static inline int modify_instruction_site(s32 *site, unsigned int clr, 
unsigned int set)
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
new file mode 100644
index ..5298ba33b6e5
--- /dev/null
+++ b/arch/powerpc/include/asm/inst.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_INST_H
+#define _ASM_INST_H
+
+/*
+ * Instruction data type for POWER
+ */
+
+#define ppc_inst(x) (x)
+
+#endif /* _ASM_INST_H */
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 92045ed64976..86e9bf62f18c 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct aligninfo {
unsigned char len;
diff --git a/arch/powerpc/kernel/epapr_paravirt.c 
b/arch/powerpc/kernel/epapr_paravirt.c
index 9d32158ce36f..e8eb72a65572 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
 extern void epapr_ev_idle(void);
@@ -36,7 +37,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
return -1;
 
for (i = 0; i < (len / 4); i++) {
-   u32 inst = be32_to_cpu(insts[i]);
+   u32 inst = ppc_inst(be32_to_cpu(insts[i]));
patch_instruction(epapr_hypercall_start + i, inst);
 #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
patch_instruction(epapr_ev_idle_start + i, inst);
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 2462cd7c565c..79f51f182a83 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /*
@@ -243,7 +244,7 @@ dar_range_overlaps(unsigned long dar, int size, struct 
arch_hw_breakpoint *info)
 static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
 struct arch_hw_breakpoint *info)
 {
-   unsigned int instr = 0;
+   unsigned int instr = ppc_inst(0);
int ret, type, size;
struct instruction_op op;
unsigned long addr = info->address;
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index ca37702bde97..daa4afce7ec8 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -6,6 +6,7 @@

[PATCH v6 05/28] powerpc: Change calling convention for create_branch() et. al.

2020-04-27 Thread Jordan Niethe

create_branch(), create_cond_branch() and translate_branch() return the
instruction that they create, or return 0 to signal an error. Separate
these concerns in preparation for an instruction type that is not just
an unsigned int.  Fill the created instruction to a pointer passed as
the first parameter to the function and use a non-zero return value to
signify an error.

Signed-off-by: Jordan Niethe 
---
v5: New to series
v6: - setup_32.c: machine_init(): change insn to unsigned int
- Fix typo in commit message
- __ftrace_make_call(): test for err not !err
---
 arch/powerpc/include/asm/code-patching.h |  12 +-
 arch/powerpc/kernel/optprobes.c  |  24 ++--
 arch/powerpc/kernel/setup_32.c   |   4 +-
 arch/powerpc/kernel/trace/ftrace.c   |  24 ++--
 arch/powerpc/lib/code-patching.c | 134 +--
 arch/powerpc/lib/feature-fixups.c|   5 +-
 6 files changed, 119 insertions(+), 84 deletions(-)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 898b54262881..351dda7215b6 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -22,10 +22,10 @@
 #define BRANCH_ABSOLUTE0x2
 
 bool is_offset_in_branch_range(long offset);
-unsigned int create_branch(const unsigned int *addr,
-  unsigned long target, int flags);
-unsigned int create_cond_branch(const unsigned int *addr,
-   unsigned long target, int flags);
+int create_branch(unsigned int *instr, const unsigned int *addr,
+ unsigned long target, int flags);
+int create_cond_branch(unsigned int *instr, const unsigned int *addr,
+  unsigned long target, int flags);
 int patch_branch(unsigned int *addr, unsigned long target, int flags);
 int patch_instruction(unsigned int *addr, unsigned int instr);
 int raw_patch_instruction(unsigned int *addr, unsigned int instr);
@@ -60,8 +60,8 @@ int instr_is_relative_branch(unsigned int instr);
 int instr_is_relative_link_branch(unsigned int instr);
 int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr);
 unsigned long branch_target(const unsigned int *instr);
-unsigned int translate_branch(const unsigned int *dest,
- const unsigned int *src);
+int translate_branch(unsigned int *instr, const unsigned int *dest,
+const unsigned int *src);
 extern bool is_conditional_branch(unsigned int instr);
 #ifdef CONFIG_PPC_BOOK3E_64
 void __patch_exception(int exc, unsigned long addr);
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 024f7aad1952..445b3dad82dc 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -251,15 +251,17 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe 
*op, struct kprobe *p)
goto error;
}
 
-   branch_op_callback = create_branch((unsigned int *)buff + 
TMPL_CALL_HDLR_IDX,
-   (unsigned long)op_callback_addr,
-   BRANCH_SET_LINK);
+   rc = create_branch(_op_callback,
+  (unsigned int *)buff + TMPL_CALL_HDLR_IDX,
+  (unsigned long)op_callback_addr,
+  BRANCH_SET_LINK);
 
-   branch_emulate_step = create_branch((unsigned int *)buff + 
TMPL_EMULATE_IDX,
-   (unsigned long)emulate_step_addr,
-   BRANCH_SET_LINK);
+   rc |= create_branch(_emulate_step,
+   (unsigned int *)buff + TMPL_EMULATE_IDX,
+   (unsigned long)emulate_step_addr,
+   BRANCH_SET_LINK);
 
-   if (!branch_op_callback || !branch_emulate_step)
+   if (rc)
goto error;
 
patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback);
@@ -305,6 +307,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op)
 
 void arch_optimize_kprobes(struct list_head *oplist)
 {
+   unsigned int instr;
struct optimized_kprobe *op;
struct optimized_kprobe *tmp;
 
@@ -315,9 +318,10 @@ void arch_optimize_kprobes(struct list_head *oplist)
 */
memcpy(op->optinsn.copied_insn, op->kp.addr,
   RELATIVEJUMP_SIZE);
-   patch_instruction(op->kp.addr,
-   create_branch((unsigned int *)op->kp.addr,
- (unsigned long)op->optinsn.insn, 0));
+   create_branch(,
+ (unsigned int *)op->kp.addr,
+ (unsigned long)op->optinsn.insn, 0);
+   patch_instruction(op->kp.addr, instr);
list_del_init(>list);
}
 }
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index

[PATCH v6 04/28] powerpc/xmon: Use bitwise calculations in_breakpoint_table()

2020-04-27 Thread Jordan Niethe

A modulo operation is used for calculating the current offset from a
breakpoint within the breakpoint table. As instruction lengths are
always a power of 2, this can be replaced with a bitwise 'and'. The
current check for word alignment can be replaced with checking that the
lower 2 bits are not set.

Suggested-by: Christophe Leroy 
Signed-off-by: Jordan Niethe 
---
v6: New to series
---
 arch/powerpc/xmon/xmon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index f7ce3ea8694c..e922cde99db2 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -856,8 +856,8 @@ static struct bpt *in_breakpoint_table(unsigned long nip, 
unsigned long *offp)
off = nip - (unsigned long) bpt_table;
if (off >= sizeof(bpt_table))
return NULL;
-   *offp = off % BPT_SIZE;
-   if (*offp != 0 && *offp != 4)
+   *offp = off & (BPT_SIZE - 1);
+   if (off & 3)
return NULL;
return bpts + (off / BPT_SIZE);
 }
-- 
2.17.1

[PATCH v6 03/28] powerpc/xmon: Move breakpoints to text section

2020-04-27 Thread Jordan Niethe

The instructions for xmon's breakpoint are stored bpt_table[] which is in
the data section. This is problematic as the data section may be marked
as no execute. Move bpt_table[] to the text section.

Signed-off-by: Jordan Niethe 
---
v6: - New to series. Was part of the previous patch.
- Make BPT_SIZE available in assembly
---
 arch/powerpc/kernel/asm-offsets.c |  8 
 arch/powerpc/xmon/Makefile|  2 +-
 arch/powerpc/xmon/xmon.c  |  6 +-
 arch/powerpc/xmon/xmon_bpts.S |  9 +
 arch/powerpc/xmon/xmon_bpts.h | 14 ++
 5 files changed, 33 insertions(+), 6 deletions(-)
 create mode 100644 arch/powerpc/xmon/xmon_bpts.S
 create mode 100644 arch/powerpc/xmon/xmon_bpts.h

diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index c25e562f1cd9..2401f415f423 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -70,6 +70,10 @@
 #include 
 #endif
 
+#ifdef CONFIG_XMON
+#include "../xmon/xmon_bpts.h"
+#endif
+
 #define STACK_PT_REGS_OFFSET(sym, val) \
DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val))
 
@@ -783,5 +787,9 @@ int main(void)
DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
 #endif
 
+#ifdef CONFIG_XMON
+   DEFINE(BPT_SIZE, BPT_SIZE);
+#endif
+
return 0;
 }
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index c3842dbeb1b7..515a13ea6f28 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -21,7 +21,7 @@ endif
 
 ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
 
-obj-y  += xmon.o nonstdio.o spr_access.o
+obj-y  += xmon.o nonstdio.o spr_access.o xmon_bpts.o
 
 ifdef CONFIG_XMON_DISASSEMBLY
 obj-y  += ppc-dis.o ppc-opc.o
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index a064392df1b8..f7ce3ea8694c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -62,6 +62,7 @@
 
 #include "nonstdio.h"
 #include "dis-asm.h"
+#include "xmon_bpts.h"
 
 #ifdef CONFIG_SMP
 static cpumask_t cpus_in_xmon = CPU_MASK_NONE;
@@ -108,7 +109,6 @@ struct bpt {
 #define BP_TRAP2
 #define BP_DABR4
 
-#define NBPTS  256
 static struct bpt bpts[NBPTS];
 static struct bpt dabr;
 static struct bpt *iabr;
@@ -116,10 +116,6 @@ static unsigned bpinstr = 0x7fe8;  /* trap */
 
 #define BP_NUM(bp) ((bp) - bpts + 1)
 
-#define BPT_SIZE   (sizeof(unsigned int) * 2)
-#define BPT_WORDS  (BPT_SIZE / sizeof(unsigned int))
-static unsigned int bpt_table[NBPTS * BPT_WORDS];
-
 /* Prototypes */
 static int cmds(struct pt_regs *);
 static int mread(unsigned long, void *, int);
diff --git a/arch/powerpc/xmon/xmon_bpts.S b/arch/powerpc/xmon/xmon_bpts.S
new file mode 100644
index ..f3ad0ab50854
--- /dev/null
+++ b/arch/powerpc/xmon/xmon_bpts.S
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include 
+#include 
+#include 
+#include "xmon_bpts.h"
+
+.global bpt_table
+bpt_table:
+   .space NBPTS * BPT_SIZE
diff --git a/arch/powerpc/xmon/xmon_bpts.h b/arch/powerpc/xmon/xmon_bpts.h
new file mode 100644
index ..b7e94375db86
--- /dev/null
+++ b/arch/powerpc/xmon/xmon_bpts.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef XMON_BPTS_H
+#define XMON_BPTS_H
+
+#define NBPTS  256
+#ifndef __ASSEMBLY__
+#define BPT_SIZE   (sizeof(unsigned int) * 2)
+#define BPT_WORDS  (BPT_SIZE / sizeof(unsigned int))
+
+extern unsigned int bpt_table[NBPTS * BPT_WORDS];
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* XMON_BPTS_H */
-- 
2.17.1

[PATCH v6 02/28] powerpc/xmon: Move breakpoint instructions to own array

2020-04-27 Thread Jordan Niethe

To execute an instruction out of line after a breakpoint, the NIP is set
to the address of struct bpt::instr. Here a copy of the instruction that
was replaced with a breakpoint is kept, along with a trap so normal flow
can be resumed after XOLing. The struct bpt's are located within the
data section. This is problematic as the data section may be marked as
no execute.

Instead of each struct bpt holding the instructions to be XOL'd, make a
new array, bpt_table[], with enough space to hold instructions for the
number of supported breakpoints. A later patch will move this to the
text section.
Make struct bpt::instr a pointer to the instructions in bpt_table[]
associated with that breakpoint. This association is a simple mapping:
bpts[n] -> bpt_table[n * words per breakpoint]. Currently we only need
the copied instruction followed by a trap, so 2 words per breakpoint.

Signed-off-by: Jordan Niethe 
---
v4: New to series
v5: - Do not use __section(), use a .space directive in .S file
- Simplify in_breakpoint_table() calculation
- Define BPT_SIZE
v6: - Seperate moving to text section
---
 arch/powerpc/xmon/xmon.c | 21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 02e3bd62cab4..a064392df1b8 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -97,7 +97,7 @@ static long *xmon_fault_jmp[NR_CPUS];
 /* Breakpoint stuff */
 struct bpt {
unsigned long   address;
-   unsigned intinstr[2];
+   unsigned int*instr;
atomic_tref_count;
int enabled;
unsigned long   pad;
@@ -116,6 +116,10 @@ static unsigned bpinstr = 0x7fe8;  /* trap */
 
 #define BP_NUM(bp) ((bp) - bpts + 1)
 
+#define BPT_SIZE   (sizeof(unsigned int) * 2)
+#define BPT_WORDS  (BPT_SIZE / sizeof(unsigned int))
+static unsigned int bpt_table[NBPTS * BPT_WORDS];
+
 /* Prototypes */
 static int cmds(struct pt_regs *);
 static int mread(unsigned long, void *, int);
@@ -853,15 +857,13 @@ static struct bpt *in_breakpoint_table(unsigned long nip, 
unsigned long *offp)
 {
unsigned long off;
 
-   off = nip - (unsigned long) bpts;
-   if (off >= sizeof(bpts))
+   off = nip - (unsigned long) bpt_table;
+   if (off >= sizeof(bpt_table))
return NULL;
-   off %= sizeof(struct bpt);
-   if (off != offsetof(struct bpt, instr[0])
-   && off != offsetof(struct bpt, instr[1]))
+   *offp = off % BPT_SIZE;
+   if (*offp != 0 && *offp != 4)
return NULL;
-   *offp = off - offsetof(struct bpt, instr[0]);
-   return (struct bpt *) (nip - off);
+   return bpts + (off / BPT_SIZE);
 }
 
 static struct bpt *new_breakpoint(unsigned long a)
@@ -876,7 +878,8 @@ static struct bpt *new_breakpoint(unsigned long a)
for (bp = bpts; bp < [NBPTS]; ++bp) {
if (!bp->enabled && atomic_read(>ref_count) == 0) {
bp->address = a;
-   patch_instruction(>instr[1], bpinstr);
+   bp->instr = bpt_table + ((bp - bpts) * BPT_WORDS);
+   patch_instruction(bp->instr + 1, bpinstr);
return bp;
}
}
-- 
2.17.1

[PATCH v6 01/28] powerpc/xmon: Remove store_inst() for patch_instruction()

2020-04-27 Thread Jordan Niethe

For modifying instructions in xmon, patch_instruction() can serve the
same role that store_inst() is performing with the advantage of not
being specific to xmon. In some places patch_instruction() is already
being using followed by store_inst(). In these cases just remove the
store_inst(). Otherwise replace store_inst() with patch_instruction().

Reviewed-by: Nicholas Piggin 
Signed-off-by: Jordan Niethe 
---
v4: Read into a local variable
---
 arch/powerpc/xmon/xmon.c | 18 +-
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index e8c84d265602..02e3bd62cab4 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -325,11 +325,6 @@ static inline void sync(void)
asm volatile("sync; isync");
 }
 
-static inline void store_inst(void *p)
-{
-   asm volatile ("dcbst 0,%0; sync; icbi 0,%0; isync" : : "r" (p));
-}
-
 static inline void cflush(void *p)
 {
asm volatile ("dcbf 0,%0; icbi 0,%0" : : "r" (p));
@@ -881,8 +876,7 @@ static struct bpt *new_breakpoint(unsigned long a)
for (bp = bpts; bp < [NBPTS]; ++bp) {
if (!bp->enabled && atomic_read(>ref_count) == 0) {
bp->address = a;
-   bp->instr[1] = bpinstr;
-   store_inst(>instr[1]);
+   patch_instruction(>instr[1], bpinstr);
return bp;
}
}
@@ -894,25 +888,26 @@ static struct bpt *new_breakpoint(unsigned long a)
 static void insert_bpts(void)
 {
int i;
+   unsigned int instr;
struct bpt *bp;
 
bp = bpts;
for (i = 0; i < NBPTS; ++i, ++bp) {
if ((bp->enabled & (BP_TRAP|BP_CIABR)) == 0)
continue;
-   if (mread(bp->address, >instr[0], 4) != 4) {
+   if (mread(bp->address, , 4) != 4) {
printf("Couldn't read instruction at %lx, "
   "disabling breakpoint there\n", bp->address);
bp->enabled = 0;
continue;
}
-   if (IS_MTMSRD(bp->instr[0]) || IS_RFID(bp->instr[0])) {
+   if (IS_MTMSRD(instr) || IS_RFID(instr)) {
printf("Breakpoint at %lx is on an mtmsrd or rfid "
   "instruction, disabling it\n", bp->address);
bp->enabled = 0;
continue;
}
-   store_inst(>instr[0]);
+   patch_instruction(bp->instr, instr);
if (bp->enabled & BP_CIABR)
continue;
if (patch_instruction((unsigned int *)bp->address,
@@ -922,7 +917,6 @@ static void insert_bpts(void)
bp->enabled &= ~BP_TRAP;
continue;
}
-   store_inst((void *)bp->address);
}
 }
 
@@ -957,8 +951,6 @@ static void remove_bpts(void)
(unsigned int *)bp->address, bp->instr[0]) != 0)
printf("Couldn't remove breakpoint at %lx\n",
   bp->address);
-   else
-   store_inst((void *)bp->address);
}
 }
 
-- 
2.17.1

[PATCH v6 00/28] Initial Prefixed Instruction support

2020-04-27 Thread Jordan Niethe

A future revision of the ISA will introduce prefixed instructions. A
prefixed instruction is composed of a 4-byte prefix followed by a
4-byte suffix.

All prefixes have the major opcode 1. A prefix will never be a valid
word instruction. A suffix may be an existing word instruction or a
new instruction.

This series enables prefixed instructions and extends the instruction
emulation to support them. Then the places where prefixed instructions
might need to be emulated are updated.

v6 is based on feedback from Balamuruhan Suriyakumar, Alistair Popple,
Christophe Leroy and Segher Boessenkool.
The major changes:
- Use the instruction type in more places that had been missed before
- Fix issues with ppc32
- Introduce new self tests for code patching and feature fixups

v5 is based on feedback from Nick Piggins, Michael Ellerman, Balamuruhan
Suriyakumar and Alistair Popple.
The major changes:
- The ppc instruction type is now a struct
- Series now just based on next
- ppc_inst_masked() dropped
- Space for xmon breakpoints allocated in an assembly file
- "Add prefixed instructions to instruction data type" patch seperated in
  to smaller patches
- Calling convention for create_branch() is changed
- Some places which had not been updated to use the data type are now 
updated

v4 is based on feedback from Nick Piggins, Christophe Leroy and Daniel Axtens.
The major changes:
- Move xmon breakpoints from data section to text section
- Introduce a data type for instructions on powerpc

v3 is based on feedback from Christophe Leroy. The major changes:
- Completely replacing store_inst() with patch_instruction() in
  xmon
- Improve implementation of mread_instr() to not use mread().
- Base the series on top of
  https://patchwork.ozlabs.org/patch/1232619/ as this will effect
  kprobes.
- Some renaming and simplification of conditionals.

v2 incorporates feedback from Daniel Axtens and and Balamuruhan
S. The major changes are:
- Squashing together all commits about SRR1 bits
- Squashing all commits for supporting prefixed load stores
- Changing abbreviated references to sufx/prfx -> suffix/prefix
- Introducing macros for returning the length of an instruction
- Removing sign extension flag from pstd/pld in sstep.c
- Dropping patch  "powerpc/fault: Use analyse_instr() to check for
  store with updates to sp" from the series, it did not really fit
  with prefixed enablement in the first place and as reported by Greg
  Kurz did not work correctly.

Alistair Popple (1):
  powerpc: Enable Prefixed Instructions

Jordan Niethe (27):
  powerpc/xmon: Remove store_inst() for patch_instruction()
  powerpc/xmon: Move breakpoint instructions to own array
  powerpc/xmon: Move breakpoints to text section
  powerpc/xmon: Use bitwise calculations in_breakpoint_table()
  powerpc: Change calling convention for create_branch() et. al.
  powerpc: Use a macro for creating instructions from u32s
  powerpc: Use an accessor for instructions
  powerpc: Use a function for getting the instruction op code
  powerpc: Use a function for byte swapping instructions
  powerpc: Introduce functions for instruction equality
  powerpc: Use a datatype for instructions
  powerpc: Use a function for reading instructions
  powerpc: Add a probe_user_read_inst() function
  powerpc: Add a probe_kernel_read_inst() function
  powerpc/kprobes: Use patch_instruction()
  powerpc: Define and use __get_user_instr{,inatomic}()
  powerpc: Introduce a function for reporting instruction length
  powerpc/xmon: Use a function for reading instructions
  powerpc/xmon: Move insertion of breakpoint for xol'ing
  powerpc: Make test_translate_branch() independent of instruction
length
  powerpc: Define new SRR1 bits for a future ISA version
  powerpc: Add prefixed instructions to instruction data type
  powerpc: Test prefixed code patching
  powerpc: Test prefixed instructions in feature fixups
  powerpc: Support prefixed instructions in alignment handler
  powerpc sstep: Add support for prefixed load/stores
  powerpc sstep: Add support for prefixed fixed-point arithmetic

 arch/powerpc/include/asm/code-patching.h |  37 +-
 arch/powerpc/include/asm/inst.h  | 106 ++
 arch/powerpc/include/asm/kprobes.h   |   2 +-
 arch/powerpc/include/asm/reg.h   |   7 +-
 arch/powerpc/include/asm/sstep.h |  15 +-
 arch/powerpc/include/asm/uaccess.h   |  35 ++
 arch/powerpc/include/asm/uprobes.h   |   7 +-
 arch/powerpc/kernel/align.c  |  13 +-
 arch/powerpc/kernel/asm-offsets.c|   8 +
 arch/powerpc/kernel/epapr_paravirt.c |   7 +-
 arch/powerpc/kernel/hw_breakpoint.c  |   5 +-
 arch/powerpc/kernel/jump_label.c |   5 +-
 arch/powerpc/kernel/kgdb.c   |   9 +-
 arch/powerpc/kernel/kprobes.c|  24 +-
 arch/powerpc/kernel/mce_power.c  |   5 +-
 arch/powerpc/kernel/module_64.c

[PATCH 1/2] powerpc: Discard .rela* sections if CONFIG_RELOCATABLE is undefined

2020-04-27 Thread H.J. Lu

arch/powerpc/kernel/vmlinux.lds.S has

DISCARDS
/DISCARD/ : {
*(*.EMB.apuinfo)
*(.glink .iplt .plt .rela* .comment)
*(.gnu.version*)
*(.gnu.attributes)
*(.eh_frame)
}

Since .rela* sections are needed when CONFIG_RELOCATABLE is defined,
change to discard .rela* sections if CONFIG_RELOCATABLE is undefined.

Signed-off-by: H.J. Lu 
Acked-by: Michael Ellerman  (powerpc)
---
 arch/powerpc/kernel/vmlinux.lds.S | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/vmlinux.lds.S 
b/arch/powerpc/kernel/vmlinux.lds.S
index 31a0f201fb6f..4ba07734a210 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -366,9 +366,12 @@ SECTIONS
DISCARDS
/DISCARD/ : {
*(*.EMB.apuinfo)
-   *(.glink .iplt .plt .rela* .comment)
+   *(.glink .iplt .plt .comment)
*(.gnu.version*)
*(.gnu.attributes)
*(.eh_frame)
+#ifndef CONFIG_RELOCATABLE
+   *(.rela*)
+#endif
}
 }
-- 
2.25.4

[PATCH 2/2] Discard .note.gnu.property sections in generic NOTES

2020-04-27 Thread H.J. Lu

With the command-line option, -mx86-used-note=yes, the x86 assembler
in binutils 2.32 and above generates a program property note in a note
section, .note.gnu.property, to encode used x86 ISAs and features.  But
kernel linker script only contains a single NOTE segment:

PHDRS {
 text PT_LOAD FLAGS(5);
 data PT_LOAD FLAGS(6);
 percpu PT_LOAD FLAGS(6);
 init PT_LOAD FLAGS(7);
 note PT_NOTE FLAGS(0);
}
SECTIONS
{
...
 .notes : AT(ADDR(.notes) - 0x8000) { __start_notes = .; KEEP(*(.not
e.*)) __stop_notes = .; } :text :note
...
}

The NOTE segment generated by kernel linker script is aligned to 4 bytes.
But .note.gnu.property section must be aligned to 8 bytes on x86-64 and
we get

[hjl@gnu-skx-1 linux]$ readelf -n vmlinux

Displaying notes found in: .notes
  OwnerData size Description
  Xen  0x0006 Unknown note type: (0x0006)
   description data: 6c 69 6e 75 78 00
  Xen  0x0004 Unknown note type: (0x0007)
   description data: 32 2e 36 00
  xen-3.0  0x0005 Unknown note type: (0x006e6558)
   description data: 08 00 00 00 03
readelf: Warning: note with invalid namesz and/or descsz found at offset 0x50
readelf: Warning:  type: 0x, namesize: 0x006e6558, descsize:
0x8000, alignment: 8
[hjl@gnu-skx-1 linux]$

Since note.gnu.property section in kernel image is never used, this patch
discards .note.gnu.property sections in kernel linker script by adding

/DISCARD/ : {
  *(.note.gnu.property)
}

before kernel NOTE segment in generic NOTES.

Signed-off-by: H.J. Lu 
Reviewed-by: Kees Cook 
---
 include/asm-generic/vmlinux.lds.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/include/asm-generic/vmlinux.lds.h 
b/include/asm-generic/vmlinux.lds.h
index 71e387a5fe90..95cd678428f4 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -833,7 +833,14 @@
 #define TRACEDATA
 #endif
 
+/*
+ * Discard .note.gnu.property sections which are unused and have
+ * different alignment requirement from kernel note sections.
+ */
 #define NOTES  \
+   /DISCARD/ : {   \
+   *(.note.gnu.property)   \
+   }   \
.notes : AT(ADDR(.notes) - LOAD_OFFSET) {   \
__start_notes = .;  \
KEEP(*(.note.*))\
-- 
2.25.4

Re: [PATCH] ibmvfc: don't send implicit logouts prior to NPIV login

2020-04-27 Thread Martin K. Petersen



Tyrel,

> Commit ed830385a2b1 ("scsi: ibmvfc: Avoid loss of all paths during SVC
> node reboot") introduced a regression where when the client resets or
> re-enables its CRQ with the hypervisor there is a chance that if the
> server side doesn't issue its INIT handshake quick enough the client
> can issue an Implicit Logout prior to doing an NPIV Login. The server
> treats this scenario as a protocol violation and closes the CRQ on its
> end forcing the client through a reset that gets the client host state
> and next host action out of agreement leading to a BUG assert.

Applied to 5.7/scsi-fixes, thanks!

-- 
Martin K. Petersen  Oracle Linux Engineering

Re: [PATCH] ibmvscsi: fix WARN_ON during event pool release

2020-04-27 Thread Martin K. Petersen

On Mon, 27 Apr 2020 15:49:53 -0700, Tyrel Datwyler wrote:

> While removing an ibmvscsi client adapter a WARN_ON like the following
> is seen in the kernel log:

Applied to 5.7/scsi-fixes, thanks!

[1/1] scsi: ibmvscsi: fix WARN_ON during event pool release
  https://git.kernel.org/mkp/scsi/c/cff6a5746645

-- 
Martin K. Petersen  Oracle Linux Engineering

Re: [PATCH] powerpc: Discard .rela* sections if CONFIG_RELOCATABLE is undefined

2020-04-27 Thread Michael Ellerman

"H.J. Lu"  writes:

> arch/powerpc/kernel/vmlinux.lds.S has
>
> DISCARDS
> /DISCARD/ : {
> *(*.EMB.apuinfo)
> *(.glink .iplt .plt .rela* .comment)
> *(.gnu.version*)
> *(.gnu.attributes)
> *(.eh_frame)
> }
>
> Since .rela* sections are needed when CONFIG_RELOCATABLE is defined,
> change to discard .rela* sections if CONFIG_RELOCATABLE is undefined.
>
> Signed-off-by: H.J. Lu 
> Acked-by: Michael Ellerman  (powerpc)
> ---
>  arch/powerpc/kernel/vmlinux.lds.S | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)

Please insert this patch into your series prior to the patch that caused
the build break.

cheers

> diff --git a/arch/powerpc/kernel/vmlinux.lds.S 
> b/arch/powerpc/kernel/vmlinux.lds.S
> index 31a0f201fb6f..4ba07734a210 100644
> --- a/arch/powerpc/kernel/vmlinux.lds.S
> +++ b/arch/powerpc/kernel/vmlinux.lds.S
> @@ -366,9 +366,12 @@ SECTIONS
>   DISCARDS
>   /DISCARD/ : {
>   *(*.EMB.apuinfo)
> - *(.glink .iplt .plt .rela* .comment)
> + *(.glink .iplt .plt .comment)
>   *(.gnu.version*)
>   *(.gnu.attributes)
>   *(.eh_frame)
> +#ifndef CONFIG_RELOCATABLE
> + *(.rela*)
> +#endif
>   }
>  }
> -- 
> 2.25.4

Re: [RFC 1/3] powernv/cpuidle : Support for pre-entry and post exit of stop state in firmware

2020-04-27 Thread Nicholas Piggin

Thanks for picking this up and pushing it along. I do plan to come back 
and take another look at it all, but what we do need to do first is get 
a coherent approach to this proposed new calling convention and OS ops.

It's fine to work on this in the meantime, but to start merging things
my idea is:

- OPAL must leave r13-r15 untouched for the OS.
- OS ops are made available only for a "v4" OS that uses the new
  calling convention, including kernel stack.
- OS ops baseline (all OSes must provide) will be console / printk 
  facility, trap handling and crash/symbol decoding on behalf of OPAL,
  and runtime virtual memory.

Other OS ops features can be added in the versioned structure, including 
this.

I'm trying to get back to cleaning these things up and start getting 
them merged now. Any comments or review on those would be helpful.

Thanks,
Nick

Re: [PATCH v2,RESEND] misc: new driver sram_uapi for user level SRAM access

2020-04-27 Thread Scott Wood

On Mon, 2020-04-27 at 09:13 -0500, Rob Herring wrote:
> On Sun, Apr 19, 2020 at 10:06 PM Wang Wenhu  wrote:
> > 
> > A generic User-Kernel interface that allows a misc device created
> > by it to support file-operations of ioctl and mmap to access SRAM
> > memory from user level. Different kinds of SRAM alloction and free
> > APIs could be registered by specific SRAM hardware level driver to
> > the available list and then be chosen by users to allocate and map
> > SRAM memory from user level.
> > 
> > It is extremely helpful for the user space applications that require
> > high performance memory accesses, such as embedded networking devices
> > that would process data in user space, and PowerPC e500 is a case.
> > 
> > Signed-off-by: Wang Wenhu 
> > Cc: Greg Kroah-Hartman 
> > Cc: Arnd Bergmann 
> > Cc: Christophe Leroy 
> > Cc: Scott Wood 
> > Cc: Michael Ellerman 
> > Cc: Randy Dunlap 
> > Cc: linuxppc-dev@lists.ozlabs.org
> > ---
> > Changes since v1: addressed comments from Arnd
> >  * Changed the ioctl cmd definitions using _IO micros
> >  * Export interfaces for HW-SRAM drivers to register apis to available
> > list
> >  * Modified allocation alignment to PAGE_SIZE
> >  * Use phys_addr_t as type of SRAM resource size and offset
> >  * Support compat_ioctl
> >  * Misc device name:sram
> > 
> > Note: From this on, the SRAM_UAPI driver is independent to any hardware
> > drivers, so I would only commit the patch itself as v2, while the v1 of
> > it was wrapped together with patches for Freescale L2-Cache-SRAM device.
> > Then after, I'd create patches for Freescale L2-Cache-SRAM device as
> > another series.
> 
> There's work to add SRAM support to dma-buf heaps[1]. Take a look and
> see if that works for you.
> 
> Rob
> 
> [1] https://lore.kernel.org/lkml/20200424222740.16259-1-...@ti.com/
> 

The dma heap API itself (what makes it specific to DMA, rather than any
special-purpose allocator?) seems like it could be what we're looking for. 
The issue with drivers/misc/sram.c is that it seems like its main purpose is
to get sram description from the device tree, but this sram isn't static (it's
a reconfiguration of L2 cache into SRAM mode) and thus can't be described by
mmio-sram.

-Scott

[PATCH] ibmvscsi: fix WARN_ON during event pool release

2020-04-27 Thread Tyrel Datwyler

While removing an ibmvscsi client adapter a WARN_ON like the following
is seen in the kernel log:

drmgr: drmgr: -r -c slot -s U9080.M9S.783AEC8-V11-C11 -w 5 -d 1
WARNING: CPU: 9 PID: 24062 at ../kernel/dma/mapping.c:311 
dma_free_attrs+0x78/0x110
Supported: No, Unreleased kernel
CPU: 9 PID: 24062 Comm: drmgr Kdump: loaded Tainted: G   X 
5.3.18-12-default
NIP:  c01fa758 LR: c01fa744 CTR: c01fa6e0
REGS: c002173375d0 TRAP: 0700   Tainted: G   X 
(5.3.18-12-default)
MSR:  80029033   CR: 28088282  XER: 2000
CFAR: c01fbf0c IRQMASK: 1
GPR00: c01fa744 c00217337860 c161ab00 
GPR04:  c11e1225 1801 
GPR08:  0001 0001 c008190f4fa8
GPR12: c01fa6e0 c7fc2a00  
GPR16:    
GPR20:    
GPR24: 00011420e310   1801
GPR28: c159de50 c11e1225 6600 c11e5c994848
NIP [c01fa758] dma_free_attrs+0x78/0x110
LR [c01fa744] dma_free_attrs+0x64/0x110
Call Trace:
[c00217337860] [00011420e310] 0x11420e310 (unreliable)
[c002173378b0] [c008190f0280] release_event_pool+0xd8/0x120 [ibmvscsi]
[c00217337930] [c008190f3f74] ibmvscsi_remove+0x6c/0x160 [ibmvscsi]
[c00217337960] [c00f3cac] vio_bus_remove+0x5c/0x100
[c002173379a0] [c087a0a4] device_release_driver_internal+0x154/0x280
[c002173379e0] [c08777cc] bus_remove_device+0x11c/0x220
[c00217337a60] [c0870fc4] device_del+0x1c4/0x470
[c00217337b10] [c08712a0] device_unregister+0x30/0xa0
[c00217337b80] [c00f39ec] vio_unregister_device+0x2c/0x60
[c00217337bb0] [c0081a1d0964] dlpar_remove_slot+0x14c/0x250 
[rpadlpar_io]
[c00217337c50] [c0081a1d0bcc] remove_slot_store+0xa4/0x110 [rpadlpar_io]
[c00217337cd0] [c0c091a0] kobj_attr_store+0x30/0x50
[c00217337cf0] [c057c934] sysfs_kf_write+0x64/0x90
[c00217337d10] [c057be10] kernfs_fop_write+0x1b0/0x290
[c00217337d60] [c0488c4c] __vfs_write+0x3c/0x70
[c00217337d80] [c048c648] vfs_write+0xd8/0x260
[c00217337dd0] [c048ca8c] ksys_write+0xdc/0x130
[c00217337e20] [c000b488] system_call+0x5c/0x70
Instruction dump:
7c840074 f8010010 f821ffb1 20840040 eb830218 7c8407b4 48002019 6000
2fa3 409e003c 892d0988 792907e0 <0b09> 2fbd 419e0028 2fbc
---[ end trace 5955b3c0cc079942 ]---
rpadlpar_io: slot U9080.M9S.783AEC8-V11-C11 removed

This is tripped as a result of irqs being disabled during the call to
dma_free_coherent() by release_event_pool(). At this point in the code
path we have quiesced the adapter and its overly paranoid anyways to
be holding the host lock.

Signed-off-by: Tyrel Datwyler 
---
 drivers/scsi/ibmvscsi/ibmvscsi.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 7f66a77..126b242 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -2326,10 +2326,7 @@ static int ibmvscsi_remove(struct vio_dev *vdev)
scsi_remove_host(hostdata->host);
 
purge_requests(hostdata, DID_ERROR);
-
-   spin_lock_irqsave(hostdata->host->host_lock, flags);
release_event_pool(>pool, hostdata);
-   spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 
ibmvscsi_release_crq_queue(>queue, hostdata,
max_events);
-- 
1.8.3.1

Re: [PATCH 2/7] signal: factor copy_siginfo_to_external32 from copy_siginfo_to_user32

2020-04-27 Thread Andrew Morton

On Sun, 26 Apr 2020 09:40:39 +0200 Christoph Hellwig  wrote:

> On Sat, Apr 25, 2020 at 09:47:24PM -0700, Andrew Morton wrote:
> > I looked at fixing it but surely this sort of thing:
> > 
> > 
> > int copy_siginfo_to_user32(struct compat_siginfo __user *to,
> >const struct kernel_siginfo *from)
> > #if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
> > {
> > return __copy_siginfo_to_user32(to, from, in_x32_syscall());
> > }
> > int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
> >  const struct kernel_siginfo *from, bool x32_ABI)
> > #endif
> > {
> > ...
> > 
> > 
> > is too ugly to live?
> 
> I fixed it up in my earlier versions, but Eric insisted to keep it,
> which is why I switched to his version given that he is the defacto
> signal.c maintainer.
> 
> Here is what I would have preferred:
> 
> https://www.spinics.net/lists/kernel/msg3473847.html
> https://www.spinics.net/lists/kernel/msg3473840.html
> https://www.spinics.net/lists/kernel/msg3473843.html

OK, but that doesn't necessitate the above monstrosity?  How about

static int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
 const struct kernel_siginfo *from, bool x32_ABI)
{
struct compat_siginfo new;
copy_siginfo_to_external32(, from);
...
}

int copy_siginfo_to_user32(struct compat_siginfo __user *to,
   const struct kernel_siginfo *from)
{
#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
return __copy_siginfo_to_user32(to, from, in_x32_syscall());
#else
return __copy_siginfo_to_user32(to, from, 0);
#endif
}

Or something like that - I didn't try very hard.  We know how to do
this stuff, and surely this thing isn't how!

Re: [PATCH v2 1/2] PCI/AER: Allow Native AER Host Bridges to use AER

2020-04-27 Thread Bjorn Helgaas

On Mon, Apr 27, 2020 at 04:11:07PM +, Derrick, Jonathan wrote:
> On Fri, 2020-04-24 at 18:30 -0500, Bjorn Helgaas wrote:
> > I'm glad you raised this because I think the way we handle
> > FIRMWARE_FIRST is really screwed up.
> > 
> > On Mon, Apr 20, 2020 at 03:37:09PM -0600, Jon Derrick wrote:
> > > Some platforms have a mix of ports whose capabilities can be negotiated
> > > by _OSC, and some ports which are not described by ACPI and instead
> > > managed by Native drivers. The existing Firmware-First HEST model can
> > > incorrectly tag these Native, Non-ACPI ports as Firmware-First managed
> > > ports by advertising the HEST Global Flag and matching the type and
> > > class of the port (aer_hest_parse).
> > > 
> > > If the port requests Native AER through the Host Bridge's capability
> > > settings, the AER driver should honor those settings and allow the port
> > > to bind. This patch changes the definition of Firmware-First to exclude
> > > ports whose Host Bridges request Native AER.
> > > 
> > > Signed-off-by: Jon Derrick 
> > > ---
> > >  drivers/pci/pcie/aer.c | 3 +++
> > >  1 file changed, 3 insertions(+)
> > > 
> > > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> > > index f4274d3..30fbd1f 100644
> > > --- a/drivers/pci/pcie/aer.c
> > > +++ b/drivers/pci/pcie/aer.c
> > > @@ -314,6 +314,9 @@ int pcie_aer_get_firmware_first(struct pci_dev *dev)
> > >   if (pcie_ports_native)
> > >   return 0;
> > >  
> > > + if (pci_find_host_bridge(dev->bus)->native_aer)
> > > + return 0;
> > 
> > I hope we don't have to complicate pcie_aer_get_firmware_first() by
> > adding this "native_aer" check here.  I'm not sure what we actually
> > *should* do based on FIRMWARE_FIRST, but I don't think the current
> > uses really make sense.
> > 
> > I think Linux makes too many assumptions based on the FIRMWARE_FIRST
> > bit.  The ACPI spec really only says (ACPI v6.3, sec 18.3.2.4):
> > 
> >   If set, FIRMWARE_FIRST indicates to the OSPM that system firmware
> >   will handle errors from this source first.
> > 
> >   If FIRMWARE_FIRST is set in the flags field, the Enabled field [of
> >   the HEST AER structure] is ignored by the OSPM.
> > 
> > I do not see anything there about who owns the AER Capability, but
> > Linux assumes that if FIRMWARE_FIRST is set, firmware must own the AER
> > Capability.  I think that's reading too much into the spec.
> > 
> > We already have _OSC, which *does* explicitly talk about who owns the
> > AER Capability, and I think we should rely on that.  If firmware
> > doesn't want the OS to touch the AER Capability, it should decline to
> > give ownership to the OS via _OSC.
> > 
> > >   if (!dev->__aer_firmware_first_valid)
> > >   aer_set_firmware_first(dev);
> > >   return dev->__aer_firmware_first;
> 
> Just a little bit of reading and my interpretation, as it seems like
> some of this is just layers upon layers of possibly conflicting yet
> intentionally vague descriptions.
> 
> _OSC seems to describe that OSPM can handle AER (6.2.11.3):
> PCI Express Advanced Error Reporting (AER) control
>The OS sets this bit to 1 to request control over PCI Express AER.
>If the OS successfully receives control of this feature, it must
>handle error reporting through the AER Capability as described in
>the PCI Express Base Specification.
> 
> 
> For AER and DPC the ACPI root port enumeration will properly set
> native_aer/dpc based on _OSC:
> 
> struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
> ...
>   if (!(root->osc_control_set & OSC_PCI_EXPRESS_AER_CONTROL))
>   host_bridge->native_aer = 0;
>   if (!(root->osc_control_set & OSC_PCI_EXPRESS_PME_CONTROL))
>   host_bridge->native_pme = 0;
>   if (!(root->osc_control_set & OSC_PCI_EXPRESS_LTR_CONTROL))
>   host_bridge->native_ltr = 0;
>   if (!(root->osc_control_set & OSC_PCI_EXPRESS_DPC_CONTROL))
>   host_bridge->native_dpc = 0;
> 
> As DPC was defined in an ECN [1], I would imagine AER will need to
> cover DPC for legacy platforms prior to the ECN.
> 
> 
> 
> The complication is that HEST also seems to describe how ports (and
> other devices) are managed either individually or globally:
> 
> Table 18-387  PCI Express Root Port AER Structure
> ...
> Flags:
>[0] - FIRMWARE_FIRST: If set, this bit indicates to the OSPM that
>system firmware will handle errors from this source
>[1] - GLOBAL: If set, indicates that the settings contained in this
>structure apply globally to all PCI Express Devices. All other bits
>must be set to zero
> 
> 
> The _OSC definition seems to contradict/negate the above FIRMWARE_FIRST
> definition that says only firmware will handle errors. It's a bit
> different than the IA_32 MCE definition which allows for a GHES_ASSIST
> condition, which would cause Firmware 'First', however does allow the
> error to be received by OSPM AER via GHES:
> 
> Table 18-385  IA-32

[PATCH] ibmvfc: don't send implicit logouts prior to NPIV login

2020-04-27 Thread Tyrel Datwyler

From: Brian King 

Commit ed830385a2b1 ("scsi: ibmvfc: Avoid loss of all paths during
SVC node reboot") introduced a regression where when the client
resets or re-enables its CRQ with the hypervisor there is a chance
that if the server side doesn't issue its INIT handshake quick
enough the client can issue an Implicit Logout prior to doing an
NPIV Login. The server treats this scenario as a protocol violation
and closes the CRQ on its end forcing the client through a reset
that gets the client host state and next host action out of
agreement leading to a BUG assert.

ibmvfc 3003: Partner initialization complete
ibmvfc 3002: Partner initialization complete
ibmvfc 3002: Host partner adapter deregistered or failed (rc=2)
ibmvfc 3002: Partner initialized
[ cut here ]
kernel BUG at ../drivers/scsi/ibmvscsi/ibmvfc.c:4489!
Oops: Exception in kernel mode, sig: 5 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
Supported: No, Unreleased kernel
CPU: 16 PID: 1290 Comm: ibmvfc_0 Tainted: G   OE  X   5.3.18-12-default
NIP:  c0080d84a2b4 LR: c0080d84a040 CTR: c0080d84a2a0
REGS: ccb57a00 TRAP: 0700   Tainted: G   OE  X
(5.3.18-12-default)
MSR:  8282b033   CR: 24000848  XER: 
0001
CFAR: c0080d84a070 IRQMASK: 1
GPR00: c0080d84a040 ccb57c90 c0080d858e00 
GPR04:    00a0
GPR08: c0080d84a074 0001 0014 c0080d84d7d0
GPR12:  c0001ea28200 c016cd98 
GPR16: c0080d84b7b8   c0542c706d68
GPR20: 0005 c0542c706d88 5deadbeef100 5deadbeef122
GPR24: 000c 000b c0080d852180 0001
GPR28:  c0542c706da0 c0542c706860 c0542c706828
NIP [c0080d84a2b4] ibmvfc_work+0x3ac/0xc90 [ibmvfc]
LR [c0080d84a040] ibmvfc_work+0x138/0xc90 [ibmvfc]

This scenario can be prevented by rejecting any attempt to send an
Implicit Logout if the client adapter is not logged in yet.

Fixes: Commit ed830385a2b1 ("scsi: ibmvfc: Avoid loss of all paths during SVC 
node reboot")
Signed-off-by: Brian King 
Signed-off-by: Tyrel Datwyler 
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 7da9e060b270..2b1326d6dd1f 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -3640,6 +3640,11 @@ static void ibmvfc_tgt_implicit_logout_and_del(struct 
ibmvfc_target *tgt)
struct ibmvfc_host *vhost = tgt->vhost;
struct ibmvfc_event *evt;
 
+if (!vhost->logged_in) {
+ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
+return;
+}
+
if (vhost->discovery_threads >= disc_threads)
return;
 
-- 
2.16.4

[PATCH v3 23/29] docs: filesystems: convert spufs/spufs.txt to ReST

2020-04-27 Thread Mauro Carvalho Chehab

This file is at groff output format. Manually convert it to
ReST format, trying to preserve a similar output after parsed.

Signed-off-by: Mauro Carvalho Chehab 
---
 Documentation/filesystems/spufs/index.rst |  1 +
 .../spufs/{spufs.txt => spufs.rst}| 59 +--
 MAINTAINERS   |  2 +-
 3 files changed, 30 insertions(+), 32 deletions(-)
 rename Documentation/filesystems/spufs/{spufs.txt => spufs.rst} (95%)

diff --git a/Documentation/filesystems/spufs/index.rst 
b/Documentation/filesystems/spufs/index.rst
index 39553c6ebefd..939cf59a7d9e 100644
--- a/Documentation/filesystems/spufs/index.rst
+++ b/Documentation/filesystems/spufs/index.rst
@@ -8,4 +8,5 @@ SPU Filesystem
 .. toctree::
:maxdepth: 1
 
+   spufs
spu_create
diff --git a/Documentation/filesystems/spufs/spufs.txt 
b/Documentation/filesystems/spufs/spufs.rst
similarity index 95%
rename from Documentation/filesystems/spufs/spufs.txt
rename to Documentation/filesystems/spufs/spufs.rst
index caf36aaae804..8a42859bb100 100644
--- a/Documentation/filesystems/spufs/spufs.txt
+++ b/Documentation/filesystems/spufs/spufs.rst
@@ -1,12 +1,18 @@
-SPUFS(2)   Linux Programmer's Manual  SPUFS(2)
+.. SPDX-License-Identifier: GPL-2.0
 
+=
+spufs
+=
 
+Name
+
 
-NAME
spufs - the SPU file system
 
 
-DESCRIPTION
+Description
+===
+
The SPU file system is used on PowerPC machines that implement the Cell
Broadband Engine Architecture in order to access Synergistic  Processor
Units (SPUs).
@@ -21,7 +27,9 @@ DESCRIPTION
ally add or remove files.
 
 
-MOUNT OPTIONS
+Mount Options
+=
+
uid=
   set the user owning the mount point, the default is 0 (root).
 
@@ -29,7 +37,9 @@ MOUNT OPTIONS
   set the group owning the mount point, the default is 0 (root).
 
 
-FILES
+Files
+=
+
The files in spufs mostly follow the standard behavior for regular sys-
tem  calls like read(2) or write(2), but often support only a subset of
the operations supported on regular file systems. This list details the
@@ -125,14 +135,12 @@ FILES
   space is available for writing.
 
 
-   /mbox_stat
-   /ibox_stat
-   /wbox_stat
+   /mbox_stat, /ibox_stat, /wbox_stat
Read-only files that contain the length of the current queue, i.e.  how
many  words  can  be  read  from  mbox or ibox or how many words can be
written to wbox without blocking.  The files can be read only in 4-byte
units  and  return  a  big-endian  binary integer number.  The possible
-   operations on an open *box_stat file are:
+   operations on an open ``*box_stat`` file are:
 
read(2)
   If a count smaller than four is requested, read returns  -1  and
@@ -143,12 +151,7 @@ FILES
   in EAGAIN.
 
 
-   /npc
-   /decr
-   /decr_status
-   /spu_tag_mask
-   /event_mask
-   /srr0
+   /npc, /decr, /decr_status, /spu_tag_mask, /event_mask, /srr0
Internal  registers  of  the SPU. The representation is an ASCII string
with the numeric value of the next instruction to  be  executed.  These
can  be  used in read/write mode for debugging, but normal operation of
@@ -157,17 +160,14 @@ FILES
 
The contents of these files are:
 
+   === ===
npc Next Program Counter
-
decrSPU Decrementer
-
decr_status Decrementer Status
-
spu_tag_maskMFC tag mask for SPU DMA
-
event_mask  Event mask for SPU interrupts
-
srr0Interrupt Return address register
+   === ===
 
 
The   possible   operations   on   an   open  npc,  decr,  decr_status,
@@ -206,8 +206,7 @@ FILES
   from the data buffer, updating the value of the fpcr register.
 
 
-   /signal1
-   /signal2
+   /signal1, /signal2
The two signal notification channels of an SPU.  These  are  read-write
files  that  operate  on  a 32 bit word.  Writing to one of these files
triggers an interrupt on the SPU.  The  value  written  to  the  signal
@@ -233,8 +232,7 @@ FILES
   file.
 
 
-   /signal1_type
-   /signal2_type
+   /signal1_type, /signal2_type
These two files change the behavior of the signal1 and signal2  notifi-
cation  files.  The  contain  a numerical ASCII string which is read as
either "1" or "0".  In mode 0 (overwrite), the  hardware  replaces  the
@@ -259,18 +257,17 @@ FILES
   the previous setting.
 
 
-EXAMPLES
+Examples
+
/etc/fstab entry
   none  /spu  spufs gid=spu   00
 
 
-AUTHORS
+Authors
+===
Arnd  Bergmann  ,  Mark  Nutter ,
Ulrich Weigand 
 
-SEE ALSO
+See Also
+

[PATCH v3 00/29] Convert files to ReST - part 2

2020-04-27 Thread Mauro Carvalho Chehab

This is the second part of a series I wrote sometime ago where I manually
convert lots of files to be properly parsed by Sphinx as ReST files.

As it touches on lot of stuff, this series is based on today's linux-next, 
at tag next-20190617.

The first version of this series had 57 patches. The first part with 28 patches
were already merged. Right now, there are still ~76  patches pending applying
(including this series), and that's because I opted to do ~1 patch per converted
 directory.

That sounds too much to be send on a single round. So, I'm opting to split
it on 3 parts for the conversion, plus a final patch adding orphaned books
to existing ones. 

Those patches should probably be good to be merged either by subsystem
maintainers or via the docs tree.

I opted to mark new files not included yet to the main index.rst (directly or
indirectly) with the :orphan: tag, in order to avoid adding warnings to the
build system. This should be removed after we find a "home" for all
the converted files within the new document tree arrangement, after I
submit the third part.

Both this series and  the other parts of this work are on my devel git tree,
at:


https://git.linuxtv.org/mchehab/experimental.git/log/?h=convert_rst_renames_v5.1

The final output in html (after all patches I currently have, including 
the upcoming series) can be seen at:

https://www.infradead.org/~mchehab/rst_conversion/

It contains all pending work from my side related to the conversion, plus
the patches I finished a first version today with contains the renaming 
patches and de-orphan changes.

---

Version 3:

- Rebased on the top of next-20200424
- configfs.rst conversion moved to the end of the series;
- avoided almost all markups at configfs.rst while still preserving
  a reasonable output and not generating build warnings.

Version 2:

- Removed patches merged via other trees;
- rebased on the top of today's linux-next (next-20190617);
- Fix a typo on one patch's description;
- Added received acks.

Mauro Carvalho Chehab (29):
  docs: filesystems: convert caching/object.txt to ReST
  docs: filesystems: convert caching/fscache.txt to ReST format
  docs: filesystems: caching/netfs-api.txt: convert it to ReST
  docs: filesystems: caching/operations.txt: convert it to ReST
  docs: filesystems: caching/cachefiles.txt: convert to ReST
  docs: filesystems: caching/backend-api.txt: convert it to ReST
  docs: filesystems: convert cifs/cifsroot.txt to ReST
  docs: filesystems: convert automount-support.txt to ReST
  docs: filesystems: convert coda.txt to ReST
  docs: filesystems: convert devpts.txt to ReST
  docs: filesystems: convert dnotify.txt to ReST
  docs: filesystems: convert fiemap.txt to ReST
  docs: filesystems: convert files.txt to ReST
  docs: filesystems: convert fuse-io.txt to ReST
  docs: filesystems: convert locks.txt to ReST
  docs: filesystems: convert mandatory-locking.txt to ReST
  docs: filesystems: convert mount_api.txt to ReST
  docs: filesystems: convert quota.txt to ReST
  docs: filesystems: convert seq_file.txt to ReST
  docs: filesystems: convert sharedsubtree.txt to ReST
  docs: filesystems: split spufs.txt into 3 separate files
  docs: filesystems: convert spufs/spu_create.txt to ReST
  docs: filesystems: convert spufs/spufs.txt to ReST
  docs: filesystems: convert spufs/spu_run.txt to ReST
  docs: filesystems: convert sysfs-pci.txt to ReST
  docs: filesystems: convert sysfs-tagging.txt to ReST
  docs: filesystems: convert xfs-delayed-logging-design.txt to ReST
  docs: filesystems: convert xfs-self-describing-metadata.txt to ReST
  docs: filesystems: convert configfs.txt to ReST

 Documentation/admin-guide/sysctl/kernel.rst   |2 +-
 ...ount-support.txt => automount-support.rst} |   23 +-
 .../{backend-api.txt => backend-api.rst}  |  165 +-
 .../{cachefiles.txt => cachefiles.rst}|  139 +-
 Documentation/filesystems/caching/fscache.rst |  565 ++
 Documentation/filesystems/caching/fscache.txt |  448 -
 Documentation/filesystems/caching/index.rst   |   14 +
 .../caching/{netfs-api.txt => netfs-api.rst}  |  172 +-
 .../caching/{object.txt => object.rst}|   43 +-
 .../{operations.txt => operations.rst}|   45 +-
 .../cifs/{cifsroot.txt => cifsroot.rst}   |   56 +-
 Documentation/filesystems/coda.rst| 1670 
 Documentation/filesystems/coda.txt| 1676 -
 .../{configfs/configfs.txt => configfs.rst}   |  131 +-
 Documentation/filesystems/devpts.rst  |   36 +
 Documentation/filesystems/devpts.txt  |   26 -
 .../filesystems/{dnotify.txt => dnotify.rst}  |   11 +-
 .../filesystems/{fiemap.txt => fiemap.rst}|  133 +-
 .../filesystems/{files.txt => files.rst}  |   15 +-
 .../filesystems/{fuse-io.txt => fuse-io.rst}  |6 +
 Documentation/filesystems/index.rst   |   23 +
 .../filesystems/{locks.txt => locks.rst}  |   14 +-
 ...tory-locking.txt => mandatory-locking.rst} |   25

[PATCH] powerpc: Discard .rela* sections if CONFIG_RELOCATABLE is undefined

2020-04-27 Thread H.J. Lu

arch/powerpc/kernel/vmlinux.lds.S has

DISCARDS
/DISCARD/ : {
*(*.EMB.apuinfo)
*(.glink .iplt .plt .rela* .comment)
*(.gnu.version*)
*(.gnu.attributes)
*(.eh_frame)
}

Since .rela* sections are needed when CONFIG_RELOCATABLE is defined,
change to discard .rela* sections if CONFIG_RELOCATABLE is undefined.

Signed-off-by: H.J. Lu 
Acked-by: Michael Ellerman  (powerpc)
---
 arch/powerpc/kernel/vmlinux.lds.S | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/vmlinux.lds.S 
b/arch/powerpc/kernel/vmlinux.lds.S
index 31a0f201fb6f..4ba07734a210 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -366,9 +366,12 @@ SECTIONS
DISCARDS
/DISCARD/ : {
*(*.EMB.apuinfo)
-   *(.glink .iplt .plt .rela* .comment)
+   *(.glink .iplt .plt .comment)
*(.gnu.version*)
*(.gnu.attributes)
*(.eh_frame)
+#ifndef CONFIG_RELOCATABLE
+   *(.rela*)
+#endif
}
 }
-- 
2.25.4

Re: [PATCH 1/5] powerpc/spufs: simplify spufs core dumping

2020-04-27 Thread Al Viro

On Mon, Apr 27, 2020 at 10:06:21PM +0200, Christoph Hellwig wrote:

> @@ -1988,7 +1984,12 @@ static ssize_t spufs_mbox_info_read(struct file *file, 
> char __user *buf,
>   if (ret)
>   return ret;
>   spin_lock(>csa.register_lock);
> - ret = __spufs_mbox_info_read(ctx, buf, len, pos);
> + /* EOF if there's no entry in the mbox */
> + if (ctx->csa.prob.mb_stat_R & 0xff) {
> + ret = simple_read_from_buffer(buf, len, pos,
> + >csa.prob.pu_mb_R,
> + sizeof(ctx->csa.prob.pu_mb_R));
> + }
>   spin_unlock(>csa.register_lock);
>   spu_release_saved(ctx);

Again, this really needs fixing.  Preferably - as a separate commit preceding
this series, so that it could be backported.  simple_read_from_buffer() is
a blocking operation.  Yes, I understand that mainline has the same bug;
it really does need to be fixed and having to backport this series is not
a good idea, for obvious reasons.

Re: [PATCH v3 2/4] hugetlbfs: move hugepagesz= parsing to arch independent code

2020-04-27 Thread Mike Kravetz

On 4/27/20 1:18 PM, Andrew Morton wrote:
> On Mon, 27 Apr 2020 12:09:47 -0700 Mike Kravetz  
> wrote:
> 
>> Previously, a check for hugepages_supported was added before processing
>> hugetlb command line parameters.  On some architectures such as powerpc,
>> hugepages_supported() is not set to true until after command line
>> processing.  Therefore, no hugetlb command line parameters would be
>> accepted.
>>
>> Remove the additional checks for hugepages_supported.  In hugetlb_init,
>> print a warning if !hugepages_supported and command line parameters were
>> specified.
> 
> This applies to [4/4] instead of fixing [2/4].  I guess you'll
> straighten that out in v4?

Yes.

> btw, was
> http://lkml.kernel.org/r/CADYN=9Koefrq9H1Y82Q8nMNbeyN4tzhEfvDu5u=svfjfzcy...@mail.gmail.com
> addressed?

Yes, you pulled a patch into your tree to address this.
hugetlbfs-remove-hugetlb_add_hstate-warning-for-existing-hstate-fix.patch

I'll send out a v4 with both these issues addressed.  Would like to wait
until receiving confirmation from someone who can test on powerpc.
-- 
Mike Kravetz

Re: [PATCH v3 2/4] hugetlbfs: move hugepagesz= parsing to arch independent code

2020-04-27 Thread Andrew Morton

On Mon, 27 Apr 2020 12:09:47 -0700 Mike Kravetz  wrote:

> Previously, a check for hugepages_supported was added before processing
> hugetlb command line parameters.  On some architectures such as powerpc,
> hugepages_supported() is not set to true until after command line
> processing.  Therefore, no hugetlb command line parameters would be
> accepted.
> 
> Remove the additional checks for hugepages_supported.  In hugetlb_init,
> print a warning if !hugepages_supported and command line parameters were
> specified.

This applies to [4/4] instead of fixing [2/4].  I guess you'll
straighten that out in v4?

btw, was
http://lkml.kernel.org/r/CADYN=9Koefrq9H1Y82Q8nMNbeyN4tzhEfvDu5u=svfjfzcy...@mail.gmail.com
addressed?

[PATCH 5/5] binfmt_elf_fdpic: remove the set_fs(KERNEL_DS) in elf_fdpic_core_dump

2020-04-27 Thread Christoph Hellwig

There is no logic in elf_fdpic_core_dump itself or in the various arch
helpers called from it which use uaccess routines on kernel pointers
except for the file writes thate are nicely encapsulated by using
__kernel_write in dump_emit.

Signed-off-by: Christoph Hellwig 
---
 fs/binfmt_elf_fdpic.c | 31 ---
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 240f35437..c62c17a5c34a9 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1549,7 +1549,6 @@ static int elf_fdpic_core_dump(struct coredump_params 
*cprm)
 {
 #defineNUM_NOTES   6
int has_dumped = 0;
-   mm_segment_t fs;
int segs;
int i;
struct vm_area_struct *vma;
@@ -1678,9 +1677,6 @@ static int elf_fdpic_core_dump(struct coredump_params 
*cprm)
  "LINUX", ELF_CORE_XFPREG_TYPE, sizeof(*xfpu), xfpu);
 #endif
 
-   fs = get_fs();
-   set_fs(KERNEL_DS);
-
offset += sizeof(*elf); /* Elf header */
offset += segs * sizeof(struct elf_phdr);   /* Program headers */
 
@@ -1695,7 +1691,7 @@ static int elf_fdpic_core_dump(struct coredump_params 
*cprm)
 
phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
if (!phdr4note)
-   goto end_coredump;
+   goto cleanup;
 
fill_elf_note_phdr(phdr4note, sz, offset);
offset += sz;
@@ -1711,17 +1707,17 @@ static int elf_fdpic_core_dump(struct coredump_params 
*cprm)
if (e_phnum == PN_XNUM) {
shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
if (!shdr4extnum)
-   goto end_coredump;
+   goto cleanup;
fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
}
 
offset = dataoff;
 
if (!dump_emit(cprm, elf, sizeof(*elf)))
-   goto end_coredump;
+   goto cleanup;
 
if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
-   goto end_coredump;
+   goto cleanup;
 
/* write program headers for segments dump */
for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
@@ -1745,16 +1741,16 @@ static int elf_fdpic_core_dump(struct coredump_params 
*cprm)
phdr.p_align = ELF_EXEC_PAGESIZE;
 
if (!dump_emit(cprm, , sizeof(phdr)))
-   goto end_coredump;
+   goto cleanup;
}
 
if (!elf_core_write_extra_phdrs(cprm, offset))
-   goto end_coredump;
+   goto cleanup;
 
/* write out the notes section */
for (i = 0; i < numnote; i++)
if (!writenote(notes + i, cprm))
-   goto end_coredump;
+   goto cleanup;
 
/* write out the thread status notes section */
list_for_each(t, _list) {
@@ -1763,21 +1759,21 @@ static int elf_fdpic_core_dump(struct coredump_params 
*cprm)
 
for (i = 0; i < tmp->num_notes; i++)
if (!writenote(>notes[i], cprm))
-   goto end_coredump;
+   goto cleanup;
}
 
if (!dump_skip(cprm, dataoff - cprm->pos))
-   goto end_coredump;
+   goto cleanup;
 
if (!elf_fdpic_dump_segments(cprm))
-   goto end_coredump;
+   goto cleanup;
 
if (!elf_core_write_extra_data(cprm))
-   goto end_coredump;
+   goto cleanup;
 
if (e_phnum == PN_XNUM) {
if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
-   goto end_coredump;
+   goto cleanup;
}
 
if (cprm->file->f_pos != offset) {
@@ -1787,9 +1783,6 @@ static int elf_fdpic_core_dump(struct coredump_params 
*cprm)
   cprm->file->f_pos, offset);
}
 
-end_coredump:
-   set_fs(fs);
-
 cleanup:
while (!list_empty(_list)) {
struct list_head *tmp = thread_list.next;
-- 
2.26.1

[PATCH 4/5] binfmt_elf: remove the set_fs(KERNEL_DS) in elf_core_dump

2020-04-27 Thread Christoph Hellwig

There is no logic in elf_core_dump itself or in the various arch helpers
called from it which use uaccess routines on kernel pointers except for
the file writes thate are nicely encapsulated by using __kernel_write in
dump_emit.

Signed-off-by: Christoph Hellwig 
---
 fs/binfmt_elf.c | 40 +---
 1 file changed, 13 insertions(+), 27 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index a1f57e20c3cf2..b29b84595b09f 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1355,7 +1355,6 @@ static unsigned long vma_dump_size(struct vm_area_struct 
*vma,
vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
u32 __user *header = (u32 __user *) vma->vm_start;
u32 word;
-   mm_segment_t fs = get_fs();
/*
 * Doing it this way gets the constant folded by GCC.
 */
@@ -1368,14 +1367,8 @@ static unsigned long vma_dump_size(struct vm_area_struct 
*vma,
magic.elfmag[EI_MAG1] = ELFMAG1;
magic.elfmag[EI_MAG2] = ELFMAG2;
magic.elfmag[EI_MAG3] = ELFMAG3;
-   /*
-* Switch to the user "segment" for get_user(),
-* then put back what elf_core_dump() had in place.
-*/
-   set_fs(USER_DS);
if (unlikely(get_user(word, header)))
word = 0;
-   set_fs(fs);
if (word == magic.cmp)
return PAGE_SIZE;
}
@@ -2183,7 +2176,6 @@ static void fill_extnum_info(struct elfhdr *elf, struct 
elf_shdr *shdr4extnum,
 static int elf_core_dump(struct coredump_params *cprm)
 {
int has_dumped = 0;
-   mm_segment_t fs;
int segs, i;
size_t vma_data_size = 0;
struct vm_area_struct *vma, *gate_vma;
@@ -2236,9 +2228,6 @@ static int elf_core_dump(struct coredump_params *cprm)
 
has_dumped = 1;
 
-   fs = get_fs();
-   set_fs(KERNEL_DS);
-
offset += sizeof(elf);  /* Elf header */
offset += segs * sizeof(struct elf_phdr);   /* Program headers */
 
@@ -2250,7 +2239,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 
phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
if (!phdr4note)
-   goto end_coredump;
+   goto cleanup;
 
fill_elf_note_phdr(phdr4note, sz, offset);
offset += sz;
@@ -2265,7 +2254,7 @@ static int elf_core_dump(struct coredump_params *cprm)
vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)),
  GFP_KERNEL);
if (!vma_filesz)
-   goto end_coredump;
+   goto cleanup;
 
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
vma = next_vma(vma, gate_vma)) {
@@ -2283,17 +2272,17 @@ static int elf_core_dump(struct coredump_params *cprm)
if (e_phnum == PN_XNUM) {
shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
if (!shdr4extnum)
-   goto end_coredump;
+   goto cleanup;
fill_extnum_info(, shdr4extnum, e_shoff, segs);
}
 
offset = dataoff;
 
if (!dump_emit(cprm, , sizeof(elf)))
-   goto end_coredump;
+   goto cleanup;
 
if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
-   goto end_coredump;
+   goto cleanup;
 
/* Write program headers for segments dump */
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
@@ -2315,22 +2304,22 @@ static int elf_core_dump(struct coredump_params *cprm)
phdr.p_align = ELF_EXEC_PAGESIZE;
 
if (!dump_emit(cprm, , sizeof(phdr)))
-   goto end_coredump;
+   goto cleanup;
}
 
if (!elf_core_write_extra_phdrs(cprm, offset))
-   goto end_coredump;
+   goto cleanup;
 
/* write out the notes section */
if (!write_note_info(, cprm))
-   goto end_coredump;
+   goto cleanup;
 
if (elf_coredump_extra_notes_write(cprm))
-   goto end_coredump;
+   goto cleanup;
 
/* Align to page */
if (!dump_skip(cprm, dataoff - cprm->pos))
-   goto end_coredump;
+   goto cleanup;
 
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
vma = next_vma(vma, gate_vma)) {
@@ -2352,22 +2341,19 @@ static int elf_core_dump(struct coredump_params *cprm)
} else
stop = !dump_skip(cprm, PAGE_SIZE);
if (stop)
-   goto end_coredump;
+   goto cleanup;
}
}

[PATCH 3/5] binfmt_elf: remove the set_fs in fill_siginfo_note

2020-04-27 Thread Christoph Hellwig

From: "Eric W. Biederman" 

The code in binfmt_elf.c is differnt from the rest of the code that
processes siginfo, as it sends siginfo from a kernel buffer to a file
rather than from kernel memory to userspace buffers.  To remove it's
use of set_fs the code needs some different siginfo helpers.

Add the helper copy_siginfo_to_external to copy from the kernel's
internal siginfo layout to a buffer in the siginfo layout that
userspace expects.

Modify fill_siginfo_note to use copy_siginfo_to_external instead of
set_fs and copy_siginfo_to_user.

Update compat_binfmt_elf.c to use the previously added
copy_siginfo_to_external32 to handle the compat case.

Signed-off-by: "Eric W. Biederman" 
Signed-off-by: Christoph Hellwig 
---
 fs/binfmt_elf.c| 5 +
 fs/compat_binfmt_elf.c | 2 +-
 include/linux/signal.h | 8 
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 13f25e241ac46..a1f57e20c3cf2 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1556,10 +1556,7 @@ static void fill_auxv_note(struct memelfnote *note, 
struct mm_struct *mm)
 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t 
*csigdata,
const kernel_siginfo_t *siginfo)
 {
-   mm_segment_t old_fs = get_fs();
-   set_fs(KERNEL_DS);
-   copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
-   set_fs(old_fs);
+   copy_siginfo_to_external(csigdata, siginfo);
fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
 }
 
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index aaad4ca1217ef..fa0e24e1b7267 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -39,7 +39,7 @@
  */
 #define user_long_tcompat_long_t
 #define user_siginfo_t compat_siginfo_t
-#define copy_siginfo_to_user   copy_siginfo_to_user32
+#define copy_siginfo_to_external   copy_siginfo_to_external32
 
 /*
  * The machine-dependent core note format types are defined in 
elfcore-compat.h,
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 05bacd2ab1350..6bb1a3f0258c2 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -24,6 +24,14 @@ static inline void clear_siginfo(kernel_siginfo_t *info)
 
 #define SI_EXPANSION_SIZE (sizeof(struct siginfo) - sizeof(struct 
kernel_siginfo))
 
+static inline void copy_siginfo_to_external(siginfo_t *to,
+   const kernel_siginfo_t *from)
+{
+   memcpy(to, from, sizeof(*from));
+   memset(((char *)to) + sizeof(struct kernel_siginfo), 0,
+   SI_EXPANSION_SIZE);
+}
+
 int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from);
 int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from);
 
-- 
2.26.1

[PATCH 2/5] signal: factor copy_siginfo_to_external32 from copy_siginfo_to_user32

2020-04-27 Thread Christoph Hellwig

From: "Eric W. Biederman" 

To remove the use of set_fs in the coredump code there needs to be a
way to convert a kernel siginfo to a userspace compat siginfo.

Call that function copy_siginfo_to_compat and factor it out of
copy_siginfo_to_user32.

The existence of x32 complicates this code.  On x32 SIGCHLD uses 64bit
times for utime and stime.  As only SIGCHLD is affected and SIGCHLD
never causes a coredump I have avoided handling that case.

Signed-off-by: "Eric W. Biederman" 
Signed-off-by: Christoph Hellwig 
---
 include/linux/compat.h |   2 +
 kernel/signal.c| 113 ++---
 2 files changed, 62 insertions(+), 53 deletions(-)

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 0480ba4db5929..adbfe8f688d92 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -402,6 +402,8 @@ long compat_get_bitmap(unsigned long *mask, const 
compat_ulong_t __user *umask,
   unsigned long bitmap_size);
 long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
   unsigned long bitmap_size);
+void copy_siginfo_to_external32(struct compat_siginfo *to,
+   const struct kernel_siginfo *from);
 int copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo 
__user *from);
 int copy_siginfo_to_user32(struct compat_siginfo __user *to, const 
kernel_siginfo_t *from);
 int get_compat_sigevent(struct sigevent *event,
diff --git a/kernel/signal.c b/kernel/signal.c
index 284fc1600063b..244c69c4261e0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3235,94 +3235,101 @@ int copy_siginfo_from_user(kernel_siginfo_t *to, const 
siginfo_t __user *from)
 }
 
 #ifdef CONFIG_COMPAT
-int copy_siginfo_to_user32(struct compat_siginfo __user *to,
-  const struct kernel_siginfo *from)
-#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
+void copy_siginfo_to_external32(struct compat_siginfo *to,
+   const struct kernel_siginfo *from)
 {
-   return __copy_siginfo_to_user32(to, from, in_x32_syscall());
-}
-int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
-const struct kernel_siginfo *from, bool x32_ABI)
-#endif
-{
-   struct compat_siginfo new;
-   memset(, 0, sizeof(new));
+   /*
+* This function does not work properly for SIGCHLD on x32,
+* but it does not need to as SIGCHLD never causes a coredump.
+*/
+   memset(to, 0, sizeof(*to));
 
-   new.si_signo = from->si_signo;
-   new.si_errno = from->si_errno;
-   new.si_code  = from->si_code;
+   to->si_signo = from->si_signo;
+   to->si_errno = from->si_errno;
+   to->si_code  = from->si_code;
switch(siginfo_layout(from->si_signo, from->si_code)) {
case SIL_KILL:
-   new.si_pid = from->si_pid;
-   new.si_uid = from->si_uid;
+   to->si_pid = from->si_pid;
+   to->si_uid = from->si_uid;
break;
case SIL_TIMER:
-   new.si_tid = from->si_tid;
-   new.si_overrun = from->si_overrun;
-   new.si_int = from->si_int;
+   to->si_tid = from->si_tid;
+   to->si_overrun = from->si_overrun;
+   to->si_int = from->si_int;
break;
case SIL_POLL:
-   new.si_band = from->si_band;
-   new.si_fd   = from->si_fd;
+   to->si_band = from->si_band;
+   to->si_fd   = from->si_fd;
break;
case SIL_FAULT:
-   new.si_addr = ptr_to_compat(from->si_addr);
+   to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-   new.si_trapno = from->si_trapno;
+   to->si_trapno = from->si_trapno;
 #endif
break;
case SIL_FAULT_MCEERR:
-   new.si_addr = ptr_to_compat(from->si_addr);
+   to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-   new.si_trapno = from->si_trapno;
+   to->si_trapno = from->si_trapno;
 #endif
-   new.si_addr_lsb = from->si_addr_lsb;
+   to->si_addr_lsb = from->si_addr_lsb;
break;
case SIL_FAULT_BNDERR:
-   new.si_addr = ptr_to_compat(from->si_addr);
+   to->si_addr = ptr_to_compat(from->si_addr);
 #ifdef __ARCH_SI_TRAPNO
-   new.si_trapno = from->si_trapno;
+   to->si_trapno = from->si_trapno;
 #endif
-   new.si_lower = ptr_to_compat(from->si_lower);
-   new.si_upper = ptr_to_compat(from->si_upper);
+   to->si_lower = ptr_to_compat(from->si_lower);
+   to->si_upper = ptr_to_compat(from->si_upper);
break;
case SIL_FAULT_PKUERR:
-   new.si_addr = ptr_to_compat(from->si_addr);
+

[PATCH 1/5] powerpc/spufs: simplify spufs core dumping

2020-04-27 Thread Christoph Hellwig

Replace the coredump ->read method with a ->dump method that must call
dump_emit itself.  That way we avoid a buffer allocation an messing with
set_fs() to call into code that is intended to deal with user buffers.
For the ->get case we can now use a small on-stack buffer and avoid
memory allocations as well.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Arnd Bergmann 
Reviewed-by: Jeremy Kerr 
---
 arch/powerpc/platforms/cell/spufs/coredump.c |  87 ++
 arch/powerpc/platforms/cell/spufs/file.c | 273 ++-
 arch/powerpc/platforms/cell/spufs/spufs.h|   3 +-
 3 files changed, 170 insertions(+), 193 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c 
b/arch/powerpc/platforms/cell/spufs/coredump.c
index 8b3296b62f651..3b75e8f60609c 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -21,22 +21,6 @@
 
 #include "spufs.h"
 
-static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
-   size_t size, loff_t *off)
-{
-   u64 data;
-   int ret;
-
-   if (spufs_coredump_read[num].read)
-   return spufs_coredump_read[num].read(ctx, buffer, size, off);
-
-   data = spufs_coredump_read[num].get(ctx);
-   ret = snprintf(buffer, size, "0x%.16llx", data);
-   if (ret >= size)
-   return size;
-   return ++ret; /* count trailing NULL */
-}
-
 static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
 {
int i, sz, total = 0;
@@ -118,58 +102,43 @@ int spufs_coredump_extra_notes_size(void)
 static int spufs_arch_write_note(struct spu_context *ctx, int i,
  struct coredump_params *cprm, int dfd)
 {
-   loff_t pos = 0;
-   int sz, rc, total = 0;
-   const int bufsz = PAGE_SIZE;
-   char *name;
-   char fullname[80], *buf;
+   size_t sz = spufs_coredump_read[i].size;
+   char fullname[80];
struct elf_note en;
-   size_t skip;
-
-   buf = (void *)get_zeroed_page(GFP_KERNEL);
-   if (!buf)
-   return -ENOMEM;
+   size_t ret;
 
-   name = spufs_coredump_read[i].name;
-   sz = spufs_coredump_read[i].size;
-
-   sprintf(fullname, "SPU/%d/%s", dfd, name);
+   sprintf(fullname, "SPU/%d/%s", dfd, spufs_coredump_read[i].name);
en.n_namesz = strlen(fullname) + 1;
en.n_descsz = sz;
en.n_type = NT_SPU;
 
if (!dump_emit(cprm, , sizeof(en)))
-   goto Eio;
-
+   return -EIO;
if (!dump_emit(cprm, fullname, en.n_namesz))
-   goto Eio;
-
+   return -EIO;
if (!dump_align(cprm, 4))
-   goto Eio;
-
-   do {
-   rc = do_coredump_read(i, ctx, buf, bufsz, );
-   if (rc > 0) {
-   if (!dump_emit(cprm, buf, rc))
-   goto Eio;
-   total += rc;
-   }
-   } while (rc == bufsz && total < sz);
-
-   if (rc < 0)
-   goto out;
-
-   skip = roundup(cprm->pos - total + sz, 4) - cprm->pos;
-   if (!dump_skip(cprm, skip))
-   goto Eio;
-
-   rc = 0;
-out:
-   free_page((unsigned long)buf);
-   return rc;
-Eio:
-   free_page((unsigned long)buf);
-   return -EIO;
+   return -EIO;
+
+   if (spufs_coredump_read[i].dump) {
+   ret = spufs_coredump_read[i].dump(ctx, cprm);
+   if (ret < 0)
+   return ret;
+   } else {
+   char buf[32];
+
+   ret = snprintf(buf, sizeof(buf), "0x%.16llx",
+  spufs_coredump_read[i].get(ctx));
+   if (ret >= sizeof(buf))
+   return sizeof(buf);
+
+   /* count trailing the NULL: */
+   if (!dump_emit(cprm, buf, ret + 1))
+   return -EIO;
+   }
+
+   if (!dump_skip(cprm, roundup(cprm->pos - ret + sz, 4) - cprm->pos))
+   return -EIO;
+   return 0;
 }
 
 int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
diff --git a/arch/powerpc/platforms/cell/spufs/file.c 
b/arch/powerpc/platforms/cell/spufs/file.c
index c0f950a3f4e1f..0f8c3d692af0c 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -9,6 +9,7 @@
 
 #undef DEBUG
 
+#include 
 #include 
 #include 
 #include 
@@ -129,6 +130,14 @@ static ssize_t spufs_attr_write(struct file *file, const 
char __user *buf,
return ret;
 }
 
+static ssize_t spufs_dump_emit(struct coredump_params *cprm, void *buf,
+   size_t size)
+{
+   if (!dump_emit(cprm, buf, size))
+   return -EIO;
+   return size;
+}
+
 #define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
 static int __fops ## _open(struct inode *inode, struct file *file) \
 {

remove set_fs calls from the coredump code v4

2020-04-27 Thread Christoph Hellwig

Hi all,

this series gets rid of playing with the address limit in the exec and
coredump code.  Most of this was fairly trivial, the biggest changes are
those to the spufs coredump code.

Changes since v3:
 - fix x86 compilation with x32 in the new version of the signal code
 - split the exec patches into a new series

Changes since v2:
 - don't cleanup the compat siginfo calling conventions, use the patch
   variant from Eric with slight coding style fixes instead.

Changes since v1:
 - properly spell NUL
 - properly handle the compat siginfo case in ELF coredumps

Re: [PATCH v3 2/4] hugetlbfs: move hugepagesz= parsing to arch independent code

2020-04-27 Thread Mike Kravetz

On 4/27/20 10:25 AM, Mike Kravetz wrote:
> On 4/26/20 10:04 PM, Sandipan Das wrote:
>> On 18/04/20 12:20 am, Mike Kravetz wrote:
>>> Now that architectures provide arch_hugetlb_valid_size(), parsing
>>> of "hugepagesz=" can be done in architecture independent code.
>>
>> This isn't working as expected on powerpc64.
>>
>>   [0.00] Kernel command line: 
>> root=UUID=dc7b49cf-95a2-4996-8e7d-7c64ddc7a6ff hugepagesz=16G hugepages=2 
>>   [0.00] HugeTLB: huge pages not supported, ignoring hugepagesz = 16G
>>   [0.00] HugeTLB: huge pages not supported, ignoring hugepages = 2
>>   [0.284177] HugeTLB registered 16.0 MiB page size, pre-allocated 0 pages
>>   [0.284182] HugeTLB registered 16.0 GiB page size, pre-allocated 0 pages
>>   [2.585062] hugepagesz=16G
>>   [2.585063] hugepages=2
>>
> 
> In the new arch independent version of hugepages_setup, I added the following
> code in patch 4 off this series:
> 
>> +if (!hugepages_supported()) {
>> +pr_warn("HugeTLB: huge pages not supported, ignoring hugepages 
>> = %s\n", s);
>> +return 0;
>> +}
>> +
> 
> The easy solution is to remove all the hugepages_supported() checks from
> command line parsing routines and rely on the later check in hugetlb_init().

Here is a patch to address the issue.  Sorry, as my series breaks all hugetlb
command line processing on powerpc.

Sandipan, can you test the following patch?

>From 480fe2847361e2a85aeec1fb39fe643bb7100a07 Mon Sep 17 00:00:00 2001
From: Mike Kravetz 
Date: Mon, 27 Apr 2020 11:37:30 -0700
Subject: [PATCH] hugetlbfs: fix changes to command line processing

Previously, a check for hugepages_supported was added before processing
hugetlb command line parameters.  On some architectures such as powerpc,
hugepages_supported() is not set to true until after command line
processing.  Therefore, no hugetlb command line parameters would be
accepted.

Remove the additional checks for hugepages_supported.  In hugetlb_init,
print a warning if !hugepages_supported and command line parameters were
specified.

Signed-off-by: Mike Kravetz 
---
 mm/hugetlb.c | 20 
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1075abdb5717..5548e8851b93 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3212,8 +3212,11 @@ static int __init hugetlb_init(void)
 {
int i;
 
-   if (!hugepages_supported())
+   if (!hugepages_supported()) {
+   if (hugetlb_max_hstate || default_hstate_max_huge_pages)
+   pr_warn("HugeTLB: huge pages not supported, ignoring 
associated command-line parameters\n");
return 0;
+   }
 
/*
 * Make sure HPAGE_SIZE (HUGETLB_PAGE_ORDER) hstate exists.  Some
@@ -3315,11 +3318,6 @@ static int __init hugepages_setup(char *s)
unsigned long *mhp;
static unsigned long *last_mhp;
 
-   if (!hugepages_supported()) {
-   pr_warn("HugeTLB: huge pages not supported, ignoring hugepages 
= %s\n", s);
-   return 0;
-   }
-
if (!parsed_valid_hugepagesz) {
pr_warn("HugeTLB: hugepages=%s does not follow a valid 
hugepagesz, ignoring\n", s);
parsed_valid_hugepagesz = true;
@@ -3372,11 +3370,6 @@ static int __init hugepagesz_setup(char *s)
struct hstate *h;
 
parsed_valid_hugepagesz = false;
-   if (!hugepages_supported()) {
-   pr_warn("HugeTLB: huge pages not supported, ignoring hugepagesz 
= %s\n", s);
-   return 0;
-   }
-
size = (unsigned long)memparse(s, NULL);
 
if (!arch_hugetlb_valid_size(size)) {
@@ -3424,11 +3417,6 @@ static int __init default_hugepagesz_setup(char *s)
unsigned long size;
 
parsed_valid_hugepagesz = false;
-   if (!hugepages_supported()) {
-   pr_warn("HugeTLB: huge pages not supported, ignoring 
default_hugepagesz = %s\n", s);
-   return 0;
-   }
-
if (parsed_default_hugepagesz) {
pr_err("HugeTLB: default_hugepagesz previously specified, 
ignoring %s\n", s);
return 0;
-- 
2.25.4

[PATCH] powerpc: Add interrupt mode information in /proc/cpuinfo

2020-04-27 Thread Cédric Le Goater

PowerNV and pSeries machines can run using the XIVE or XICS interrupt
mode. Report this information in /proc/cpuinfo :

timebase: 51200
platform: PowerNV
model   : 9006-22C
machine : PowerNV 9006-22C
firmware: OPAL
MMU : Radix
IRQ : XIVE

and use seq_puts() where we can.

Signed-off-by: Cédric Le Goater 
---
 arch/powerpc/platforms/powernv/setup.c | 12 
 arch/powerpc/platforms/pseries/setup.c |  8 ++--
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/setup.c 
b/arch/powerpc/platforms/powernv/setup.c
index 3bc188da82ba..39ef3394038d 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -196,14 +196,18 @@ static void pnv_show_cpuinfo(struct seq_file *m)
model = of_get_property(root, "model", NULL);
seq_printf(m, "machine\t\t: PowerNV %s\n", model);
if (firmware_has_feature(FW_FEATURE_OPAL))
-   seq_printf(m, "firmware\t: OPAL\n");
+   seq_puts(m, "firmware\t: OPAL\n");
else
-   seq_printf(m, "firmware\t: BML\n");
+   seq_puts(m, "firmware\t: BML\n");
of_node_put(root);
if (radix_enabled())
-   seq_printf(m, "MMU\t\t: Radix\n");
+   seq_puts(m, "MMU\t\t: Radix\n");
else
-   seq_printf(m, "MMU\t\t: Hash\n");
+   seq_puts(m, "MMU\t\t: Hash\n");
+   if (xive_enabled())
+   seq_puts(m, "IRQ\t\t: XIVE\n");
+   else
+   seq_puts(m, "IRQ\t\t: XICS\n");
 }
 
 static void pnv_prepare_going_down(void)
diff --git a/arch/powerpc/platforms/pseries/setup.c 
b/arch/powerpc/platforms/pseries/setup.c
index 0c8421dd01ab..d248fca67797 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -95,9 +95,13 @@ static void pSeries_show_cpuinfo(struct seq_file *m)
seq_printf(m, "machine\t\t: CHRP %s\n", model);
of_node_put(root);
if (radix_enabled())
-   seq_printf(m, "MMU\t\t: Radix\n");
+   seq_puts(m, "MMU\t\t: Radix\n");
else
-   seq_printf(m, "MMU\t\t: Hash\n");
+   seq_puts(m, "MMU\t\t: Hash\n");
+   if (xive_enabled())
+   seq_puts(m, "IRQ\t\t: XIVE\n");
+   else
+   seq_puts(m, "IRQ\t\t: XICS\n");
 }
 
 /* Initialize firmware assisted non-maskable interrupts if
-- 
2.25.3

Re: [PATCH v4] pci: Make return value of pcie_capability_read*() consistent

2020-04-27 Thread Bjorn Helgaas

[+cc Thomas, Michael, linux-mips, linux-ppc, LKML
Background:

  - PCI config accessors (pci_read_config_word(), etc) return 0 or a
positive error (PCIBIOS_BAD_REGISTER_NUMBER, etc).

  - PCI Express capability accessors (pcie_capability_read_word(),
etc) return 0, a negative error (-EINVAL), or a positive error
(PCIBIOS_BAD_REGISTER_NUMBER, etc).

  - The PCI Express case is hard for callers to deal with.  The
original plan was to convert this case to either return 0 or
positive errors, just like pci_read_config_word().

  - I'm raising the possibility of instead getting rid of the positive
PCIBIOS_* error values completely and replacing them with -EINVAL,
-ENOENT, etc.

  - Very few callers check the return codes at all.  Most of the ones
that do either check for non-zero or use pcibios_err_to_errno() to
convert PCIBIOS_* to -EINVAL, etc.

I added MIPS and powerpc folks to CC: just as FYI because you're the
biggest users of PCIBIOS_*.  The intent is that this would be zero
functional change.
]

On Sun, Apr 26, 2020 at 11:51:30AM +0200, Saheed Bolarinwa wrote:
> On 4/25/20 12:30 AM, Bjorn Helgaas wrote:
> > On Fri, Apr 24, 2020 at 04:27:11PM +0200, Bolarinwa Olayemi Saheed wrote:
> > > pcie_capability_read*() could return 0, -EINVAL, or any of the
> > > PCIBIOS_* error codes (which are positive).
> > > This is behaviour is now changed to return only PCIBIOS_* error
> > > codes on error.
> > > This is consistent with pci_read_config_*(). Callers can now have
> > > a consistent way for checking which error has occurred.
> > > 
> > > An audit of the callers of this function was made and no case was found
> > > where there is need for a change within the caller function or their
> > > dependencies down the heirarchy.
> > > Out of all caller functions discovered only 8 functions either persist the
> > > return value of pcie_capability_read*() or directly pass on the return
> > > value.
> > > 
> > > 1.) "./drivers/infiniband/hw/hfi1/pcie.c" :
> > > => pcie_speeds() line-306
> > > 
> > >   if (ret) {
> > >   dd_dev_err(dd, "Unable to read from PCI config\n");
> > >   return ret;
> > >   }
> > > 
> > > remarks: The variable "ret" is the captured return value.
> > >   This function passes on the return value. The return value was
> > >store only by hfi1_init_dd() line-15076 in
> > >   ./drivers/infiniband/hw/hfi1/chip.c and it behave the same on 
> > > all
> > >errors. So this patch will not require a change in this function.
> > Thanks for the analysis, but I don't think it's quite complete.
> > Here's the call chain I see:
> > 
> >local_pci_probe
> >  pci_drv->probe(..)
> >init_one# hfi1_pci_driver.probe method
> >  hfi1_init_dd
> >pcie_speeds
> >  pcie_capability_read_dword
> 
> Thank you for pointing out the call chain. After checking it, I noticed that
> the
> 
> error is handled within the chain in two places without being passed on.
> 
> 1. init_one() in ./drivers/infiniband/hw/hfil1/init.c
> 
>  ret = hfi1_init_dd(dd);
>     if (ret)
>     goto clean_bail; /* error already printed */
> 
>...
>clean_bail:
>     hfi1_pcie_cleanup(pdev);  /*EXITS*/
> 
> 2. hfi1_init_dd() in ./drivers/infiniband/hw/hfil1/chip.c
> 
>     ret = pcie_speeds(dd);
>     if (ret)
>     goto bail_cleanup;
> 
>         ...
> 
>         bail_cleanup:
>          hfi1_pcie_ddcleanup(dd);  /*EXITS*/
> 
> > If pcie_capability_read_dword() returns any non-zero value, that value
> > propagates all the way up and is eventually returned by init_one().
> > init_one() id called by local_pci_probe(), which interprets:
> > 
> >< 0 as failure
> >  0 as success, and
> >> 0 as "success but warn"
> > 
> > So previously an error from pcie_capability_read_dword() could cause
> > either failure or "success but warn" for the probe method, and after
> > this patch those errors will always cause "success but warn".
> > 
> > The current behavior is definitely a bug: if
> > pci_bus_read_config_word() returns PCIBIOS_BAD_REGISTER_NUMBER, that
> > causes pcie_capability_read_dword() to also return
> > PCIBIOS_BAD_REGISTER_NUMBER, which will lead to the probe succeeding
> > with a warning, when it should fail.
> > 
> > I think the fix is to make pcie_speeds() call pcibios_err_to_errno():
> > 
> >ret = pcie_capability_read_dword(...);
> >if (ret) {
> >  dd_dev_err(...);
> >  return pcibios_err_to_errno(ret);
> >}
> 
> I agree that this fix is needed, so that PCIBIOS_* error code are
> not passed on but replaced
> 
> with one consistent with non-PCI error codes.
> 
> > That could be its own separate preparatory patch before this
> > adjustment to pcie_capability_read_dword().
> > 
> > I didn't look at the other cases below, so I don't know whether
> > they are similar hidden problems.
> 
> I will check again, please

[PATCH net] ibmvnic: Fall back to 16 H_SEND_SUB_CRQ_INDIRECT entries with old FW

2020-04-27 Thread Juliet Kim

The maximum entries for H_SEND_SUB_CRQ_INDIRECT has increased on
some platforms from 16 to 128. If Live Partition Mobility is used
to migrate a running OS image from a newer source platform to an
older target platform, then H_SEND_SUB_CRQ_INDIRECT will fail with
H_PARAMETER if 128 entries are queued.

Fix this by falling back to 16 entries if H_PARAMETER is returned
from the hcall().

Signed-off-by: Juliet Kim 
---
 drivers/net/ethernet/ibm/ibmvnic.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c 
b/drivers/net/ethernet/ibm/ibmvnic.c
index 4bd33245bad6..b66c2f26a427 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1656,6 +1656,17 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, 
struct net_device *netdev)
lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
   (u64)tx_buff->indir_dma,
   (u64)num_entries);
+
+   /* Old firmware accepts max 16 num_entries */
+   if (lpar_rc == H_PARAMETER && num_entries > 16) {
+   tx_crq.v1.n_crq_elem = 16;
+   tx_buff->num_entries = 16;
+   lpar_rc = send_subcrq_indirect(adapter,
+  handle_array[queue_num],
+  (u64)tx_buff->indir_dma,
+  16);
+   }
+
dma_unmap_single(dev, tx_buff->indir_dma,
 sizeof(tx_buff->indir_arr), DMA_TO_DEVICE);
} else {
-- 
2.18.1

Re: [PATCH v3 2/4] hugetlbfs: move hugepagesz= parsing to arch independent code

2020-04-27 Thread Mike Kravetz

On 4/26/20 10:04 PM, Sandipan Das wrote:
> Hi Mike,
> 
> On 18/04/20 12:20 am, Mike Kravetz wrote:
>> Now that architectures provide arch_hugetlb_valid_size(), parsing
>> of "hugepagesz=" can be done in architecture independent code.
>> Create a single routine to handle hugepagesz= parsing and remove
>> all arch specific routines.  We can also remove the interface
>> hugetlb_bad_size() as this is no longer used outside arch independent
>> code.
>>
>> This also provides consistent behavior of hugetlbfs command line
>> options.  The hugepagesz= option should only be specified once for
>> a specific size, but some architectures allow multiple instances.
>> This appears to be more of an oversight when code was added by some
>> architectures to set up ALL huge pages sizes.
>>
>> [...]
>>
>> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
>> index de54d2a37830..2c3fa0a7787b 100644
>> --- a/arch/powerpc/mm/hugetlbpage.c
>> +++ b/arch/powerpc/mm/hugetlbpage.c
>> @@ -589,21 +589,6 @@ static int __init add_huge_page_size(unsigned long long 
>> size)
>>  return 0;
>>  }
>>  
>> -static int __init hugepage_setup_sz(char *str)
>> -{
>> -unsigned long long size;
>> -
>> -size = memparse(str, );
>> -
>> -if (add_huge_page_size(size) != 0) {
>> -hugetlb_bad_size();
>> -pr_err("Invalid huge page size specified(%llu)\n", size);
>> -}
>> -
>> -return 1;
>> -}
>> -__setup("hugepagesz=", hugepage_setup_sz);
>> -
>> [...]
> 
> This isn't working as expected on powerpc64.
> 
>   [0.00] Kernel command line: 
> root=UUID=dc7b49cf-95a2-4996-8e7d-7c64ddc7a6ff hugepagesz=16G hugepages=2 
>   [0.00] HugeTLB: huge pages not supported, ignoring hugepagesz = 16G
>   [0.00] HugeTLB: huge pages not supported, ignoring hugepages = 2
>   [0.284177] HugeTLB registered 16.0 MiB page size, pre-allocated 0 pages
>   [0.284182] HugeTLB registered 16.0 GiB page size, pre-allocated 0 pages
>   [2.585062] hugepagesz=16G
>   [2.585063] hugepages=2
> 
> The "huge pages not supported" messages are under a !hugepages_supported()
> condition which checks if HPAGE_SHIFT is non-zero. On powerpc64, HPAGE_SHIFT
> comes from the hpage_shift variable. At this point, it is still zero and yet
> to be set. Hence the check fails. The reason being hugetlbpage_init_default(),
> which sets hpage_shift, it now called after hugepage_setup_sz().

Thanks for catching this Sandipan.

In the new arch independent version of hugepages_setup, I added the following
code in patch 4 off this series:

> +static int __init hugepages_setup(char *s)
>  {
>   unsigned long *mhp;
>   static unsigned long *last_mhp;
>  
> + if (!hugepages_supported()) {
> + pr_warn("HugeTLB: huge pages not supported, ignoring hugepages 
> = %s\n", s);
> + return 0;
> + }
> +
>   if (!parsed_valid_hugepagesz) {

In fact, I added it to the beginning of all the hugetlb command line parsing
routines.  My 'thought' was to warn early if hugetlb pages were not supported.
Previously, the first check for hugepages_supported() was in hugetlb_init()
which ran after hugetlbpage_init_default().

The easy solution is to remove all the hugepages_supported() checks from
command line parsing routines and rely on the later check in hugetlb_init().

Another reason for adding those early checks was to possibly prevent the
preallocation of gigantic pages at command line parsing time.   Gigantic
pages are allocated at command line parsing time as they need to be allocated
with the bootmem allocator.  My concern is that there could be some strange
configuration where !hugepages_supported(), yet we allocate gigantic pages
from bootmem that can not be used or freeed later.

powerpc is the only architecture which has it's own alloc_bootmem_huge_page
routine.  So, it handles this potential issue.

I'll send out a fix shortly.
-- 
Mike Kravetz

Re: [PATCH 1/3] powerpc: Properly return error code from do_patch_instruction()

2020-04-27 Thread Naveen N. Rao

Christopher M. Riedl wrote:

On Fri Apr 24, 2020 at 9:15 AM, Steven Rostedt wrote:

On Thu, 23 Apr 2020 18:21:14 +0200
Christophe Leroy  wrote:

> Le 23/04/2020 à 17:09, Naveen N. Rao a écrit :
> > With STRICT_KERNEL_RWX, we are currently ignoring return value from
> > __patch_instruction() in do_patch_instruction(), resulting in the error
> > not being propagated back. Fix the same.  
> 
> Good patch.
> 
> Be aware that there is ongoing work which tend to wanting to replace 
> error reporting by BUG_ON() . See 
> https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=166003

Thanks for the reference. I still believe that WARN_ON() should be used
in
99% of the cases, including here. And only do a BUG_ON() when you know
there's no recovering from it.

In fact, there's still BUG_ON()s in my code that I need to convert to
WARN_ON() (it was written when BUG_ON() was still acceptable ;-)

Figured I'd chime in since I am working on that other series :) The
BUG_ON()s are _only_ in the init code to set things up to allow a
temporary mapping for patching a STRICT_RWX kernel later. There's no
ongoing work to "replace error reporting by BUG_ON()". If that initial
setup fails we cannot patch under STRICT_KERNEL_RWX at all which imo
warrants a BUG_ON(). I am still working on v2 of my RFC which does
return any __patch_instruction() error back to the caller of
patch_instruction() similar to this patch.

Ok, that's good to know. I will drop this patch from my series, since 
this can be done independently of the other changes.

- Naveen

Re: [PATCH 3/3] powerpc/kprobes: Check return value of patch_instruction()

2020-04-27 Thread Naveen N. Rao


Steven Rostedt wrote:

On Sat, 25 Apr 2020 10:11:56 +
Christophe Leroy  wrote:


Sure it's be more explicit, but then more lines also. 3 lines for only 
one really usefull.


With goto, I would look like:

diff --git a/arch/powerpc/kernel/optprobes.c 
b/arch/powerpc/kernel/optprobes.c

index 046485bb0a52..938208f824da 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -139,14 +139,14 @@ void arch_remove_optimized_kprobe(struct 
optimized_kprobe *op)

}
  }

-#define PATCH_INSN(addr, instr)
 \
+#define PATCH_INSN(addr, instr, label) 
 \


With the explicit label as a parameter, makes it more evident that it
will do something (like jump) with that label.


I think I will also rename the macro to PATCH_INSN_OR_GOTO() to make it 
super evident :)




I like this solution the best!


Thanks for the feedback.


- Naveen

Re: [PATCH 3/3] powerpc/kprobes: Check return value of patch_instruction()

2020-04-27 Thread Naveen N. Rao

Christophe Leroy wrote:

On 04/24/2020 06:26 PM, Naveen N. Rao wrote:

Steven Rostedt wrote:

On Thu, 23 Apr 2020 17:41:52 +0200
Christophe Leroy  wrote:
> diff --git a/arch/powerpc/kernel/optprobes.c 
b/arch/powerpc/kernel/optprobes.c

> index 024f7aad1952..046485bb0a52 100644
> --- a/arch/powerpc/kernel/optprobes.c
> +++ b/arch/powerpc/kernel/optprobes.c
> @@ -139,52 +139,67 @@ void arch_remove_optimized_kprobe(struct 
optimized_kprobe *op)

>   }
>   }
> > +#define PATCH_INSN(addr, instr) \
> +do { \
> +    int rc = patch_instruction((unsigned int *)(addr), 
instr); \

> +    if (rc) { \
> +    pr_err("%s:%d Error patching instruction at 0x%pK (%pS): 
%d\n", \

> +    __func__, __LINE__, \
> +    (void *)(addr), (void *)(addr), rc); \
> +    return rc; \
> +    } \
> +} while (0)
> +
I hate this kind of macro which hides the "return".

What about keeping the return action in the caller ?

Otherwise, what about implementing something based on the use of 
goto, on the same model as unsafe_put_user() for instance ?

Thanks for the review.

I noticed this as a warning from checkpatch.pl, but this looked compact 
and correct for use in the two following functions. You'll notice that I 
added it just before the two functions this is used in.

I suppose 'goto err' is usable too, but the ftrace code (patch 2) will 
end up with more changes. I'm also struggling to see how a 'goto' is 
less offensive. I think Steve's suggestion below would be the better way 
to go, to make things explicit.

Sure it's be more explicit, but then more lines also. 3 lines for only 
one really usefull.

With goto, I would look like:

diff --git a/arch/powerpc/kernel/optprobes.c 
b/arch/powerpc/kernel/optprobes.c

index 046485bb0a52..938208f824da 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -139,14 +139,14 @@ void arch_remove_optimized_kprobe(struct 
optimized_kprobe *op)

}
  }

-#define PATCH_INSN(addr, instr)
 \
+#define PATCH_INSN(addr, instr, label) 
 \
  do {   \
int rc = patch_instruction((unsigned int *)(addr), instr);   \
if (rc) {\
pr_err("%s:%d Error patching instruction at 0x%pK (%pS): %d\n", 
\
__func__, __LINE__,  \
(void *)(addr), (void *)(addr), rc); \
-   return rc;   \
+   goto label;  \
}\
  } while (0)

My earlier complaint was that this would still add a flow control 
statement, so didn't look to immediately address your original concern.  
However, I suppose introduction of an explicit label makes things a bit 
better.

In addition:

@@ -291,23 +297,8 @@ int arch_prepare_optimized_kprobe(struct 
optimized_kprobe *op, struct kprobe *p)

goto error;
}

-   rc = patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback);
-   if (rc) {
-   pr_err("%s:%d: Error patching instruction at 0x%pK: %d\n",
-   __func__, __LINE__,
-   (void *)(buff + TMPL_CALL_HDLR_IDX), rc);
-   rc = -EFAULT;
-   goto error;
-   }
-
-   rc = patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step);
-   if (rc) {
-   pr_err("%s:%d: Error patching instruction at 0x%pK: %d\n",
-   __func__, __LINE__,
-   (void *)(buff + TMPL_EMULATE_IDX), rc);
-   rc = -EFAULT;
-   goto error;
-   }
+   PATCH_INSN(buff + TMPL_CALL_HDLR_IDX, branch_op_callback, efault);
+   PATCH_INSN(buff + TMPL_EMULATE_IDX, branch_emulate_step, efault);

I like how this variant can cover additional uses of patch_instruction() 
here.

I will use this variant. Thanks for the suggestion!

- Naveen

RE: [PATCH] x86: Fix early boot crash on gcc-10, next try

2020-04-27 Thread David Laight

From: Borislav Petkov
> Sent: 25 April 2020 18:53
...
> IOW, something like this (ontop) which takes care of the xen case too.
> If it needs to be used by all arches, then I'll split the patch:
.
> - asm ("");
> + prevent_tail_call_optimization();
>  }

One obvious implementation would be a real function call.
Which the compiler would convert into a tail call.
Just to confuse matters :-)

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)

Re: [PATCH v2 1/2] PCI/AER: Allow Native AER Host Bridges to use AER

2020-04-27 Thread Derrick, Jonathan

Hi Bjorn,

On Fri, 2020-04-24 at 18:30 -0500, Bjorn Helgaas wrote:
> Hi Jon,
> 
> I'm glad you raised this because I think the way we handle
> FIRMWARE_FIRST is really screwed up.
> 
> On Mon, Apr 20, 2020 at 03:37:09PM -0600, Jon Derrick wrote:
> > Some platforms have a mix of ports whose capabilities can be negotiated
> > by _OSC, and some ports which are not described by ACPI and instead
> > managed by Native drivers. The existing Firmware-First HEST model can
> > incorrectly tag these Native, Non-ACPI ports as Firmware-First managed
> > ports by advertising the HEST Global Flag and matching the type and
> > class of the port (aer_hest_parse).
> > 
> > If the port requests Native AER through the Host Bridge's capability
> > settings, the AER driver should honor those settings and allow the port
> > to bind. This patch changes the definition of Firmware-First to exclude
> > ports whose Host Bridges request Native AER.
> > 
> > Signed-off-by: Jon Derrick 
> > ---
> >  drivers/pci/pcie/aer.c | 3 +++
> >  1 file changed, 3 insertions(+)
> > 
> > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> > index f4274d3..30fbd1f 100644
> > --- a/drivers/pci/pcie/aer.c
> > +++ b/drivers/pci/pcie/aer.c
> > @@ -314,6 +314,9 @@ int pcie_aer_get_firmware_first(struct pci_dev *dev)
> > if (pcie_ports_native)
> > return 0;
> >  
> > +   if (pci_find_host_bridge(dev->bus)->native_aer)
> > +   return 0;
> 
> I hope we don't have to complicate pcie_aer_get_firmware_first() by
> adding this "native_aer" check here.  I'm not sure what we actually
> *should* do based on FIRMWARE_FIRST, but I don't think the current
> uses really make sense.
> 
> I think Linux makes too many assumptions based on the FIRMWARE_FIRST
> bit.  The ACPI spec really only says (ACPI v6.3, sec 18.3.2.4):
> 
>   If set, FIRMWARE_FIRST indicates to the OSPM that system firmware
>   will handle errors from this source first.
> 
>   If FIRMWARE_FIRST is set in the flags field, the Enabled field [of
>   the HEST AER structure] is ignored by the OSPM.
> 
> I do not see anything there about who owns the AER Capability, but
> Linux assumes that if FIRMWARE_FIRST is set, firmware must own the AER
> Capability.  I think that's reading too much into the spec.
> 
> We already have _OSC, which *does* explicitly talk about who owns the
> AER Capability, and I think we should rely on that.  If firmware
> doesn't want the OS to touch the AER Capability, it should decline to
> give ownership to the OS via _OSC.
> 
> > if (!dev->__aer_firmware_first_valid)
> > aer_set_firmware_first(dev);
> > return dev->__aer_firmware_first;
> > -- 
> > 1.8.3.1
> > 

Just a little bit of reading and my interpretation, as it seems like
some of this is just layers upon layers of possibly conflicting yet
intentionally vague descriptions.

_OSC seems to describe that OSPM can handle AER (6.2.11.3):
PCI Express Advanced Error Reporting (AER) control
   The OS sets this bit to 1 to request control over PCI Express AER.
   If the OS successfully receives control of this feature, it must
   handle error reporting through the AER Capability as described in
   the PCI Express Base Specification.

For AER and DPC the ACPI root port enumeration will properly set
native_aer/dpc based on _OSC:

struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
...
if (!(root->osc_control_set & OSC_PCI_EXPRESS_AER_CONTROL))
host_bridge->native_aer = 0;
if (!(root->osc_control_set & OSC_PCI_EXPRESS_PME_CONTROL))
host_bridge->native_pme = 0;
if (!(root->osc_control_set & OSC_PCI_EXPRESS_LTR_CONTROL))
host_bridge->native_ltr = 0;
if (!(root->osc_control_set & OSC_PCI_EXPRESS_DPC_CONTROL))
host_bridge->native_dpc = 0;

As DPC was defined in an ECN [1], I would imagine AER will need to
cover DPC for legacy platforms prior to the ECN.

The complication is that HEST also seems to describe how ports (and
other devices) are managed either individually or globally:

Table 18-387  PCI Express Root Port AER Structure
...
Flags:
   [0] - FIRMWARE_FIRST: If set, this bit indicates to the OSPM that
   system firmware will handle errors from this source
   [1] - GLOBAL: If set, indicates that the settings contained in this
   structure apply globally to all PCI Express Devices. All other bits
   must be set to zero

The _OSC definition seems to contradict/negate the above FIRMWARE_FIRST
definition that says only firmware will handle errors. It's a bit
different than the IA_32 MCE definition which allows for a GHES_ASSIST
condition, which would cause Firmware 'First', however does allow the
error to be received by OSPM AER via GHES:

Table 18-385  IA-32 Architecture Corrected Machine Check Structure
   [0] - FIRMWARE_FIRST: If set, this bit indicates that system
   firmware will handle errors from this source first.
   [2] - GHES_ASSIST: If set, this bit indicates

Re: [PATCH v2 2/2] PCI/DPC: Allow Native DPC Host Bridges to use DPC

2020-04-27 Thread Kuppuswamy, Sathyanarayanan





On 4/27/20 8:15 AM, Derrick, Jonathan wrote:

Hi Sathyanarayanan,

On Sat, 2020-04-25 at 13:46 -0700, Kuppuswamy, Sathyanarayanan wrote:


On 4/23/20 8:11 AM, Derrick, Jonathan wrote:

Hi Sathyanarayanan,

On Wed, 2020-04-22 at 15:50 -0700, Kuppuswamy, Sathyanarayanan wrote:

On 4/20/20 2:37 PM, Jon Derrick wrote:

The existing portdrv model prevents DPC services without either OS
control (_OSC) granted to AER services, a Host Bridge requesting Native
AER, or using one of the 'pcie_ports=' parameters of 'native' or
'dpc-native'.

The DPC port service driver itself will also fail to probe if the kernel
assumes the port is using Firmware-First AER. It's a reasonable
expectation that a port using Firmware-First AER will also be using
Firmware-First DPC, however if a Host Bridge requests Native DPC, the
DPC driver should allow it and not fail to bind due to AER capability
settings.

Host Bridges which request Native DPC port services will also likely
request Native AER, however it shouldn't be a requirement. This patch
allows ports on those Host Bridges to have DPC port services.

This will avoid the unlikely situation where the port is Firmware-First
AER and Native DPC, and a BIOS or switch firmware preconfiguration of
the DPC trigger could result in unhandled DPC events.

Signed-off-by: Jon Derrick 
---
drivers/pci/pcie/dpc.c  | 3 ++-
drivers/pci/pcie/portdrv_core.c | 3 ++-
2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index 7621704..3f3106f 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -284,7 +284,8 @@ static int dpc_probe(struct pcie_device *dev)
int status;
u16 ctl, cap;

-	if (pcie_aer_get_firmware_first(pdev) && !pcie_ports_dpc_native)

+   if (pcie_aer_get_firmware_first(pdev) && !pcie_ports_dpc_native &&
+   !pci_find_host_bridge(pdev->bus)->native_dpc)

Why do it in probe as well ? if host->native_dpc is not set then the
device DPC probe it self won't happen right ?


Portdrv only enables the interrupt and allows the probe to occur.


Please check the following snippet of code (from portdrv_core.c).

IIUC, pcie_device_init() will not be called if PCIE_PORT_SERVICE_DPC is
not set in capabilities. Your change in portdrv_core.c already
selectively enables the PCIE_PORT_SERVICE_DPC service based on
native_dpc value.


That's right. So pcie_device_init registers the port service driver
allowing the services enumeration to occur.


So IMO, adding native_dpc check in dpc_probe() is redundant.

int pcie_port_device_register(struct pci_dev *dev)
/* Allocate child services if any */
status = -ENODEV;
nr_service = 0;
for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
int service = 1 << i;
if (!(capabilities & service))
continue;
if (!pcie_device_init(dev, service, irqs[i]))
nr_service++;
}


This is the tricky part
There's still a check in dpc_probe for AER FFS or pcie_ports=dpc-
native:

if (pcie_aer_get_firmware_first(pdev) && !pcie_ports_dpc_native)
return -ENOTSUPP;

One option is to move that to get_port_device_capability and remove the
dpc_probe check

Yes, its better to group them together in get_port_device_capability().

But it should be done in a separate patch.



The probe itself will still fail if there's a mixed-mode _OSC
negotiated AER & DPC, due to pcie_aer_get_firmware_first returning 1
for AER and no check for DPC.

I don't know if such a platform will exist, but the kernel is already
wired for 'dpc-native' so it makes sense to extend it for this..

This transform might be more readable:
if (pcie_aer_get_firmware_first(pdev) &&
!(pcie_ports_dpc_native || hb->native_dpc))




return -ENOTSUPP;

	status = devm_request_threaded_irq(device, dev->irq, dpc_irq,

diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 50a9522..f2139a1 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -256,7 +256,8 @@ static int get_port_device_capability(struct pci_dev *dev)
 */
if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC) &&
pci_aer_available() &&
-   (pcie_ports_dpc_native || (services & PCIE_PORT_SERVICE_AER)))
+   (pcie_ports_dpc_native || host->native_dpc ||
+(services & PCIE_PORT_SERVICE_AER)))
services |= PCIE_PORT_SERVICE_DPC;

	if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM ||

Re: [PATCH v2 2/2] PCI/DPC: Allow Native DPC Host Bridges to use DPC

2020-04-27 Thread Derrick, Jonathan

Hi Sathyanarayanan,

On Sat, 2020-04-25 at 13:46 -0700, Kuppuswamy, Sathyanarayanan wrote:
> 
> On 4/23/20 8:11 AM, Derrick, Jonathan wrote:
> > Hi Sathyanarayanan,
> > 
> > On Wed, 2020-04-22 at 15:50 -0700, Kuppuswamy, Sathyanarayanan wrote:
> > > On 4/20/20 2:37 PM, Jon Derrick wrote:
> > > > The existing portdrv model prevents DPC services without either OS
> > > > control (_OSC) granted to AER services, a Host Bridge requesting Native
> > > > AER, or using one of the 'pcie_ports=' parameters of 'native' or
> > > > 'dpc-native'.
> > > > 
> > > > The DPC port service driver itself will also fail to probe if the kernel
> > > > assumes the port is using Firmware-First AER. It's a reasonable
> > > > expectation that a port using Firmware-First AER will also be using
> > > > Firmware-First DPC, however if a Host Bridge requests Native DPC, the
> > > > DPC driver should allow it and not fail to bind due to AER capability
> > > > settings.
> > > > 
> > > > Host Bridges which request Native DPC port services will also likely
> > > > request Native AER, however it shouldn't be a requirement. This patch
> > > > allows ports on those Host Bridges to have DPC port services.
> > > > 
> > > > This will avoid the unlikely situation where the port is Firmware-First
> > > > AER and Native DPC, and a BIOS or switch firmware preconfiguration of
> > > > the DPC trigger could result in unhandled DPC events.
> > > > 
> > > > Signed-off-by: Jon Derrick 
> > > > ---
> > > >drivers/pci/pcie/dpc.c  | 3 ++-
> > > >drivers/pci/pcie/portdrv_core.c | 3 ++-
> > > >2 files changed, 4 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> > > > index 7621704..3f3106f 100644
> > > > --- a/drivers/pci/pcie/dpc.c
> > > > +++ b/drivers/pci/pcie/dpc.c
> > > > @@ -284,7 +284,8 @@ static int dpc_probe(struct pcie_device *dev)
> > > > int status;
> > > > u16 ctl, cap;
> > > >
> > > > -   if (pcie_aer_get_firmware_first(pdev) && !pcie_ports_dpc_native)
> > > > +   if (pcie_aer_get_firmware_first(pdev) && !pcie_ports_dpc_native 
> > > > &&
> > > > +   !pci_find_host_bridge(pdev->bus)->native_dpc)
> > > Why do it in probe as well ? if host->native_dpc is not set then the
> > > device DPC probe it self won't happen right ?
> > 
> > Portdrv only enables the interrupt and allows the probe to occur.
> 
> Please check the following snippet of code (from portdrv_core.c).
> 
> IIUC, pcie_device_init() will not be called if PCIE_PORT_SERVICE_DPC is
> not set in capabilities. Your change in portdrv_core.c already
> selectively enables the PCIE_PORT_SERVICE_DPC service based on
> native_dpc value.
> 
That's right. So pcie_device_init registers the port service driver
allowing the services enumeration to occur.

> So IMO, adding native_dpc check in dpc_probe() is redundant.
> 
> int pcie_port_device_register(struct pci_dev *dev)
>   /* Allocate child services if any */
>   status = -ENODEV;
>   nr_service = 0;
>   for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) {
>   int service = 1 << i;
>   if (!(capabilities & service))
>   continue;
>   if (!pcie_device_init(dev, service, irqs[i]))
>   nr_service++;
>   }
> 
This is the tricky part
There's still a check in dpc_probe for AER FFS or pcie_ports=dpc-
native:

if (pcie_aer_get_firmware_first(pdev) && !pcie_ports_dpc_native)
return -ENOTSUPP;

One option is to move that to get_port_device_capability and remove the
dpc_probe check

> > The probe itself will still fail if there's a mixed-mode _OSC
> > negotiated AER & DPC, due to pcie_aer_get_firmware_first returning 1
> > for AER and no check for DPC.
> > 
> > I don't know if such a platform will exist, but the kernel is already
> > wired for 'dpc-native' so it makes sense to extend it for this..
> > 
> > This transform might be more readable:
> > if (pcie_aer_get_firmware_first(pdev) &&
> > !(pcie_ports_dpc_native || hb->native_dpc))
> > 
> > 
> > 
> > > > return -ENOTSUPP;
> > > >
> > > > status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
> > > > diff --git a/drivers/pci/pcie/portdrv_core.c 
> > > > b/drivers/pci/pcie/portdrv_core.c
> > > > index 50a9522..f2139a1 100644
> > > > --- a/drivers/pci/pcie/portdrv_core.c
> > > > +++ b/drivers/pci/pcie/portdrv_core.c
> > > > @@ -256,7 +256,8 @@ static int get_port_device_capability(struct 
> > > > pci_dev *dev)
> > > >  */
> > > > if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC) &&
> > > > pci_aer_available() &&
> > > > -   (pcie_ports_dpc_native || (services & 
> > > > PCIE_PORT_SERVICE_AER)))
> > > > +   (pcie_ports_dpc_native || host->native_dpc ||
> > > > +(services & PCIE_PORT_SERVICE_AER)))
> > > > services |= PCIE_PORT_SERVICE_DPC;
> > > >
> > > > if

Re: [PATCH v2 6/7] debugfs: switch to simplefs inode creation API

2020-04-27 Thread Paolo Bonzini

On 21/04/20 15:57, Emanuele Giuseppe Esposito wrote:
> - inode = debugfs_get_inode(dentry->d_sb);

You're not removing debugfs_get_inode so I think you're going to get a
warning (same in tracefs)?

You can wait a few more days for reviews and/or Acked-bys (especially
for patches 6 and 7) and then post v3.

Since the touch-everything patch (#2) has already been reviewed, and
it's mechanical and not introducing any semantic change, you can
probably reduce the To/Cc list to filesystem, debugfs and tracefs
maintainers.

Thanks,

Paolo

Re: [PATCH v2, RESEND] misc: new driver sram_uapi for user level SRAM access

2020-04-27 Thread Rob Herring

On Sun, Apr 19, 2020 at 10:06 PM Wang Wenhu  wrote:
>
> A generic User-Kernel interface that allows a misc device created
> by it to support file-operations of ioctl and mmap to access SRAM
> memory from user level. Different kinds of SRAM alloction and free
> APIs could be registered by specific SRAM hardware level driver to
> the available list and then be chosen by users to allocate and map
> SRAM memory from user level.
>
> It is extremely helpful for the user space applications that require
> high performance memory accesses, such as embedded networking devices
> that would process data in user space, and PowerPC e500 is a case.
>
> Signed-off-by: Wang Wenhu 
> Cc: Greg Kroah-Hartman 
> Cc: Arnd Bergmann 
> Cc: Christophe Leroy 
> Cc: Scott Wood 
> Cc: Michael Ellerman 
> Cc: Randy Dunlap 
> Cc: linuxppc-dev@lists.ozlabs.org
> ---
> Changes since v1: addressed comments from Arnd
>  * Changed the ioctl cmd definitions using _IO micros
>  * Export interfaces for HW-SRAM drivers to register apis to available list
>  * Modified allocation alignment to PAGE_SIZE
>  * Use phys_addr_t as type of SRAM resource size and offset
>  * Support compat_ioctl
>  * Misc device name:sram
>
> Note: From this on, the SRAM_UAPI driver is independent to any hardware
> drivers, so I would only commit the patch itself as v2, while the v1 of
> it was wrapped together with patches for Freescale L2-Cache-SRAM device.
> Then after, I'd create patches for Freescale L2-Cache-SRAM device as
> another series.

There's work to add SRAM support to dma-buf heaps[1]. Take a look and
see if that works for you.

Rob

[1] https://lore.kernel.org/lkml/20200424222740.16259-1-...@ti.com/

Re: [RFC 3/3] powernv/cpuidle : Introduce capability for firmware-enabled-stop

2020-04-27 Thread Gautham R Shenoy

On Sun, Apr 26, 2020 at 09:10:27PM -0500, Abhishek Goel wrote:
> This patch introduces the capability for firmware to handle the stop
> states instead. A bit is set based on the discovery of the feature
> and correspondingly also the responsibility to handle the stop states.
> 
> If Kernel does not know about stop version, it can fallback to opal for
> idle stop support if firmware-stop-supported property is present.
> 
> Earlier this patch was posted as part of this series :
> https://lkml.org/lkml/2020/3/4/589
> 
> Signed-off-by: Pratik Rajesh Sampat 
> Signed-off-by: Abhishek Goel 
> ---
> 
>  v1->v2: This patch is newly added in this series.
> 
>  arch/powerpc/include/asm/processor.h  |  1 +
>  arch/powerpc/kernel/dt_cpu_ftrs.c |  8 
>  arch/powerpc/platforms/powernv/idle.c | 27 ---
>  3 files changed, 25 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/processor.h 
> b/arch/powerpc/include/asm/processor.h
> index 66fa20476d0e..d5c6532b11ef 100644
> --- a/arch/powerpc/include/asm/processor.h
> +++ b/arch/powerpc/include/asm/processor.h
> @@ -430,6 +430,7 @@ extern unsigned long cpuidle_disable;
>  enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
> 
>  #define STOP_ENABLE  0x0001
> +#define FIRMWARE_STOP_ENABLE 0x0010
> 
>  #define STOP_VERSION_P9   0x1
> 
> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c 
> b/arch/powerpc/kernel/dt_cpu_ftrs.c
> index db1a525e090d..ff4a87b79702 100644
> --- a/arch/powerpc/kernel/dt_cpu_ftrs.c
> +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
> @@ -298,6 +298,13 @@ static int __init feat_enable_idle_stop(struct 
> dt_cpu_feature *f)
>   return 1;
>  }
> 
> +static int __init feat_enable_firmware_stop(struct dt_cpu_feature *f)
> +{
> + stop_dep.cpuidle_prop |= FIRMWARE_STOP_ENABLE;
> +
> + return 1;
> +}
> +
>  static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f)
>  {
>   u64 lpcr;
> @@ -592,6 +599,7 @@ static struct dt_cpu_feature_match __initdata
>   {"idle-nap", feat_enable_idle_nap, 0},
>   {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0},
>   {"idle-stop", feat_enable_idle_stop, 0},
> + {"firmware-stop-supported", feat_enable_firmware_stop, 0},
>   {"machine-check-power8", feat_enable_mce_power8, 0},
>   {"performance-monitor-power8", feat_enable_pmu_power8, 0},
>   {"data-stream-control-register", feat_enable_dscr, CPU_FTR_DSCR},
> diff --git a/arch/powerpc/platforms/powernv/idle.c 
> b/arch/powerpc/platforms/powernv/idle.c
> index 538f0842ac3f..0de5de81902e 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -633,16 +633,6 @@ static unsigned long power9_idle_stop(unsigned long 
> psscr, bool mmu_on)
>   unsigned long mmcr0 = 0;
>   struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
>   bool sprs_saved = false;
> - int rc = 0;
> -
> - /*
> -  * Kernel takes decision whether to make OPAL call or not. This logic
> -  * will be combined with the logic for BE opal to take decision.
> -  */
> - if (firmware_stop_supported) {
> - rc = opal_cpu_idle(cpu_to_be64(__pa()), (uint64_t) psscr);
> - goto out;
> - }
> 
>   if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
>   /* EC=ESL=0 case */
> @@ -835,6 +825,19 @@ static unsigned long power9_idle_stop(unsigned long 
> psscr, bool mmu_on)
>   return srr1;
>  }
> 
> +static unsigned long power9_firmware_idle_stop(unsigned long psscr, bool 
> mmu_on)
> +{
> + unsigned long srr1;
> + int rc;
> +
> + rc = opal_cpu_idle(cpu_to_be64(__pa()), (uint64_t) psscr);
> +
> + if (mmu_on)
> + mtmsr(MSR_KERNEL);
> + return srr1;
> +
> +}
> +
>  #ifdef CONFIG_HOTPLUG_CPU
>  static unsigned long power9_offline_stop(unsigned long psscr)
>  {
> @@ -1394,9 +1397,11 @@ static int __init pnv_init_idle_states(void)
>   !(stop_dep.cpuidle_prop & STOP_ENABLE))
>   goto out;
> 
> - /* Check for supported version in kernel */
> + /* Check for supported version in kernel or fallback to kernel*/
>   if (stop_dep.stop_version & STOP_VERSION_P9) {
>   stop_dep.idle_stop = power9_idle_stop;
> + } else if (stop_dep.cpuidle_prop & FIRMWARE_STOP_ENABLE) {
> + stop_dep.idle_stop = power9_firmware_idle_stop;

Ok, so in this patch you first check if the "idle-stop" feature is
available. Only otherwise you fallback to the OPAL based cpuidle
driver.

This looks ok to me.


>   } else {
>   stop_dep.idle_stop = NULL;
>   goto out;
> -- 
> 2.17.1
> 

--
Thanks and Regards
gautham.

Re: [RFC 2/3] powernv/cpuidle : Interface for an idle-stop dependency structure

2020-04-27 Thread Gautham R Shenoy

On Sun, Apr 26, 2020 at 09:10:26PM -0500, Abhishek Goel wrote:
> This patch introduces the idea of having a dependency structure for
> idle-stop. The structure encapsulates the following:
> 1. Bitmask for version of idle-stop
> 2. Bitmask for propterties like ENABLE/DISABLE
> 3. Function pointer which helps handle how the stop must be invoked
> 
> This patch lays a foundation for other idle-stop versions to be added
> and handled cleanly based on their specified requirments.
> Currently it handles the existing "idle-stop" version by setting the
> discovery bits and the function pointer.
> 
> Earlier this patch was posted as part of this series :
> https://lkml.org/lkml/2020/3/4/589


Please see the review comments to the earlier version:
https://lkml.org/lkml/2020/4/8/245

I still feel that we don't need cpuidle_prop and stop_version to be
separate fields.


> 
> Signed-off-by: Pratik Rajesh Sampat 
> Signed-off-by: Abhishek Goel 
> ---
> 
>  v1->v2: This patch is newly added in this series.
> 
>  arch/powerpc/include/asm/processor.h  | 17 +
>  arch/powerpc/kernel/dt_cpu_ftrs.c |  5 +
>  arch/powerpc/platforms/powernv/idle.c | 18 ++
>  drivers/cpuidle/cpuidle-powernv.c |  3 ++-
>  4 files changed, 38 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/processor.h 
> b/arch/powerpc/include/asm/processor.h
> index eedcbfb9a6ff..66fa20476d0e 100644
> --- a/arch/powerpc/include/asm/processor.h
> +++ b/arch/powerpc/include/asm/processor.h
> @@ -429,6 +429,23 @@ extern void power4_idle_nap(void);
>  extern unsigned long cpuidle_disable;
>  enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
> 
> +#define STOP_ENABLE  0x0001
> +
> +#define STOP_VERSION_P9   0x1
> +
> +/*
> + * Classify the dependencies of the stop states
> + * @idle_stop: function handler to handle the quirk stop version
> + * @cpuidle_prop: Signify support for stop states through kernel and/or 
> firmware
> + * @stop_version: Classify quirk versions for stop states
> + */
> +typedef struct {
> + unsigned long (*idle_stop)(unsigned long psscr, bool mmu_on);
> + uint8_t cpuidle_prop;
> + uint8_t stop_version;
> +} stop_deps_t;
> +extern stop_deps_t stop_dep;
> +
>  extern int powersave_nap;/* set if nap mode can be used in idle loop */
> 
>  extern void power7_idle_type(unsigned long type);
> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c 
> b/arch/powerpc/kernel/dt_cpu_ftrs.c
> index 182b4047c1ef..db1a525e090d 100644
> --- a/arch/powerpc/kernel/dt_cpu_ftrs.c
> +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
> @@ -292,6 +292,8 @@ static int __init feat_enable_idle_stop(struct 
> dt_cpu_feature *f)
>   lpcr |=  LPCR_PECE1;
>   lpcr |=  LPCR_PECE2;
>   mtspr(SPRN_LPCR, lpcr);
> + stop_dep.cpuidle_prop |= STOP_ENABLE;
> + stop_dep.stop_version = STOP_VERSION_P9;
> 
>   return 1;
>  }
> @@ -657,6 +659,9 @@ static void __init cpufeatures_setup_start(u32 isa)
>   }
>  }
> 
> +stop_deps_t stop_dep = {NULL, 0x0, 0x0};
> +EXPORT_SYMBOL(stop_dep);
> +
>  static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
>  {
>   const struct dt_cpu_feature_match *m;
> diff --git a/arch/powerpc/platforms/powernv/idle.c 
> b/arch/powerpc/platforms/powernv/idle.c
> index 1841027b25c5..538f0842ac3f 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -842,7 +842,7 @@ static unsigned long power9_offline_stop(unsigned long 
> psscr)
> 
>  #ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
>   __ppc64_runlatch_off();
> - srr1 = power9_idle_stop(psscr, true);
> + srr1 = stop_dep.idle_stop(psscr, true);
>   __ppc64_runlatch_on();
>  #else
>   /*
> @@ -858,7 +858,7 @@ static unsigned long power9_offline_stop(unsigned long 
> psscr)
>   local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
> 
>   __ppc64_runlatch_off();
> - srr1 = power9_idle_stop(psscr, false);
> + srr1 = stop_dep.idle_stop(psscr, true);
>   __ppc64_runlatch_on();
> 
>   local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
> @@ -886,7 +886,7 @@ void power9_idle_type(unsigned long stop_psscr_val,
>   psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
> 
>   __ppc64_runlatch_off();
> - srr1 = power9_idle_stop(psscr, true);
> + srr1 = stop_dep.idle_stop(psscr, true);
>   __ppc64_runlatch_on();
> 
>   fini_irq_for_idle_irqsoff();
> @@ -1390,8 +1390,18 @@ static int __init pnv_init_idle_states(void)
>   nr_pnv_idle_states = 0;
>   supported_cpuidle_states = 0;
> 
> - if (cpuidle_disable != IDLE_NO_OVERRIDE)
> + if (cpuidle_disable != IDLE_NO_OVERRIDE ||
> + !(stop_dep.cpuidle_prop & STOP_ENABLE))
>   goto out;
> +
> + /* Check for supported version in kernel */
> + if (stop_dep.stop_version & STOP_VERSION_P9) {
> + stop_dep.idle_stop = power9_idle_stop;
> + } else {
> +

Re: [PATCH v5 0/6] implement KASLR for powerpc/fsl_booke/64

2020-04-27 Thread Daniel Axtens

Hi Jason,

> Thanks for the test. Can you send me the full log which may contain the 
> system info such as the following:

It's attached.

Regards,
Daniel



fail-log
Description: Binary data


>
> -
> phys_mem_size = 0x2
> dcache_bsize  = 0x20
> icache_bsize  = 0x20
> cpu_features  = 0x0003008003b6
>possible= 0x0003009003b6
>always  = 0x0003008003b4
> cpu_user_features = 0xdc008000 0x0800
> mmu_features  = 0x000a0010
> firmware_features = 0x
> physical_start= 0x2000
> -
> barrier-nospec: using isync; sync as speculation barrier
> Zone ranges:
>DMA  [mem 0x-0x7fff]
>Normal   [mem 0x8000-0x0001]
> Movable zone start for each node
> Early memory node ranges
>node   0: [mem 0x-0x0001]
> Initmem setup node 0 [mem 0x-0x0001]
> MMU: Allocated 2112 bytes of context maps for 255 contexts
> percpu: Embedded 32 pages/cpu s91736 r0 d39336 u131072
> Built 1 zonelists, mobility grouping on.  Total pages: 2064384
> Kernel command line: console=ttyS0 root=/dev/ram0
> printk: log_buf_len individual max cpu contribution: 16384 bytes
> printk: log_buf_len total cpu_extra contributions: 376832 bytes
> printk: log_buf_len min size: 131072 bytes
> printk: log_buf_len: 524288 bytes
> printk: early log buf free: 127460(97%)
> Dentry cache hash table entries: 1048576 (order: 11, 8388608 bytes, linear)
> Inode-cache hash table entries: 524288 (order: 10, 4194304 bytes, linear)
> mem auto-init: stack:off, heap alloc:off, heap free:off
> Memory: 8135632K/8388608K available (10572K kernel code, 2000K rwdata, 
> 3396K rodata, 4124K init, 358K bss, 252976K reserved, 0K cma-reserved)
> rcu: Hierarchical RCU implementation.
> rcu:RCU event tracing is enabled.
> rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies.
> NR_IRQS: 512, nr_irqs: 512, preallocated irqs: 16
> mpic: Setting up MPIC " OpenPIC  " version 1.2 at fe004, max 24 CPUs
> mpic: ISU size: 256, shift: 8, mask: ff
> mpic: Initializing for 256 sources
> random: get_random_u64 called from .start_kernel+0x724/0x954 with 
> crng_init=0
> clocksource: timebase: mask: 0x max_cycles: 
> 0x5c4093a7d1, max_idle_ns: 440795210635 ns
> clocksource: timebase mult[280] shift[24] registered
> Console: colour dummy device 80x25
> pid_max: default: 32768 minimum: 301
> Mount-cache hash table entries: 16384 (order: 5, 131072 bytes, linear)
> Mountpoint-cache hash table entries: 16384 (order: 5, 131072 bytes, linear)
> e6500 family performance monitor hardware support registered
> rcu: Hierarchical SRCU implementation.
> smp: Bringing up secondary CPUs ...
> smp: Brought up 1 node, 24 CPUs
>
>
>
> 在 2020/4/25 1:17, Daniel Axtens 写道:
>> Hi Jason,
>> 
>> Apologies for the delay in testing.
>> 
>> I'm seeing this problem when I try to boot on a t4240rdb:
>> 
>> random: get_random_u64 called from .start_kernel+0x734/0x964 with 
>> crng_init=0   [8/973]
>> clocksource: timebase: mask: 0x max_cycles: 0xa9210e89c, 
>> max_idle_ns: 440795203878 ns
>> clocksource: timebase mult[15d17460] shift[24] registered
>> Console: colour dummy device 80x25
>> pid_max: default: 32768 minimum: 301
>> Mount-cache hash table entries: 16384 (order: 5, 131072 bytes, linear)
>> Mountpoint-cache hash table entries: 16384 (order: 5, 131072 bytes, linear)
>> e6500 family performance monitor hardware support registered
>> rcu: Hierarchical SRCU implementation.
>> smp: Bringing up secondary CPUs ...
>> Processor 2 is stuck.
>> Processor 3 is stuck.
>> Processor 4 is stuck.
>> Processor 5 is stuck.
>> Processor 6 is stuck.
>> Processor 7 is stuck.
>> Processor 8 is stuck.
>> Processor 9 is stuck.
>> Processor 10 is stuck.
>> Processor 11 is stuck.
>> Processor 12 is stuck.
>> Processor 13 is stuck.
>> Processor 14 is stuck.
>> ...
>> Processor 22 is stuck.
>> Processor 23 is stuck.
>> smp: Brought up 1 node, 2 CPUs
>> Using standard scheduler topology
>> devtmpfs: initialized
>> clocksource: jiffies: mask: 0x max_cycles: 0x, max_idle_ns: 
>> 764504178510 ns
>> futex hash table entries: 8192 (order: 7, 524288 bytes, linear)
>> NET: Registered protocol family 16
>> audit: initializing netlink subsys (disabled)
>> audit: type=2000 audit(108.032:1): state=initialized audit_enabled=0 res=1
>> Machine: fsl,T4240RDB
>> SoC family: QorIQ T4240
>> SoC ID: svr:0x82480020, Revision: 2.0
>> ... boot continues ...
>> 
>> 
>> If I boot with nokaslr, all the CPUs come up with no issue.
>> 
>> This is on top of powerpc/merge at
>> 8299da600ad05b8aa0f15ec0f5f03bd40e37d6f0. If you'd like me to test any
>> debug patches I can do that.
>> 
>> I've attached my .config.
>> 
>> Regards,
>> Daniel
>> 
>> 
>> 
>>> This is a try

Re: New powerpc vdso calling convention

2020-04-27 Thread Adhemerval Zanella




On 26/04/2020 00:41, Nicholas Piggin wrote:
> Excerpts from Rich Felker's message of April 26, 2020 9:11 am:
>> On Sun, Apr 26, 2020 at 08:58:19AM +1000, Nicholas Piggin wrote:
>>> Excerpts from Christophe Leroy's message of April 25, 2020 10:20 pm:


 Le 25/04/2020 à 12:56, Nicholas Piggin a écrit :
> Excerpts from Christophe Leroy's message of April 25, 2020 5:47 pm:
>>
>>
>> Le 25/04/2020 à 07:22, Nicholas Piggin a écrit :
>>> As noted in the 'scv' thread, powerpc's vdso calling convention does not
>>> match the C ELF ABI calling convention (or the proposed scv convention).
>>> I think we could implement a new ABI by basically duplicating function
>>> entry points with different names.
>>
>> I think doing this is a real good idea.
>>
>> I've been working at porting powerpc VDSO to the GENERIC C VDSO, and the
>> main pitfall has been that our vdso calling convention is not compatible
>> with C calling convention, so we have go through an ASM entry/exit.
>>
>> See https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=171469
>>
>> We should kill this error flag return through CR[SO] and get it the
>> "modern" way like other architectectures implementing the C VDSO: return
>> 0 when successfull, return -err when failed.
>
> Agreed.
>
>>> The ELF v2 ABI convention would suit it well, because the caller already
>>> requires the function address for ctr, so having it in r12 will
>>> eliminate the need for address calculation, which suits the vdso data
>>> page access.
>>>
>>> Is there a need for ELF v1 specific calls as well, or could those just 
>>> be
>>> deprecated and remain on existing functions or required to use the ELF
>>> v2 calls using asm wrappers?
>>
>> What's ELF v1 and ELF v2 ? Is ELF v1 what PPC32 uses ? If so, I'd say
>> yes, it would be good to have it to avoid going through ASM in the 
>> middle.
>
> I'm not sure about PPC32. On PPC64, ELFv2 functions must be called with
> their address in r12 if called at their global entry point. ELFv1 have a
> function descriptor with call address and TOC in it, caller has to load
> the TOC if it's global.
>
> The vdso doesn't have TOC, it has one global address (the vdso data
> page) which it loads by calculating its own address.
>
> The kernel doesn't change the vdso based on whether it's called by a v1
> or v2 userspace (it doesn't really know itself and would have to export
> different functions). glibc has a hack to create something:
>
> # define VDSO_IFUNC_RET(value)   \
>({ \
>  static Elf64_FuncDesc vdso_opd = { .fd_toc = ~0x0 }; \
>  vdso_opd.fd_func = (Elf64_Addr)value;\
>  _opd;   \
>})
>
> If we could make something which links more like any other dso with
> ELFv1, that would be good. Otherwise I think v2 is preferable so it
> doesn't have to calculate its own address.

 I see the following in glibc. So looks like PPC32 is like PPC64 elfv1. 
 By the way, they are talking about something not completely finished in 
 the kernel. Can we finish it ?
>>>
>>> Possibly can. It seems like a good idea to fix all loose ends if we are 
>>> going to add new versions. Will have to check with the toolchain people 
>>> to make sure we're doing the right thing.
>>
>> "ELFv1" and "ELFv2" are PPC64-specific names for the old and new
>> version of the ELF psABI for PPC64. They have nothing at all to do
>> with PPC32 which is a completely different ABI from either.
> 
> Right, I'm just talking about those comments -- it seems like the kernel 
> vdso should contain an .opd section with function descriptors in it for
> elfv1 calls, rather than the hack it has now of creating one in the 
> caller's .data section.
> 
> But all that function descriptor code is gated by
> 
> #if (defined(__PPC64__) || defined(__powerpc64__)) && _CALL_ELF != 2
> 
> So it seems PPC32 does not use function descriptors but a direct pointer 
> to the entry point like PPC64 with ELFv2.

Yes, this hack is only for ELFv1.  The missing ODP has not been an issue 
or glibc because it has been using the inline assembly to emulate the 
functions call since initial vDSO support (INTERNAL_VSYSCALL_CALL_TYPE).
It just has become an issue when I added a ifunc optimization to 
gettimeofday so it can bypass the libc.so and make plt branch to vDSO 
directly.

Recently on some y2038 refactoring it was suggested to get rid of this 
and make gettimeofday call clock_gettime regardless.  But some felt that 
the performance degradation was not worth for a symbol that is still used
extensibility, so we stuck with the hack.

And I think having this synthetic opd entry is not an issue, since

Re: [PATCH v2] ASoC: fsl_easrc: Check for null pointer before dereferencing "ctx" in fsl_easrc_hw_free()

2020-04-27 Thread Mark Brown

On Sat, 25 Apr 2020 15:19:29 +0800, Shengjiu Wang wrote:
> The patch 955ac624058f: "ASoC: fsl_easrc: Add EASRC ASoC CPU DAI
> drivers" from Apr 16, 2020, leads to the following Smatch complaint:
> 
> sound/soc/fsl/fsl_easrc.c:1529 fsl_easrc_hw_free()
> warn: variable dereferenced before check 'ctx' (see line 1527)
> 
> sound/soc/fsl/fsl_easrc.c
>   1526  struct fsl_asrc_pair *ctx = runtime->private_data;
>   1527  struct fsl_easrc_ctx_priv *ctx_priv = ctx->private;
>   ^
> Dereference
> 
> [...]

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-5.8

Thanks!

[1/1] ASoC: fsl_easrc: Check for null pointer before dereferencing "ctx" in 
fsl_easrc_hw_free()
  commit: f3fc1ea011f09156886e8f4beb240ea814f2197a

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

Re: [PATCH v2] ASoC: fsl_esai: Disable exception interrupt before scheduling tasklet

2020-04-27 Thread Mark Brown

On Mon, 27 Apr 2020 14:23:21 +0800, Shengjiu Wang wrote:
> Disable exception interrupt before scheduling tasklet, otherwise if
> the tasklet isn't handled immediately, there will be endless xrun
> interrupt.
> 
> Fixes: 7ccafa2b3879 ("ASoC: fsl_esai: recover the channel swap after xrun")
> Signed-off-by: Shengjiu Wang 
> Acked-by: Nicolin Chen 
> 
> [...]

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-5.7

Thanks!

[1/1] ASoC: fsl_esai: Disable exception interrupt before scheduling tasklet
  commit: 1fecbb71fe0e46b886f84e3b6decca6643c3af6d

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

Re: [RFC 1/3] powernv/cpuidle : Support for pre-entry and post exit of stop state in firmware

2020-04-27 Thread Gautham R Shenoy

Hi Abhishek,

On Sun, Apr 26, 2020 at 09:10:25PM -0500, Abhishek Goel wrote:
> This patch provides kernel framework fro opal support of save restore
> of sprs in idle stop loop. Opal support for stop states is needed to
> selectively enable stop states or to introduce a quirk quickly in case
> a buggy stop state is present.
> 
> We make a opal call from kernel if firmware-stop-support for stop
> states is present and enabled. All the quirks for pre-entry of stop
> state is handled inside opal. A call from opal is made into kernel
> where we execute stop afer saving of NVGPRs.
> After waking up from 0x100 vector in kernel, we enter back into opal.
> All the quirks in post exit path, if any, are then handled in opal,
> from where we return successfully back to kernel.
> For deep stop states in which additional SPRs are lost, saving and
> restoration will be done in OPAL.
> 
> This idea was first proposed by Nick here:
> https://patchwork.ozlabs.org/patch/1208159/
> 
> The corresponding skiboot patch for this kernel patch is here:
> https://patchwork.ozlabs.org/project/skiboot/list/?series=172831
> 
> When we callback from OPAL into kernel, r13 is clobbered. So, to
> access PACA we need to restore it from HSPRGO. In future we can
> handle this into OPAL as in here:
> https://patchwork.ozlabs.org/patch/1245275/
> 
> Signed-off-by: Abhishek Goel 
> Signed-off-by: Nicholas Piggin 
> ---
> 
>  v1->v2 : No change in this patch.
> 
>  arch/powerpc/include/asm/opal-api.h|  8 -
>  arch/powerpc/include/asm/opal.h|  3 ++
>  arch/powerpc/kernel/idle_book3s.S  |  5 +++
>  arch/powerpc/platforms/powernv/idle.c  | 37 ++
>  arch/powerpc/platforms/powernv/opal-call.c |  2 ++
>  5 files changed, 54 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h 
> b/arch/powerpc/include/asm/opal-api.h
> index c1f25a760eb1..a2c782c99c9e 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -214,7 +214,9 @@
>  #define OPAL_SECVAR_GET  176
>  #define OPAL_SECVAR_GET_NEXT 177
>  #define OPAL_SECVAR_ENQUEUE_UPDATE   178
> -#define OPAL_LAST178
> +#define OPAL_REGISTER_OS_OPS 181
> +#define OPAL_CPU_IDLE182
> +#define OPAL_LAST182
> 
>  #define QUIESCE_HOLD 1 /* Spin all calls at entry */
>  #define QUIESCE_REJECT   2 /* Fail all calls with 
> OPAL_BUSY */
> @@ -1181,6 +1183,10 @@ struct opal_mpipl_fadump {
>   struct  opal_mpipl_region region[];
>  } __packed;
> 
> +struct opal_os_ops {
> + __be64 os_idle_stop;
> +};
> +
>  #endif /* __ASSEMBLY__ */
> 
>  #endif /* __OPAL_API_H */
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 9986ac34b8e2..3c340bc4df8e 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -400,6 +400,9 @@ void opal_powercap_init(void);
>  void opal_psr_init(void);
>  void opal_sensor_groups_init(void);
> 
> +extern int64_t opal_register_os_ops(struct opal_os_ops *os_ops);
> +extern int64_t opal_cpu_idle(__be64 srr1_addr, uint64_t psscr);
> +
>  #endif /* __ASSEMBLY__ */
> 
>  #endif /* _ASM_POWERPC_OPAL_H */
> diff --git a/arch/powerpc/kernel/idle_book3s.S 
> b/arch/powerpc/kernel/idle_book3s.S
> index 22f249b6f58d..8d287d1d06c0 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -49,6 +49,8 @@ _GLOBAL(isa300_idle_stop_noloss)
>   */
>  _GLOBAL(isa300_idle_stop_mayloss)
>   mtspr   SPRN_PSSCR,r3
> + mr  r6, r13
> + mfspr   r13, SPRN_HSPRG0
>   std r1,PACAR1(r13)
>   mflrr4
>   mfcrr5
> @@ -74,6 +76,7 @@ _GLOBAL(isa300_idle_stop_mayloss)
>   std r31,-8*18(r1)
>   std r4,-8*19(r1)
>   std r5,-8*20(r1)
> + std r6,-8*21(r1)
>   /* 168 bytes */
>   PPC_STOP
>   b   .   /* catch bugs */
> @@ -91,8 +94,10 @@ _GLOBAL(idle_return_gpr_loss)
>   ld  r1,PACAR1(r13)
>   ld  r4,-8*19(r1)
>   ld  r5,-8*20(r1)
> + ld  r6,-8*21(r1)
>   mtlrr4
>   mtcrr5
> + mr  r13,r6
>   /*
>* KVM nap requires r2 to be saved, rather than just restoring it
>* from PACATOC. This could be avoided for that less common case
> diff --git a/arch/powerpc/platforms/powernv/idle.c 
> b/arch/powerpc/platforms/powernv/idle.c
> index 78599bca66c2..1841027b25c5 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -35,6 +35,7 @@
>  static u32 supported_cpuidle_states;
>  struct pnv_idle_states_t *pnv_idle_states;
>  int nr_pnv_idle_states;
> +static bool firmware_stop_supported;
> 
>  /*
>   * The default stop state that will be used by ppc_md.power_save
> @@ -602,6 +603,25 @@ struct p9_sprs {
>   u64

[RFC PATCH 19/25] powerpc/book3s64/kuap: Restrict access to userspace based on userspace AMR

2020-04-27 Thread Aneesh Kumar K.V

If an application has configured address protection such that read/write is
denied using pkey even the kernel should receive a FAULT on accessing the same.

This patch use user AMR value stored in pt_regs.kuap to achieve the same.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/kup.h | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup.h 
b/arch/powerpc/include/asm/book3s/64/kup.h
index 6a706e6fef59..590c1d9455d1 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -242,14 +242,20 @@ static inline void set_kuap(unsigned long value)
 static __always_inline void allow_user_access(void __user *to, const void 
__user *from,
  unsigned long size, unsigned long 
dir)
 {
+   unsigned long thread_amr = 0;
+
// This is written so we can resolve to a single case at build time
BUILD_BUG_ON(!__builtin_constant_p(dir));
+
+   if (mmu_has_feature(MMU_FTR_PKEY))
+   thread_amr = current_thread_amr();
+
if (dir == KUAP_READ)
-   set_kuap(AMR_KUAP_BLOCK_WRITE);
+   set_kuap(thread_amr | AMR_KUAP_BLOCK_WRITE);
else if (dir == KUAP_WRITE)
-   set_kuap(AMR_KUAP_BLOCK_READ);
+   set_kuap(thread_amr | AMR_KUAP_BLOCK_READ);
else if (dir == KUAP_READ_WRITE)
-   set_kuap(0);
+   set_kuap(thread_amr);
else
BUILD_BUG();
 }
-- 
2.25.3

[RFC PATCH 24/25] powerpc/book3s64/hash/kuep: Enable KUEP on hash

2020-04-27 Thread Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/book3s64/pkeys.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 40b6240ecc89..a1952ee6668e 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -219,7 +219,12 @@ void __init pkey_early_init_devtree(void)
 #ifdef CONFIG_PPC_KUEP
 void setup_kuep(bool disabled)
 {
-   if (disabled || !early_radix_enabled())
+   if (disabled)
+   return;
+   /*
+* On hash if PKEY feature is not enabled, disable KUAP too.
+*/
+   if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY))
return;
 
if (smp_processor_id() == boot_cpuid) {
-- 
2.25.3

[RFC PATCH 22/25] powerpc/book3s64/kuep: Use Key 3 to implement KUEP with hash translation.

2020-04-27 Thread Aneesh Kumar K.V

Radix use IAMR Key 0 and hash translation use IAMR key 3.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/kup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup.h 
b/arch/powerpc/include/asm/book3s/64/kup.h
index 7ccbe2653273..ae03b6341f88 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -7,7 +7,7 @@
 
 #define AMR_KUAP_BLOCK_READUL(0x5455)
 #define AMR_KUAP_BLOCK_WRITE   UL(0xa8aa)
-#define AMR_KUEP_BLOCKED   (1UL << 62)
+#define AMR_KUEP_BLOCKED   UL(0x5455)
 #define AMR_KUAP_BLOCKED   (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE)
 
 #ifdef __ASSEMBLY__
-- 
2.25.3

[RFC PATCH 25/25] powerpc/book3s64/keys: Print information during boot.

2020-04-27 Thread Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/book3s64/pkeys.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index a1952ee6668e..660e0691db07 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -207,6 +207,7 @@ void __init pkey_early_init_devtree(void)
 */
initial_allocation_mask |= reserved_allocation_mask;
 
+   pr_info("Enabling Memory keys with max key count %d", max_pkey);
 err_out:
/*
 * Setup uamor on boot cpu
-- 
2.25.3

[RFC PATCH 21/25] powerpc/book3s64/kuap: Use Key 3 to implement KUAP with hash translation.

2020-04-27 Thread Aneesh Kumar K.V

Radix use AMR Key 0 and hash translation use AMR key 3.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/kup.h | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup.h 
b/arch/powerpc/include/asm/book3s/64/kup.h
index f564d909e195..7ccbe2653273 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -5,11 +5,10 @@
 #include 
 #include 
 
-#define AMR_KUAP_BLOCK_READUL(0x4000)
-#define AMR_KUAP_BLOCK_WRITE   UL(0x8000)
+#define AMR_KUAP_BLOCK_READUL(0x5455)
+#define AMR_KUAP_BLOCK_WRITE   UL(0xa8aa)
 #define AMR_KUEP_BLOCKED   (1UL << 62)
 #define AMR_KUAP_BLOCKED   (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE)
-#define AMR_KUAP_SHIFT 62
 
 #ifdef __ASSEMBLY__
 
@@ -63,8 +62,8 @@
 #ifdef CONFIG_PPC_KUAP_DEBUG
BEGIN_MMU_FTR_SECTION_NESTED(67)
mfspr   \gpr1, SPRN_AMR
-   li  \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT)
-   sldi\gpr2, \gpr2, AMR_KUAP_SHIFT
+   /* Prevent access to userspace using any key values */
+   LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED)
 999:   tdne\gpr1, \gpr2
EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | 
BUGFLAG_ONCE)
END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 67)
-- 
2.25.3

[RFC PATCH 20/25] powerpc/book3s64/kuap: Improve error reporting with KUAP

2020-04-27 Thread Aneesh Kumar K.V

With hash translation use DSISR_KEYFAULT to identify a wrong access.
With Radix we look at the AMR value and type of fault.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/kup.h |  7 --
 arch/powerpc/mm/fault.c  | 27 +++-
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup.h 
b/arch/powerpc/include/asm/book3s/64/kup.h
index 590c1d9455d1..f564d909e195 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -280,13 +280,6 @@ static inline void restore_user_access(unsigned long flags)
set_kuap(flags);
 }
 
-static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
-{
-   return WARN(mmu_has_feature(MMU_FTR_KUAP) &&
-   (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : 
AMR_KUAP_BLOCK_READ)),
-   "Bug: %s fault blocked by AMR!", is_write ? "Write" : 
"Read");
-}
 #else /* CONFIG_PPC_KUAP */
 static inline void kuap_restore_amr(struct pt_regs *regs)
 {
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 84af6c8eecf7..4f866b11512c 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -198,6 +198,31 @@ static int mm_fault_error(struct pt_regs *regs, unsigned 
long addr,
return 0;
 }
 
+#define RADIX_KUAP_BLOCK_READ  UL(0x4000)
+#define RADIX_KUAP_BLOCK_WRITE UL(0x8000)
+
+static bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
+  bool is_write, unsigned long error_code)
+{
+   if (!mmu_has_feature(MMU_FTR_KUAP))
+   return false;
+
+   if (radix_enabled()) {
+   /*
+* Will be a storage protection fault.
+* Only check the details of AMR[0]
+*/
+   return WARN((regs->kuap & (is_write ? RADIX_KUAP_BLOCK_WRITE : 
RADIX_KUAP_BLOCK_READ)),
+   "Bug: %s fault blocked by AMR!", is_write ? "Write" 
: "Read");
+   }
+   /*
+* We don't want to WARN here because userspace can setup
+* keys such that a kernel access to user address can cause
+* fault
+*/
+   return !!(error_code & DSISR_KEYFAULT);
+}
+
 /* Is this a bad kernel fault ? */
 static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
 unsigned long address, bool is_write)
@@ -233,7 +258,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned 
long error_code,
 
// Read/write fault in a valid region (the exception table search passed
// above), but blocked by KUAP is bad, it can never succeed.
-   if (bad_kuap_fault(regs, address, is_write))
+   if (bad_kuap_fault(regs, address, is_write, error_code))
return true;
 
// What's left? Kernel fault on user in well defined regions (extable
-- 
2.25.3

[RFC PATCH 23/25] powerpc/book3s64/hash/kuap: Enable kuap on hash

2020-04-27 Thread Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/book3s64/pkeys.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 2484d8502499..40b6240ecc89 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -240,7 +240,12 @@ void setup_kuep(bool disabled)
 #ifdef CONFIG_PPC_KUAP
 void setup_kuap(bool disabled)
 {
-   if (disabled || !early_radix_enabled())
+   if (disabled)
+   return;
+   /*
+* On hash if PKEY feature is not enabled, disable KUAP too.
+*/
+   if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY))
return;
 
if (smp_processor_id() == boot_cpuid) {
-- 
2.25.3

[RFC PATCH 18/25] powerpc/book3s64/pkeys: Don't update SPRN_AMR when in kernel mode.

2020-04-27 Thread Aneesh Kumar K.V

Now that kernel correctly store/restore userspace AMR/IAMR values, avoid
manipulating AMR and IAMR from the kernel on behalf of userspace.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/kup.h | 27 +
 arch/powerpc/include/asm/pkeys.h |  2 -
 arch/powerpc/include/asm/processor.h |  5 --
 arch/powerpc/kernel/process.c|  4 --
 arch/powerpc/kernel/traps.c  |  6 --
 arch/powerpc/mm/book3s64/pkeys.c | 73 
 6 files changed, 39 insertions(+), 78 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup.h 
b/arch/powerpc/include/asm/book3s/64/kup.h
index 717c2c4d3681..6a706e6fef59 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -158,6 +158,33 @@
 #include 
 #include 
 
+extern u64 default_uamor;
+extern u64 default_amr;
+extern u64 default_iamr;
+
+/*
+ * For kernel thread that doesn't have thread.regs return
+ * default AMR/IAMR values.
+ */
+static inline u64 current_thread_amr(void)
+{
+   if (current->thread.regs)
+   return current->thread.regs->kuap;
+   return AMR_KUAP_BLOCKED;
+}
+
+static inline u64 current_thread_iamr(void)
+{
+   if (current->thread.regs)
+   return current->thread.regs->kuep;
+   return AMR_KUEP_BLOCKED;
+}
+
+static inline u64 read_uamor(void)
+{
+   return default_uamor;
+}
+
 static inline void kuap_restore_amr(struct pt_regs *regs, bool to_user)
 {
if (!mmu_has_feature(MMU_FTR_PKEY) && to_user)
diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
index cd1a03044814..b1d448c53209 100644
--- a/arch/powerpc/include/asm/pkeys.h
+++ b/arch/powerpc/include/asm/pkeys.h
@@ -12,8 +12,6 @@
 #include 
 
 extern int max_pkey;
-extern u64 default_uamor;
-extern u64 default_amr;
 extern u32 reserved_allocation_mask; /* bits set for reserved keys */
 
 #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \
diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index eedcbfb9a6ff..3cef707b27c7 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -234,11 +234,6 @@ struct thread_struct {
struct thread_vr_state ckvr_state; /* Checkpointed VR state */
unsigned long   ckvrsave; /* Checkpointed VRSAVE */
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-#ifdef CONFIG_PPC_MEM_KEYS
-   unsigned long   amr;
-   unsigned long   iamr;
-   unsigned long   uamor;
-#endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
void*   kvm_shadow_vcpu; /* KVM internal data */
 #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9660bab1d99d..17f65a2738bc 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -585,7 +585,6 @@ static void save_all(struct task_struct *tsk)
__giveup_spe(tsk);
 
msr_check_and_clear(msr_all_available);
-   thread_pkey_regs_save(>thread);
 }
 
 void flush_all_to_thread(struct task_struct *tsk)
@@ -1097,8 +1096,6 @@ static inline void save_sprs(struct thread_struct *t)
t->tar = mfspr(SPRN_TAR);
}
 #endif
-
-   thread_pkey_regs_save(t);
 }
 
 static inline void restore_sprs(struct thread_struct *old_thread,
@@ -1139,7 +1136,6 @@ static inline void restore_sprs(struct thread_struct 
*old_thread,
mtspr(SPRN_TIDR, new_thread->tidr);
 #endif
 
-   thread_pkey_regs_restore(new_thread, old_thread);
 }
 
 struct task_struct *__switch_to(struct task_struct *prev,
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 3fca22276bb1..a47fb49b7af8 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -348,12 +348,6 @@ static bool exception_common(int signr, struct pt_regs 
*regs, int code,
 
current->thread.trap_nr = code;
 
-   /*
-* Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need
-* to capture the content, if the task gets killed.
-*/
-   thread_pkey_regs_save(>thread);
-
return true;
 }
 
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 42ca7aa09d5b..2484d8502499 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -21,7 +21,7 @@ int  max_pkey;/* Maximum key value 
supported */
 u32  reserved_allocation_mask;
 static u32  initial_allocation_mask;   /* Bits set for the initially allocated 
keys */
 u64 default_amr;
-static u64 default_iamr;
+u64 default_iamr;
 /* Allow all keys to be modified by default */
 u64 default_uamor = ~0x0UL;
 /*
@@ -264,40 +264,17 @@ void pkey_mm_init(struct mm_struct *mm)
mm->context.execute_only_pkey = execute_only_key;
 }
 
-static inline u64 read_amr(void)
+static inline void update_current_thread_amr(u64 value)
 {
-   return

[RFC PATCH 17/25] powerpc/ptrace-view: Use pt_regs values instead of thread_struct based one.

2020-04-27 Thread Aneesh Kumar K.V

We will remove thread.amr/iamr/uamor in a later patch

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/kernel/ptrace/ptrace-view.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c 
b/arch/powerpc/kernel/ptrace/ptrace-view.c
index 15e3b79b6395..5b7bea41c699 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -488,14 +488,25 @@ static int pkey_active(struct task_struct *target, const 
struct user_regset *reg
 static int pkey_get(struct task_struct *target, const struct user_regset 
*regset,
unsigned int pos, unsigned int count, void *kbuf, void 
__user *ubuf)
 {
-   BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr));
-   BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor));
+   int ret;
 
if (!arch_pkeys_enabled())
return -ENODEV;
 
-   return user_regset_copyout(, , , , 
>thread.amr,
-  0, ELF_NPKEY * sizeof(unsigned long));
+   ret = user_regset_copyout(, , , , 
>thread.regs->kuap,
+ 0, 1 * sizeof(unsigned long));
+   if (ret)
+   goto err_out;
+
+   ret = user_regset_copyout(, , , , 
>thread.regs->kuep,
+ 1 * sizeof(unsigned long), 2 * 
sizeof(unsigned long));
+   if (ret)
+   goto err_out;
+
+   ret = user_regset_copyout(, , , , _uamor,
+ 2 * sizeof(unsigned long), 3 * 
sizeof(unsigned long));
+err_out:
+   return ret;
 }
 
 static int pkey_set(struct task_struct *target, const struct user_regset 
*regset,
@@ -518,8 +529,8 @@ static int pkey_set(struct task_struct *target, const 
struct user_regset *regset
return ret;
 
/* UAMOR determines which bits of the AMR can be set from userspace. */
-   target->thread.amr = (new_amr & target->thread.uamor) |
-(target->thread.amr & ~target->thread.uamor);
+   target->thread.regs->kuap = (new_amr & default_uamor) |
+   (target->thread.regs->kuap & ~default_uamor);
 
return 0;
 }
-- 
2.25.3

[RFC PATCH 16/25] powerpc/book3s64/pkeys: Reset userspace AMR correctly on exec

2020-04-27 Thread Aneesh Kumar K.V

On fork, we inherit from the parent and on exec, we should switch to 
default_amr values.

Also, avoid changing the AMR register value within the kernel. The kernel now 
runs with
different AMR values.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/pkeys.h |  1 +
 arch/powerpc/kernel/process.c|  3 ++-
 arch/powerpc/mm/book3s64/pkeys.c | 16 +---
 3 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
index 6e8157f78b52..cd1a03044814 100644
--- a/arch/powerpc/include/asm/pkeys.h
+++ b/arch/powerpc/include/asm/pkeys.h
@@ -13,6 +13,7 @@
 
 extern int max_pkey;
 extern u64 default_uamor;
+extern u64 default_amr;
 extern u32 reserved_allocation_mask; /* bits set for reserved keys */
 
 #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index fc4cc32d4726..9660bab1d99d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1473,6 +1473,8 @@ void arch_setup_new_exec(void)
current->thread.regs = regs - 1;
}
 
+   current->thread.regs->kuap  = default_amr;
+   current->thread.regs->kuep  = default_iamr;
 }
 #endif
 
@@ -1827,7 +1829,6 @@ void start_thread(struct pt_regs *regs, unsigned long 
start, unsigned long sp)
current->thread.load_tm = 0;
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
-   thread_pkey_regs_init(>thread);
 }
 EXPORT_SYMBOL(start_thread);
 
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index d0d781876c20..42ca7aa09d5b 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -20,7 +20,7 @@ int  max_pkey;/* Maximum key value 
supported */
  */
 u32  reserved_allocation_mask;
 static u32  initial_allocation_mask;   /* Bits set for the initially allocated 
keys */
-static u64 default_amr;
+u64 default_amr;
 static u64 default_iamr;
 /* Allow all keys to be modified by default */
 u64 default_uamor = ~0x0UL;
@@ -387,20 +387,6 @@ void thread_pkey_regs_restore(struct thread_struct 
*new_thread,
write_uamor(new_thread->uamor);
 }
 
-void thread_pkey_regs_init(struct thread_struct *thread)
-{
-   if (!mmu_has_feature(MMU_FTR_PKEY))
-   return;
-
-   thread->amr   = default_amr;
-   thread->iamr  = default_iamr;
-   thread->uamor = default_uamor;
-
-   write_amr(default_amr);
-   write_iamr(default_iamr);
-   write_uamor(default_uamor);
-}
-
 int execute_only_pkey(struct mm_struct *mm)
 {
if (static_branch_likely(_pkey_disabled))
-- 
2.25.3

1 2 >

1 - 100 of 124 matches

Mail list logo