Re: [PATCH v2 06/14] powerpc: Include all arch-specific syscall prototypes

2022-08-09 Thread Andrew Donnellan
On Mon, 2022-08-08 at 15:24 +1000, Andrew Donnellan wrote:
> > diff --git a/arch/powerpc/include/asm/syscalls.h
> > b/arch/powerpc/include/asm/syscalls.h
> > index 025d4b877161..8b2757d7f423 100644
> > --- a/arch/powerpc/include/asm/syscalls.h
> > +++ b/arch/powerpc/include/asm/syscalls.h
> > @@ -8,49 +8,93 @@
> >  #include 
> >  #include 
> >  
> > +#ifdef CONFIG_PPC64
> > +#include 
> > +#endif
> > +#include 
> > +#include 
> > +
> >  struct rtas_args;
> >  
> > -asmlinkage long sys_mmap(unsigned long addr, size_t len,
> > -   unsigned long prot, unsigned long flags,
> > -   unsigned long fd, off_t offset);
> > -asmlinkage long sys_mmap2(unsigned long addr, size_t len,
> > -   unsigned long prot, unsigned long flags,
> > -   unsigned long fd, unsigned long pgoff);
> > -asmlinkage long sys_ppc64_personality(unsigned long personality);
> > +#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER

Introduce this ifdef once it starts mattering at patch 10, I think.

-- 
Andrew DonnellanOzLabs, ADL Canberra
a...@linux.ibm.com   IBM Australia Limited



Re: [PATCH 06/17] powerpc/qspinlock: theft prevention to control latency

2022-08-09 Thread Jordan Niethe
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> Give the queue head the ability to stop stealers. After a number of
> spins without sucessfully acquiring the lock, the queue head employs
> this, which will assure it is the next owner.
> ---
>  arch/powerpc/include/asm/qspinlock_types.h | 10 +++-
>  arch/powerpc/lib/qspinlock.c   | 56 +-
>  2 files changed, 63 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/qspinlock_types.h 
> b/arch/powerpc/include/asm/qspinlock_types.h
> index 210adf05b235..8b20f5e22bba 100644
> --- a/arch/powerpc/include/asm/qspinlock_types.h
> +++ b/arch/powerpc/include/asm/qspinlock_types.h
> @@ -29,7 +29,8 @@ typedef struct qspinlock {
>   * Bitfields in the lock word:
>   *
>   * 0: locked bit
> - * 16-31: tail cpu (+1)
> + *16: must queue bit
> + * 17-31: tail cpu (+1)
>   */
>  #define  _Q_SET_MASK(type)   (((1U << _Q_ ## type ## _BITS) - 1)\
> << _Q_ ## type ## _OFFSET)
> @@ -38,7 +39,12 @@ typedef struct qspinlock {
>  #define _Q_LOCKED_MASK   _Q_SET_MASK(LOCKED)
>  #define _Q_LOCKED_VAL(1U << _Q_LOCKED_OFFSET)
>  
> -#define _Q_TAIL_CPU_OFFSET   16
> +#define _Q_MUST_Q_OFFSET 16
> +#define _Q_MUST_Q_BITS   1
> +#define _Q_MUST_Q_MASK   _Q_SET_MASK(MUST_Q)
> +#define _Q_MUST_Q_VAL(1U << _Q_MUST_Q_OFFSET)
> +
> +#define _Q_TAIL_CPU_OFFSET   17
>  #define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET)
>  #define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU)

Not a big deal but some of these values could be calculated like in the
generic version. e.g.

#define _Q_PENDING_OFFSET   (_Q_LOCKED_OFFSET +_Q_LOCKED_BITS)

>  
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 1625cce714b2..a906cc8f15fa 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -22,6 +22,7 @@ struct qnodes {
>  /* Tuning parameters */
>  static int STEAL_SPINS __read_mostly = (1<<5);
>  static bool MAYBE_STEALERS __read_mostly = true;
> +static int HEAD_SPINS __read_mostly = (1<<8);
>  
>  static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
>  
> @@ -30,6 +31,11 @@ static __always_inline int get_steal_spins(void)
>   return STEAL_SPINS;
>  }
>  
> +static __always_inline int get_head_spins(void)
> +{
> + return HEAD_SPINS;
> +}
> +
>  static inline u32 encode_tail_cpu(void)
>  {
>   return (smp_processor_id() + 1) << _Q_TAIL_CPU_OFFSET;
> @@ -142,6 +148,23 @@ static __always_inline u32 publish_tail_cpu(struct 
> qspinlock *lock, u32 tail)
>   return prev;
>  }
>  
> +static __always_inline u32 lock_set_mustq(struct qspinlock *lock)
> +{
> + u32 new = _Q_MUST_Q_VAL;
> + u32 prev;
> +
> + asm volatile(
> +"1:  lwarx   %0,0,%1 # lock_set_mustq\n"

Is the EH bit not set because we don't hold the lock here?

> +"or  %0,%0,%2\n"
> +"stwcx.  %0,0,%1 \n"
> +"bne-1b  \n"
> + : "=" (prev)
> + : "r" (>val), "r" (new)
> + : "cr0", "memory");

This is another usage close to the DEFINE_TESTOP() pattern.

> +
> + return prev;
> +}
> +
>  static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
>  {
>   int cpu = get_tail_cpu(val);
> @@ -165,6 +188,9 @@ static inline bool try_to_steal_lock(struct qspinlock 
> *lock)
>   for (;;) {
>   u32 val = READ_ONCE(lock->val);
>  
> + if (val & _Q_MUST_Q_VAL)
> + break;
> +
>   if (unlikely(!(val & _Q_LOCKED_VAL))) {
>   if (trylock_with_tail_cpu(lock, val))
>   return true;
> @@ -246,11 +272,22 @@ static inline void queued_spin_lock_mcs_queue(struct 
> qspinlock *lock)
>   /* We must be the owner, just set the lock bit and acquire */
>   lock_set_locked(lock);
>   } else {
> + int iters = 0;
> + bool set_mustq = false;
> +
>  again:
>   /* We're at the head of the waitqueue, wait for the lock. */
> - while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL)
> + while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
>   cpu_relax();
>  
> + iters++;

It seems instead of using set_mustq, (val & _Q_MUST_Q_VAL) could be checked?

> + if (!set_mustq && iters >= get_head_spins()) {
> + set_mustq = true;
> + lock_set_mustq(lock);
> + val |= _Q_MUST_Q_VAL;
> + }
> + }
> +
>   /* If we're the last queued, must clean up the tail. */
>   if ((val & _Q_TAIL_CPU_MASK) == tail) {
>  

[PATCH v2] powerpc/kexec: Fix build failure from uninitialised variable

2022-08-09 Thread Russell Currey
clang 14 won't build because ret is uninitialised and can be returned if
both prop and fdtprop are NULL.  Drop the ret variable and return an
error in that failure case.

Fixes: b1fc44eaa9ba ("pseries/iommu/ddw: Fix kdump to work in absence of 
ibm,dma-window")
Suggested-by: Christophe Leroy 
Signed-off-by: Russell Currey 
---
v2: adopt Christophe's suggestion, which is better

 arch/powerpc/kexec/file_load_64.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kexec/file_load_64.c 
b/arch/powerpc/kexec/file_load_64.c
index 683462e4556b..349a781cea0b 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -1043,17 +1043,17 @@ static int copy_property(void *fdt, int node_offset, 
const struct device_node *d
 const char *propname)
 {
const void *prop, *fdtprop;
-   int len = 0, fdtlen = 0, ret;
+   int len = 0, fdtlen = 0;
 
prop = of_get_property(dn, propname, );
fdtprop = fdt_getprop(fdt, node_offset, propname, );
 
if (fdtprop && !prop)
-   ret = fdt_delprop(fdt, node_offset, propname);
+   return fdt_delprop(fdt, node_offset, propname);
else if (prop)
-   ret = fdt_setprop(fdt, node_offset, propname, prop, len);
-
-   return ret;
+   return fdt_setprop(fdt, node_offset, propname, prop, len);
+   else
+   return -FDT_ERR_NOTFOUND;
 }
 
 static int update_pci_dma_nodes(void *fdt, const char *dmapropname)
-- 
2.37.1



Re: [PATCH 05/17] powerpc/qspinlock: allow new waiters to steal the lock before queueing

2022-08-09 Thread Jordan Niethe
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> Allow new waiters a number of spins on the lock word before queueing,
> which particularly helps paravirt performance when physical CPUs are
> oversubscribed.
> ---
>  arch/powerpc/lib/qspinlock.c | 152 ---
>  1 file changed, 141 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 7c71e5e287df..1625cce714b2 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -19,8 +19,17 @@ struct qnodes {
>   struct qnode nodes[MAX_NODES];
>  };
>  
> +/* Tuning parameters */
> +static int STEAL_SPINS __read_mostly = (1<<5);
> +static bool MAYBE_STEALERS __read_mostly = true;

I can understand why, but macro case variables can be a bit confusing.

> +
>  static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
>  
> +static __always_inline int get_steal_spins(void)
> +{
> + return STEAL_SPINS;
> +}
> +
>  static inline u32 encode_tail_cpu(void)
>  {
>   return (smp_processor_id() + 1) << _Q_TAIL_CPU_OFFSET;
> @@ -76,6 +85,39 @@ static __always_inline int trylock_clear_tail_cpu(struct 
> qspinlock *lock, u32 ol
>   return 0;
>  }
>  
> +static __always_inline u32 __trylock_cmpxchg(struct qspinlock *lock, u32 
> old, u32 new)
> +{
> + u32 prev;
> +
> + BUG_ON(old & _Q_LOCKED_VAL);
> +
> + asm volatile(
> +"1:  lwarx   %0,0,%1,%4  # queued_spin_trylock_cmpxchg   \n"

s/queued_spin_trylock_cmpxchg/__trylock_cmpxchg/

btw what is the format you using for the '\n's in the inline asm?

> +"cmpw0,%0,%2 \n"
> +"bne-2f  \n"
> +"stwcx.  %3,0,%1 \n"
> +"bne-1b  \n"
> +"\t" PPC_ACQUIRE_BARRIER "   \n"
> +"2:  \n"
> + : "=" (prev)
> + : "r" (>val), "r"(old), "r" (new),
> +   "i" (IS_ENABLED(CONFIG_PPC64) ? 1 : 0)
> + : "cr0", "memory");

This is very similar to trylock_clear_tail_cpu(). So maybe it is worth having
some form of "test and set" primitive helper.

> +
> + return prev;
> +}
> +
> +/* Take lock, preserving tail, cmpxchg with val (which must not be locked) */
> +static __always_inline int trylock_with_tail_cpu(struct qspinlock *lock, u32 
> val)
> +{
> + u32 newval = _Q_LOCKED_VAL | (val & _Q_TAIL_CPU_MASK);
> +
> + if (__trylock_cmpxchg(lock, val, newval) == val)
> + return 1;
> + else
> + return 0;

same optional style nit: return __trylock_cmpxchg(lock, val, newval) == val

> +}
> +
>  /*
>   * Publish our tail, replacing previous tail. Return previous value.
>   *
> @@ -115,6 +157,31 @@ static struct qnode *get_tail_qnode(struct qspinlock 
> *lock, u32 val)
>   BUG();
>  }
>  
> +static inline bool try_to_steal_lock(struct qspinlock *lock)
> +{
> + int iters;
> +
> + /* Attempt to steal the lock */
> + for (;;) {
> + u32 val = READ_ONCE(lock->val);
> +
> + if (unlikely(!(val & _Q_LOCKED_VAL))) {
> + if (trylock_with_tail_cpu(lock, val))
> + return true;
> + continue;
> + }

The continue would bypass iters++/cpu_relax but the next time around
  if (unlikely(!(val & _Q_LOCKED_VAL))) {
should fail so everything should be fine?

> +
> + cpu_relax();
> +
> + iters++;
> +
> + if (iters >= get_steal_spins())
> + break;
> + }
> +
> + return false;
> +}
> +
>  static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock)
>  {
>   struct qnodes *qnodesp;
> @@ -164,20 +231,39 @@ static inline void queued_spin_lock_mcs_queue(struct 
> qspinlock *lock)
>   smp_rmb(); /* acquire barrier for the mcs lock */
>   }
>  
> - /* We're at the head of the waitqueue, wait for the lock. */
> - while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL)
> - cpu_relax();
> + if (!MAYBE_STEALERS) {
> + /* We're at the head of the waitqueue, wait for the lock. */
> + while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL)
> + cpu_relax();
>  
> - /* If we're the last queued, must clean up the tail. */
> - if ((val & _Q_TAIL_CPU_MASK) == tail) {
> - if (trylock_clear_tail_cpu(lock, val))
> - goto release;
> - /* Another waiter must have enqueued */
> - }
> + /* If we're the last queued, must clean up the tail. */
> + if ((val & _Q_TAIL_CPU_MASK) == tail) {
> + if (trylock_clear_tail_cpu(lock, val))
> + goto release;
> + /* Another 

[PATCH] docs: powerpc: add POWER9 and POWER10 to CPU families

2022-08-09 Thread Nicholas Miehlbradt
Add POWER9 and POWER10 to CPU families and list Radix MMU.

Signed-off-by: Nicholas Miehlbradt 
---
 Documentation/powerpc/cpu_families.rst | 13 +
 1 file changed, 13 insertions(+)

diff --git a/Documentation/powerpc/cpu_families.rst 
b/Documentation/powerpc/cpu_families.rst
index 9b84e045e713..eb7e60649b43 100644
--- a/Documentation/powerpc/cpu_families.rst
+++ b/Documentation/powerpc/cpu_families.rst
@@ -10,6 +10,7 @@ Book3S (aka sPAPR)
 --
 
 - Hash MMU (except 603 and e300)
+- Radix MMU (POWER9 and later)
 - Software loaded TLB (603 and e300)
 - Selectable Software loaded TLB in addition to hash MMU (755, 7450, e600)
 - Mix of 32 & 64 bit::
@@ -100,6 +101,18 @@ Book3S (aka sPAPR)
   v
+--+
|POWER8|
+   +--+
+  |
+  |
+  v
+   +--+
+   |POWER9|
+   +--+
+  |
+  |
+  v
+   +--+
+   |   POWER10|
+--+
 
 
-- 
2.34.1



Re: [PATCH 04/17] powerpc/qspinlock: convert atomic operations to assembly

2022-08-09 Thread Jordan Niethe
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> This uses more optimal ll/sc style access patterns (rather than
> cmpxchg), and also sets the EH=1 lock hint on those operations
> which acquire ownership of the lock.
> ---
>  arch/powerpc/include/asm/qspinlock.h   | 25 +--
>  arch/powerpc/include/asm/qspinlock_types.h |  6 +-
>  arch/powerpc/lib/qspinlock.c   | 81 +++---
>  3 files changed, 79 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/qspinlock.h 
> b/arch/powerpc/include/asm/qspinlock.h
> index 79a1936fb68d..3ab354159e5e 100644
> --- a/arch/powerpc/include/asm/qspinlock.h
> +++ b/arch/powerpc/include/asm/qspinlock.h
> @@ -2,28 +2,43 @@
>  #ifndef _ASM_POWERPC_QSPINLOCK_H
>  #define _ASM_POWERPC_QSPINLOCK_H
>  
> -#include 
>  #include 
>  #include 
>  
>  static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
>  {
> - return atomic_read(>val);
> + return READ_ONCE(lock->val);
>  }
>  
>  static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
>  {
> - return !atomic_read();
> + return !lock.val;
>  }
>  
>  static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
>  {
> - return !!(atomic_read(>val) & _Q_TAIL_CPU_MASK);
> + return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK);
>  }
>  
>  static __always_inline int queued_spin_trylock(struct qspinlock *lock)
>  {
> - if (atomic_cmpxchg_acquire(>val, 0, _Q_LOCKED_VAL) == 0)
> + u32 new = _Q_LOCKED_VAL;
> + u32 prev;
> +
> + asm volatile(
> +"1:  lwarx   %0,0,%1,%3  # queued_spin_trylock   \n"
> +"cmpwi   0,%0,0  \n"
> +"bne-2f  \n"
> +"stwcx.  %2,0,%1 \n"
> +"bne-1b  \n"
> +"\t" PPC_ACQUIRE_BARRIER "   \n"
> +"2:  \n"
> + : "=" (prev)
> + : "r" (>val), "r" (new),
> +   "i" (IS_ENABLED(CONFIG_PPC64) ? 1 : 0)

btw IS_ENABLED() already returns 1 or 0

> + : "cr0", "memory");

This is the ISA's "test and set" atomic primitive. Do you think it would be 
worth seperating it as a helper?

> +
> + if (likely(prev == 0))
>   return 1;
>   return 0;

same optional style nit: return likely(prev == 0);

>  }
> diff --git a/arch/powerpc/include/asm/qspinlock_types.h 
> b/arch/powerpc/include/asm/qspinlock_types.h
> index 3425dab42576..210adf05b235 100644
> --- a/arch/powerpc/include/asm/qspinlock_types.h
> +++ b/arch/powerpc/include/asm/qspinlock_types.h
> @@ -7,7 +7,7 @@
>  
>  typedef struct qspinlock {
>   union {
> - atomic_t val;
> + u32 val;
>  
>  #ifdef __LITTLE_ENDIAN
>   struct {
> @@ -23,10 +23,10 @@ typedef struct qspinlock {
>   };
>  } arch_spinlock_t;
>  
> -#define  __ARCH_SPIN_LOCK_UNLOCKED   { { .val = ATOMIC_INIT(0) } }
> +#define  __ARCH_SPIN_LOCK_UNLOCKED   { { .val = 0 } }
>  
>  /*
> - * Bitfields in the atomic value:
> + * Bitfields in the lock word:
>   *
>   * 0: locked bit
>   * 16-31: tail cpu (+1)
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 5ebb88d95636..7c71e5e287df 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -1,5 +1,4 @@
>  // SPDX-License-Identifier: GPL-2.0-or-later
> -#include 
>  #include 
>  #include 
>  #include 
> @@ -22,32 +21,59 @@ struct qnodes {
>  
>  static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
>  
> -static inline int encode_tail_cpu(void)
> +static inline u32 encode_tail_cpu(void)
>  {
>   return (smp_processor_id() + 1) << _Q_TAIL_CPU_OFFSET;
>  }
>  
> -static inline int get_tail_cpu(int val)
> +static inline int get_tail_cpu(u32 val)
>  {
>   return (val >> _Q_TAIL_CPU_OFFSET) - 1;
>  }
>  
>  /* Take the lock by setting the bit, no other CPUs may concurrently lock it. 
> */

I think you missed deleting the above line.

> +/* Take the lock by setting the lock bit, no other CPUs will touch it. */
>  static __always_inline void lock_set_locked(struct qspinlock *lock)
>  {
> - atomic_or(_Q_LOCKED_VAL, >val);
> - __atomic_acquire_fence();
> + u32 new = _Q_LOCKED_VAL;
> + u32 prev;
> +
> + asm volatile(
> +"1:  lwarx   %0,0,%1,%3  # lock_set_locked   \n"
> +"or  %0,%0,%2\n"
> +"stwcx.  %0,0,%1 \n"
> +"bne-1b  \n"
> +"\t" PPC_ACQUIRE_BARRIER "   \n"
> + : "=" (prev)
> + : "r" (>val), "r" (new),
> +   "i" (IS_ENABLED(CONFIG_PPC64) ? 1 : 0)
> + : "cr0", 

Re: [PATCH 03/17] powerpc/qspinlock: use a half-word store to unlock to avoid larx/stcx.

2022-08-09 Thread Jordan Niethe
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> The first 16 bits of the lock are only modified by the owner, and other
> modifications always use atomic operations on the entire 32 bits, so
> unlocks can use plain stores on the 16 bits. This is the same kind of
> optimisation done by core qspinlock code.
> ---
>  arch/powerpc/include/asm/qspinlock.h   |  6 +-
>  arch/powerpc/include/asm/qspinlock_types.h | 19 +--
>  2 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/qspinlock.h 
> b/arch/powerpc/include/asm/qspinlock.h
> index f06117aa60e1..79a1936fb68d 100644
> --- a/arch/powerpc/include/asm/qspinlock.h
> +++ b/arch/powerpc/include/asm/qspinlock.h
> @@ -38,11 +38,7 @@ static __always_inline void queued_spin_lock(struct 
> qspinlock *lock)
>  
>  static inline void queued_spin_unlock(struct qspinlock *lock)
>  {
> - for (;;) {
> - int val = atomic_read(>val);
> - if (atomic_cmpxchg_release(>val, val, val & 
> ~_Q_LOCKED_VAL) == val)
> - return;
> - }
> + smp_store_release(>locked, 0);

Is it also possible for lock_set_locked() to use a non-atomic acquire
operation?

>  }
>  
>  #define arch_spin_is_locked(l)   queued_spin_is_locked(l)
> diff --git a/arch/powerpc/include/asm/qspinlock_types.h 
> b/arch/powerpc/include/asm/qspinlock_types.h
> index 9630e714c70d..3425dab42576 100644
> --- a/arch/powerpc/include/asm/qspinlock_types.h
> +++ b/arch/powerpc/include/asm/qspinlock_types.h
> @@ -3,12 +3,27 @@
>  #define _ASM_POWERPC_QSPINLOCK_TYPES_H
>  
>  #include 
> +#include 
>  
>  typedef struct qspinlock {
> - atomic_t val;
> + union {
> + atomic_t val;
> +
> +#ifdef __LITTLE_ENDIAN
> + struct {
> + u16 locked;
> + u8  reserved[2];
> + };
> +#else
> + struct {
> + u8  reserved[2];
> + u16 locked;
> + };
> +#endif
> + };
>  } arch_spinlock_t;

Just to double check we have:

#define _Q_LOCKED_OFFSET0
#define _Q_LOCKED_BITS  1
#define _Q_LOCKED_MASK  0x0001
#define _Q_LOCKED_VAL   1

#define _Q_TAIL_CPU_OFFSET  16
#define _Q_TAIL_CPU_BITS16
#define _Q_TAIL_CPU_MASK0x


so the ordering here looks correct.

>  
> -#define  __ARCH_SPIN_LOCK_UNLOCKED   { .val = ATOMIC_INIT(0) }
> +#define  __ARCH_SPIN_LOCK_UNLOCKED   { { .val = ATOMIC_INIT(0) } }
>  
>  /*
>   * Bitfields in the atomic value:



Re: [PATCH 02/17] powerpc/qspinlock: add mcs queueing for contended waiters

2022-08-09 Thread Jordan NIethe
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:


>  
> +/*
> + * Bitfields in the atomic value:
> + *
> + * 0: locked bit
> + * 16-31: tail cpu (+1)
> + */
> +#define  _Q_SET_MASK(type)   (((1U << _Q_ ## type ## _BITS) - 1)\
> +   << _Q_ ## type ## _OFFSET)
> +#define _Q_LOCKED_OFFSET 0
> +#define _Q_LOCKED_BITS   1
> +#define _Q_LOCKED_MASK   _Q_SET_MASK(LOCKED)
> +#define _Q_LOCKED_VAL(1U << _Q_LOCKED_OFFSET)
> +
> +#define _Q_TAIL_CPU_OFFSET   16
> +#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET)
> +#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU)
> +

Just to state the obvious this is:

#define _Q_LOCKED_OFFSET0
#define _Q_LOCKED_BITS  1
#define _Q_LOCKED_MASK  0x0001
#define _Q_LOCKED_VAL   1

#define _Q_TAIL_CPU_OFFSET  16
#define _Q_TAIL_CPU_BITS16
#define _Q_TAIL_CPU_MASK0x

> +#if CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)
> +#error "qspinlock does not support such large CONFIG_NR_CPUS"
> +#endif
> +
>  #endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 8dbce99a373c..5ebb88d95636 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -1,12 +1,172 @@
>  // SPDX-License-Identifier: GPL-2.0-or-later
> +#include 
> +#include 
> +#include 
>  #include 
> -#include 
> +#include 
> +#include 
>  #include 
>  
> -void queued_spin_lock_slowpath(struct qspinlock *lock)
> +#define MAX_NODES4
> +
> +struct qnode {
> + struct qnode*next;
> + struct qspinlock *lock;
> + u8  locked; /* 1 if lock acquired */
> +};
> +
> +struct qnodes {
> + int count;
> + struct qnode nodes[MAX_NODES];
> +};

I think it could be worth commenting why qnodes::count instead 
_Q_TAIL_IDX_OFFSET.

> +
> +static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
> +
> +static inline int encode_tail_cpu(void)

I think the generic version that takes smp_processor_id() as a parameter is 
clearer - at least with this function name.

> +{
> + return (smp_processor_id() + 1) << _Q_TAIL_CPU_OFFSET;
> +}
> +
> +static inline int get_tail_cpu(int val)

It seems like there should be a "decode" function to pair up with the "encode" 
function.

> +{
> + return (val >> _Q_TAIL_CPU_OFFSET) - 1;
> +}
> +
> +/* Take the lock by setting the bit, no other CPUs may concurrently lock it. 
> */

Does that comment mean it is not necessary to use an atomic_or here?

> +static __always_inline void lock_set_locked(struct qspinlock *lock)

nit: could just be called set_locked()

> +{
> + atomic_or(_Q_LOCKED_VAL, >val);
> + __atomic_acquire_fence();
> +}
> +
> +/* Take lock, clearing tail, cmpxchg with val (which must not be locked) */
> +static __always_inline int trylock_clear_tail_cpu(struct qspinlock *lock, 
> int val)
> +{
> + int newval = _Q_LOCKED_VAL;
> +
> + if (atomic_cmpxchg_acquire(>val, val, newval) == val)
> + return 1;
> + else
> + return 0;

same optional style nit: return (atomic_cmpxchg_acquire(>val, val, 
newval) == val);

> +}
> +
> +/*
> + * Publish our tail, replacing previous tail. Return previous value.
> + *
> + * This provides a release barrier for publishing node, and an acquire 
> barrier
> + * for getting the old node.
> + */
> +static __always_inline int publish_tail_cpu(struct qspinlock *lock, int tail)

Did you change from the xchg_tail() name in the generic version because of the 
release and acquire barriers this provides?
Does "publish" generally imply the old value will be returned?

>  {
> - while (!queued_spin_trylock(lock))
> + for (;;) {
> + int val = atomic_read(>val);
> + int newval = (val & ~_Q_TAIL_CPU_MASK) | tail;
> + int old;
> +
> + old = atomic_cmpxchg(>val, val, newval);
> + if (old == val)
> + return old;
> + }
> +}
> +
> +static struct qnode *get_tail_qnode(struct qspinlock *lock, int val)
> +{
> + int cpu = get_tail_cpu(val);
> + struct qnodes *qnodesp = per_cpu_ptr(, cpu);
> + int idx;
> +
> + for (idx = 0; idx < MAX_NODES; idx++) {
> + struct qnode *qnode = >nodes[idx];
> + if (qnode->lock == lock)
> + return qnode;
> + }

In case anyone else is confused by this, Nick explained each cpu can only queue 
on a unique spinlock once regardless of "idx" level.

> +
> + BUG();
> +}
> +
> +static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock)
> +{
> + struct qnodes *qnodesp;
> + struct qnode *next, *node;
> + int val, old, tail;
> + int idx;
> +
> + BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
> +
> + qnodesp = this_cpu_ptr();
> + if (unlikely(qnodesp->count == MAX_NODES)) {

The comparison is >= in the generic, I guess we've no 

Re: [PATCH 01/17] powerpc/qspinlock: powerpc qspinlock implementation

2022-08-09 Thread Jordan NIethe
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:

> -#define queued_spin_lock queued_spin_lock
>  
> -static inline void queued_spin_unlock(struct qspinlock *lock)
> +static __always_inline int queued_spin_trylock(struct qspinlock *lock)
>  {
> - if (!IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) || !is_shared_processor())
> - smp_store_release(>locked, 0);
> - else
> - __pv_queued_spin_unlock(lock);
> + if (atomic_cmpxchg_acquire(>val, 0, 1) == 0)
> + return 1;
> + return 0;

optional style nit: return (atomic_cmpxchg_acquire(>val, 0, 1) == 0);



Re: [PATCH 06/16] powerpc: Fix objtool unannotated intra-function call warnings on PPC32

2022-08-09 Thread kernel test robot
Hi Sathvika,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.19 next-20220809]
[cannot apply to powerpc/next powerpc/topic/ppc-kvm masahiroy-kbuild/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Sathvika-Vasireddy/objtool-Enable-and-implement-mcount-option-on-powerpc/20220808-200702
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
4e23eeebb2e57f5a28b36221aa776b5a1122dde5
config: powerpc-randconfig-r024-20220808 
(https://download.01.org/0day-ci/archive/20220810/202208100751.liikzjrx-...@intel.com/config)
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 
5f1c7e2cc5a3c07cbc2412e851a7283c1841f520)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install powerpc cross compiling tool for clang build
# apt-get install binutils-powerpc-linux-gnu
# 
https://github.com/intel-lab-lkp/linux/commit/bcefd9c9f24358413a1b210aa591c8758f58b3a9
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Sathvika-Vasireddy/objtool-Enable-and-implement-mcount-option-on-powerpc/20220808-200702
git checkout bcefd9c9f24358413a1b210aa591c8758f58b3a9
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 
O=build_dir ARCH=powerpc SHELL=/bin/bash arch/powerpc/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

>> :0: error: symbol '__kuep_lock' is already defined

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp


Re: [PATCH] powerpc64/ftrace: Fix ftrace for clang builds

2022-08-09 Thread Ondrej Mosnáček
On Tue, Aug 9, 2022 at 11:59 AM Naveen N. Rao
 wrote:
> Clang doesn't support -mprofile-kernel ABI, so guard the checks against
> CONFIG_DYNAMIC_FTRACE_WITH_REGS, rather than the elf ABI version.
>
> Fixes: 23b44fc248f420 ("powerpc/ftrace: Make __ftrace_make_{nop/call}() 
> common to PPC32 and PPC64")
> Reported-by: Nick Desaulniers 
> Reported-by: Ondrej Mosnacek 
> Signed-off-by: Naveen N. Rao 
> ---
>  arch/powerpc/kernel/trace/ftrace.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)

With this patch my reproducer [1] is passing, thanks!

Tested-by: Ondrej Mosnacek 

[1] https://github.com/ClangBuiltLinux/linux/issues/1682#issue-1330483247


Re: [PATCH v4 3/8] dt-bindings: clock: Add ids for Lynx 10g PLLs

2022-08-09 Thread Sean Anderson



On 8/9/22 1:21 AM, Krzysztof Kozlowski wrote:
> On 08/08/2022 18:16, Sean Anderson wrote:
>> 
>>> This entry here is not
>>> parsed for any tools and only sometimes people look at it. The questions
>>> are directed via entry in maintainers file or via git history, so you
>>> can put company email just there.
>> 
>> As I understand it, the email is simply informative. There are literally
>> hundreds of examples of mixing a "personal" copyright with a company email.
>> It is easy to find if you grep. If you are so opposed to it, then I will
>> remove the email and simply use my name.
> 
> No, no problem for me.
> 
>> 

>> + */
>> +
>> +#ifndef __DT_BINDINGS_CLK_LYNX_10G_H
>> +#define __DT_BINDINGS_CLK_LYNX_10G_H
>> +
>> +#define LYNX10G_CLKS_PER_PLL 2
>> +
>> +#define LYNX10G_PLLa(a) ((a) * LYNX10G_CLKS_PER_PLL)
>> +#define LYNX10G_PLLa_EX_DLY(a)  ((a) * LYNX10G_CLKS_PER_PLL + 1)
>
> These do not look like proper IDs for clocks for bindings. Numbering
> starts from 0 or 1 and any "a" needs to be clearly explained. What do
> you bind here?

 This matches "a" is the index of the PLL. E.g. registers PLL1RSTCTL etc.
 This matches the notation used in the reference manual.
>>>
>>> This is a file for bindings, not for storing register values. There is
>>> no single need to store register values (offsets, indexes) as bindings
>>> as it is not appropriate. Therefore if you do not use it as an ID, just
>>> remove the bindings header.
>> 
>> This *is* just for IDs, as stated in the commit message. The above example
>> was only to illustrate that the clock controlled via the PLL1RSTCTL register
>> (among others) would have an ID of LYNX10G_PLLa(0).
>> 
>> If you doubt it, review the driver.
> 
> Indeed, thanks. Except the driver, where is the DTS user of these
> bindings? It's neither in bindings example, nor in the DTS patches.

The primary purpose is to allow using assigned-clocks. The reference manual
for the processor may specify that certain PLLs must be used with a certain
rate when in some configuration (this is not necessary for the LS1046A or
LS1088A, but there are restrictions for e.g. the LS1043A). Using
assigned-clock-rates allows specifying which PLL is to be used at which rate
(especially if it differs from the bootloader). Of course, the driver could
adjust this later, but it will always use the configured PLL rate before
reconfiguring anything.

--Sean


[PATCH] powerpc/ftrace: Ignore weak functions

2022-08-09 Thread Naveen N. Rao
Extend commit b39181f7c6907d ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to
avoid adding weak function") to ppc32 and ppc64 -mprofile-kernel by
defining FTRACE_MCOUNT_MAX_OFFSET.

For ppc64 -mprofile-kernel ABI, we can have two instructions at function
entry for TOC setup followed by 'mflr r0' and 'bl _mcount'. So, the
mcount location is at most the 4th instruction in a function. For ppc32,
mcount location is always the 3rd instruction in a function, preceded by
'mflr r0' and 'stw r0,4(r1)'.

With this patch, and with ppc64le_guest_defconfig and some ftrace/bpf
config items enabled:
  # grep __ftrace_invalid_address available_filter_functions | wc -l
  79

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/include/asm/ftrace.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 3cee7115441b41..ade406dc6504e3 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -10,6 +10,13 @@
 
 #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 
+/* Ignore unused weak functions which will have larger offsets */
+#ifdef CONFIG_MPROFILE_KERNEL
+#define FTRACE_MCOUNT_MAX_OFFSET   12
+#elif defined(CONFIG_PPC32)
+#define FTRACE_MCOUNT_MAX_OFFSET   8
+#endif
+
 #ifndef __ASSEMBLY__
 extern void _mcount(void);
 

base-commit: ff1ed171e05c971652a0ede3d716997de8ee41c9
-- 
2.37.1



Re: [PATCH] powerpc/kexec: Fix build failure from uninitialised variable

2022-08-09 Thread Christophe Leroy


Le 09/08/2022 à 07:45, Russell Currey a écrit :
> clang 14 won't build because ret is uninitialised and can be returned if
> both prop and fdtprop are NULL.
> 
> Fixes: b1fc44eaa9ba ("pseries/iommu/ddw: Fix kdump to work in absence of 
> ibm,dma-window")
> Signed-off-by: Russell Currey 
> ---
> Not sure what should be returned here, EINVAL seemed reasonable for a
> passed property not existing.

Not sure. My understanding is that fdt_delprop() and fdt_setprop() 
return one of the FDT_ERR_ defined in scripts/dtc/libfdt/libfdt.h

> 
> Also, damn it Alexey, I mentioned this in my review:
> http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20220616075901.835871-1-...@ozlabs.ru/
> 
> Consider yourself lucky I'm no longer your dictator (if you don't already)
> 
>   arch/powerpc/kexec/file_load_64.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kexec/file_load_64.c 
> b/arch/powerpc/kexec/file_load_64.c
> index 683462e4556b..8fa2995e6fc7 100644
> --- a/arch/powerpc/kexec/file_load_64.c
> +++ b/arch/powerpc/kexec/file_load_64.c
> @@ -1043,7 +1043,7 @@ static int copy_property(void *fdt, int node_offset, 
> const struct device_node *d
>const char *propname)
>   {
>   const void *prop, *fdtprop;
> - int len = 0, fdtlen = 0, ret;
> + int len = 0, fdtlen = 0, ret = -EINVAL;

Do we need 'ret' at all ?

It would be more clear to return directly instead of going through a 
local var :

if (fdtprop && !prop)
return fdt_delprop(fdt, node_offset, propname);
else if (prop)
return fdt_setprop(fdt, node_offset, propname, prop, len);
else
return -FDT_ERR_NOTFOUND;


>   
>   prop = of_get_property(dn, propname, );
>   fdtprop = fdt_getprop(fdt, node_offset, propname, );

[PATCH] powerpc64/ftrace: Fix ftrace for clang builds

2022-08-09 Thread Naveen N. Rao
Clang doesn't support -mprofile-kernel ABI, so guard the checks against
CONFIG_DYNAMIC_FTRACE_WITH_REGS, rather than the elf ABI version.

Fixes: 23b44fc248f420 ("powerpc/ftrace: Make __ftrace_make_{nop/call}() common 
to PPC32 and PPC64")
Reported-by: Nick Desaulniers 
Reported-by: Ondrej Mosnacek 
Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index cb158c32b50b99..7b85c3b460a3c0 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -393,11 +393,11 @@ int ftrace_make_nop(struct module *mod,
  */
 static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
 {
-   if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1))
+   if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+   return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()));
+   else
return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) &&
   ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC));
-   else
-   return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()));
 }
 
 static int
@@ -412,7 +412,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long 
addr)
if (copy_inst_from_kernel_nofault(op, ip))
return -EFAULT;
 
-   if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1) &&
+   if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) &&
copy_inst_from_kernel_nofault(op + 1, ip + 4))
return -EFAULT;
 

base-commit: ff1ed171e05c971652a0ede3d716997de8ee41c9
-- 
2.37.1



Re: [PATCH v3] powerpc/mm: Support execute-only memory on the Radix MMU

2022-08-09 Thread Russell Currey
On Tue, 2022-08-09 at 05:51 +, Christophe Leroy wrote:
> Le 09/08/2022 à 04:44, Russell Currey a écrit :
> > The Hash MMU already supports XOM (i.e. mmap with PROT_EXEC only)
> > through the execute-only pkey.  A PROT_EXEC-only mapping will
> > actually
> > map to RX, and then the pkey will be applied on top of it.
> 
> I don't think XOM is a commonly understood accronym. Maybe the first 
> time you use it it'd be better to say something like:
> 
> The Hash MMU already supports execute-only memory (XOM)

Yes, that's better.

> 
> When you say that Hash MMU supports it through the execute-only pkey,
> does it mean that it is taken into account automatically at mmap
> time, 
> or does the userspace app has to do something special to use the key
> ? 
> If it is the second, it means that depending on whether you are radix
> or 
> not, you must do something different ? Is that expected ?

It happens at mmap time, see do_mmap() in mm/mmap.c (and similar for
mprotect).  That calls into execute_only_pkey() which can return
something on x86 & Hash, and if it does that pkey gets used.  The
userspace process doesn't have to do anything, it's transparent.  So
there's no difference in program behaviour switching between Hash/Radix
- at least in the basic cases I've tested.

> 
> > 
> > Radix doesn't have pkeys, but it does have execute permissions
> > built-in
> > to the MMU, so all we have to do to support XOM is expose it.
> > 
> > Signed-off-by: Russell Currey 
> > ---
> > v3: Incorporate Aneesh's suggestions, leave protection_map
> > untouched
> > Basic test:
> > https://github.com/ruscur/junkcode/blob/main/mmap_test.c
> > 
> >   arch/powerpc/include/asm/book3s/64/pgtable.h |  2 ++
> >   arch/powerpc/mm/book3s64/pgtable.c   | 11 +--
> >   arch/powerpc/mm/fault.c  |  6 +-
> >   3 files changed, 16 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h
> > b/arch/powerpc/include/asm/book3s/64/pgtable.h
> > index 392ff48f77df..486902aff040 100644
> > --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> > +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> > @@ -151,6 +151,8 @@
> >   #define PAGE_COPY_X   __pgprot(_PAGE_BASE | _PAGE_READ |
> > _PAGE_EXEC)
> >   #define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ)
> >   #define PAGE_READONLY_X   __pgprot(_PAGE_BASE | _PAGE_READ |
> > _PAGE_EXEC)
> > +/* Radix only, Hash uses PAGE_READONLY_X + execute-only pkey
> > instead */
> > +#define PAGE_EXECONLY  __pgprot(_PAGE_BASE | _PAGE_EXEC)
> >   
> >   /* Permission masks used for kernel mappings */
> >   #define PAGE_KERNEL   __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
> > diff --git a/arch/powerpc/mm/book3s64/pgtable.c
> > b/arch/powerpc/mm/book3s64/pgtable.c
> > index 7b9966402b25..62f63d344596 100644
> > --- a/arch/powerpc/mm/book3s64/pgtable.c
> > +++ b/arch/powerpc/mm/book3s64/pgtable.c
> > @@ -553,8 +553,15 @@ EXPORT_SYMBOL_GPL(memremap_compat_align);
> >   
> >   pgprot_t vm_get_page_prot(unsigned long vm_flags)
> >   {
> > -   unsigned long prot = pgprot_val(protection_map[vm_flags &
> > -
> >    (VM_READ|VM_WRITE|VM_EXEC|VM_
> > SHARED)]);
> > +   unsigned long prot;
> > +
> > +   /* Radix supports execute-only, but protection_map maps X -
> > > RX */
> > +   if (radix_enabled() && ((vm_flags &
> > (VM_READ|VM_WRITE|VM_EXEC)) == VM_EXEC)) {
> 
> Maybe use VM_ACCESS_FLAGS ?

I was looking for something like that but only checked powerpc, thanks.

> 
> > +   prot = pgprot_val(PAGE_EXECONLY);
> > +   } else {
> > +   prot = pgprot_val(protection_map[vm_flags &
> > +
> > (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
> > +   }
> >   
> > if (vm_flags & VM_SAO)
> > prot |= _PAGE_SAO;
> > diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> > index 014005428687..59e4cbcf3109 100644
> > --- a/arch/powerpc/mm/fault.c
> > +++ b/arch/powerpc/mm/fault.c
> > @@ -270,7 +270,11 @@ static bool access_error(bool is_write, bool
> > is_exec, struct vm_area_struct *vma
> > return false;
> > }
> >   
> > -   if (unlikely(!vma_is_accessible(vma)))
> > +   /* On Radix, a read fault could be from PROT_NONE or
> > PROT_EXEC */
> > +   if (unlikely(radix_enabled() && !(vma->vm_flags &
> > VM_READ)))
> > +   return true;
> 
> Why do you need the radix_enabled() here ?
> Even if it doesn't fault directly, reading a non readable area is
> still 
> an error and should be handled as such, even on hardware that will
> not 
> generate a fault for it at the first place. So I'd just do:
> 
> if (!(vma->vm_flags & VM_READ)))
> return true;
> 

I don't think we need it, just concerned I might break something.  I
can do that.

> > +   /* Check for a PROT_NONE fault on other MMUs */
> > +   else if (unlikely(!vma_is_accessible(vma)))
> >  

Re: [PATCH v3] powerpc/mm: Support execute-only memory on the Radix MMU

2022-08-09 Thread Aneesh Kumar K V
On 8/9/22 11:21 AM, Christophe Leroy wrote:
> Le 09/08/2022 à 04:44, Russell Currey a écrit :
>> The Hash MMU already supports XOM (i.e. mmap with PROT_EXEC only)
>> through the execute-only pkey.  A PROT_EXEC-only mapping will actually
>> map to RX, and then the pkey will be applied on top of it.
> 
> I don't think XOM is a commonly understood accronym. Maybe the first 
> time you use it it'd be better to say something like:
> 
> The Hash MMU already supports execute-only memory (XOM)
> 
> When you say that Hash MMU supports it through the execute-only pkey, 
> does it mean that it is taken into account automatically at mmap time, 
> or does the userspace app has to do something special to use the key ? 
> If it is the second, it means that depending on whether you are radix or 
> not, you must do something different ? Is that expected ?
> 
>>
>> Radix doesn't have pkeys, but it does have execute permissions built-in
>> to the MMU, so all we have to do to support XOM is expose it.
>>
>> Signed-off-by: Russell Currey 
>> ---
>> v3: Incorporate Aneesh's suggestions, leave protection_map untouched
>> Basic test: https://github.com/ruscur/junkcode/blob/main/mmap_test.c
>>
>>   arch/powerpc/include/asm/book3s/64/pgtable.h |  2 ++
>>   arch/powerpc/mm/book3s64/pgtable.c   | 11 +--
>>   arch/powerpc/mm/fault.c  |  6 +-
>>   3 files changed, 16 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
>> b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index 392ff48f77df..486902aff040 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -151,6 +151,8 @@
>>   #define PAGE_COPY_X__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
>>   #define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_READ)
>>   #define PAGE_READONLY_X__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
>> +/* Radix only, Hash uses PAGE_READONLY_X + execute-only pkey instead */
>> +#define PAGE_EXECONLY   __pgprot(_PAGE_BASE | _PAGE_EXEC)
>>   
>>   /* Permission masks used for kernel mappings */
>>   #define PAGE_KERNEL__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
>> diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
>> b/arch/powerpc/mm/book3s64/pgtable.c
>> index 7b9966402b25..62f63d344596 100644
>> --- a/arch/powerpc/mm/book3s64/pgtable.c
>> +++ b/arch/powerpc/mm/book3s64/pgtable.c
>> @@ -553,8 +553,15 @@ EXPORT_SYMBOL_GPL(memremap_compat_align);
>>   
>>   pgprot_t vm_get_page_prot(unsigned long vm_flags)
>>   {
>> -unsigned long prot = pgprot_val(protection_map[vm_flags &
>> -(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
>> +unsigned long prot;
>> +
>> +/* Radix supports execute-only, but protection_map maps X -> RX */
>> +if (radix_enabled() && ((vm_flags & (VM_READ|VM_WRITE|VM_EXEC)) == 
>> VM_EXEC)) {
> 
> Maybe use VM_ACCESS_FLAGS ?
> 
>> +prot = pgprot_val(PAGE_EXECONLY);
>> +} else {
>> +prot = pgprot_val(protection_map[vm_flags &
>> +  (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
>> +}
>>   
>>  if (vm_flags & VM_SAO)
>>  prot |= _PAGE_SAO;
>> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
>> index 014005428687..59e4cbcf3109 100644
>> --- a/arch/powerpc/mm/fault.c
>> +++ b/arch/powerpc/mm/fault.c
>> @@ -270,7 +270,11 @@ static bool access_error(bool is_write, bool is_exec, 
>> struct vm_area_struct *vma
>>  return false;
>>  }
>>   
>> -if (unlikely(!vma_is_accessible(vma)))
>> +/* On Radix, a read fault could be from PROT_NONE or PROT_EXEC */
>> +if (unlikely(radix_enabled() && !(vma->vm_flags & VM_READ)))
>> +return true;
> 
> Why do you need the radix_enabled() here ?
> Even if it doesn't fault directly, reading a non readable area is still 
> an error and should be handled as such, even on hardware that will not 
> generate a fault for it at the first place. So I'd just do:
> 
>   if (!(vma->vm_flags & VM_READ)))
>   return true;
> 
>> +/* Check for a PROT_NONE fault on other MMUs */
>> +else if (unlikely(!vma_is_accessible(vma)))
>>  return true;
>>  /*
>>   * We should ideally do the vma pkey access check here. But in the
> 
> Don't use an if/else construct, there is no other 'else' in that 
> function, or in similar functions like bad_kernel_fault() for instance.
> 
> So leave the !vma_is_accessible(vma) untouched and add your check as a 
> standalone check before or after it.


What does vma_is_accessible() check bring if we have the VM_READ check 
unconditional ?

-aneesh




Re: [PATCH v3 11/25] powerpc/ftrace: Make __ftrace_make_{nop/call}() common to PPC32 and PPC64

2022-08-09 Thread Naveen N. Rao

Nick Desaulniers wrote:

Christ...that's not what I meant to send; sorry, mutt is giving me
bizarro errors today so I can't insert myself into threads using the
usual incantation I have in the past.  What I meant to send was:

Christophe,
Ondrej reported a regression with ftrace when building w/ clang
bisected to this commit. Can you PTAL?
https://github.com/ClangBuiltLinux/linux/issues/1682


Thanks for the report. That was my doing - I tend to miss the fact that 
clang doesn't support -mprofile-kernel. Can you check if the below patch 
fixes it for you?


- Naveen

---
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index cb158c32b50b99..7b85c3b460a3c0 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -393,11 +393,11 @@ int ftrace_make_nop(struct module *mod,
 */
static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
{
-   if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1))
+   if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+   return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()));
+   else
   return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) &&
  ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC));
-   else
-   return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()));
}

static int
@@ -412,7 +412,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long 
addr)
   if (copy_inst_from_kernel_nofault(op, ip))
   return -EFAULT;

-   if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1) &&
+   if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) &&
   copy_inst_from_kernel_nofault(op + 1, ip + 4))
   return -EFAULT;