Re: [PATCH 0/6] powerpc/qspinlock: Fix yield latency bug and other

2023-10-20 Thread Nysal Jan K.A.
On Mon, Oct 16, 2023 at 10:42:59PM +1000, Nicholas Piggin wrote:
> This fixes a long-standing latency bug in the powerpc qspinlock
> implementation that quite a few people have reported and helped
> out with debugging.
> 
> The first patch is a minimal fix that avoids the problem. The
> other patches are streamlining and improvements after the fix.
> 
> Thanks,
> Nick
> 
> Nicholas Piggin (6):
>   powerpc/qspinlock: Fix stale propagated yield_cpu
>   powerpc/qspinlock: stop queued waiters trying to set lock sleepy
>   powerpc/qspinlock: propagate owner preemptedness rather than CPU
> number
>   powerpc/qspinlock: don't propagate the not-sleepy state
>   powerpc/qspinlock: Propagate sleepy if previous waiter is preempted
>   powerpc/qspinlock: Rename yield_propagate_owner tunable
> 
>  arch/powerpc/lib/qspinlock.c | 119 +++
>  1 file changed, 52 insertions(+), 67 deletions(-)
> 
> -- 
> 2.42.0
> 
Just a minor comment regarding patch 2.

For the series:

Reviewed-by: Nysal Jan K.A 


Re: [PATCH 2/6] powerpc/qspinlock: stop queued waiters trying to set lock sleepy

2023-10-20 Thread Nysal Jan K.A.
On Mon, Oct 16, 2023 at 10:43:01PM +1000, Nicholas Piggin wrote:
> If a queued waiter notices the lock owner or the previous waiter has
> been preempted, it attempts to mark the lock sleepy, but it does this
> as a try-set operation using the original lock value it got when
> queueing, which will become stale as the queue progresses, and the
> try-set will fail. Drop this and just set the sleepy seen clock.
> 
> Signed-off-by: Nicholas Piggin 
> ---
>  arch/powerpc/lib/qspinlock.c | 24 ++--
>  1 file changed, 10 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 6dd2f46bd3ef..75608ced14c2 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -247,22 +247,18 @@ static __always_inline void seen_sleepy_lock(void)
>   this_cpu_write(sleepy_lock_seen_clock, sched_clock());
>  }
>  
> -static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val)
> +static __always_inline void seen_sleepy_node(void)
>  {
>   if (pv_sleepy_lock) {
>   if (pv_sleepy_lock_interval_ns)
>   this_cpu_write(sleepy_lock_seen_clock, sched_clock());
> - if (val & _Q_LOCKED_VAL) {
> - if (!(val & _Q_SLEEPY_VAL))
> - try_set_sleepy(lock, val);
> - }
> + /* Don't set sleepy because we likely have a stale val */
>   }
>  }

seen_sleepy_lock() and seen_sleepy_node() are now the same

>  
> -static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
> +static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu)
>  {
> - int cpu = decode_tail_cpu(val);
> - struct qnodes *qnodesp = per_cpu_ptr(, cpu);
> + struct qnodes *qnodesp = per_cpu_ptr(, prev_cpu);
>   int idx;
>  
>   /*
> @@ -381,9 +377,8 @@ static __always_inline void propagate_yield_cpu(struct 
> qnode *node, u32 val, int
>  }
>  
>  /* Called inside spin_begin() */
> -static __always_inline bool yield_to_prev(struct qspinlock *lock, struct 
> qnode *node, u32 val, bool paravirt)
> +static __always_inline bool yield_to_prev(struct qspinlock *lock, struct 
> qnode *node, int prev_cpu, bool paravirt)
>  {
> - int prev_cpu = decode_tail_cpu(val);
>   u32 yield_count;
>   int yield_cpu;
>   bool preempted = false;
> @@ -412,7 +407,7 @@ static __always_inline bool yield_to_prev(struct 
> qspinlock *lock, struct qnode *
>   spin_end();
>  
>   preempted = true;
> - seen_sleepy_node(lock, val);
> + seen_sleepy_node();
>  
>   smp_rmb();
>  
> @@ -436,7 +431,7 @@ static __always_inline bool yield_to_prev(struct 
> qspinlock *lock, struct qnode *
>   spin_end();
>  
>   preempted = true;
> - seen_sleepy_node(lock, val);
> + seen_sleepy_node();
>  
>   smp_rmb(); /* See __yield_to_locked_owner comment */
>  
> @@ -587,7 +582,8 @@ static __always_inline void 
> queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>* head of the waitqueue.
>*/
>   if (old & _Q_TAIL_CPU_MASK) {
> - struct qnode *prev = get_tail_qnode(lock, old);
> + int prev_cpu = decode_tail_cpu(old);
> + struct qnode *prev = get_tail_qnode(lock, prev_cpu);
>  
>   /* Link @node into the waitqueue. */
>   WRITE_ONCE(prev->next, node);
> @@ -597,7 +593,7 @@ static __always_inline void 
> queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>   while (!READ_ONCE(node->locked)) {
>   spec_barrier();
>  
> - if (yield_to_prev(lock, node, old, paravirt))
> + if (yield_to_prev(lock, node, prev_cpu, paravirt))
>   seen_preempted = true;
>   }
>   spec_barrier();
> -- 
> 2.42.0
> 


Re: [PATCH] powerpc/atomics: Remove unused function

2023-03-23 Thread Nysal Jan K.A.
Michael,
Any comments on this one?

On Fri, Feb 24, 2023 at 11:02:31AM +, Christophe Leroy wrote:
> 
> 
> Le 24/02/2023 à 11:39, Nysal Jan K.A a écrit :
> > [Vous ne recevez pas souvent de courriers de ny...@linux.ibm.com. Découvrez 
> > pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification 
> > ]
> > 
> > Remove arch_atomic_try_cmpxchg_lock function as it is no longer used
> > since commit 9f61521c7a28 ("powerpc/qspinlock: powerpc qspinlock
> > implementation")
> > 
> > Signed-off-by: Nysal Jan K.A 
> 
> Reviewed-by: Christophe Leroy 
> 
> > ---
> >   arch/powerpc/include/asm/atomic.h | 29 -
> >   1 file changed, 29 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/atomic.h 
> > b/arch/powerpc/include/asm/atomic.h
> > index 486ab7889121..b3a53830446b 100644
> > --- a/arch/powerpc/include/asm/atomic.h
> > +++ b/arch/powerpc/include/asm/atomic.h
> > @@ -130,35 +130,6 @@ ATOMIC_OPS(xor, xor, "", K)
> >   #define arch_atomic_xchg_relaxed(v, new) \
> >  arch_xchg_relaxed(&((v)->counter), (new))
> > 
> > -/*
> > - * Don't want to override the generic atomic_try_cmpxchg_acquire, because
> > - * we add a lock hint to the lwarx, which may not be wanted for the
> > - * _acquire case (and is not used by the other _acquire variants so it
> > - * would be a surprise).
> > - */
> > -static __always_inline bool
> > -arch_atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
> > -{
> > -   int r, o = *old;
> > -   unsigned int eh = IS_ENABLED(CONFIG_PPC64);
> > -
> > -   __asm__ __volatile__ (
> > -"1:lwarx   %0,0,%2,%[eh]   # atomic_try_cmpxchg_acquire\n"
> > -"  cmpw0,%0,%3 \n"
> > -"  bne-2f  \n"
> > -"  stwcx.  %4,0,%2 \n"
> > -"  bne-1b  \n"
> > -"\t"   PPC_ACQUIRE_BARRIER "   \n"
> > -"2:\n"
> > -   : "=" (r), "+m" (v->counter)
> > -   : "r" (>counter), "r" (o), "r" (new), [eh] "n" (eh)
> > -   : "cr0", "memory");
> > -
> > -   if (unlikely(r != o))
> > -   *old = r;
> > -   return likely(r == o);
> > -}
> > -
> >   /**
> >* atomic_fetch_add_unless - add unless the number is a given value
> >* @v: pointer of type atomic_t
> > --
> > 2.39.2
> > 


[PATCH] powerpc/atomics: Remove unused function

2023-02-24 Thread Nysal Jan K.A
Remove arch_atomic_try_cmpxchg_lock function as it is no longer used
since commit 9f61521c7a28 ("powerpc/qspinlock: powerpc qspinlock
implementation")

Signed-off-by: Nysal Jan K.A 
---
 arch/powerpc/include/asm/atomic.h | 29 -
 1 file changed, 29 deletions(-)

diff --git a/arch/powerpc/include/asm/atomic.h 
b/arch/powerpc/include/asm/atomic.h
index 486ab7889121..b3a53830446b 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -130,35 +130,6 @@ ATOMIC_OPS(xor, xor, "", K)
 #define arch_atomic_xchg_relaxed(v, new) \
arch_xchg_relaxed(&((v)->counter), (new))
 
-/*
- * Don't want to override the generic atomic_try_cmpxchg_acquire, because
- * we add a lock hint to the lwarx, which may not be wanted for the
- * _acquire case (and is not used by the other _acquire variants so it
- * would be a surprise).
- */
-static __always_inline bool
-arch_atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
-{
-   int r, o = *old;
-   unsigned int eh = IS_ENABLED(CONFIG_PPC64);
-
-   __asm__ __volatile__ (
-"1:lwarx   %0,0,%2,%[eh]   # atomic_try_cmpxchg_acquire\n"
-"  cmpw0,%0,%3 \n"
-"  bne-2f  \n"
-"  stwcx.  %4,0,%2 \n"
-"  bne-1b  \n"
-"\t"   PPC_ACQUIRE_BARRIER "   \n"
-"2:\n"
-   : "=" (r), "+m" (v->counter)
-   : "r" (>counter), "r" (o), "r" (new), [eh] "n" (eh)
-   : "cr0", "memory");
-
-   if (unlikely(r != o))
-   *old = r;
-   return likely(r == o);
-}
-
 /**
  * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
-- 
2.39.2