Re: [PATCH 0/6] powerpc/qspinlock: Fix yield latency bug and other
On Mon, Oct 16, 2023 at 10:42:59PM +1000, Nicholas Piggin wrote: > This fixes a long-standing latency bug in the powerpc qspinlock > implementation that quite a few people have reported and helped > out with debugging. > > The first patch is a minimal fix that avoids the problem. The > other patches are streamlining and improvements after the fix. > > Thanks, > Nick > > Nicholas Piggin (6): > powerpc/qspinlock: Fix stale propagated yield_cpu > powerpc/qspinlock: stop queued waiters trying to set lock sleepy > powerpc/qspinlock: propagate owner preemptedness rather than CPU > number > powerpc/qspinlock: don't propagate the not-sleepy state > powerpc/qspinlock: Propagate sleepy if previous waiter is preempted > powerpc/qspinlock: Rename yield_propagate_owner tunable > > arch/powerpc/lib/qspinlock.c | 119 +++ > 1 file changed, 52 insertions(+), 67 deletions(-) > > -- > 2.42.0 > Just a minor comment regarding patch 2. For the series: Reviewed-by: Nysal Jan K.A
Re: [PATCH 2/6] powerpc/qspinlock: stop queued waiters trying to set lock sleepy
On Mon, Oct 16, 2023 at 10:43:01PM +1000, Nicholas Piggin wrote: > If a queued waiter notices the lock owner or the previous waiter has > been preempted, it attempts to mark the lock sleepy, but it does this > as a try-set operation using the original lock value it got when > queueing, which will become stale as the queue progresses, and the > try-set will fail. Drop this and just set the sleepy seen clock. > > Signed-off-by: Nicholas Piggin > --- > arch/powerpc/lib/qspinlock.c | 24 ++-- > 1 file changed, 10 insertions(+), 14 deletions(-) > > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c > index 6dd2f46bd3ef..75608ced14c2 100644 > --- a/arch/powerpc/lib/qspinlock.c > +++ b/arch/powerpc/lib/qspinlock.c > @@ -247,22 +247,18 @@ static __always_inline void seen_sleepy_lock(void) > this_cpu_write(sleepy_lock_seen_clock, sched_clock()); > } > > -static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val) > +static __always_inline void seen_sleepy_node(void) > { > if (pv_sleepy_lock) { > if (pv_sleepy_lock_interval_ns) > this_cpu_write(sleepy_lock_seen_clock, sched_clock()); > - if (val & _Q_LOCKED_VAL) { > - if (!(val & _Q_SLEEPY_VAL)) > - try_set_sleepy(lock, val); > - } > + /* Don't set sleepy because we likely have a stale val */ > } > } seen_sleepy_lock() and seen_sleepy_node() are now the same > > -static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) > +static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu) > { > - int cpu = decode_tail_cpu(val); > - struct qnodes *qnodesp = per_cpu_ptr(, cpu); > + struct qnodes *qnodesp = per_cpu_ptr(, prev_cpu); > int idx; > > /* > @@ -381,9 +377,8 @@ static __always_inline void propagate_yield_cpu(struct > qnode *node, u32 val, int > } > > /* Called inside spin_begin() */ > -static __always_inline bool yield_to_prev(struct qspinlock *lock, struct > qnode *node, u32 val, bool paravirt) > +static __always_inline bool yield_to_prev(struct qspinlock *lock, struct > qnode *node, int prev_cpu, bool paravirt) > { > - int prev_cpu = decode_tail_cpu(val); > u32 yield_count; > int yield_cpu; > bool preempted = false; > @@ -412,7 +407,7 @@ static __always_inline bool yield_to_prev(struct > qspinlock *lock, struct qnode * > spin_end(); > > preempted = true; > - seen_sleepy_node(lock, val); > + seen_sleepy_node(); > > smp_rmb(); > > @@ -436,7 +431,7 @@ static __always_inline bool yield_to_prev(struct > qspinlock *lock, struct qnode * > spin_end(); > > preempted = true; > - seen_sleepy_node(lock, val); > + seen_sleepy_node(); > > smp_rmb(); /* See __yield_to_locked_owner comment */ > > @@ -587,7 +582,8 @@ static __always_inline void > queued_spin_lock_mcs_queue(struct qspinlock *lock, b >* head of the waitqueue. >*/ > if (old & _Q_TAIL_CPU_MASK) { > - struct qnode *prev = get_tail_qnode(lock, old); > + int prev_cpu = decode_tail_cpu(old); > + struct qnode *prev = get_tail_qnode(lock, prev_cpu); > > /* Link @node into the waitqueue. */ > WRITE_ONCE(prev->next, node); > @@ -597,7 +593,7 @@ static __always_inline void > queued_spin_lock_mcs_queue(struct qspinlock *lock, b > while (!READ_ONCE(node->locked)) { > spec_barrier(); > > - if (yield_to_prev(lock, node, old, paravirt)) > + if (yield_to_prev(lock, node, prev_cpu, paravirt)) > seen_preempted = true; > } > spec_barrier(); > -- > 2.42.0 >
Re: [PATCH] powerpc/atomics: Remove unused function
Michael, Any comments on this one? On Fri, Feb 24, 2023 at 11:02:31AM +, Christophe Leroy wrote: > > > Le 24/02/2023 à 11:39, Nysal Jan K.A a écrit : > > [Vous ne recevez pas souvent de courriers de ny...@linux.ibm.com. Découvrez > > pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification > > ] > > > > Remove arch_atomic_try_cmpxchg_lock function as it is no longer used > > since commit 9f61521c7a28 ("powerpc/qspinlock: powerpc qspinlock > > implementation") > > > > Signed-off-by: Nysal Jan K.A > > Reviewed-by: Christophe Leroy > > > --- > > arch/powerpc/include/asm/atomic.h | 29 - > > 1 file changed, 29 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/atomic.h > > b/arch/powerpc/include/asm/atomic.h > > index 486ab7889121..b3a53830446b 100644 > > --- a/arch/powerpc/include/asm/atomic.h > > +++ b/arch/powerpc/include/asm/atomic.h > > @@ -130,35 +130,6 @@ ATOMIC_OPS(xor, xor, "", K) > > #define arch_atomic_xchg_relaxed(v, new) \ > > arch_xchg_relaxed(&((v)->counter), (new)) > > > > -/* > > - * Don't want to override the generic atomic_try_cmpxchg_acquire, because > > - * we add a lock hint to the lwarx, which may not be wanted for the > > - * _acquire case (and is not used by the other _acquire variants so it > > - * would be a surprise). > > - */ > > -static __always_inline bool > > -arch_atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new) > > -{ > > - int r, o = *old; > > - unsigned int eh = IS_ENABLED(CONFIG_PPC64); > > - > > - __asm__ __volatile__ ( > > -"1:lwarx %0,0,%2,%[eh] # atomic_try_cmpxchg_acquire\n" > > -" cmpw0,%0,%3 \n" > > -" bne-2f \n" > > -" stwcx. %4,0,%2 \n" > > -" bne-1b \n" > > -"\t" PPC_ACQUIRE_BARRIER " \n" > > -"2:\n" > > - : "=" (r), "+m" (v->counter) > > - : "r" (>counter), "r" (o), "r" (new), [eh] "n" (eh) > > - : "cr0", "memory"); > > - > > - if (unlikely(r != o)) > > - *old = r; > > - return likely(r == o); > > -} > > - > > /** > >* atomic_fetch_add_unless - add unless the number is a given value > >* @v: pointer of type atomic_t > > -- > > 2.39.2 > >
[PATCH] powerpc/atomics: Remove unused function
Remove arch_atomic_try_cmpxchg_lock function as it is no longer used since commit 9f61521c7a28 ("powerpc/qspinlock: powerpc qspinlock implementation") Signed-off-by: Nysal Jan K.A --- arch/powerpc/include/asm/atomic.h | 29 - 1 file changed, 29 deletions(-) diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 486ab7889121..b3a53830446b 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -130,35 +130,6 @@ ATOMIC_OPS(xor, xor, "", K) #define arch_atomic_xchg_relaxed(v, new) \ arch_xchg_relaxed(&((v)->counter), (new)) -/* - * Don't want to override the generic atomic_try_cmpxchg_acquire, because - * we add a lock hint to the lwarx, which may not be wanted for the - * _acquire case (and is not used by the other _acquire variants so it - * would be a surprise). - */ -static __always_inline bool -arch_atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new) -{ - int r, o = *old; - unsigned int eh = IS_ENABLED(CONFIG_PPC64); - - __asm__ __volatile__ ( -"1:lwarx %0,0,%2,%[eh] # atomic_try_cmpxchg_acquire\n" -" cmpw0,%0,%3 \n" -" bne-2f \n" -" stwcx. %4,0,%2 \n" -" bne-1b \n" -"\t" PPC_ACQUIRE_BARRIER " \n" -"2:\n" - : "=" (r), "+m" (v->counter) - : "r" (>counter), "r" (o), "r" (new), [eh] "n" (eh) - : "cr0", "memory"); - - if (unlikely(r != o)) - *old = r; - return likely(r == o); -} - /** * atomic_fetch_add_unless - add unless the number is a given value * @v: pointer of type atomic_t -- 2.39.2