On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> Queued waiters which are not at the head of the queue don't spin on
> the lock word but their qnode lock word, waiting for the previous queued
> CPU to release them. Add an option which allows these waiters to yield
> to the previous CPU if its vCPU is preempted.
> 
> Disable this option by default for now, i.e., no logical change.
> ---
>  arch/powerpc/lib/qspinlock.c | 46 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 45 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index 55286ac91da5..b39f8c5b329c 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -26,6 +26,7 @@ static bool MAYBE_STEALERS __read_mostly = true;
>  static int HEAD_SPINS __read_mostly = (1<<8);
>  
>  static bool pv_yield_owner __read_mostly = true;
> +static bool pv_yield_prev __read_mostly = true;

Similiar suggestion, maybe pv_yield_prev_enabled would read better.

Isn't this enabled by default contrary to the commit message?


>  
>  static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
>  
> @@ -224,6 +225,31 @@ static __always_inline void yield_to_locked_owner(struct 
> qspinlock *lock, u32 va
>       cpu_relax();
>  }
>  
> +static __always_inline void yield_to_prev(struct qspinlock *lock, struct 
> qnode *node, int prev_cpu, bool paravirt)

yield_to_locked_owner() takes a raw val and works out the cpu to yield to.
I think for consistency have yield_to_prev() take the raw val and work it out 
too.

> +{
> +     u32 yield_count;
> +
> +     if (!paravirt)
> +             goto relax;
> +
> +     if (!pv_yield_prev)
> +             goto relax;
> +
> +     yield_count = yield_count_of(prev_cpu);
> +     if ((yield_count & 1) == 0)
> +             goto relax; /* owner vcpu is running */
> +
> +     smp_rmb(); /* See yield_to_locked_owner comment */
> +
> +     if (!node->locked) {
> +             yield_to_preempted(prev_cpu, yield_count);
> +             return;
> +     }
> +
> +relax:
> +     cpu_relax();
> +}
> +
>  
>  static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool 
> paravirt)
>  {
> @@ -291,13 +317,14 @@ static __always_inline void 
> queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>        */
>       if (old & _Q_TAIL_CPU_MASK) {
>               struct qnode *prev = get_tail_qnode(lock, old);
> +             int prev_cpu = get_tail_cpu(old);

This could then be removed.

>  
>               /* Link @node into the waitqueue. */
>               WRITE_ONCE(prev->next, node);
>  
>               /* Wait for mcs node lock to be released */
>               while (!node->locked)
> -                     cpu_relax();
> +                     yield_to_prev(lock, node, prev_cpu, paravirt);

And would have this as:
                        yield_to_prev(lock, node, old, paravirt);


>  
>               smp_rmb(); /* acquire barrier for the mcs lock */
>       }
> @@ -448,12 +475,29 @@ static int pv_yield_owner_get(void *data, u64 *val)
>  
>  DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, 
> pv_yield_owner_set, "%llu\n");
>  
> +static int pv_yield_prev_set(void *data, u64 val)
> +{
> +     pv_yield_prev = !!val;
> +
> +     return 0;
> +}
> +
> +static int pv_yield_prev_get(void *data, u64 *val)
> +{
> +     *val = pv_yield_prev;
> +
> +     return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, 
> pv_yield_prev_set, "%llu\n");
> +
>  static __init int spinlock_debugfs_init(void)
>  {
>       debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, 
> &fops_steal_spins);
>       debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, 
> &fops_head_spins);
>       if (is_shared_processor()) {
>               debugfs_create_file("qspl_pv_yield_owner", 0600, 
> arch_debugfs_dir, NULL, &fops_pv_yield_owner);
> +             debugfs_create_file("qspl_pv_yield_prev", 0600, 
> arch_debugfs_dir, NULL, &fops_pv_yield_prev);
>       }
>  
>       return 0;

Reply via email to