rt: Simplify the IPI rt balancing logic

Peter Zijlstra Thu, 04 May 2017 08:33:53 -0700

On Mon, Apr 24, 2017 at 11:47:32AM -0400, Steven Rostedt wrote:
>  static int rto_next_cpu(struct rq *rq)
>  {
>       int cpu;
>  
>       /*
> +      * When starting the IPI RT pushing, the rto_cpu is set to nr_cpu_ids
> +      * or greater. rt_next_cpu() will simply return the first CPU found in
> +      * the rto_mask.
> +      *
> +      * If rto_next_cpu() is called with rto_cpu less than nr_cpu_ids, it
> +      * will return the next CPU found in the rto_mask.
> +      *
> +      * If there are no more CPUs left in the rto_mask, then a check is made
> +      * against rto_loop and rto_loop_next. rto_loop is only updated with
> +      * the rto_lock held, but any CPU may increment the rto_loop_next
> +      * without any locking.
>        */
> +again:
> +     if (rq->rd->rto_cpu >= nr_cpu_ids) {
>               cpu = cpumask_first(rq->rd->rto_mask);
> +             rq->rd->rto_cpu = cpu;
> +             /* If cpu is nr_cpu_ids, then there is no overloaded rqs */
> +             return cpu;
>       }
>  
> +     cpu = cpumask_next(rq->rd->rto_cpu, rq->rd->rto_mask);
> +     rq->rd->rto_cpu = cpu;
>  
> +     if (cpu < nr_cpu_ids)
> +             return cpu;
>  
> +     if (rq->rd->rto_loop == atomic_read(&rq->rd->rto_loop_next))
> +             return cpu;
>  
> +     rq->rd->rto_loop = atomic_read(&rq->rd->rto_loop_next);
> +     goto again;
> +}


I think you want to write that as:

        struct root_domain *rd = rq->rd;
        int cpu, next;

        /* comment */
        for (;;) { 
                if (rd->rto_cpu >= nr_cpu_ids) {
                        cpu = cpumask_first(rd->rto_mask);
                        rd->rto_cpu = cpu;
                        return cpu;
                }

                cpu = cpumask_next(rd->rto_mask);
                rd->rto_cpu = cpu;

                if (cpu < nr_cpu_ids)
                        break;

//              rd->rto_cpu = -1;

                /*
                 * ACQUIRE ensures we see the @rto_mask changes
                 * made prior to the @next value observed.
                 * 
                 * Matches WMB in rt_set_overload().
                 */
                next = atomic_read_acquire(&rd->rto_loop_next);

                if (rd->rto_loop == next)
                        break;

                rd->rto_loop = next;
        }

        return cpu;

And I don't fully understand the whole rto_cpu >= nr_cpus_ids thing,
can't you simply reset the thing to -1 and always use cpumask_next()?
As per the // comment above?

> +static inline bool rto_start_trylock(atomic_t *v)
> +{
> +     return !atomic_cmpxchg(v, 0, 1);

Arguably this could be: !atomic_cmpxchg_acquire(v, 0, 1);

>  }
>  
> +static inline void rto_start_unlock(atomic_t *v)
> +{
> +     atomic_set_release(v, 0);
> +}
>  

>  static void tell_cpu_to_push(struct rq *rq)
>  {
> +     int cpu = nr_cpu_ids;
>  
> +     /* Keep the loop going if the IPI is currently active */
> +     atomic_inc_return(&rq->rd->rto_loop_next);

Since rt_set_overload() already provides a WMB, we don't need an
ordered primitive here and atomic_inc() is fine.

>  
> +     /* Only one CPU can initiate a loop at a time */
> +     if (!rto_start_trylock(&rq->rd->rto_loop_start))
>               return;
>  
> +     raw_spin_lock(&rq->rd->rto_lock);
> +
> +     /*
> +      * The rto_cpu is updated under the lock, if it has a valid cpu
> +      * then the IPI is still running and will continue due to the
> +      * update to loop_next, and nothing needs to be done here.
> +      * Otherwise it is finishing up and an ipi needs to be sent.
> +      */
> +     if (rq->rd->rto_cpu >= nr_cpu_ids)
//      if (rq->rd->rto_cpu < 0)

> +             cpu = rto_next_cpu(rq);
>  
> +     raw_spin_unlock(&rq->rd->rto_lock);
> +
> +     rto_start_unlock(&rq->rd->rto_loop_start);
> +
> +     if (cpu < nr_cpu_ids)
> +             irq_work_queue_on(&rq->rd->rto_push_work, cpu);
>  }
>  
>  /* Called from hardirq context */
> +void rto_push_irq_work_func(struct irq_work *work)
>  {
> +     struct rq *rq;
>       int this_cpu;
>       int cpu;
>  
> +     this_cpu = smp_processor_id();
>       rq = cpu_rq(this_cpu);

        rq = this_rq();

>  
> +     /*
> +      * We do not need to grab the lock to check for has_pushable_tasks.
> +      * When it gets updated, a check is made if a push is possible.
> +      */
>       if (has_pushable_tasks(rq)) {
>               raw_spin_lock(&rq->lock);
> +             push_rt_tasks(rq);
>               raw_spin_unlock(&rq->lock);
>       }
>  
> +     raw_spin_lock(&rq->rd->rto_lock);
>  
> +     /* Pass the IPI to the next rt overloaded queue */
> +     cpu = rto_next_cpu(rq);
>  
> +     raw_spin_unlock(&rq->rd->rto_lock);
>  
>       if (cpu >= nr_cpu_ids)
>               return;
>  
>       /* Try the next RT overloaded CPU */
> +     irq_work_queue_on(&rq->rd->rto_push_work, cpu);
>  }

Re: [PATCH tip/sched/core v2] sched/rt: Simplify the IPI rt balancing logic

Reply via email to