From: Greg Kroah-Hartman <gre...@linuxfoundation.org>

From: Peter Zijlstra <pet...@infradead.org>

commit c20cf065d4a619d394d23290093b1002e27dff86 upstream.

When affine_move_task() issues a migration_cpu_stop(), the purpose of
that function is to complete that @pending, not any random other
p->migration_pending that might have gotten installed since.

This realization much simplifies migration_cpu_stop() and allows
further necessary steps to fix all this as it provides the guarantee
that @pending's stopper will complete @pending (and not some random
other @pending).

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: sta...@kernel.org
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
Signed-off-by: Ingo Molnar <mi...@kernel.org>
Reviewed-by: Valentin Schneider <valentin.schnei...@arm.com>
Link: https://lkml.kernel.org/r/20210224131355.430014...@infradead.org
Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>
---
 kernel/sched/core.c |   56 +++++++---------------------------------------------
 1 file changed, 8 insertions(+), 48 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1898,8 +1898,8 @@ static struct rq *__migrate_task(struct
  */
 static int migration_cpu_stop(void *data)
 {
-       struct set_affinity_pending *pending;
        struct migration_arg *arg = data;
+       struct set_affinity_pending *pending = arg->pending;
        struct task_struct *p = arg->task;
        int dest_cpu = arg->dest_cpu;
        struct rq *rq = this_rq();
@@ -1921,25 +1921,6 @@ static int migration_cpu_stop(void *data
        raw_spin_lock(&p->pi_lock);
        rq_lock(rq, &rf);
 
-       pending = p->migration_pending;
-       if (pending && !arg->pending) {
-               /*
-                * This happens from sched_exec() and migrate_task_to(),
-                * neither of them care about pending and just want a task to
-                * maybe move about.
-                *
-                * Even if there is a pending, we can ignore it, since
-                * affine_move_task() will have it's own stop_work's in flight
-                * which will manage the completion.
-                *
-                * Notably, pending doesn't need to match arg->pending. This can
-                * happen when tripple concurrent affine_move_task() first sets
-                * pending, then clears pending and eventually sets another
-                * pending.
-                */
-               pending = NULL;
-       }
-
        /*
         * If task_rq(p) != rq, it cannot be migrated here, because we're
         * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
@@ -1950,31 +1931,20 @@ static int migration_cpu_stop(void *data
                        goto out;
 
                if (pending) {
-                       p->migration_pending = NULL;
+                       if (p->migration_pending == pending)
+                               p->migration_pending = NULL;
                        complete = true;
                }
 
-               /* migrate_enable() --  we must not race against SCA */
-               if (dest_cpu < 0) {
-                       /*
-                        * When this was migrate_enable() but we no longer
-                        * have a @pending, a concurrent SCA 'fixed' things
-                        * and we should be valid again. Nothing to do.
-                        */
-                       if (!pending) {
-                               WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), 
&p->cpus_mask));
-                               goto out;
-                       }
-
+               if (dest_cpu < 0)
                        dest_cpu = cpumask_any_distribute(&p->cpus_mask);
-               }
 
                if (task_on_rq_queued(p))
                        rq = __migrate_task(rq, &rf, p, dest_cpu);
                else
                        p->wake_cpu = dest_cpu;
 
-       } else if (dest_cpu < 0 || pending) {
+       } else if (pending) {
                /*
                 * This happens when we get migrated between migrate_enable()'s
                 * preempt_enable() and scheduling the stopper task. At that
@@ -1989,23 +1959,14 @@ static int migration_cpu_stop(void *data
                 * ->pi_lock, so the allowed mask is stable - if it got
                 * somewhere allowed, we're done.
                 */
-               if (pending && cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
-                       p->migration_pending = NULL;
+               if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
+                       if (p->migration_pending == pending)
+                               p->migration_pending = NULL;
                        complete = true;
                        goto out;
                }
 
                /*
-                * When this was migrate_enable() but we no longer have an
-                * @pending, a concurrent SCA 'fixed' things and we should be
-                * valid again. Nothing to do.
-                */
-               if (!pending) {
-                       WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), 
&p->cpus_mask));
-                       goto out;
-               }
-
-               /*
                 * When migrate_enable() hits a rq mis-match we can't reliably
                 * determine is_migration_disabled() and so have to chase after
                 * it.
@@ -2022,7 +1983,6 @@ out:
                complete_all(&pending->done);
 
        /* For pending->{arg,stop_work} */
-       pending = arg->pending;
        if (pending && refcount_dec_and_test(&pending->refs))
                wake_up_var(&pending->refs);
 


Reply via email to