On 15/10/20 12:05, Peter Zijlstra wrote:
> +static int affine_move_task(struct rq *rq, struct rq_flags *rf,
> +                         struct task_struct *p, int dest_cpu, unsigned int 
> flags)
> +{
> +     struct set_affinity_pending my_pending = { }, *pending = NULL;
> +     struct migration_arg arg = {
> +             .task = p,
> +             .dest_cpu = dest_cpu,
> +     };
> +     bool complete = false;
> +
> +     /* Can the task run on the task's current CPU? If so, we're done */
> +     if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
> +             pending = p->migration_pending;
> +             if (pending) {
> +                     p->migration_pending = NULL;
> +                     complete = true;

Deciphering my TLA+ deadlock traces leads me to think this needs

                        refcount_inc(&pending->refs);

because the 'goto do_complete' leads us to an unconditional decrement.

> +             }
> +             task_rq_unlock(rq, p, rf);
> +
> +             if (complete)
> +                     goto do_complete;
                        ^^^^
                       that here

> +
> +             return 0;
> +     }
> +

[...]

> +do_complete:
> +             if (complete)
> +                     complete_all(&pending->done);
> +     }
> +
> +     wait_for_completion(&pending->done);
> +
> +     if (refcount_dec_and_test(&pending->refs))
           ^^^^^^^^^^^^^^^^^^^^^^^
           leads to this guy there

> +             wake_up_var(&pending->refs);
> +
> +     wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs));
> +
> +     return 0;
> +}

Reply via email to