On 17 June 2016 at 14:01, Peter Zijlstra <[email protected]> wrote:
> From: Vincent Guittot <[email protected]>
>
> A new fair task is detached and attached from/to task_group with:
>
>   cgroup_post_fork()
>     ss->fork(child) := cpu_cgroup_fork()
>       sched_move_task()
>         task_move_group_fair()
>
> Which is wrong, because at this point in fork() the task isn't fully
> initialized and it cannot 'move' to another group, because its not
> attached to any group as yet.
>
> In fact, cpu_cgroup_fork needs a small part of sched_move_task so we
> can just call this small part directly instead sched_move_task. And
> the task doesn't really migrate because it is not yet attached so we
> need the sequence:
>
>   do_fork()
>     sched_fork()
>       __set_task_cpu()
>
>     cgroup_post_fork()
>       set_task_rq() # set task group and runqueue
>
>     wake_up_new_task()
>       select_task_rq() can select a new cpu
>       __set_task_cpu
>       post_init_entity_util_avg
>         attach_task_cfs_rq()
>       activate_task
>         enqueue_task
>
> This patch makes that happen.
>

With this patch and patch 1, the fork sequence looks correct in my test

> Maybe-Signed-off-by: Vincent Guittot <[email protected]>

You can remove the Maybe if you want

> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
>  kernel/sched/core.c |   67 
> ++++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 47 insertions(+), 20 deletions(-)
>
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -7743,27 +7743,17 @@ void sched_offline_group(struct task_gro
>         spin_unlock_irqrestore(&task_group_lock, flags);
>  }
>
> -/* change task's runqueue when it moves between groups.
> - *     The caller of this function should have put the task in its new group
> - *     by now. This function just updates tsk->se.cfs_rq and tsk->se.parent 
> to
> - *     reflect its new group.
> +/*
> + * Set task's runqueue and group.
> + *
> + * In case of a move between group, we update src and dst group thanks to
> + * sched_class->task_move_group. Otherwise, we just need to set runqueue and
> + * group pointers. The task will be attached to the runqueue during its wake
> + * up.
>   */
> -void sched_move_task(struct task_struct *tsk)
> +static void sched_set_group(struct task_struct *tsk, bool move)
>  {
>         struct task_group *tg;
> -       int queued, running;
> -       struct rq_flags rf;
> -       struct rq *rq;
> -
> -       rq = task_rq_lock(tsk, &rf);
> -
> -       running = task_current(rq, tsk);
> -       queued = task_on_rq_queued(tsk);
> -
> -       if (queued)
> -               dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
> -       if (unlikely(running))
> -               put_prev_task(rq, tsk);
>
>         /*
>          * All callers are synchronized by task_rq_lock(); we do not use RCU
> @@ -7776,11 +7766,37 @@ void sched_move_task(struct task_struct
>         tsk->sched_task_group = tg;
>
>  #ifdef CONFIG_FAIR_GROUP_SCHED
> -       if (tsk->sched_class->task_move_group)
> +       if (move && tsk->sched_class->task_move_group)
>                 tsk->sched_class->task_move_group(tsk);
>         else
>  #endif
>                 set_task_rq(tsk, task_cpu(tsk));
> +}
> +
> +/*
> + * Change task's runqueue when it moves between groups.
> + *
> + * The caller of this function should have put the task in its new group by
> + * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to 
> reflect
> + * its new group.
> + */
> +void sched_move_task(struct task_struct *tsk)
> +{
> +       int queued, running;
> +       struct rq_flags rf;
> +       struct rq *rq;
> +
> +       rq = task_rq_lock(tsk, &rf);
> +
> +       running = task_current(rq, tsk);
> +       queued = task_on_rq_queued(tsk);
> +
> +       if (queued)
> +               dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
> +       if (unlikely(running))
> +               put_prev_task(rq, tsk);
> +
> +       sched_set_group(tsk, true);
>
>         if (unlikely(running))
>                 tsk->sched_class->set_curr_task(rq);
> @@ -8208,9 +8224,20 @@ static void cpu_cgroup_css_free(struct c
>         sched_free_group(tg);
>  }
>
> +/*
> + * This is called before wake_up_new_task(), therefore we really only
> + * have to set its group bits, all the other stuff does not apply.
> + */
>  static void cpu_cgroup_fork(struct task_struct *task)
>  {
> -       sched_move_task(task);
> +       struct rq_flags rf;
> +       struct rq *rq;
> +
> +       rq = task_rq_lock(task, &rf);
> +
> +       sched_set_group(task, false);
> +
> +       task_rq_unlock(rq, task, &rf);
>  }
>
>  static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
>
>

Reply via email to