Vincent reported that when a new task is moved into a new cgroup it
gets attached twice to the load tracking.

  sched_move_task()
    task_move_group_fair()
      detach_task_cfs_rq()
      set_task_rq()
      attach_task_cfs_rq()
        attach_entity_load_avg()
          se->avg.last_load_update = cfs_rq->avg.last_load_update // == 0

  enqueue_entity()
    enqueue_entity_load_avg()
      update_cfs_rq_load_avg()
        now = clock()
        __update_load_avg(&cfs_rq->avg)
          cfs_rq->avg.last_load_update = now
          // ages load/util for: now - 0, load/util -> 0
      if (migrated)
        attach_entity_load_avg()
          se->avg.last_load_update = cfs_rq->avg.last_load_update; // now != 0

The problem is that we don't update cfs_rq load_avg before all
entity attach/detach operations. Only enqueue and migrate_task do
this.

By fixing this, the above will not happen, because the
sched_move_task() attach will have updated cfs_rq's last_load_update
time before attach, and in turn the attach will have set the entity's
last_load_update stamp.

Note that there is a further problem with sched_move_task() calling
detach on a task that hasn't yet been attached; this will be taken
care of in a subsequent patch.

Cc: Yuyang Du <[email protected]>
Reported-by: Vincent Guittot <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
 kernel/sched/fair.c |    4 ++++
 1 file changed, 4 insertions(+)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8366,6 +8366,7 @@ static void detach_task_cfs_rq(struct ta
 {
        struct sched_entity *se = &p->se;
        struct cfs_rq *cfs_rq = cfs_rq_of(se);
+       u64 now = cfs_rq_clock_task(cfs_rq);
 
        if (!vruntime_normalized(p)) {
                /*
@@ -8377,6 +8378,7 @@ static void detach_task_cfs_rq(struct ta
        }
 
        /* Catch up with the cfs_rq and remove our load when we leave */
+       update_cfs_rq_load_avg(now, cfs_rq, false);
        detach_entity_load_avg(cfs_rq, se);
 }
 
@@ -8384,6 +8386,7 @@ static void attach_task_cfs_rq(struct ta
 {
        struct sched_entity *se = &p->se;
        struct cfs_rq *cfs_rq = cfs_rq_of(se);
+       u64 now = cfs_rq_clock_task(cfs_rq);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
        /*
@@ -8394,6 +8397,7 @@ static void attach_task_cfs_rq(struct ta
 #endif
 
        /* Synchronize task with its cfs_rq */
+       update_cfs_rq_load_avg(now, cfs_rq, false);
        attach_entity_load_avg(cfs_rq, se);
 
        if (!vruntime_normalized(p))


Reply via email to