On Thu, Aug 20, 2015 at 03:17:21AM +0200, Peter Zijlstra wrote:
> 
> I did something like this on top.. please have a look at the XXX and
> integrate.

yes, i will do it.

> 
> ---
> 
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -2664,8 +2664,8 @@ static inline u64 cfs_rq_clock_task(stru
>  /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
>  static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
>  {
> -     int decayed;
>       struct sched_avg *sa = &cfs_rq->avg;
> +     int decayed;
>  
>       if (atomic_long_read(&cfs_rq->removed_load_avg)) {
>               long r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
> @@ -2695,15 +2695,16 @@ static inline int update_cfs_rq_load_avg
>  static inline void update_load_avg(struct sched_entity *se, int update_tg)
>  {
>       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> -     int cpu = cpu_of(rq_of(cfs_rq));
>       u64 now = cfs_rq_clock_task(cfs_rq);
> +     int cpu = cpu_of(rq_of(cfs_rq));
>  
>       /*
>        * Track task load average for carrying it to new CPU after migrated, 
> and
>        * track group sched_entity load average for task_h_load calc in 
> migration
>        */
>       __update_load_avg(now, cpu, &se->avg,
> -             se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == 
> se, NULL);
> +                       se->on_rq * scale_load_down(se->load.weight),
> +                       cfs_rq->curr == se, NULL);
>  
>       if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
>               update_tg_load_avg(cfs_rq, 0);
> @@ -2718,9 +2719,10 @@ static void attach_entity_load_avg(struc
>        * update here. we have to update it with prev cfs_rq just before 
> changing
>        * se's cfs_rq, and get here soon.
>        */
> -     if (se->avg.last_update_time)
> +     if (se->avg.last_update_time) {
>               __update_load_avg(cfs_rq->avg.last_update_time, 
> cpu_of(rq_of(cfs_rq)),
> -                             &se->avg, 0, 0, NULL);
> +                               &se->avg, 0, 0, NULL);
> +     }
>  
>       se->avg.last_update_time = cfs_rq->avg.last_update_time;
>       cfs_rq->avg.load_avg += se->avg.load_avg;
> @@ -2732,17 +2734,13 @@ static void attach_entity_load_avg(struc
>  static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct 
> sched_entity *se)
>  {
>       __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
> -                     &se->avg, se->on_rq * scale_load_down(se->load.weight),
> -                     cfs_rq->curr == se, NULL);
> +                       &se->avg, se->on_rq * 
> scale_load_down(se->load.weight),
> +                       cfs_rq->curr == se, NULL);
>  
> -     cfs_rq->avg.load_avg =
> -             max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
> -     cfs_rq->avg.load_sum =
> -             max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0);
> -     cfs_rq->avg.util_avg =
> -             max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
> -     cfs_rq->avg.util_sum =
> -             max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0);
> +     cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - 
> se->avg.load_avg, 0);
> +     cfs_rq->avg.load_sum = max_t(s64,  cfs_rq->avg.load_sum - 
> se->avg.load_sum, 0);
> +     cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - 
> se->avg.util_avg, 0);
> +     cfs_rq->avg.util_sum = max_t(s32,  cfs_rq->avg.util_sum - 
> se->avg.util_sum, 0);
>  }
>  
>  /* Add the load generated by se into cfs_rq's load average */
> @@ -2751,14 +2749,14 @@ enqueue_entity_load_avg(struct cfs_rq *c
>  {
>       struct sched_avg *sa = &se->avg;
>       u64 now = cfs_rq_clock_task(cfs_rq);
> -     int migrated = 0, decayed;
> +     int migrated, decayed;
>  
> -     if (sa->last_update_time == 0)
> -             migrated = 1;
> -     else
> +     migrated = !sa->last_update_time;
> +     if (!migrated) {
>               __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
>                       se->on_rq * scale_load_down(se->load.weight),
>                       cfs_rq->curr == se, NULL);
> +     }
>  
>       decayed = update_cfs_rq_load_avg(now, cfs_rq);
>  
> @@ -2781,7 +2779,7 @@ dequeue_entity_load_avg(struct cfs_rq *c
>       cfs_rq->runnable_load_avg =
>               max_t(long, cfs_rq->runnable_load_avg - se->avg.load_avg, 0);
>       cfs_rq->runnable_load_sum =
> -             max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
> +             max_t(s64,  cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
>  }
>  
>  /*
> @@ -2849,6 +2847,11 @@ static inline void
>  dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
>  static inline void remove_entity_load_avg(struct sched_entity *se) {}
>  
> +static inline void
> +attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
> +static inline void
> +detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
> +
>  static inline int idle_balance(struct rq *rq)
>  {
>       return 0;
> @@ -7915,29 +7918,50 @@ prio_changed_fair(struct rq *rq, struct
>               check_preempt_curr(rq, p, 0);
>  }
>  
> -static void detach_task_cfs_rq(struct task_struct *p, int queued)
> +static inline bool vruntime_normalized(struct task_struct *p)
>  {
> +     int queued = task_on_rq_queued(p);
>       struct sched_entity *se = &p->se;
> -     struct cfs_rq *cfs_rq = cfs_rq_of(se);
>  
>       /*
> -      * Ensure the task's vruntime is normalized, so that after attaching
> -      * back it to a cfs_rq the enqueue_entity() will do the right thing.
> -      *
>        * If it's queued, then the dequeue_entity(.flags=0) will already
> -      * have normalized the vruntime, if it's !queued, then only when
> -      * the task is sleeping it still has a non-normalized vruntime.
> -      *
> +      * have normalized the vruntime.
> +      */
> +     if (queued)
> +             return true;
> +
> +     /*
>        * When !queued, vruntime of the task has usually NOT been normalized.
>        * But there are some cases where it has already been normalized:
> -      *
> -      * - Moving a forked child which is waiting for being woken up by
> +      */
> +
> +     /*
> +      * - a forked child which is waiting for being woken up by
>        *   wake_up_new_task().
> -      * - Moving a task which has been woken up by try_to_wake_up() and
> +      */
> +     if (!se->sum_exec_runtime || p->state == TASK_WAKING)
> +             return true;
> +
> +     /*
> +      * - a task which has been woken up by try_to_wake_up() and
>        *   waiting for actually being woken up by sched_ttwu_pending().
> +      *
> +      * XXX this appears wrong!! check history, 
> +      * we appear to always set queued and RUNNING under the same lock 
> instance
> +      * might be from before TASK_WAKING ?
>        */
> -     if (!queued && p->state != TASK_RUNNING &&
> -         se->sum_exec_runtime && p->state != TASK_WAKING) {
> +     if (p->state == TASK_RUNNING)
> +             return true;
> +
> +     return false;
> +}
> +
> +static void detach_task_cfs_rq(struct task_struct *p)
> +{
> +     struct sched_entity *se = &p->se;
> +     struct cfs_rq *cfs_rq = cfs_rq_of(se);
> +
> +     if (!vruntime_normalized(p)) {
>               /*
>                * Fix up our vruntime so that the current sleep doesn't
>                * cause 'unlimited' sleep bonus.
> @@ -7946,10 +7970,8 @@ static void detach_task_cfs_rq(struct ta
>               se->vruntime -= cfs_rq->min_vruntime;
>       }
>  
> -#ifdef CONFIG_SMP
>       /* Catch up with the cfs_rq and remove our load when we leave */
>       detach_entity_load_avg(cfs_rq, se);
> -#endif
>  }
>  
>  static void attach_task_cfs_rq(struct task_struct *p, int queued)
> @@ -7965,49 +7987,23 @@ static void attach_task_cfs_rq(struct ta
>       se->depth = se->parent ? se->parent->depth + 1 : 0;
>  #endif
>  
> -#ifdef CONFIG_SMP
>       /* synchronize task with its cfs_rq */
>       attach_entity_load_avg(cfs_rq, se);
> -#endif
>  
> -     /*
> -      * Ensure the task has a non-normalized vruntime when attaching it
> -      * to a cfs_rq, so that enqueue_entity() at wake-up time will do
> -      * the right thing.
> -      *
> -      * If it's queued, then the enqueue_entity(.flags=0) makes the task
> -      * has non-normalized vruntime, if it's !queued, then it still has
> -      * a normalized vruntime.
> -      *
> -      * When !queued, a task usually should have a non-normalized vruntime
> -      * after being attached to a cfs_rq. But there are some cases where
> -      * we should keep it normalized:
> -      *
> -      * - Moving a forked child which is waiting for being woken up by
> -      *   wake_up_new_task().
> -      * - Moving a task which has been woken up by try_to_wake_up() and
> -      *   waiting for actually being woken up by sched_ttwu_pending().
> -      */
> -     if (!queued && p->state != TASK_RUNNING &&
> -         se->sum_exec_runtime && p->state != TASK_WAKING)
> +     if (!vruntime_normalized(p))
>               se->vruntime += cfs_rq->min_vruntime;
>  }
>  
>  static void switched_from_fair(struct rq *rq, struct task_struct *p)
>  {
> -     detach_task_cfs_rq(p, task_on_rq_queued(p));
> +     detach_task_cfs_rq(p);
>  }
>  
> -/*
> - * We switched to the sched_fair class.
> - */
>  static void switched_to_fair(struct rq *rq, struct task_struct *p)
>  {
> -     int queued = task_on_rq_queued(p);
> -
> -     attach_task_cfs_rq(p, queued);
> +     attach_task_cfs_rq(p);
>  
> -     if (queued) {
> +     if (task_on_rq_queued(p)) {
>               /*
>                * We were most likely switched from sched_rt, so
>                * kick off the schedule if running, otherwise just see
> @@ -8056,8 +8052,7 @@ static void task_move_group_fair(struct
>  {
>       detach_task_cfs_rq(p, queued);
>       set_task_rq(p, task_cpu(p));
> -
> -     /* tell se's cfs_rq has been changed */
> +     /* tell se's cfs_rq has been changed -- migrated */
>       p->se.avg.last_update_time = 0;
>       attach_task_cfs_rq(p, queued);
>  }
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to