[tip:sched/core] sched: Guarantee new group-entities always have weight
Commit-ID: 0ac9b1c21874d2490331233b3242085f8151e166 Gitweb: http://git.kernel.org/tip/0ac9b1c21874d2490331233b3242085f8151e166 Author: Paul Turner AuthorDate: Wed, 16 Oct 2013 11:16:27 -0700 Committer: Ingo Molnar CommitDate: Tue, 29 Oct 2013 12:02:23 +0100 sched: Guarantee new group-entities always have weight Currently, group entity load-weights are initialized to zero. This admits some races with respect to the first time they are re-weighted in earlty use. ( Let g[x] denote the se for "g" on cpu "x". ) Suppose that we have root->a and that a enters a throttled state, immediately followed by a[0]->t1 (the only task running on cpu[0]) blocking: put_prev_task(group_cfs_rq(a[0]), t1) put_prev_entity(..., t1) check_cfs_rq_runtime(group_cfs_rq(a[0])) throttle_cfs_rq(group_cfs_rq(a[0])) Then, before unthrottling occurs, let a[0]->b[0]->t2 wake for the first time: enqueue_task_fair(rq[0], t2) enqueue_entity(group_cfs_rq(b[0]), t2) enqueue_entity_load_avg(group_cfs_rq(b[0]), t2) account_entity_enqueue(group_cfs_ra(b[0]), t2) update_cfs_shares(group_cfs_rq(b[0])) < skipped because b is part of a throttled hierarchy > enqueue_entity(group_cfs_rq(a[0]), b[0]) ... We now have b[0] enqueued, yet group_cfs_rq(a[0])->load.weight == 0 which violates invariants in several code-paths. Eliminate the possibility of this by initializing group entity weight. Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20131016181627.22647.47543.st...@sword-of-the-dawn.mtv.corp.google.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f6308cb..0923ab2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7198,7 +7198,8 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, se->cfs_rq = parent->my_q; se->my_q = cfs_rq; - update_load_set(&se->load, 0); + /* guarantee group entities always have weight */ + update_load_set(&se->load, NICE_0_LOAD); se->parent = parent; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:sched/core] sched: Introduce temporary FAIR_GROUP_SCHED dependency for load-tracking
Commit-ID: f4e26b120b9de84cb627bc7361ba43cfdc51341f Gitweb: http://git.kernel.org/tip/f4e26b120b9de84cb627bc7361ba43cfdc51341f Author: Paul Turner AuthorDate: Thu, 4 Oct 2012 13:18:32 +0200 Committer: Ingo Molnar CommitDate: Wed, 24 Oct 2012 10:27:31 +0200 sched: Introduce temporary FAIR_GROUP_SCHED dependency for load-tracking While per-entity load-tracking is generally useful, beyond computing shares distribution, e.g. runnable based load-balance (in progress), governors, power-management, etc. These facilities are not yet consumers of this data. This may be trivially reverted when the information is required; but avoid paying the overhead for calculations we will not use until then. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141507.422162...@google.com Signed-off-by: Ingo Molnar --- include/linux/sched.h |8 +++- kernel/sched/core.c |7 ++- kernel/sched/fair.c | 13 +++-- kernel/sched/sched.h |9 - 4 files changed, 32 insertions(+), 5 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index e483ccb..e1581a0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1168,7 +1168,13 @@ struct sched_entity { /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; #endif -#ifdef CONFIG_SMP +/* + * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be + * removed when useful for applications beyond shares distribution (e.g. + * load-balance). + */ +#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED) + /* Per-entity load-tracking */ struct sched_avgavg; #endif }; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f268600..5dae0d2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1526,7 +1526,12 @@ static void __sched_fork(struct task_struct *p) p->se.vruntime = 0; INIT_LIST_HEAD(&p->se.group_node); -#ifdef CONFIG_SMP +/* + * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be + * removed when useful for applications beyond shares distribution (e.g. + * load-balance). + */ +#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED) p->se.avg.runnable_avg_period = 0; p->se.avg.runnable_avg_sum = 0; #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6ecf455..3e6a353 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -882,7 +882,8 @@ static inline void update_cfs_shares(struct cfs_rq *cfs_rq) } #endif /* CONFIG_FAIR_GROUP_SCHED */ -#ifdef CONFIG_SMP +/* Only depends on SMP, FAIR_GROUP_SCHED may be removed when useful in lb */ +#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED) /* * We choose a half-life close to 1 scheduling period. * Note: The tables below are dependent on this value. @@ -3174,6 +3175,12 @@ unlock: } /* + * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be + * removed when useful for applications beyond shares distribution (e.g. + * load-balance). + */ +#ifdef CONFIG_FAIR_GROUP_SCHED +/* * Called immediately before a task is migrated to a new cpu; task_cpu(p) and * cfs_rq_of(p) references at time of call are still valid and identify the * previous cpu. However, the caller only guarantees p->pi_lock is held; no @@ -3196,6 +3203,7 @@ migrate_task_rq_fair(struct task_struct *p, int next_cpu) atomic64_add(se->avg.load_avg_contrib, &cfs_rq->removed_load); } } +#endif #endif /* CONFIG_SMP */ static unsigned long @@ -5773,8 +5781,9 @@ const struct sched_class fair_sched_class = { #ifdef CONFIG_SMP .select_task_rq = select_task_rq_fair, +#ifdef CONFIG_FAIR_GROUP_SCHED .migrate_task_rq= migrate_task_rq_fair, - +#endif .rq_online = rq_online_fair, .rq_offline = rq_offline_fair, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0a75a43..5eca173 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -225,6 +225,12 @@ struct cfs_rq { #endif #ifdef CONFIG_SMP +/* + * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be + * removed when useful for applications beyond shares distribution (e.g. + * load-balance). + */ +#ifdef CONFIG_FAIR_GROUP_SCHED /* * CFS Load tracking * Under CFS, load is tracked on a per-entity basis and aggregated up. @@ -234,7 +240,8 @@ struct cfs_rq { u64 runnable_load_avg, blocked_load_avg; atomic64_t decay_counter, removed_load; u64 last_decay; - +#endif /* CONFIG_FAIR_GROUP_SCHED */ +/* These always depend on CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_FAIR_GROUP_SCHED u32 tg_runnable_contrib; u64 tg_load_contrib; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.
[tip:sched/core] sched: Update_cfs_shares at period edge
Commit-ID: f269ae0469fc882332bdfb5db15d3c1315fe2a10 Gitweb: http://git.kernel.org/tip/f269ae0469fc882332bdfb5db15d3c1315fe2a10 Author: Paul Turner AuthorDate: Thu, 4 Oct 2012 13:18:31 +0200 Committer: Ingo Molnar CommitDate: Wed, 24 Oct 2012 10:27:29 +0200 sched: Update_cfs_shares at period edge Now that our measurement intervals are small (~1ms) we can amortize the posting of update_shares() to be about each period overflow. This is a large cost saving for frequently switching tasks. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141507.200772...@google.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 18 ++ 1 files changed, 10 insertions(+), 8 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index dcc27d8..002a769 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1187,6 +1187,7 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) } __update_cfs_rq_tg_load_contrib(cfs_rq, force_update); + update_cfs_shares(cfs_rq); } static inline void update_rq_runnable_avg(struct rq *rq, int runnable) @@ -1396,9 +1397,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * Update run-time statistics of the 'current'. */ update_curr(cfs_rq); - enqueue_entity_load_avg(cfs_rq, se, flags & ENQUEUE_WAKEUP); account_entity_enqueue(cfs_rq, se); - update_cfs_shares(cfs_rq); + enqueue_entity_load_avg(cfs_rq, se, flags & ENQUEUE_WAKEUP); if (flags & ENQUEUE_WAKEUP) { place_entity(cfs_rq, se, 0); @@ -1471,7 +1471,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * Update run-time statistics of the 'current'. */ update_curr(cfs_rq); - dequeue_entity_load_avg(cfs_rq, se, flags & DEQUEUE_SLEEP); update_stats_dequeue(cfs_rq, se); if (flags & DEQUEUE_SLEEP) { @@ -1491,8 +1490,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); - se->on_rq = 0; account_entity_dequeue(cfs_rq, se); + dequeue_entity_load_avg(cfs_rq, se, flags & DEQUEUE_SLEEP); /* * Normalize the entity after updating the min_vruntime because the @@ -1506,7 +1505,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) return_cfs_rq_runtime(cfs_rq); update_min_vruntime(cfs_rq); - update_cfs_shares(cfs_rq); + se->on_rq = 0; } /* @@ -2518,8 +2517,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; - update_cfs_shares(cfs_rq); update_entity_load_avg(se, 1); + update_cfs_rq_blocked_load(cfs_rq, 0); } if (!se) { @@ -2579,8 +2578,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; - update_cfs_shares(cfs_rq); update_entity_load_avg(se, 1); + update_cfs_rq_blocked_load(cfs_rq, 0); } if (!se) { @@ -5639,8 +5638,11 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) se = tg->se[i]; /* Propagate contribution to hierarchy */ raw_spin_lock_irqsave(&rq->lock, flags); - for_each_sched_entity(se) + for_each_sched_entity(se) { update_cfs_shares(group_cfs_rq(se)); + /* update contribution to parent */ + update_entity_load_avg(se, 1); + } raw_spin_unlock_irqrestore(&rq->lock, flags); } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:sched/core] sched: Refactor update_shares_cpu() -> update_blocked_avgs()
Commit-ID: 48a1675323fa1b7844e479ad2a4469f4558c0f79 Gitweb: http://git.kernel.org/tip/48a1675323fa1b7844e479ad2a4469f4558c0f79 Author: Paul Turner AuthorDate: Thu, 4 Oct 2012 13:18:31 +0200 Committer: Ingo Molnar CommitDate: Wed, 24 Oct 2012 10:27:28 +0200 sched: Refactor update_shares_cpu() -> update_blocked_avgs() Now that running entities maintain their own load-averages the work we must do in update_shares() is largely restricted to the periodic decay of blocked entities. This allows us to be a little less pessimistic regarding our occupancy on rq->lock and the associated rq->clock updates required. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141507.133999...@google.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 50 +++--- 1 files changed, 23 insertions(+), 27 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 57fae95..dcc27d8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3639,20 +3639,15 @@ next: /* * update tg->load_weight by folding this cpu's load_avg */ -static int update_shares_cpu(struct task_group *tg, int cpu) +static void __update_blocked_averages_cpu(struct task_group *tg, int cpu) { - struct sched_entity *se; - struct cfs_rq *cfs_rq; - unsigned long flags; - struct rq *rq; - - rq = cpu_rq(cpu); - se = tg->se[cpu]; - cfs_rq = tg->cfs_rq[cpu]; + struct sched_entity *se = tg->se[cpu]; + struct cfs_rq *cfs_rq = tg->cfs_rq[cpu]; - raw_spin_lock_irqsave(&rq->lock, flags); + /* throttled entities do not contribute to load */ + if (throttled_hierarchy(cfs_rq)) + return; - update_rq_clock(rq); update_cfs_rq_blocked_load(cfs_rq, 1); if (se) { @@ -3669,32 +3664,33 @@ static int update_shares_cpu(struct task_group *tg, int cpu) if (!se->avg.runnable_avg_sum && !cfs_rq->nr_running) list_del_leaf_cfs_rq(cfs_rq); } else { + struct rq *rq = rq_of(cfs_rq); update_rq_runnable_avg(rq, rq->nr_running); } - - raw_spin_unlock_irqrestore(&rq->lock, flags); - - return 0; } -static void update_shares(int cpu) +static void update_blocked_averages(int cpu) { - struct cfs_rq *cfs_rq; struct rq *rq = cpu_rq(cpu); + struct cfs_rq *cfs_rq; + unsigned long flags; - rcu_read_lock(); + raw_spin_lock_irqsave(&rq->lock, flags); + update_rq_clock(rq); /* * Iterates the task_group tree in a bottom up fashion, see * list_add_leaf_cfs_rq() for details. */ for_each_leaf_cfs_rq(rq, cfs_rq) { - /* throttled entities do not contribute to load */ - if (throttled_hierarchy(cfs_rq)) - continue; - - update_shares_cpu(cfs_rq->tg, cpu); + /* +* Note: We may want to consider periodically releasing +* rq->lock about these updates so that creating many task +* groups does not result in continually extending hold time. +*/ + __update_blocked_averages_cpu(cfs_rq->tg, rq->cpu); } - rcu_read_unlock(); + + raw_spin_unlock_irqrestore(&rq->lock, flags); } /* @@ -3746,7 +3742,7 @@ static unsigned long task_h_load(struct task_struct *p) return load; } #else -static inline void update_shares(int cpu) +static inline void update_blocked_averages(int cpu) { } @@ -4813,7 +4809,7 @@ void idle_balance(int this_cpu, struct rq *this_rq) */ raw_spin_unlock(&this_rq->lock); - update_shares(this_cpu); + update_blocked_averages(this_cpu); rcu_read_lock(); for_each_domain(this_cpu, sd) { unsigned long interval; @@ -5068,7 +5064,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) int update_next_balance = 0; int need_serialize; - update_shares(cpu); + update_blocked_averages(cpu); rcu_read_lock(); for_each_domain(cpu, sd) { -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:sched/core] sched: Replace update_shares weight distribution with per-entity computation
Commit-ID: 82958366cfea1a50e7e90907b2d55ae29ed69974 Gitweb: http://git.kernel.org/tip/82958366cfea1a50e7e90907b2d55ae29ed69974 Author: Paul Turner AuthorDate: Thu, 4 Oct 2012 13:18:31 +0200 Committer: Ingo Molnar CommitDate: Wed, 24 Oct 2012 10:27:28 +0200 sched: Replace update_shares weight distribution with per-entity computation Now that the machinery in place is in place to compute contributed load in a bottom up fashion; replace the shares distribution code within update_shares() accordingly. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141507.061208...@google.com Signed-off-by: Ingo Molnar --- kernel/sched/debug.c |8 --- kernel/sched/fair.c | 157 -- kernel/sched/sched.h | 36 3 files changed, 36 insertions(+), 165 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 71b0ea3..2cd3c1b 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -218,14 +218,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); #ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_SMP - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg", - SPLIT_NS(cfs_rq->load_avg)); - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period", - SPLIT_NS(cfs_rq->load_period)); - SEQ_printf(m, " .%-30s: %ld\n", "load_contrib", - cfs_rq->load_contribution); - SEQ_printf(m, " .%-30s: %d\n", "load_tg", - atomic_read(&cfs_rq->tg->load_weight)); SEQ_printf(m, " .%-30s: %lld\n", "runnable_load_avg", cfs_rq->runnable_load_avg); SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg", diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 873c9f5..57fae95 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -658,9 +658,6 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) return calc_delta_fair(sched_slice(cfs_rq, se), se); } -static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); -static void update_cfs_shares(struct cfs_rq *cfs_rq); - /* * Update the current task's runtime statistics. Skip current tasks that * are not in our scheduling class. @@ -680,10 +677,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, curr->vruntime += delta_exec_weighted; update_min_vruntime(cfs_rq); - -#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED - cfs_rq->load_unacc_exec_time += delta_exec; -#endif } static void update_curr(struct cfs_rq *cfs_rq) @@ -806,72 +799,7 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) } #ifdef CONFIG_FAIR_GROUP_SCHED -/* we need this in update_cfs_load and load-balance functions below */ -static inline int throttled_hierarchy(struct cfs_rq *cfs_rq); # ifdef CONFIG_SMP -static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, - int global_update) -{ - struct task_group *tg = cfs_rq->tg; - long load_avg; - - load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1); - load_avg -= cfs_rq->load_contribution; - - if (global_update || abs(load_avg) > cfs_rq->load_contribution / 8) { - atomic_add(load_avg, &tg->load_weight); - cfs_rq->load_contribution += load_avg; - } -} - -static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) -{ - u64 period = sysctl_sched_shares_window; - u64 now, delta; - unsigned long load = cfs_rq->load.weight; - - if (cfs_rq->tg == &root_task_group || throttled_hierarchy(cfs_rq)) - return; - - now = rq_of(cfs_rq)->clock_task; - delta = now - cfs_rq->load_stamp; - - /* truncate load history at 4 idle periods */ - if (cfs_rq->load_stamp > cfs_rq->load_last && - now - cfs_rq->load_last > 4 * period) { - cfs_rq->load_period = 0; - cfs_rq->load_avg = 0; - delta = period - 1; - } - - cfs_rq->load_stamp = now; - cfs_rq->load_unacc_exec_time = 0; - cfs_rq->load_period += delta; - if (load) { - cfs_rq->load_last = now; - cfs_rq->load_avg += delta * load; - } - - /* consider updating load contribution on each fold or truncate */ - if (global_update || cfs_rq->load_period > period - || !cfs_rq->load_period) - update_cfs_rq_load_contribution(cfs_rq, global_update); - - while (cfs_rq->load_period > period) { - /* -* Inline assembly required to prevent the compiler -* optimising this loop into a divmod call. -* See __iter_div_u64_rem() for another ex
[tip:sched/core] sched: Maintain runnable averages across throttled periods
Commit-ID: f1b17280efbd21873d1db8631117bdbccbcb39a2 Gitweb: http://git.kernel.org/tip/f1b17280efbd21873d1db8631117bdbccbcb39a2 Author: Paul Turner AuthorDate: Thu, 4 Oct 2012 13:18:31 +0200 Committer: Ingo Molnar CommitDate: Wed, 24 Oct 2012 10:27:27 +0200 sched: Maintain runnable averages across throttled periods With bandwidth control tracked entities may cease execution according to user specified bandwidth limits. Charging this time as either throttled or blocked however, is incorrect and would falsely skew in either direction. What we actually want is for any throttled periods to be "invisible" to load-tracking as they are removed from the system for that interval and contribute normally otherwise. Do this by moderating the progression of time to omit any periods in which the entity belonged to a throttled hierarchy. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141506.998912...@google.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 50 -- kernel/sched/sched.h |3 ++- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9e49722..873c9f5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1222,15 +1222,26 @@ static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq, cfs_rq->blocked_load_avg = 0; } +static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq); + /* Update a sched_entity's runnable average */ static inline void update_entity_load_avg(struct sched_entity *se, int update_cfs_rq) { struct cfs_rq *cfs_rq = cfs_rq_of(se); long contrib_delta; + u64 now; - if (!__update_entity_runnable_avg(rq_of(cfs_rq)->clock_task, &se->avg, - se->on_rq)) + /* +* For a group entity we need to use their owned cfs_rq_clock_task() in +* case they are the parent of a throttled hierarchy. +*/ + if (entity_is_task(se)) + now = cfs_rq_clock_task(cfs_rq); + else + now = cfs_rq_clock_task(group_cfs_rq(se)); + + if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq)) return; contrib_delta = __update_entity_load_avg_contrib(se); @@ -1250,7 +1261,7 @@ static inline void update_entity_load_avg(struct sched_entity *se, */ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) { - u64 now = rq_of(cfs_rq)->clock_task >> 20; + u64 now = cfs_rq_clock_task(cfs_rq) >> 20; u64 decays; decays = now - cfs_rq->last_decay; @@ -1841,6 +1852,15 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) return &tg->cfs_bandwidth; } +/* rq->task_clock normalized against any time this cfs_rq has spent throttled */ +static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq) +{ + if (unlikely(cfs_rq->throttle_count)) + return cfs_rq->throttled_clock_task; + + return rq_of(cfs_rq)->clock_task - cfs_rq->throttled_clock_task_time; +} + /* returns 0 on failure to allocate runtime */ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) { @@ -1991,6 +2011,10 @@ static int tg_unthrottle_up(struct task_group *tg, void *data) cfs_rq->load_stamp += delta; cfs_rq->load_last += delta; + /* adjust cfs_rq_clock_task() */ + cfs_rq->throttled_clock_task_time += rq->clock_task - +cfs_rq->throttled_clock_task; + /* update entity weight now that we are on_rq again */ update_cfs_shares(cfs_rq); } @@ -2005,8 +2029,10 @@ static int tg_throttle_down(struct task_group *tg, void *data) struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; /* group is entering throttled state, record last load */ - if (!cfs_rq->throttle_count) + if (!cfs_rq->throttle_count) { update_cfs_load(cfs_rq, 0); + cfs_rq->throttled_clock_task = rq->clock_task; + } cfs_rq->throttle_count++; return 0; @@ -2021,7 +2047,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; - /* account load preceding throttle */ + /* freeze hierarchy runnable averages while throttled */ rcu_read_lock(); walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq); rcu_read_unlock(); @@ -2045,7 +2071,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) rq->nr_running -= task_delta; cfs_rq->throttled = 1; - cfs_rq->throttled_timestamp = rq->clock; + cfs_rq->throttled_clock = rq->clock; raw_spin_lock(&cfs_b->lock); list_add_tail_rcu(&cfs_rq-
[tip:sched/core] sched: Compute load contribution by a group entity
Commit-ID: 8165e145ceb62fc338e099c9b12b3239c83d2f8e Gitweb: http://git.kernel.org/tip/8165e145ceb62fc338e099c9b12b3239c83d2f8e Author: Paul Turner AuthorDate: Thu, 4 Oct 2012 13:18:31 +0200 Committer: Ingo Molnar CommitDate: Wed, 24 Oct 2012 10:27:25 +0200 sched: Compute load contribution by a group entity Unlike task entities who have a fixed weight, group entities instead own a fraction of their parenting task_group's shares as their contributed weight. Compute this fraction so that we can correctly account hierarchies and shared entity nodes. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141506.855074...@google.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 33 +++-- 1 files changed, 27 insertions(+), 6 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index db78822..e20cb26 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1117,22 +1117,43 @@ static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq, cfs_rq->tg_load_contrib += tg_contrib; } } + +static inline void __update_group_entity_contrib(struct sched_entity *se) +{ + struct cfs_rq *cfs_rq = group_cfs_rq(se); + struct task_group *tg = cfs_rq->tg; + u64 contrib; + + contrib = cfs_rq->tg_load_contrib * tg->shares; + se->avg.load_avg_contrib = div64_u64(contrib, +atomic64_read(&tg->load_avg) + 1); +} #else static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq, int force_update) {} +static inline void __update_group_entity_contrib(struct sched_entity *se) {} #endif +static inline void __update_task_entity_contrib(struct sched_entity *se) +{ + u32 contrib; + + /* avoid overflowing a 32-bit type w/ SCHED_LOAD_SCALE */ + contrib = se->avg.runnable_avg_sum * scale_load_down(se->load.weight); + contrib /= (se->avg.runnable_avg_period + 1); + se->avg.load_avg_contrib = scale_load(contrib); +} + /* Compute the current contribution to load_avg by se, return any delta */ static long __update_entity_load_avg_contrib(struct sched_entity *se) { long old_contrib = se->avg.load_avg_contrib; - if (!entity_is_task(se)) - return 0; - - se->avg.load_avg_contrib = div64_u64(se->avg.runnable_avg_sum * -se->load.weight, -se->avg.runnable_avg_period + 1); + if (entity_is_task(se)) { + __update_task_entity_contrib(se); + } else { + __update_group_entity_contrib(se); + } return se->avg.load_avg_contrib - old_contrib; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:sched/core] sched: Aggregate total task_group load
Commit-ID: c566e8e9e44b72b53091da20e2dedefc730f2ee2 Gitweb: http://git.kernel.org/tip/c566e8e9e44b72b53091da20e2dedefc730f2ee2 Author: Paul Turner AuthorDate: Thu, 4 Oct 2012 13:18:30 +0200 Committer: Ingo Molnar CommitDate: Wed, 24 Oct 2012 10:27:24 +0200 sched: Aggregate total task_group load Maintain a global running sum of the average load seen on each cfs_rq belonging to each task group so that it may be used in calculating an appropriate shares:weight distribution. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141506.792901...@google.com Signed-off-by: Ingo Molnar --- kernel/sched/debug.c |4 kernel/sched/fair.c | 22 ++ kernel/sched/sched.h |4 3 files changed, 30 insertions(+), 0 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 2d2e2b3..2908923 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -230,6 +230,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) cfs_rq->runnable_load_avg); SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg", cfs_rq->blocked_load_avg); + SEQ_printf(m, " .%-30s: %ld\n", "tg_load_avg", + atomic64_read(&cfs_rq->tg->load_avg)); + SEQ_printf(m, " .%-30s: %lld\n", "tg_load_contrib", + cfs_rq->tg_load_contrib); #endif print_cfs_group_stats(m, cpu, cfs_rq->tg); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 74dc29b..db78822 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1102,6 +1102,26 @@ static inline u64 __synchronize_entity_decay(struct sched_entity *se) return decays; } +#ifdef CONFIG_FAIR_GROUP_SCHED +static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq, +int force_update) +{ + struct task_group *tg = cfs_rq->tg; + s64 tg_contrib; + + tg_contrib = cfs_rq->runnable_load_avg + cfs_rq->blocked_load_avg; + tg_contrib -= cfs_rq->tg_load_contrib; + + if (force_update || abs64(tg_contrib) > cfs_rq->tg_load_contrib / 8) { + atomic64_add(tg_contrib, &tg->load_avg); + cfs_rq->tg_load_contrib += tg_contrib; + } +} +#else +static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq, +int force_update) {} +#endif + /* Compute the current contribution to load_avg by se, return any delta */ static long __update_entity_load_avg_contrib(struct sched_entity *se) { @@ -1172,6 +1192,8 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) atomic64_add(decays, &cfs_rq->decay_counter); cfs_rq->last_decay = now; } + + __update_cfs_rq_tg_load_contrib(cfs_rq, force_update); } static inline void update_rq_runnable_avg(struct rq *rq, int runnable) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 30236ab..924a990 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -112,6 +112,7 @@ struct task_group { unsigned long shares; atomic_t load_weight; + atomic64_t load_avg; #endif #ifdef CONFIG_RT_GROUP_SCHED @@ -232,6 +233,9 @@ struct cfs_rq { u64 runnable_load_avg, blocked_load_avg; atomic64_t decay_counter, removed_load; u64 last_decay; +#ifdef CONFIG_FAIR_GROUP_SCHED + u64 tg_load_contrib; +#endif #endif #ifdef CONFIG_FAIR_GROUP_SCHED struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:sched/core] sched: Account for blocked load waking back up
Commit-ID: aff3e49881fa71c5ee1bbc470e1dff9548d9 Gitweb: http://git.kernel.org/tip/aff3e49881fa71c5ee1bbc470e1dff9548d9 Author: Paul Turner AuthorDate: Thu, 4 Oct 2012 13:18:30 +0200 Committer: Ingo Molnar CommitDate: Wed, 24 Oct 2012 10:27:23 +0200 sched: Account for blocked load waking back up When a running entity blocks we migrate its tracked load to cfs_rq->blocked_runnable_avg. In the sleep case this occurs while holding rq->lock and so is a natural transition. Wake-ups however, are potentially asynchronous in the presence of migration and so special care must be taken. We use an atomic counter to track such migrated load, taking care to match this with the previously introduced decay counters so that we don't migrate too much load. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141506.726077...@google.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 100 -- kernel/sched/sched.h |2 +- 2 files changed, 81 insertions(+), 21 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5e602e6..74dc29b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -259,7 +259,8 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) return grp->my_q; } -static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq); +static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, + int force_update); static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) { @@ -281,7 +282,7 @@ static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->on_list = 1; /* We should have no load, but we need to update last_decay. */ - update_cfs_rq_blocked_load(cfs_rq); + update_cfs_rq_blocked_load(cfs_rq, 0); } } @@ -1086,17 +1087,19 @@ static __always_inline int __update_entity_runnable_avg(u64 now, } /* Synchronize an entity's decay with its parenting cfs_rq.*/ -static inline void __synchronize_entity_decay(struct sched_entity *se) +static inline u64 __synchronize_entity_decay(struct sched_entity *se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); u64 decays = atomic64_read(&cfs_rq->decay_counter); decays -= se->avg.decay_count; if (!decays) - return; + return 0; se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); se->avg.decay_count = 0; + + return decays; } /* Compute the current contribution to load_avg by se, return any delta */ @@ -1149,20 +1152,26 @@ static inline void update_entity_load_avg(struct sched_entity *se, * Decay the load contributed by all blocked children and account this so that * their contribution may appropriately discounted when they wake up. */ -static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq) +static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) { u64 now = rq_of(cfs_rq)->clock_task >> 20; u64 decays; decays = now - cfs_rq->last_decay; - if (!decays) + if (!decays && !force_update) return; - cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg, - decays); - atomic64_add(decays, &cfs_rq->decay_counter); + if (atomic64_read(&cfs_rq->removed_load)) { + u64 removed_load = atomic64_xchg(&cfs_rq->removed_load, 0); + subtract_blocked_load_contrib(cfs_rq, removed_load); + } - cfs_rq->last_decay = now; + if (decays) { + cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg, + decays); + atomic64_add(decays, &cfs_rq->decay_counter); + cfs_rq->last_decay = now; + } } static inline void update_rq_runnable_avg(struct rq *rq, int runnable) @@ -1175,20 +1184,42 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) { - /* we track migrations using entity decay_count == 0 */ - if (unlikely(!se->avg.decay_count)) { + /* +* We track migrations using entity decay_count <= 0, on a wake-up +* migration we use a negative decay count to track the remote decays +* accumulated while sleeping. +*/ + if (unlikely(se->avg.decay_count <= 0)) { se->avg.last_runnable_update = rq_of(cfs_rq)->clock_task; + if (se->avg.decay_count) { + /* +* In a wake-up migration we have to approximate the +* time sleeping. This is because we can't synchronize +
[tip:sched/core] sched: Maintain the load contribution of blocked entities
Commit-ID: 9ee474f55664ff63111c843099d365e7ecffb56f Gitweb: http://git.kernel.org/tip/9ee474f55664ff63111c843099d365e7ecffb56f Author: Paul Turner AuthorDate: Thu, 4 Oct 2012 13:18:30 +0200 Committer: Ingo Molnar CommitDate: Wed, 24 Oct 2012 10:27:22 +0200 sched: Maintain the load contribution of blocked entities We are currently maintaining: runnable_load(cfs_rq) = \Sum task_load(t) For all running children t of cfs_rq. While this can be naturally updated for tasks in a runnable state (as they are scheduled); this does not account for the load contributed by blocked task entities. This can be solved by introducing a separate accounting for blocked load: blocked_load(cfs_rq) = \Sum runnable(b) * weight(b) Obviously we do not want to iterate over all blocked entities to account for their decay, we instead observe that: runnable_load(t) = \Sum p_i*y^i and that to account for an additional idle period we only need to compute: y*runnable_load(t). This means that we can compute all blocked entities at once by evaluating: blocked_load(cfs_rq)` = y * blocked_load(cfs_rq) Finally we maintain a decay counter so that when a sleeping entity re-awakens we can determine how much of its load should be removed from the blocked sum. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141506.585389...@google.com Signed-off-by: Ingo Molnar --- include/linux/sched.h |1 + kernel/sched/core.c |1 - kernel/sched/debug.c |3 + kernel/sched/fair.c | 128 - kernel/sched/sched.h |4 +- 5 files changed, 122 insertions(+), 15 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 81d8b1b..b1831ac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1103,6 +1103,7 @@ struct sched_avg { */ u32 runnable_avg_sum, runnable_avg_period; u64 last_runnable_update; + s64 decay_count; unsigned long load_avg_contrib; }; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fd9d085..00898f1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1528,7 +1528,6 @@ static void __sched_fork(struct task_struct *p) p->se.avg.runnable_avg_period = 0; p->se.avg.runnable_avg_sum = 0; #endif - #ifdef CONFIG_SCHEDSTATS memset(&p->se.statistics, 0, sizeof(p->se.statistics)); #endif diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index c953a89..2d2e2b3 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -95,6 +95,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group P(se->avg.runnable_avg_sum); P(se->avg.runnable_avg_period); P(se->avg.load_avg_contrib); + P(se->avg.decay_count); #endif #undef PN #undef P @@ -227,6 +228,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) atomic_read(&cfs_rq->tg->load_weight)); SEQ_printf(m, " .%-30s: %lld\n", "runnable_load_avg", cfs_rq->runnable_load_avg); + SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg", + cfs_rq->blocked_load_avg); #endif print_cfs_group_stats(m, cpu, cfs_rq->tg); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 77af759..8319417 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -259,6 +259,8 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) return grp->my_q; } +static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq); + static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) { if (!cfs_rq->on_list) { @@ -278,6 +280,8 @@ static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) } cfs_rq->on_list = 1; + /* We should have no load, but we need to update last_decay. */ + update_cfs_rq_blocked_load(cfs_rq); } } @@ -1081,6 +1085,20 @@ static __always_inline int __update_entity_runnable_avg(u64 now, return decayed; } +/* Synchronize an entity's decay with its parenting cfs_rq.*/ +static inline void __synchronize_entity_decay(struct sched_entity *se) +{ + struct cfs_rq *cfs_rq = cfs_rq_of(se); + u64 decays = atomic64_read(&cfs_rq->decay_counter); + + decays -= se->avg.decay_count; + if (!decays) + return; + + se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); + se->avg.decay_count = 0; +} + /* Compute the current contribution to load_avg by se, return any delta */ static long __update_entity_load_avg_contrib(struct sched_entity *se) { @@ -1096,8 +1114,18 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se) return se->avg.load_avg_contrib - old_contrib; } +static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq, +
[tip:sched/core] sched: Aggregate load contributed by task entities on parenting cfs_rq
Commit-ID: 2dac754e10a5d41d94d2d2365c0345d4f215a266 Gitweb: http://git.kernel.org/tip/2dac754e10a5d41d94d2d2365c0345d4f215a266 Author: Paul Turner AuthorDate: Thu, 4 Oct 2012 13:18:30 +0200 Committer: Ingo Molnar CommitDate: Wed, 24 Oct 2012 10:27:21 +0200 sched: Aggregate load contributed by task entities on parenting cfs_rq For a given task t, we can compute its contribution to load as: task_load(t) = runnable_avg(t) * weight(t) On a parenting cfs_rq we can then aggregate: runnable_load(cfs_rq) = \Sum task_load(t), for all runnable children t Maintain this bottom up, with task entities adding their contributed load to the parenting cfs_rq sum. When a task entity's load changes we add the same delta to the maintained sum. Signed-off-by: Paul Turner Reviewed-by: Ben Segall Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120823141506.514678...@google.com Signed-off-by: Ingo Molnar --- include/linux/sched.h |1 + kernel/sched/debug.c |3 ++ kernel/sched/fair.c | 51 +--- kernel/sched/sched.h | 10 - 4 files changed, 60 insertions(+), 5 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 418fc6d..81d8b1b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1103,6 +1103,7 @@ struct sched_avg { */ u32 runnable_avg_sum, runnable_avg_period; u64 last_runnable_update; + unsigned long load_avg_contrib; }; #ifdef CONFIG_SCHEDSTATS diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 4240abc..c953a89 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -94,6 +94,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group #ifdef CONFIG_SMP P(se->avg.runnable_avg_sum); P(se->avg.runnable_avg_period); + P(se->avg.load_avg_contrib); #endif #undef PN #undef P @@ -224,6 +225,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) cfs_rq->load_contribution); SEQ_printf(m, " .%-30s: %d\n", "load_tg", atomic_read(&cfs_rq->tg->load_weight)); + SEQ_printf(m, " .%-30s: %lld\n", "runnable_load_avg", + cfs_rq->runnable_load_avg); #endif print_cfs_group_stats(m, cpu, cfs_rq->tg); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8c5468f..77af759 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1081,20 +1081,63 @@ static __always_inline int __update_entity_runnable_avg(u64 now, return decayed; } +/* Compute the current contribution to load_avg by se, return any delta */ +static long __update_entity_load_avg_contrib(struct sched_entity *se) +{ + long old_contrib = se->avg.load_avg_contrib; + + if (!entity_is_task(se)) + return 0; + + se->avg.load_avg_contrib = div64_u64(se->avg.runnable_avg_sum * +se->load.weight, +se->avg.runnable_avg_period + 1); + + return se->avg.load_avg_contrib - old_contrib; +} + /* Update a sched_entity's runnable average */ static inline void update_entity_load_avg(struct sched_entity *se) { - __update_entity_runnable_avg(rq_of(cfs_rq_of(se))->clock_task, &se->avg, -se->on_rq); + struct cfs_rq *cfs_rq = cfs_rq_of(se); + long contrib_delta; + + if (!__update_entity_runnable_avg(rq_of(cfs_rq)->clock_task, &se->avg, + se->on_rq)) + return; + + contrib_delta = __update_entity_load_avg_contrib(se); + if (se->on_rq) + cfs_rq->runnable_load_avg += contrib_delta; } static inline void update_rq_runnable_avg(struct rq *rq, int runnable) { __update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable); } + +/* Add the load generated by se into cfs_rq's child load-average */ +static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, + struct sched_entity *se) +{ + update_entity_load_avg(se); + cfs_rq->runnable_load_avg += se->avg.load_avg_contrib; +} + +/* Remove se's load from this cfs_rq child load-average */ +static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, + struct sched_entity *se) +{ + update_entity_load_avg(se); + cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib; +} #else static inline void update_entity_load_avg(struct sched_entity *se) {} static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {} +static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, + struct sched_entity *se) {} +static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, + struc