Now that enqueue_task_fair and dequeue_task_fair no longer iterate up the hierarchy all the time, a method to lazily propagate sum_exec_runtime up the hierarchy is necessary.
Once a tick, propagate the newly accumulated exec_runtime up the hierarchy, and feed it into CFS bandwidth control. Remove the pointless call to account_cfs_rq_runtime from update_curr, which is always called with a root cfs_rq. Signed-off-by: Rik van Riel <r...@surriel.com> --- include/linux/sched.h | 1 + kernel/sched/core.c | 1 + kernel/sched/fair.c | 22 ++++++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 901c710363e7..bdca15b3afe7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -454,6 +454,7 @@ struct sched_entity { int depth; unsigned long enqueued_h_load; unsigned long enqueued_h_weight; + u64 propagated_exec_runtime; struct load_weight h_load; struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fbd96900f715..9915d20e84a9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2137,6 +2137,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) INIT_LIST_HEAD(&p->se.group_node); #ifdef CONFIG_FAIR_GROUP_SCHED + p->se.propagated_exec_runtime = 0; p->se.cfs_rq = NULL; #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5cfa3dbeba49..d6c881c5c4d5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -898,8 +898,6 @@ static void update_curr(struct cfs_rq *cfs_rq) trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime); cgroup_account_cputime(curtask, delta_exec); account_group_exec_runtime(curtask, delta_exec); - - account_cfs_rq_runtime(cfs_rq, delta_exec); } static void update_curr_fair(struct rq *rq) @@ -3412,6 +3410,20 @@ static inline bool skip_blocked_update(struct sched_entity *se) return true; } +static void propagate_exec_runtime(struct cfs_rq *cfs_rq, + struct sched_entity *se) +{ + struct sched_entity *parent = se->parent; + u64 diff = se->sum_exec_runtime - se->propagated_exec_runtime; + + if (parent) { + parent->sum_exec_runtime += diff; + account_cfs_rq_runtime(cfs_rq, diff); + } + + se->propagated_exec_runtime = se->sum_exec_runtime; +} + #else /* CONFIG_FAIR_GROUP_SCHED */ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {} @@ -3423,6 +3435,11 @@ static inline int propagate_entity_load_avg(struct sched_entity *se) static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum) {} +static void propagate_exec_runtime(struct cfs_rq *cfs_rq, + struct sched_entity *se); +{ +} + #endif /* CONFIG_FAIR_GROUP_SCHED */ /** @@ -10157,6 +10174,7 @@ static void propagate_entity_cfs_rq(struct sched_entity *se, int flags) if (!(flags & DO_ATTACH)) break; + propagate_exec_runtime(cfs_rq, se); update_cfs_group(se); } } -- 2.20.1