Reducing the overhead of the CPU controller is achieved by not walking all the sched_entities every time a task is enqueued or dequeued.
One of the things being checked every single time is whether the cfs_rq is on the rq->leaf_cfs_rq_list. By only removing a cfs_rq from the list once it no longer has children on the list, we can avoid walking the sched_entity hierarchy if the bottom cfs_rq is on the list, once the runqueues have been flattened. Signed-off-by: Rik van Riel <r...@surriel.com> --- kernel/sched/fair.c | 17 +++++++++++++++++ kernel/sched/sched.h | 1 + 2 files changed, 18 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 63cb40253b26..e41feacc45d9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -286,6 +286,13 @@ static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->on_list = 1; + /* + * If the tmp_alone_branch cursor was moved, it means a child cfs_rq + * is already on the list ahead of us. + */ + if (rq->tmp_alone_branch != &rq->leaf_cfs_rq_list) + cfs_rq->children_on_list++; + /* * Ensure we either appear before our parent (if already * enqueued) or force our parent to appear after us when it is @@ -311,6 +318,7 @@ static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) * list. */ rq->tmp_alone_branch = &rq->leaf_cfs_rq_list; + cfs_rq->tg->parent->cfs_rq[cpu]->children_on_list++; return true; } @@ -359,6 +367,11 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) if (rq->tmp_alone_branch == &cfs_rq->leaf_cfs_rq_list) rq->tmp_alone_branch = cfs_rq->leaf_cfs_rq_list.prev; + if (cfs_rq->tg->parent) { + int cpu = cpu_of(rq); + cfs_rq->tg->parent->cfs_rq[cpu]->children_on_list--; + } + list_del_rcu(&cfs_rq->leaf_cfs_rq_list); cfs_rq->on_list = 0; } @@ -7687,6 +7700,10 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) if (cfs_rq->avg.util_sum) return false; + /* Remove decayed parents once their decayed children are gone. */ + if (cfs_rq->children_on_list) + return false; + return true; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 32978a8de8ce..4f8acbab0fb2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -557,6 +557,7 @@ struct cfs_rq { * This list is used during load balance. */ int on_list; + int children_on_list; struct list_head leaf_cfs_rq_list; struct task_group *tg; /* group that "owns" this runqueue */ -- 2.20.1