The commit 36b238d57172 ("psi: Optimize switching tasks inside shared
cgroups") only update cgroups whose state actually changes during a
task switch only in task preempt case, not in task sleep case.We actually don't need to clear and set TSK_ONCPU state for common cgroups of next and prev task in sleep case, that can save many psi_group_change especially when most activity comes from one leaf cgroup. Signed-off-by: Muchun Song <[email protected]> Signed-off-by: Chengming Zhou <[email protected]> --- kernel/sched/psi.c | 27 +++++++++++++++++---------- kernel/sched/stats.h | 17 +++-------------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 6e46d9eb279b..6061e87089ac 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -836,20 +836,27 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, } } - /* - * If this is a voluntary sleep, dequeue will have taken care - * of the outgoing TSK_ONCPU alongside TSK_RUNNING already. We - * only need to deal with it during preemption. - */ - if (sleep) - return; - if (prev->pid) { - psi_flags_change(prev, TSK_ONCPU, 0); + int clear = 0, set = 0; + + if (sleep) { + clear |= TSK_RUNNING; + if (prev->in_iowait) + set |= TSK_IOWAIT; + } + + psi_flags_change(prev, clear | TSK_ONCPU, set); iter = NULL; while ((group = iterate_groups(prev, &iter)) && group != common) - psi_group_change(group, cpu, TSK_ONCPU, 0, true); + psi_group_change(group, cpu, clear | TSK_ONCPU, set, true); + + if (sleep) { + while (group) { + psi_group_change(group, cpu, clear, set, true); + group = iterate_groups(prev, &iter); + } + } } } diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 9e4e67a94731..2d92c8467678 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -84,28 +84,17 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup) static inline void psi_dequeue(struct task_struct *p, bool sleep) { - int clear = TSK_RUNNING, set = 0; - if (static_branch_likely(&psi_disabled)) return; if (!sleep) { + int clear = TSK_RUNNING; + if (p->in_memstall) clear |= TSK_MEMSTALL; - } else { - /* - * When a task sleeps, schedule() dequeues it before - * switching to the next one. Merge the clearing of - * TSK_RUNNING and TSK_ONCPU to save an unnecessary - * psi_task_change() call in psi_sched_switch(). - */ - clear |= TSK_ONCPU; - if (p->in_iowait) - set |= TSK_IOWAIT; + psi_task_change(p, clear, 0); } - - psi_task_change(p, clear, set); } static inline void psi_ttwu_dequeue(struct task_struct *p) -- 2.11.0

