On Thu, Feb 07, 2019 at 04:16:06PM +0530, Viresh Kumar wrote: > @@ -6081,10 +6082,14 @@ static int select_idle_core(struct task_struct *p, > struct sched_domain *sd, int > for_each_cpu_wrap(core, cpus, target) { > bool idle = true; > > - for_each_cpu(cpu, cpu_smt_mask(core)) { > - cpumask_clear_cpu(cpu, cpus); > - if (!available_idle_cpu(cpu)) > + smt = cpu_smt_mask(core); > + cpumask_andnot(cpus, cpus, smt);
So where the previous code was like 1-2 stores, you just added 16. (assuming 64bit and NR_CPUS=1024) And we still do the iteration anyway: > + for_each_cpu(cpu, smt) { > + if (!available_idle_cpu(cpu)) { > idle = false; > + break; > + } > } An actual improvement would've been: diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 38d4669aa2ef..2d352d6d15c7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6082,7 +6082,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int bool idle = true; for_each_cpu(cpu, cpu_smt_mask(core)) { - cpumask_clear_cpu(cpu, cpus); + __cpumask_clear_cpu(cpu, cpus); if (!available_idle_cpu(cpu)) idle = false; }