On Thu, Feb 07, 2019 at 04:16:06PM +0530, Viresh Kumar wrote:
> @@ -6081,10 +6082,14 @@ static int select_idle_core(struct task_struct *p, 
> struct sched_domain *sd, int
>       for_each_cpu_wrap(core, cpus, target) {
>               bool idle = true;
>  
> -             for_each_cpu(cpu, cpu_smt_mask(core)) {
> -                     cpumask_clear_cpu(cpu, cpus);
> -                     if (!available_idle_cpu(cpu))
> +             smt = cpu_smt_mask(core);
> +             cpumask_andnot(cpus, cpus, smt);

So where the previous code was like 1-2 stores, you just added 16.

(assuming 64bit and NR_CPUS=1024)

And we still do the iteration anyway:

> +             for_each_cpu(cpu, smt) {
> +                     if (!available_idle_cpu(cpu)) {
>                               idle = false;
> +                             break;
> +                     }
>               }

An actual improvement would've been:

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 38d4669aa2ef..2d352d6d15c7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6082,7 +6082,7 @@ static int select_idle_core(struct task_struct *p, struct 
sched_domain *sd, int
                bool idle = true;
 
                for_each_cpu(cpu, cpu_smt_mask(core)) {
-                       cpumask_clear_cpu(cpu, cpus);
+                       __cpumask_clear_cpu(cpu, cpus);
                        if (!available_idle_cpu(cpu))
                                idle = false;
                }

Reply via email to