Current select_idle_core tries to find a fully idle core and if it fails
select_idle_cpu next returns any idle cpu in the llc domain. This is not optimal
for architectures with many (more than 2) hyperthreads in a core. This patch
changes select_idle_core to find the core with least number of busy
hyperthreads and return an idle cpu in that core.

Signed-off-by: Subhra Mazumdar <[email protected]>
---
 kernel/sched/fair.c      |  113 +++++++++-------------------------------------
 kernel/sched/idle_task.c |    1 -
 kernel/sched/sched.h     |   10 ----
 3 files changed, 21 insertions(+), 103 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d711093..eb2c33c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5679,111 +5679,49 @@ static int cpumask_next_wrap(int n, const struct 
cpumask *mask, int start, int *
 
 #ifdef CONFIG_SCHED_SMT
 
-static inline void set_idle_cores(int cpu, int val)
-{
-       struct sched_domain_shared *sds;
-
-       sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
-       if (sds)
-               WRITE_ONCE(sds->has_idle_cores, val);
-}
-
-static inline bool test_idle_cores(int cpu, bool def)
-{
-       struct sched_domain_shared *sds;
-
-       sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
-       if (sds)
-               return READ_ONCE(sds->has_idle_cores);
-
-       return def;
-}
-
 /*
- * Scans the local SMT mask to see if the entire core is idle, and records this
- * information in sd_llc_shared->has_idle_cores.
- *
- * Since SMT siblings share all cache levels, inspecting this limited remote
- * state should be fairly cheap.
- */
-void __update_idle_core(struct rq *rq)
-{
-       int core = cpu_of(rq);
-       int cpu;
-
-       rcu_read_lock();
-       if (test_idle_cores(core, true))
-               goto unlock;
-
-       for_each_cpu(cpu, cpu_smt_mask(core)) {
-               if (cpu == core)
-                       continue;
-
-               if (!idle_cpu(cpu))
-                       goto unlock;
-       }
-
-       set_idle_cores(core, 1);
-unlock:
-       rcu_read_unlock();
-}
-
-/*
- * Scan the entire LLC domain for idle cores; this dynamically switches off if
- * there are no idle cores left in the system; tracked through
- * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above.
+ * Scan the entire LLC domain for idle cores; Find the core with minimum number
+ * of busy strands and return a idle strand in that core
  */
 static int select_idle_core(struct task_struct *p, struct sched_domain *sd, 
int target)
 {
        struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-       int core, cpu, wrap;
+       int core, cpu, wrap, min_util = INT_MAX, min_cpu = -1;
 
        if (!static_branch_likely(&sched_smt_present))
                return -1;
 
-       if (!test_idle_cores(target, false))
-               return -1;
-
        cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
 
        for_each_cpu_wrap(core, cpus, target, wrap) {
                bool idle = true;
+               int util = 0;
+               int cp = -1;
 
                for_each_cpu(cpu, cpu_smt_mask(core)) {
                        cpumask_clear_cpu(cpu, cpus);
-                       if (!idle_cpu(cpu))
+                       if (!idle_cpu(cpu)) {
                                idle = false;
+                               util++;
+                       } else if (cpumask_test_cpu(cpu, &p->cpus_allowed)) {
+                               cp = cpu;
+                       }
                }
 
                if (idle)
                        return core;
-       }
 
-       /*
-        * Failed to find an idle core; stop looking for one.
-        */
-       set_idle_cores(target, 0);
+               if (util < min_util && cp != -1) {
+                       min_util = util;
+                       min_cpu = cp;
+               }
+       }
 
-       return -1;
+       return min_cpu;
 }
 
-/*
- * Scan the local SMT mask for idle CPUs.
- */
-static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int 
target)
+static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int 
target)
 {
-       int cpu;
-
-       if (!static_branch_likely(&sched_smt_present))
-               return -1;
-
-       for_each_cpu(cpu, cpu_smt_mask(target)) {
-               if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
-                       continue;
-               if (idle_cpu(cpu))
-                       return cpu;
-       }
-
        return -1;
 }
 
@@ -5794,13 +5732,6 @@ static inline int select_idle_core(struct task_struct 
*p, struct sched_domain *s
        return -1;
 }
 
-static inline int select_idle_smt(struct task_struct *p, struct sched_domain 
*sd, int target)
-{
-       return -1;
-}
-
-#endif /* CONFIG_SCHED_SMT */
-
 /*
  * Scan the LLC domain for idle CPUs; this is dynamically regulated by
  * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
@@ -5830,8 +5761,8 @@ static int select_idle_cpu(struct task_struct *p, struct 
sched_domain *sd, int t
        time = local_clock();
 
        for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {
-               if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
-                       continue;
+                       if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+                               continue;
                if (idle_cpu(cpu))
                        break;
        }
@@ -5844,6 +5775,8 @@ static int select_idle_cpu(struct task_struct *p, struct 
sched_domain *sd, int t
        return cpu;
 }
 
+#endif /* CONFIG_SCHED_SMT */
+
 /*
  * Try and locate an idle core/thread in the LLC cache domain.
  */
@@ -5873,10 +5806,6 @@ static int select_idle_sibling(struct task_struct *p, 
int prev, int target)
        if ((unsigned)i < nr_cpumask_bits)
                return i;
 
-       i = select_idle_smt(p, sd, target);
-       if ((unsigned)i < nr_cpumask_bits)
-               return i;
-
        return target;
 }
 
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index 0c00172..a3d5a7c 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -27,7 +27,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct 
task_struct *p, int fl
 pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags 
*rf)
 {
        put_prev_task(rq, prev);
-       update_idle_core(rq);
        schedstat_inc(rq->sched_goidle);
        return rq->idle;
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6dda2aa..96ef012 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -772,16 +772,6 @@ static inline int cpu_of(struct rq *rq)
 
 extern struct static_key_false sched_smt_present;
 
-extern void __update_idle_core(struct rq *rq);
-
-static inline void update_idle_core(struct rq *rq)
-{
-       if (static_branch_unlikely(&sched_smt_present))
-               __update_idle_core(rq);
-}
-
-#else
-static inline void update_idle_core(struct rq *rq) { }
 #endif
 
 DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-- 
1.7.1

Reply via email to