find_idlest_group currently returns NULL when the local group is
idlest. The caller then continues the find_idlest_group search at a
lower level of the current CPU's sched_domain hierarchy. However
find_idlest_group_cpu is not consulted and, crucially, @new_cpu is
not updated. This means the search is pointless and we return
@prev_cpu from select_task_rq_fair.

This patch makes find_idlest_group return the idlest group, and never
NULL, and then has the caller unconditionally continue to consult
find_idlest_group_cpu and update @new_cpu.

 * * *

An alternative, simpler, fix for this would be to initialize @new_cpu
to @cpu instead of @prev_cpu, at the beginning of the new
find_idlest_cpu. However this would not fix the fact that
find_idlest_group_cpu goes unused, and we miss out on the bias toward
a shallower-idle CPU, unless find_idlest_group picks a non-local
group.

If that alternative solution was preferred, then some code could be
removed in recognition of the fact that when find_idlest_group_cpu
was called, @cpu would never be a member of @group (because @group
would be a non-local group, and since @sd is derived from @cpu, @cpu
would always be in @sd's local group).

Signed-off-by: Brendan Jackman <brendan.jack...@arm.com>
Cc: Dietmar Eggemann <dietmar.eggem...@arm.com>
Cc: Vincent Guittot <vincent.guit...@linaro.org>
Cc: Josef Bacik <jo...@toxicpanda.com>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: Morten Rasmussen <morten.rasmus...@arm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
---
 kernel/sched/fair.c | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 26080917ff8d..93e2746d3c30 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5384,11 +5384,10 @@ static unsigned long capacity_spare_wake(int cpu, 
struct task_struct *p)
  * Assumes p is allowed on at least one CPU in sd.
  */
 static struct sched_group *
-find_idlest_group(struct sched_domain *sd, struct task_struct *p,
-                 int this_cpu, int sd_flag)
+find_idlest_group(struct sched_domain *sd, struct task_struct *p, int sd_flag)
 {
-       struct sched_group *idlest = NULL, *group = sd->groups;
-       struct sched_group *most_spare_sg = NULL;
+       struct sched_group *group = sd->groups, *local_group = sd->groups;
+       struct sched_group *idlest = NULL, *most_spare_sg = NULL;
        unsigned long min_runnable_load = ULONG_MAX;
        unsigned long this_runnable_load = ULONG_MAX;
        unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX;
@@ -5404,7 +5403,6 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,
        do {
                unsigned long load, avg_load, runnable_load;
                unsigned long spare_cap, max_spare_cap;
-               int local_group;
                int i;

                /* Skip over this group if it has no CPUs allowed */
@@ -5412,9 +5410,6 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,
                                        &p->cpus_allowed))
                        continue;

-               local_group = cpumask_test_cpu(this_cpu,
-                                              sched_group_span(group));
-
                /*
                 * Tally up the load of all CPUs in the group and find
                 * the group containing the CPU with most spare capacity.
@@ -5425,7 +5420,7 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,

                for_each_cpu(i, sched_group_span(group)) {
                        /* Bias balancing toward cpus of our domain */
-                       if (local_group)
+                       if (group == local_group)
                                load = source_load(i, load_idx);
                        else
                                load = target_load(i, load_idx);
@@ -5446,7 +5441,7 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,
                runnable_load = (runnable_load * SCHED_CAPACITY_SCALE) /
                                        group->sgc->capacity;

-               if (local_group) {
+               if (group == local_group) {
                        this_runnable_load = runnable_load;
                        this_avg_load = avg_load;
                        this_spare = max_spare_cap;
@@ -5492,21 +5487,21 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,

        if (this_spare > task_util(p) / 2 &&
            imbalance_scale*this_spare > 100*most_spare)
-               return NULL;
+               return local_group;

        if (most_spare > task_util(p) / 2)
                return most_spare_sg;

 skip_spare:
        if (!idlest)
-               return NULL;
+               return local_group;

        if (min_runnable_load > (this_runnable_load + imbalance))
-               return NULL;
+               return local_group;

        if ((this_runnable_load < (min_runnable_load + imbalance)) &&
             (100*this_avg_load < imbalance_scale*min_avg_load))
-               return NULL;
+               return local_group;

        return idlest;
 }
@@ -5582,11 +5577,7 @@ static inline int find_idlest_cpu(struct sched_domain 
*sd, struct task_struct *p
                        continue;
                }

-               group = find_idlest_group(sd, p, cpu, sd_flag);
-               if (!group) {
-                       sd = sd->child;
-                       continue;
-               }
+               group = find_idlest_group(sd, p, sd_flag);

                new_cpu = find_idlest_group_cpu(group, p, cpu);
                if (new_cpu == cpu) {
--
2.14.1

Reply via email to