runnable load has been introduced to take into account the case where
blocked load biases the wake up path which may end to select an overloaded
CPU with a large number of runnable tasks instead of an underutilized
CPU with a huge blocked load.

Tha wake up path now starts to looks for idle CPUs before comparing
runnable load and it's worth aligning the wake up path with the
load_balance.

Signed-off-by: Vincent Guittot <vincent.guit...@linaro.org>
---
 kernel/sched/fair.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 670856d..6203e71 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1475,7 +1475,12 @@ bool should_numa_migrate_memory(struct task_struct *p, 
struct page * page,
               group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
 }
 
-static unsigned long cpu_runnable_load(struct rq *rq);
+static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
+
+static unsigned long cpu_runnable_load(struct rq *rq)
+{
+       return cfs_rq_runnable_load_avg(&rq->cfs);
+}
 
 /* Cached statistics for all CPUs within a node */
 struct numa_stats {
@@ -5380,11 +5385,6 @@ static int sched_idle_cpu(int cpu)
                        rq->nr_running);
 }
 
-static unsigned long cpu_runnable_load(struct rq *rq)
-{
-       return cfs_rq_runnable_load_avg(&rq->cfs);
-}
-
 static unsigned long cpu_load(struct rq *rq)
 {
        return cfs_rq_load_avg(&rq->cfs);
@@ -5485,7 +5485,7 @@ wake_affine_weight(struct sched_domain *sd, struct 
task_struct *p,
        s64 this_eff_load, prev_eff_load;
        unsigned long task_load;
 
-       this_eff_load = cpu_runnable_load(cpu_rq(this_cpu));
+       this_eff_load = cpu_load(cpu_rq(this_cpu));
 
        if (sync) {
                unsigned long current_load = task_h_load(current);
@@ -5503,7 +5503,7 @@ wake_affine_weight(struct sched_domain *sd, struct 
task_struct *p,
                this_eff_load *= 100;
        this_eff_load *= capacity_of(prev_cpu);
 
-       prev_eff_load = cpu_runnable_load(cpu_rq(prev_cpu));
+       prev_eff_load = cpu_load(cpu_rq(prev_cpu));
        prev_eff_load -= task_load;
        if (sched_feat(WA_BIAS))
                prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
@@ -5591,7 +5591,7 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,
                max_spare_cap = 0;
 
                for_each_cpu(i, sched_group_span(group)) {
-                       load = cpu_runnable_load(cpu_rq(i));
+                       load = cpu_load(cpu_rq(i));
                        runnable_load += load;
 
                        avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
@@ -5732,7 +5732,7 @@ find_idlest_group_cpu(struct sched_group *group, struct 
task_struct *p, int this
                                continue;
                        }
 
-                       load = cpu_runnable_load(cpu_rq(i));
+                       load = cpu_load(cpu_rq(i));
                        if (load < min_load) {
                                min_load = load;
                                least_loaded_cpu = i;
-- 
2.7.4

Reply via email to