select_idle_cpu() accounts average search cost for the purposes of
conducting a limited proportional search if SIS_PROP is enabled. The issue
is that select_idle_cpu() does not account for the cost if a candidate
is found and select_idle_smt() is ignored.

This patch moves the accounting of avg_cost to cover the cpu/smt search
costs. select_idle_core() costs could be accounted for but it has its
own throttling mechanism by tracking depending on whether idle cores are
expected to exist.

This patch is a bisection hazard becuse SIS_PROP and how it balances
avg_cost vs avg_idle was probably guided by the fact that avg_cost was
not always accounted for.

Signed-off-by: Mel Gorman <mgor...@techsingularity.net>
---
 kernel/sched/fair.c | 82 +++++++++++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 36 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1d8f5c4b4936..185fc6e28f8e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6006,6 +6006,29 @@ static inline int find_idlest_cpu(struct sched_domain 
*sd, struct task_struct *p
        return new_cpu;
 }
 
+static int sis_search_depth(struct sched_domain *sd, struct sched_domain 
*this_sd)
+{
+       u64 avg_cost, avg_idle, span_avg;
+       int nr = INT_MAX;
+
+       if (sched_feat(SIS_PROP)) {
+               /*
+                * Due to large variance we need a large fuzz factor; hackbench 
in
+                * particularly is sensitive here.
+                */
+               avg_idle = this_rq()->avg_idle / 512;
+               avg_cost = this_sd->avg_scan_cost + 1;
+
+               span_avg = sd->span_weight * avg_idle;
+               if (span_avg > 4*avg_cost)
+                       nr = div_u64(span_avg, avg_cost);
+               else
+                       nr = 4;
+       }
+
+       return nr;
+}
+
 #ifdef CONFIG_SCHED_SMT
 DEFINE_STATIC_KEY_FALSE(sched_smt_present);
 EXPORT_SYMBOL_GPL(sched_smt_present);
@@ -6151,35 +6174,11 @@ static inline int select_idle_smt(struct task_struct 
*p, struct sched_domain *sd
  * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
  * average idle time for this rq (as found in rq->avg_idle).
  */
-static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int 
target)
+static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd,
+                                                       int target, int nr)
 {
        struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-       struct sched_domain *this_sd;
-       u64 avg_cost, avg_idle;
-       u64 time;
-       int this = smp_processor_id();
-       int cpu, nr = INT_MAX;
-
-       this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
-       if (!this_sd)
-               return -1;
-
-       /*
-        * Due to large variance we need a large fuzz factor; hackbench in
-        * particularly is sensitive here.
-        */
-       avg_idle = this_rq()->avg_idle / 512;
-       avg_cost = this_sd->avg_scan_cost + 1;
-
-       if (sched_feat(SIS_PROP)) {
-               u64 span_avg = sd->span_weight * avg_idle;
-               if (span_avg > 4*avg_cost)
-                       nr = div_u64(span_avg, avg_cost);
-               else
-                       nr = 4;
-       }
-
-       time = cpu_clock(this);
+       int cpu;
 
        cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
        __cpumask_clear_cpu(target, cpus);
@@ -6192,9 +6191,6 @@ static int select_idle_cpu(struct task_struct *p, struct 
sched_domain *sd, int t
                        break;
        }
 
-       time = cpu_clock(this) - time;
-       update_avg(&this_sd->avg_scan_cost, time);
-
        return cpu;
 }
 
@@ -6245,9 +6241,10 @@ static inline bool asym_fits_capacity(int task_util, int 
cpu)
  */
 static int select_idle_sibling(struct task_struct *p, int prev, int target)
 {
-       struct sched_domain *sd;
+       struct sched_domain *sd, *this_sd;
        unsigned long task_util;
-       int i, recent_used_cpu;
+       int i, recent_used_cpu, depth;
+       u64 time;
 
        schedstat_inc(this_rq()->sis_search);
 
@@ -6337,21 +6334,34 @@ static int select_idle_sibling(struct task_struct *p, 
int prev, int target)
        if (!sd)
                return target;
 
+       this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
+       if (!this_sd)
+               return target;
+
+       depth = sis_search_depth(sd, this_sd);
+
        schedstat_inc(this_rq()->sis_domain_search);
        i = select_idle_core(p, sd, target);
        if ((unsigned)i < nr_cpumask_bits)
                return i;
 
-       i = select_idle_cpu(p, sd, target);
+       time = cpu_clock(smp_processor_id());
+       i = select_idle_cpu(p, sd, target, depth);
        if ((unsigned)i < nr_cpumask_bits)
-               return i;
+               goto acct_cost;
 
        i = select_idle_smt(p, sd, target);
        if ((unsigned)i < nr_cpumask_bits)
-               return i;
+               goto acct_cost;
 
        schedstat_inc(this_rq()->sis_failed);
-       return target;
+       i = target;
+
+acct_cost:
+       time = cpu_clock(smp_processor_id()) - time;
+       update_avg(&this_sd->avg_scan_cost, time);
+
+       return i;
 }
 
 /**
-- 
2.26.2

Reply via email to