Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-20 Thread Gautham R Shenoy


On Wed, Jan 20, 2021 at 09:54:20AM +, Mel Gorman wrote:
> On Wed, Jan 20, 2021 at 10:21:47AM +0100, Vincent Guittot wrote:
> > On Wed, 20 Jan 2021 at 10:12, Mel Gorman  
> > wrote:
> > >
> > > On Wed, Jan 20, 2021 at 02:00:18PM +0530, Gautham R Shenoy wrote:
> > > > > @@ -6157,18 +6169,31 @@ static int select_idle_cpu(struct task_struct 
> > > > > *p, struct sched_domain *sd, int t
> > > > > }
> > > > >
> > > > > for_each_cpu_wrap(cpu, cpus, target) {
> > > > > -   if (!--nr)
> > > > > -   return -1;
> > > > > -   if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> > > > > -   break;
> > > > > +   if (smt) {
> > > > > +   i = select_idle_core(p, cpu, cpus, _cpu);
> > > > > +   if ((unsigned int)i < nr_cpumask_bits)
> > > > > +   return i;
> > > > > +
> > > > > +   } else {
> > > > > +   if (!--nr)
> > > > > +   return -1;
> > > > > +   i = __select_idle_cpu(cpu);
> > > > > +   if ((unsigned int)i < nr_cpumask_bits) {
> > > > > +   idle_cpu = i;
> > > > > +   break;
> > > > > +   }
> > > > > +   }
> > > > > }
> > > > >
> > > > > -   if (sched_feat(SIS_PROP)) {
> > > > > +   if (smt)
> > > > > +   set_idle_cores(this, false);
> > > >
> > > > Shouldn't we set_idle_cores(false) only if this was the last idle
> > > > core in the LLC ?
> > > >
> > >
> > > That would involve rechecking the cpumask bits that have not been
> > > scanned to see if any of them are an idle core. As the existance of idle
> > > cores can change very rapidly, it's not worth the cost.
> > 
> > But don't we reach this point only if we scanned all CPUs and didn't
> > find an idle core ?

Indeed, I missed that part that we return as soon as we find an idle
core in the for_each_cpu_wrap() loop above. So here we clear the
"has_idle_cores" when there are no longer any idle-cores. Sorry for
the noise!


> 
> Yes, but my understanding of Gauthams suggestion was to check if an
> idle core found was the last idle core available and set has_idle_cores
> to false in that case.

That would have been nice, but since we do not keep a count of idle
cores, it is probably not worth the effort as you note below.

> I think this would be relatively expensive and
> possibly futile as returning the last idle core for this wakeup does not
> mean there will be no idle core on the next wakeup as other cores may go
> idle between wakeups.


> 
> -- 
> Mel Gorman
> SUSE Labs

--
Thanks and Regards
gautham.


Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-20 Thread Mel Gorman
On Wed, Jan 20, 2021 at 10:21:47AM +0100, Vincent Guittot wrote:
> On Wed, 20 Jan 2021 at 10:12, Mel Gorman  wrote:
> >
> > On Wed, Jan 20, 2021 at 02:00:18PM +0530, Gautham R Shenoy wrote:
> > > > @@ -6157,18 +6169,31 @@ static int select_idle_cpu(struct task_struct 
> > > > *p, struct sched_domain *sd, int t
> > > > }
> > > >
> > > > for_each_cpu_wrap(cpu, cpus, target) {
> > > > -   if (!--nr)
> > > > -   return -1;
> > > > -   if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> > > > -   break;
> > > > +   if (smt) {
> > > > +   i = select_idle_core(p, cpu, cpus, _cpu);
> > > > +   if ((unsigned int)i < nr_cpumask_bits)
> > > > +   return i;
> > > > +
> > > > +   } else {
> > > > +   if (!--nr)
> > > > +   return -1;
> > > > +   i = __select_idle_cpu(cpu);
> > > > +   if ((unsigned int)i < nr_cpumask_bits) {
> > > > +   idle_cpu = i;
> > > > +   break;
> > > > +   }
> > > > +   }
> > > > }
> > > >
> > > > -   if (sched_feat(SIS_PROP)) {
> > > > +   if (smt)
> > > > +   set_idle_cores(this, false);
> > >
> > > Shouldn't we set_idle_cores(false) only if this was the last idle
> > > core in the LLC ?
> > >
> >
> > That would involve rechecking the cpumask bits that have not been
> > scanned to see if any of them are an idle core. As the existance of idle
> > cores can change very rapidly, it's not worth the cost.
> 
> But don't we reach this point only if we scanned all CPUs and didn't
> find an idle core ?

Yes, but my understanding of Gauthams suggestion was to check if an
idle core found was the last idle core available and set has_idle_cores
to false in that case. I think this would be relatively expensive and
possibly futile as returning the last idle core for this wakeup does not
mean there will be no idle core on the next wakeup as other cores may go
idle between wakeups.

-- 
Mel Gorman
SUSE Labs


Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-20 Thread Vincent Guittot
On Wed, 20 Jan 2021 at 10:54, Mel Gorman  wrote:
>
> On Wed, Jan 20, 2021 at 10:21:47AM +0100, Vincent Guittot wrote:
> > On Wed, 20 Jan 2021 at 10:12, Mel Gorman  
> > wrote:
> > >
> > > On Wed, Jan 20, 2021 at 02:00:18PM +0530, Gautham R Shenoy wrote:
> > > > > @@ -6157,18 +6169,31 @@ static int select_idle_cpu(struct task_struct 
> > > > > *p, struct sched_domain *sd, int t
> > > > > }
> > > > >
> > > > > for_each_cpu_wrap(cpu, cpus, target) {
> > > > > -   if (!--nr)
> > > > > -   return -1;
> > > > > -   if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> > > > > -   break;
> > > > > +   if (smt) {
> > > > > +   i = select_idle_core(p, cpu, cpus, _cpu);
> > > > > +   if ((unsigned int)i < nr_cpumask_bits)
> > > > > +   return i;
> > > > > +
> > > > > +   } else {
> > > > > +   if (!--nr)
> > > > > +   return -1;
> > > > > +   i = __select_idle_cpu(cpu);
> > > > > +   if ((unsigned int)i < nr_cpumask_bits) {
> > > > > +   idle_cpu = i;
> > > > > +   break;
> > > > > +   }
> > > > > +   }
> > > > > }
> > > > >
> > > > > -   if (sched_feat(SIS_PROP)) {
> > > > > +   if (smt)
> > > > > +   set_idle_cores(this, false);
> > > >
> > > > Shouldn't we set_idle_cores(false) only if this was the last idle
> > > > core in the LLC ?
> > > >
> > >
> > > That would involve rechecking the cpumask bits that have not been
> > > scanned to see if any of them are an idle core. As the existance of idle
> > > cores can change very rapidly, it's not worth the cost.
> >
> > But don't we reach this point only if we scanned all CPUs and didn't
> > find an idle core ?
>
> Yes, but my understanding of Gauthams suggestion was to check if an
> idle core found was the last idle core available and set has_idle_cores

ok get it now

> to false in that case. I think this would be relatively expensive and
> possibly futile as returning the last idle core for this wakeup does not
> mean there will be no idle core on the next wakeup as other cores may go
> idle between wakeups.

I agree, this doesn't worth the added complexity

>
> --
> Mel Gorman
> SUSE Labs


Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-20 Thread Vincent Guittot
On Wed, 20 Jan 2021 at 10:12, Mel Gorman  wrote:
>
> On Wed, Jan 20, 2021 at 02:00:18PM +0530, Gautham R Shenoy wrote:
> > > @@ -6157,18 +6169,31 @@ static int select_idle_cpu(struct task_struct *p, 
> > > struct sched_domain *sd, int t
> > > }
> > >
> > > for_each_cpu_wrap(cpu, cpus, target) {
> > > -   if (!--nr)
> > > -   return -1;
> > > -   if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> > > -   break;
> > > +   if (smt) {
> > > +   i = select_idle_core(p, cpu, cpus, _cpu);
> > > +   if ((unsigned int)i < nr_cpumask_bits)
> > > +   return i;
> > > +
> > > +   } else {
> > > +   if (!--nr)
> > > +   return -1;
> > > +   i = __select_idle_cpu(cpu);
> > > +   if ((unsigned int)i < nr_cpumask_bits) {
> > > +   idle_cpu = i;
> > > +   break;
> > > +   }
> > > +   }
> > > }
> > >
> > > -   if (sched_feat(SIS_PROP)) {
> > > +   if (smt)
> > > +   set_idle_cores(this, false);
> >
> > Shouldn't we set_idle_cores(false) only if this was the last idle
> > core in the LLC ?
> >
>
> That would involve rechecking the cpumask bits that have not been
> scanned to see if any of them are an idle core. As the existance of idle
> cores can change very rapidly, it's not worth the cost.

But don't we reach this point only if we scanned all CPUs and didn't
find an idle core ?
If there is an idle core, it returns early
And in case of smt == true, nr is set to INt_MAX and we loop on all CPUs/cores

>
> --
> Mel Gorman
> SUSE Labs


Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-20 Thread Mel Gorman
On Wed, Jan 20, 2021 at 02:00:18PM +0530, Gautham R Shenoy wrote:
> > @@ -6157,18 +6169,31 @@ static int select_idle_cpu(struct task_struct *p, 
> > struct sched_domain *sd, int t
> > }
> > 
> > for_each_cpu_wrap(cpu, cpus, target) {
> > -   if (!--nr)
> > -   return -1;
> > -   if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> > -   break;
> > +   if (smt) {
> > +   i = select_idle_core(p, cpu, cpus, _cpu);
> > +   if ((unsigned int)i < nr_cpumask_bits)
> > +   return i;
> > +
> > +   } else {
> > +   if (!--nr)
> > +   return -1;
> > +   i = __select_idle_cpu(cpu);
> > +   if ((unsigned int)i < nr_cpumask_bits) {
> > +   idle_cpu = i;
> > +   break;
> > +   }
> > +   }
> > }
> > 
> > -   if (sched_feat(SIS_PROP)) {
> > +   if (smt)
> > +   set_idle_cores(this, false);
> 
> Shouldn't we set_idle_cores(false) only if this was the last idle
> core in the LLC ? 
> 

That would involve rechecking the cpumask bits that have not been
scanned to see if any of them are an idle core. As the existance of idle
cores can change very rapidly, it's not worth the cost.

-- 
Mel Gorman
SUSE Labs


Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-20 Thread Gautham R Shenoy
Hello Mel, Peter,

On Tue, Jan 19, 2021 at 11:22:11AM +, Mel Gorman wrote:
> From: Peter Zijlstra (Intel) 
> 
> Both select_idle_core() and select_idle_cpu() do a loop over the same
> cpumask. Observe that by clearing the already visited CPUs, we can
> fold the iteration and iterate a core at a time.
> 
> All we need to do is remember any non-idle CPU we encountered while
> scanning for an idle core. This way we'll only iterate every CPU once.
> 
> Signed-off-by: Peter Zijlstra (Intel) 
> Signed-off-by: Mel Gorman 
> ---
>  kernel/sched/fair.c | 101 ++--
>  1 file changed, 61 insertions(+), 40 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 12e08da90024..822851b39b65 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c

[..snip..]


> @@ -6157,18 +6169,31 @@ static int select_idle_cpu(struct task_struct *p, 
> struct sched_domain *sd, int t
>   }
> 
>   for_each_cpu_wrap(cpu, cpus, target) {
> - if (!--nr)
> - return -1;
> - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> - break;
> + if (smt) {
> + i = select_idle_core(p, cpu, cpus, _cpu);
> + if ((unsigned int)i < nr_cpumask_bits)
> + return i;
> +
> + } else {
> + if (!--nr)
> + return -1;
> + i = __select_idle_cpu(cpu);
> + if ((unsigned int)i < nr_cpumask_bits) {
> + idle_cpu = i;
> + break;
> + }
> + }
>   }
> 
> - if (sched_feat(SIS_PROP)) {
> + if (smt)
> + set_idle_cores(this, false);

Shouldn't we set_idle_cores(false) only if this was the last idle
core in the LLC ? 

--
Thanks and Regards
gautham.


[PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-19 Thread Mel Gorman
From: Peter Zijlstra (Intel) 

Both select_idle_core() and select_idle_cpu() do a loop over the same
cpumask. Observe that by clearing the already visited CPUs, we can
fold the iteration and iterate a core at a time.

All we need to do is remember any non-idle CPU we encountered while
scanning for an idle core. This way we'll only iterate every CPU once.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Mel Gorman 
---
 kernel/sched/fair.c | 101 ++--
 1 file changed, 61 insertions(+), 40 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 12e08da90024..822851b39b65 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6006,6 +6006,14 @@ static inline int find_idlest_cpu(struct sched_domain 
*sd, struct task_struct *p
return new_cpu;
 }
 
+static inline int __select_idle_cpu(int cpu)
+{
+   if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
+   return cpu;
+
+   return -1;
+}
+
 #ifdef CONFIG_SCHED_SMT
 DEFINE_STATIC_KEY_FALSE(sched_smt_present);
 EXPORT_SYMBOL_GPL(sched_smt_present);
@@ -6066,40 +6074,34 @@ void __update_idle_core(struct rq *rq)
  * there are no idle cores left in the system; tracked through
  * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above.
  */
-static int select_idle_core(struct task_struct *p, struct sched_domain *sd, 
int target)
+static int select_idle_core(struct task_struct *p, int core, struct cpumask 
*cpus, int *idle_cpu)
 {
-   struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-   int core, cpu;
+   bool idle = true;
+   int cpu;
 
if (!static_branch_likely(_smt_present))
-   return -1;
-
-   if (!test_idle_cores(target, false))
-   return -1;
-
-   cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
+   return __select_idle_cpu(core);
 
-   for_each_cpu_wrap(core, cpus, target) {
-   bool idle = true;
-
-   for_each_cpu(cpu, cpu_smt_mask(core)) {
-   if (!available_idle_cpu(cpu)) {
-   idle = false;
-   break;
+   for_each_cpu(cpu, cpu_smt_mask(core)) {
+   if (!available_idle_cpu(cpu)) {
+   idle = false;
+   if (*idle_cpu == -1) {
+   if (sched_idle_cpu(cpu) && 
cpumask_test_cpu(cpu, p->cpus_ptr)) {
+   *idle_cpu = cpu;
+   break;
+   }
+   continue;
}
+   break;
}
-
-   if (idle)
-   return core;
-
-   cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
+   if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr))
+   *idle_cpu = cpu;
}
 
-   /*
-* Failed to find an idle core; stop looking for one.
-*/
-   set_idle_cores(target, 0);
+   if (idle)
+   return core;
 
+   cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
return -1;
 }
 
@@ -6107,9 +6109,18 @@ static int select_idle_core(struct task_struct *p, 
struct sched_domain *sd, int
 
 #define sched_smt_weight   1
 
-static inline int select_idle_core(struct task_struct *p, struct sched_domain 
*sd, int target)
+static inline void set_idle_cores(int cpu, int val)
 {
-   return -1;
+}
+
+static inline bool test_idle_cores(int cpu, bool def)
+{
+   return def;
+}
+
+static inline int select_idle_core(struct task_struct *p, int core, struct 
cpumask *cpus, int *idle_cpu)
+{
+   return __select_idle_cpu(core);
 }
 
 #endif /* CONFIG_SCHED_SMT */
@@ -6124,10 +6135,11 @@ static inline int select_idle_core(struct task_struct 
*p, struct sched_domain *s
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int 
target)
 {
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+   int i, cpu, idle_cpu = -1, nr = INT_MAX;
+   bool smt = test_idle_cores(target, false);
+   int this = smp_processor_id();
struct sched_domain *this_sd;
u64 time;
-   int this = smp_processor_id();
-   int cpu, nr = INT_MAX;
 
this_sd = rcu_dereference(*this_cpu_ptr(_llc));
if (!this_sd)
@@ -6135,7 +6147,7 @@ static int select_idle_cpu(struct task_struct *p, struct 
sched_domain *sd, int t
 
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
 
-   if (sched_feat(SIS_PROP)) {
+   if (sched_feat(SIS_PROP) && !smt) {
u64 avg_cost, avg_idle, span_avg;
 
/*
@@ -6157,18 +6169,31 @@ static int select_idle_cpu(struct task_struct *p, 
struct sched_domain *sd, int t
}
 
for_each_cpu_wrap(cpu, cpus, target) {
-   if (!--nr)
-   return -1;
- 

Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-18 Thread Mel Gorman
On Mon, Jan 18, 2021 at 08:55:03PM +0800, Li, Aubrey wrote:
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 12e08da90024..6c0f841e9e75 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -6006,6 +6006,14 @@ static inline int find_idlest_cpu(struct 
> > sched_domain *sd, struct task_struct *p
> > return new_cpu;
> >  }
> >  
> > +static inline int __select_idle_cpu(struct task_struct *p, int core, 
> > struct cpumask *cpus)
> 
> Sorry if I missed anything, why p and cpus are needed here?
> 

They are not needed. The original code was matching the calling pattern
for select_idle_core() which needs p and cpus to check if sibling CPUs
are allowed.

> > @@ -6135,7 +6147,7 @@ static int select_idle_cpu(struct task_struct *p, 
> > struct sched_domain *sd, int t
> >  
> > cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> >  
> > -   if (sched_feat(SIS_PROP)) {
> > +   if (sched_feat(SIS_PROP) && !smt) {
>
> Is it possible the system does have a idle core, but I still don't want to 
> scan the entire llc domain?
> 

This version is matching historical behaviour. To limit the scan for cores,
select_idle_core() would need to obey SIS_PROP and that patch was dropped
as it introduced regressions. It would only be considered once SIS_PROP
had better metrics for limiting the depth of the search.

> > u64 avg_cost, avg_idle, span_avg;
> >  
> > /*
> > @@ -6159,16 +6171,29 @@ static int select_idle_cpu(struct task_struct *p, 
> > struct sched_domain *sd, int t
> > for_each_cpu_wrap(cpu, cpus, target) {
> > if (!--nr)
> > return -1;
> 
> It looks like nr only makes sense when smt = false now, can it be moved into 
> else branch below?
> 

It can. I expect the saving to be marginal and it will need to move back
when/if select_idle_core() obeys SIS_PROP.

> > -   if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> > -   break;
> > +   if (smt) {
> > +   i = select_idle_core(p, cpu, cpus, _cpu);
> > +   if ((unsigned int)i < nr_cpumask_bits)
> > +   return i;
> 
> What if the last idle core is selected here, should we set_idle_cores false 
> before return?
> 

We'd have to check what bits were still set in the cpus mask and
determine if they represent an idle core. I severely doubt it would be
worth the cost given that the availability of idle cores can change at
any instant.

-- 
Mel Gorman
SUSE Labs


Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-18 Thread Li, Aubrey
On 2021/1/15 18:08, Mel Gorman wrote:
> From: Peter Zijlstra (Intel) 
> 
> Both select_idle_core() and select_idle_cpu() do a loop over the same
> cpumask. Observe that by clearing the already visited CPUs, we can
> fold the iteration and iterate a core at a time.
> 
> All we need to do is remember any non-idle CPU we encountered while
> scanning for an idle core. This way we'll only iterate every CPU once.
> 
> Signed-off-by: Peter Zijlstra (Intel) 
> Signed-off-by: Mel Gorman 
> ---
>  kernel/sched/fair.c | 97 +++--
>  1 file changed, 59 insertions(+), 38 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 12e08da90024..6c0f841e9e75 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6006,6 +6006,14 @@ static inline int find_idlest_cpu(struct sched_domain 
> *sd, struct task_struct *p
>   return new_cpu;
>  }
>  
> +static inline int __select_idle_cpu(struct task_struct *p, int core, struct 
> cpumask *cpus)

Sorry if I missed anything, why p and cpus are needed here?

> +{
> + if (available_idle_cpu(core) || sched_idle_cpu(core))
> + return core;
> +
> + return -1;
> +}
> +
>  #ifdef CONFIG_SCHED_SMT
>  DEFINE_STATIC_KEY_FALSE(sched_smt_present);
>  EXPORT_SYMBOL_GPL(sched_smt_present);
> @@ -6066,40 +6074,34 @@ void __update_idle_core(struct rq *rq)
>   * there are no idle cores left in the system; tracked through
>   * sd_llc->shared->has_idle_cores and enabled through update_idle_core() 
> above.
>   */
> -static int select_idle_core(struct task_struct *p, struct sched_domain *sd, 
> int target)
> +static int select_idle_core(struct task_struct *p, int core, struct cpumask 
> *cpus, int *idle_cpu)
>  {
> - struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
> - int core, cpu;
> + bool idle = true;
> + int cpu;
>  
>   if (!static_branch_likely(_smt_present))
> - return -1;
> -
> - if (!test_idle_cores(target, false))
> - return -1;
> -
> - cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> + return __select_idle_cpu(p, core, cpus);
>  
> - for_each_cpu_wrap(core, cpus, target) {
> - bool idle = true;
> -
> - for_each_cpu(cpu, cpu_smt_mask(core)) {
> - if (!available_idle_cpu(cpu)) {
> - idle = false;
> - break;
> + for_each_cpu(cpu, cpu_smt_mask(core)) {
> + if (!available_idle_cpu(cpu)) {
> + idle = false;
> + if (*idle_cpu == -1) {
> + if (sched_idle_cpu(cpu) && 
> cpumask_test_cpu(cpu, p->cpus_ptr)) {
> + *idle_cpu = cpu;
> + break;
> + }
> + continue;
>   }
> + break;
>   }
> -
> - if (idle)
> - return core;
> -
> - cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
> + if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr))
> + *idle_cpu = cpu;
>   }
>  
> - /*
> -  * Failed to find an idle core; stop looking for one.
> -  */
> - set_idle_cores(target, 0);
> + if (idle)
> + return core;
>  
> + cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
>   return -1;
>  }
>  
> @@ -6107,9 +6109,18 @@ static int select_idle_core(struct task_struct *p, 
> struct sched_domain *sd, int
>  
>  #define sched_smt_weight 1
>  
> -static inline int select_idle_core(struct task_struct *p, struct 
> sched_domain *sd, int target)
> +static inline void set_idle_cores(int cpu, int val)
>  {
> - return -1;
> +}
> +
> +static inline bool test_idle_cores(int cpu, bool def)
> +{
> + return def;
> +}
> +
> +static inline int select_idle_core(struct task_struct *p, int core, struct 
> cpumask *cpus, int *idle_cpu)
> +{
> + return __select_idle_cpu(p, core, cpus);
>  }
>  
>  #endif /* CONFIG_SCHED_SMT */
> @@ -6124,10 +6135,11 @@ static inline int select_idle_core(struct task_struct 
> *p, struct sched_domain *s
>  static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, 
> int target)
>  {
>   struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
> + int i, cpu, idle_cpu = -1, nr = INT_MAX;
> + bool smt = test_idle_cores(target, false);
> + int this = smp_processor_id();
>   struct sched_domain *this_sd;
>   u64 time;
> - int this = smp_processor_id();
> - int cpu, nr = INT_MAX;
>  
>   this_sd = rcu_dereference(*this_cpu_ptr(_llc));
>   if (!this_sd)
> @@ -6135,7 +6147,7 @@ static int select_idle_cpu(struct task_struct *p, 
> struct sched_domain *sd, int t
>  
>   cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
>  
> - if (sched_feat(SIS_PROP)) {
> +

[PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-15 Thread Mel Gorman
From: Peter Zijlstra (Intel) 

Both select_idle_core() and select_idle_cpu() do a loop over the same
cpumask. Observe that by clearing the already visited CPUs, we can
fold the iteration and iterate a core at a time.

All we need to do is remember any non-idle CPU we encountered while
scanning for an idle core. This way we'll only iterate every CPU once.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Mel Gorman 
---
 kernel/sched/fair.c | 97 +++--
 1 file changed, 59 insertions(+), 38 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 12e08da90024..6c0f841e9e75 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6006,6 +6006,14 @@ static inline int find_idlest_cpu(struct sched_domain 
*sd, struct task_struct *p
return new_cpu;
 }
 
+static inline int __select_idle_cpu(struct task_struct *p, int core, struct 
cpumask *cpus)
+{
+   if (available_idle_cpu(core) || sched_idle_cpu(core))
+   return core;
+
+   return -1;
+}
+
 #ifdef CONFIG_SCHED_SMT
 DEFINE_STATIC_KEY_FALSE(sched_smt_present);
 EXPORT_SYMBOL_GPL(sched_smt_present);
@@ -6066,40 +6074,34 @@ void __update_idle_core(struct rq *rq)
  * there are no idle cores left in the system; tracked through
  * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above.
  */
-static int select_idle_core(struct task_struct *p, struct sched_domain *sd, 
int target)
+static int select_idle_core(struct task_struct *p, int core, struct cpumask 
*cpus, int *idle_cpu)
 {
-   struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-   int core, cpu;
+   bool idle = true;
+   int cpu;
 
if (!static_branch_likely(_smt_present))
-   return -1;
-
-   if (!test_idle_cores(target, false))
-   return -1;
-
-   cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
+   return __select_idle_cpu(p, core, cpus);
 
-   for_each_cpu_wrap(core, cpus, target) {
-   bool idle = true;
-
-   for_each_cpu(cpu, cpu_smt_mask(core)) {
-   if (!available_idle_cpu(cpu)) {
-   idle = false;
-   break;
+   for_each_cpu(cpu, cpu_smt_mask(core)) {
+   if (!available_idle_cpu(cpu)) {
+   idle = false;
+   if (*idle_cpu == -1) {
+   if (sched_idle_cpu(cpu) && 
cpumask_test_cpu(cpu, p->cpus_ptr)) {
+   *idle_cpu = cpu;
+   break;
+   }
+   continue;
}
+   break;
}
-
-   if (idle)
-   return core;
-
-   cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
+   if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr))
+   *idle_cpu = cpu;
}
 
-   /*
-* Failed to find an idle core; stop looking for one.
-*/
-   set_idle_cores(target, 0);
+   if (idle)
+   return core;
 
+   cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
return -1;
 }
 
@@ -6107,9 +6109,18 @@ static int select_idle_core(struct task_struct *p, 
struct sched_domain *sd, int
 
 #define sched_smt_weight   1
 
-static inline int select_idle_core(struct task_struct *p, struct sched_domain 
*sd, int target)
+static inline void set_idle_cores(int cpu, int val)
 {
-   return -1;
+}
+
+static inline bool test_idle_cores(int cpu, bool def)
+{
+   return def;
+}
+
+static inline int select_idle_core(struct task_struct *p, int core, struct 
cpumask *cpus, int *idle_cpu)
+{
+   return __select_idle_cpu(p, core, cpus);
 }
 
 #endif /* CONFIG_SCHED_SMT */
@@ -6124,10 +6135,11 @@ static inline int select_idle_core(struct task_struct 
*p, struct sched_domain *s
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int 
target)
 {
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+   int i, cpu, idle_cpu = -1, nr = INT_MAX;
+   bool smt = test_idle_cores(target, false);
+   int this = smp_processor_id();
struct sched_domain *this_sd;
u64 time;
-   int this = smp_processor_id();
-   int cpu, nr = INT_MAX;
 
this_sd = rcu_dereference(*this_cpu_ptr(_llc));
if (!this_sd)
@@ -6135,7 +6147,7 @@ static int select_idle_cpu(struct task_struct *p, struct 
sched_domain *sd, int t
 
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
 
-   if (sched_feat(SIS_PROP)) {
+   if (sched_feat(SIS_PROP) && !smt) {
u64 avg_cost, avg_idle, span_avg;
 
/*
@@ -6159,16 +6171,29 @@ static int select_idle_cpu(struct task_struct *p, 
struct sched_domain *sd, int t
for_each_cpu_wrap(cpu, cpus, target) {

Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-14 Thread Mel Gorman
On Thu, Jan 14, 2021 at 04:44:46PM +0100, Vincent Guittot wrote:
> > domain. There is no need to make it specific to the core account and we
> > are already doing the full scan. Throttling that would be a separate patch.
> >
> > > This patch 5 should focus on merging select_idle_core and
> > > select_idle_cpu so we keep (almost) the same behavior but each CPU is
> > > checked only once.
> > >
> >
> > Which I think it's already doing. Main glitch really is that
> > __select_idle_cpu() shouldn't be taking *idle_cpu as it does not consume
> > the information.
> 
>  don't really like the if (smt) else in the for_each_cpu_wrap(cpu,
> cpus, target) loop  because it just looks like we fail to merge idle
> core and idle cpu search loop at the end.
> 

While it's not the best, I did at one point have a series that fully
unified this function and it wasn't pretty.

> But there is probably not much we can do without changing what is
> accounted idle core  search in the avg_scan_cost
> 

Indeed. Maybe in the future it'll make more sense to consolidate it
further but between the depth search and possibly using SIS_PROP core
core searches, we've bigger fish to fry.

Current delta between this series and what is being tested is simply

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7bfa73de6a8d..ada8faac2e4d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7446,7 +7446,6 @@ int sched_cpu_activate(unsigned int cpu)
 #ifdef CONFIG_SCHED_SMT
do {
int weight = cpumask_weight(cpu_smt_mask(cpu));
-   extern int sched_smt_weight;
 
if (weight > sched_smt_weight)
sched_smt_weight = weight;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 84f02abb29e3..6c0f841e9e75 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6006,7 +6006,7 @@ static inline int find_idlest_cpu(struct sched_domain 
*sd, struct task_struct *p
return new_cpu;
 }
 
-static inline int __select_idle_cpu(struct task_struct *p, int core, struct 
cpumask *cpus, int *idle_cpu)
+static inline int __select_idle_cpu(struct task_struct *p, int core, struct 
cpumask *cpus)
 {
if (available_idle_cpu(core) || sched_idle_cpu(core))
return core;
@@ -6080,7 +6080,7 @@ static int select_idle_core(struct task_struct *p, int 
core, struct cpumask *cpu
int cpu;
 
if (!static_branch_likely(_smt_present))
-   return __select_idle_cpu(p, core, cpus, idle_cpu);
+   return __select_idle_cpu(p, core, cpus);
 
for_each_cpu(cpu, cpu_smt_mask(core)) {
if (!available_idle_cpu(cpu)) {
@@ -6120,7 +6120,7 @@ static inline bool test_idle_cores(int cpu, bool def)
 
 static inline int select_idle_core(struct task_struct *p, int core, struct 
cpumask *cpus, int *idle_cpu)
 {
-   return __select_idle_cpu(p, core, cpus, idle_cpu);
+   return __select_idle_cpu(p, core, cpus);
 }
 
 #endif /* CONFIG_SCHED_SMT */
@@ -6177,7 +6177,7 @@ static int select_idle_cpu(struct task_struct *p, struct 
sched_domain *sd, int t
return i;
 
} else {
-   i = __select_idle_cpu(p, cpu, cpus, _cpu);
+   i = __select_idle_cpu(p, cpu, cpus);
if ((unsigned int)i < nr_cpumask_bits) {
idle_cpu = i;
break;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 12ada79d40f3..29aabe98dd1d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1107,6 +1107,8 @@ static inline void update_idle_core(struct rq *rq)
__update_idle_core(rq);
 }
 
+extern int sched_smt_weight;
+
 #else
 static inline void update_idle_core(struct rq *rq) { }
 #endif
-- 
Mel Gorman
SUSE Labs


Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-14 Thread Vincent Guittot
On Thu, 14 Jan 2021 at 14:53, Mel Gorman  wrote:
>
> On Thu, Jan 14, 2021 at 02:25:32PM +0100, Vincent Guittot wrote:
> > On Thu, 14 Jan 2021 at 10:35, Mel Gorman  
> > wrote:
> > >
> > > On Wed, Jan 13, 2021 at 06:03:00PM +0100, Vincent Guittot wrote:
> > > > > @@ -6159,16 +6171,29 @@ static int select_idle_cpu(struct task_struct 
> > > > > *p, struct sched_domain *sd, int t
> > > > > for_each_cpu_wrap(cpu, cpus, target) {
> > > > > if (!--nr)
> > > > > return -1;
> > > > > -   if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> > > > > -   break;
> > > > > +   if (smt) {
> > > >
> > > > If we want to stay on something similar to the previous behavior, we
> > > > want to check on all cores if test_idle_cores is true so nr should be
> > > > set to number of cores
> > > >
> > >
> > > I don't think we necessarily want to do that. has_idle_cores is an
> > > effective throttling mechanism but it's not perfect. If the full domain
> > > is always scanned for a core then there can be excessive scanning in
> >
> > But that's what the code is currently doing. Can this change be done
> > in another patch so we can check the impact of each change more
> > easily?
>
> Ok, when looking at this again instead of just the mail, the flow is;
>
> int i, cpu, idle_cpu = -1, nr = INT_MAX;
> ...
> if (sched_feat(SIS_PROP) && !smt) {
> /* recalculate nr */
> }
>
> The !smt check should mean that core scanning is still scanning the entire

yes good point. I missed  this change.

> domain. There is no need to make it specific to the core account and we
> are already doing the full scan. Throttling that would be a separate patch.
>
> > This patch 5 should focus on merging select_idle_core and
> > select_idle_cpu so we keep (almost) the same behavior but each CPU is
> > checked only once.
> >
>
> Which I think it's already doing. Main glitch really is that
> __select_idle_cpu() shouldn't be taking *idle_cpu as it does not consume
> the information.

 don't really like the if (smt) else in the for_each_cpu_wrap(cpu,
cpus, target) loop  because it just looks like we fail to merge idle
core and idle cpu search loop at the end.

But there is probably not much we can do without changing what is
accounted idle core  search in the avg_scan_cost


>
> > > workloads like hackbench which tends to have has_idle_cores return false
> > > positives. It becomes important once average busy CPUs is over half of
> > > the domain for SMT2.
> > >
> > > At least with the patch if that change was made, we still would not scan
> > > twice going over the same runqueues so it would still be an improvement
> >
> > yeah, it's for me the main goal of this patchset with the calculation
> > of avg_can_cost being done only when SIS_PROP is true and the remove
> > of SIS_AVG
> >
> > any changes in the number of cpu/core to loop on is sensitive to
> > regression and should be done in a separate patch IMHO
> >
>
> Understood.
>
> --
> Mel Gorman
> SUSE Labs


Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-14 Thread Mel Gorman
On Thu, Jan 14, 2021 at 02:25:32PM +0100, Vincent Guittot wrote:
> On Thu, 14 Jan 2021 at 10:35, Mel Gorman  wrote:
> >
> > On Wed, Jan 13, 2021 at 06:03:00PM +0100, Vincent Guittot wrote:
> > > > @@ -6159,16 +6171,29 @@ static int select_idle_cpu(struct task_struct 
> > > > *p, struct sched_domain *sd, int t
> > > > for_each_cpu_wrap(cpu, cpus, target) {
> > > > if (!--nr)
> > > > return -1;
> > > > -   if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> > > > -   break;
> > > > +   if (smt) {
> > >
> > > If we want to stay on something similar to the previous behavior, we
> > > want to check on all cores if test_idle_cores is true so nr should be
> > > set to number of cores
> > >
> >
> > I don't think we necessarily want to do that. has_idle_cores is an
> > effective throttling mechanism but it's not perfect. If the full domain
> > is always scanned for a core then there can be excessive scanning in
> 
> But that's what the code is currently doing. Can this change be done
> in another patch so we can check the impact of each change more
> easily?

Ok, when looking at this again instead of just the mail, the flow is;

int i, cpu, idle_cpu = -1, nr = INT_MAX;
...
if (sched_feat(SIS_PROP) && !smt) {
/* recalculate nr */
}

The !smt check should mean that core scanning is still scanning the entire
domain. There is no need to make it specific to the core account and we
are already doing the full scan. Throttling that would be a separate patch.

> This patch 5 should focus on merging select_idle_core and
> select_idle_cpu so we keep (almost) the same behavior but each CPU is
> checked only once.
> 

Which I think it's already doing. Main glitch really is that
__select_idle_cpu() shouldn't be taking *idle_cpu as it does not consume
the information.

> > workloads like hackbench which tends to have has_idle_cores return false
> > positives. It becomes important once average busy CPUs is over half of
> > the domain for SMT2.
> >
> > At least with the patch if that change was made, we still would not scan
> > twice going over the same runqueues so it would still be an improvement
> 
> yeah, it's for me the main goal of this patchset with the calculation
> of avg_can_cost being done only when SIS_PROP is true and the remove
> of SIS_AVG
> 
> any changes in the number of cpu/core to loop on is sensitive to
> regression and should be done in a separate patch IMHO
> 

Understood.

-- 
Mel Gorman
SUSE Labs


Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-14 Thread Vincent Guittot
On Thu, 14 Jan 2021 at 10:35, Mel Gorman  wrote:
>
> On Wed, Jan 13, 2021 at 06:03:00PM +0100, Vincent Guittot wrote:
> > > @@ -6159,16 +6171,29 @@ static int select_idle_cpu(struct task_struct *p, 
> > > struct sched_domain *sd, int t
> > > for_each_cpu_wrap(cpu, cpus, target) {
> > > if (!--nr)
> > > return -1;
> > > -   if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> > > -   break;
> > > +   if (smt) {
> >
> > If we want to stay on something similar to the previous behavior, we
> > want to check on all cores if test_idle_cores is true so nr should be
> > set to number of cores
> >
>
> I don't think we necessarily want to do that. has_idle_cores is an
> effective throttling mechanism but it's not perfect. If the full domain
> is always scanned for a core then there can be excessive scanning in

But that's what the code is currently doing. Can this change be done
in another patch so we can check the impact of each change more
easily?
This patch 5 should focus on merging select_idle_core and
select_idle_cpu so we keep (almost) the same behavior but each CPU is
checked only once.

> workloads like hackbench which tends to have has_idle_cores return false
> positives. It becomes important once average busy CPUs is over half of
> the domain for SMT2.
>
> At least with the patch if that change was made, we still would not scan
> twice going over the same runqueues so it would still be an improvement

yeah, it's for me the main goal of this patchset with the calculation
of avg_can_cost being done only when SIS_PROP is true and the remove
of SIS_AVG

any changes in the number of cpu/core to loop on is sensitive to
regression and should be done in a separate patch IMHO

> but it would be nice to avoid excessive deep scanning when there are a
> lot of busy CPUs but individual tasks are rapidly idling.
>
> However, in the event regressions are found, changing to your suggested
> behaviour would be an obvious starting point.
>
> --
> Mel Gorman
> SUSE Labs


Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-14 Thread Mel Gorman
On Wed, Jan 13, 2021 at 06:03:00PM +0100, Vincent Guittot wrote:
> > @@ -6159,16 +6171,29 @@ static int select_idle_cpu(struct task_struct *p, 
> > struct sched_domain *sd, int t
> > for_each_cpu_wrap(cpu, cpus, target) {
> > if (!--nr)
> > return -1;
> > -   if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> > -   break;
> > +   if (smt) {
> 
> If we want to stay on something similar to the previous behavior, we
> want to check on all cores if test_idle_cores is true so nr should be
> set to number of cores
> 

I don't think we necessarily want to do that. has_idle_cores is an
effective throttling mechanism but it's not perfect. If the full domain
is always scanned for a core then there can be excessive scanning in
workloads like hackbench which tends to have has_idle_cores return false
positives. It becomes important once average busy CPUs is over half of
the domain for SMT2.

At least with the patch if that change was made, we still would not scan
twice going over the same runqueues so it would still be an improvement
but it would be nice to avoid excessive deep scanning when there are a
lot of busy CPUs but individual tasks are rapidly idling.

However, in the event regressions are found, changing to your suggested
behaviour would be an obvious starting point.

-- 
Mel Gorman
SUSE Labs


Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-13 Thread Vincent Guittot
On Mon, 11 Jan 2021 at 16:50, Mel Gorman  wrote:
>
> Both select_idle_core() and select_idle_cpu() do a loop over the same
> cpumask. Observe that by clearing the already visited CPUs, we can
> fold the iteration and iterate a core at a time.
>
> All we need to do is remember any non-idle CPU we encountered while
> scanning for an idle core. This way we'll only iterate every CPU once.
>
> Signed-off-by: Peter Zijlstra (Intel) 
> Signed-off-by: Mel Gorman 
> ---
>  kernel/sched/fair.c | 97 +++--
>  1 file changed, 59 insertions(+), 38 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 12e08da90024..84f02abb29e3 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6006,6 +6006,14 @@ static inline int find_idlest_cpu(struct sched_domain 
> *sd, struct task_struct *p
> return new_cpu;
>  }
>
> +static inline int __select_idle_cpu(struct task_struct *p, int core, struct 
> cpumask *cpus, int *idle_cpu)
> +{
> +   if (available_idle_cpu(core) || sched_idle_cpu(core))
> +   return core;
> +
> +   return -1;
> +}
> +
>  #ifdef CONFIG_SCHED_SMT
>  DEFINE_STATIC_KEY_FALSE(sched_smt_present);
>  EXPORT_SYMBOL_GPL(sched_smt_present);
> @@ -6066,40 +6074,34 @@ void __update_idle_core(struct rq *rq)
>   * there are no idle cores left in the system; tracked through
>   * sd_llc->shared->has_idle_cores and enabled through update_idle_core() 
> above.
>   */
> -static int select_idle_core(struct task_struct *p, struct sched_domain *sd, 
> int target)
> +static int select_idle_core(struct task_struct *p, int core, struct cpumask 
> *cpus, int *idle_cpu)
>  {
> -   struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
> -   int core, cpu;
> +   bool idle = true;
> +   int cpu;
>
> if (!static_branch_likely(_smt_present))
> -   return -1;
> -
> -   if (!test_idle_cores(target, false))
> -   return -1;
> -
> -   cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> +   return __select_idle_cpu(p, core, cpus, idle_cpu);
>
> -   for_each_cpu_wrap(core, cpus, target) {
> -   bool idle = true;
> -
> -   for_each_cpu(cpu, cpu_smt_mask(core)) {
> -   if (!available_idle_cpu(cpu)) {
> -   idle = false;
> -   break;
> +   for_each_cpu(cpu, cpu_smt_mask(core)) {
> +   if (!available_idle_cpu(cpu)) {
> +   idle = false;
> +   if (*idle_cpu == -1) {
> +   if (sched_idle_cpu(cpu) && 
> cpumask_test_cpu(cpu, p->cpus_ptr)) {
> +   *idle_cpu = cpu;
> +   break;
> +   }
> +   continue;
> }
> +   break;
> }
> -
> -   if (idle)
> -   return core;
> -
> -   cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
> +   if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr))
> +   *idle_cpu = cpu;
> }
>
> -   /*
> -* Failed to find an idle core; stop looking for one.
> -*/
> -   set_idle_cores(target, 0);
> +   if (idle)
> +   return core;
>
> +   cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
> return -1;
>  }
>
> @@ -6107,9 +6109,18 @@ static int select_idle_core(struct task_struct *p, 
> struct sched_domain *sd, int
>
>  #define sched_smt_weight   1
>
> -static inline int select_idle_core(struct task_struct *p, struct 
> sched_domain *sd, int target)
> +static inline void set_idle_cores(int cpu, int val)
>  {
> -   return -1;
> +}
> +
> +static inline bool test_idle_cores(int cpu, bool def)
> +{
> +   return def;
> +}
> +
> +static inline int select_idle_core(struct task_struct *p, int core, struct 
> cpumask *cpus, int *idle_cpu)
> +{
> +   return __select_idle_cpu(p, core, cpus, idle_cpu);
>  }
>
>  #endif /* CONFIG_SCHED_SMT */
> @@ -6124,10 +6135,11 @@ static inline int select_idle_core(struct task_struct 
> *p, struct sched_domain *s
>  static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, 
> int target)
>  {
> struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
> +   int i, cpu, idle_cpu = -1, nr = INT_MAX;
> +   bool smt = test_idle_cores(target, false);
> +   int this = smp_processor_id();
> struct sched_domain *this_sd;
> u64 time;
> -   int this = smp_processor_id();
> -   int cpu, nr = INT_MAX;
>
> this_sd = rcu_dereference(*this_cpu_ptr(_llc));
> if (!this_sd)
> @@ -6135,7 +6147,7 @@ static int select_idle_cpu(struct task_struct *p, 
> struct sched_domain *sd, int t
>
> cpumask_and(cpus, sched_domain_span(sd), 

[PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

2021-01-11 Thread Mel Gorman
Both select_idle_core() and select_idle_cpu() do a loop over the same
cpumask. Observe that by clearing the already visited CPUs, we can
fold the iteration and iterate a core at a time.

All we need to do is remember any non-idle CPU we encountered while
scanning for an idle core. This way we'll only iterate every CPU once.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Mel Gorman 
---
 kernel/sched/fair.c | 97 +++--
 1 file changed, 59 insertions(+), 38 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 12e08da90024..84f02abb29e3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6006,6 +6006,14 @@ static inline int find_idlest_cpu(struct sched_domain 
*sd, struct task_struct *p
return new_cpu;
 }
 
+static inline int __select_idle_cpu(struct task_struct *p, int core, struct 
cpumask *cpus, int *idle_cpu)
+{
+   if (available_idle_cpu(core) || sched_idle_cpu(core))
+   return core;
+
+   return -1;
+}
+
 #ifdef CONFIG_SCHED_SMT
 DEFINE_STATIC_KEY_FALSE(sched_smt_present);
 EXPORT_SYMBOL_GPL(sched_smt_present);
@@ -6066,40 +6074,34 @@ void __update_idle_core(struct rq *rq)
  * there are no idle cores left in the system; tracked through
  * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above.
  */
-static int select_idle_core(struct task_struct *p, struct sched_domain *sd, 
int target)
+static int select_idle_core(struct task_struct *p, int core, struct cpumask 
*cpus, int *idle_cpu)
 {
-   struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-   int core, cpu;
+   bool idle = true;
+   int cpu;
 
if (!static_branch_likely(_smt_present))
-   return -1;
-
-   if (!test_idle_cores(target, false))
-   return -1;
-
-   cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
+   return __select_idle_cpu(p, core, cpus, idle_cpu);
 
-   for_each_cpu_wrap(core, cpus, target) {
-   bool idle = true;
-
-   for_each_cpu(cpu, cpu_smt_mask(core)) {
-   if (!available_idle_cpu(cpu)) {
-   idle = false;
-   break;
+   for_each_cpu(cpu, cpu_smt_mask(core)) {
+   if (!available_idle_cpu(cpu)) {
+   idle = false;
+   if (*idle_cpu == -1) {
+   if (sched_idle_cpu(cpu) && 
cpumask_test_cpu(cpu, p->cpus_ptr)) {
+   *idle_cpu = cpu;
+   break;
+   }
+   continue;
}
+   break;
}
-
-   if (idle)
-   return core;
-
-   cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
+   if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr))
+   *idle_cpu = cpu;
}
 
-   /*
-* Failed to find an idle core; stop looking for one.
-*/
-   set_idle_cores(target, 0);
+   if (idle)
+   return core;
 
+   cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
return -1;
 }
 
@@ -6107,9 +6109,18 @@ static int select_idle_core(struct task_struct *p, 
struct sched_domain *sd, int
 
 #define sched_smt_weight   1
 
-static inline int select_idle_core(struct task_struct *p, struct sched_domain 
*sd, int target)
+static inline void set_idle_cores(int cpu, int val)
 {
-   return -1;
+}
+
+static inline bool test_idle_cores(int cpu, bool def)
+{
+   return def;
+}
+
+static inline int select_idle_core(struct task_struct *p, int core, struct 
cpumask *cpus, int *idle_cpu)
+{
+   return __select_idle_cpu(p, core, cpus, idle_cpu);
 }
 
 #endif /* CONFIG_SCHED_SMT */
@@ -6124,10 +6135,11 @@ static inline int select_idle_core(struct task_struct 
*p, struct sched_domain *s
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int 
target)
 {
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+   int i, cpu, idle_cpu = -1, nr = INT_MAX;
+   bool smt = test_idle_cores(target, false);
+   int this = smp_processor_id();
struct sched_domain *this_sd;
u64 time;
-   int this = smp_processor_id();
-   int cpu, nr = INT_MAX;
 
this_sd = rcu_dereference(*this_cpu_ptr(_llc));
if (!this_sd)
@@ -6135,7 +6147,7 @@ static int select_idle_cpu(struct task_struct *p, struct 
sched_domain *sd, int t
 
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
 
-   if (sched_feat(SIS_PROP)) {
+   if (sched_feat(SIS_PROP) && !smt) {
u64 avg_cost, avg_idle, span_avg;
 
/*
@@ -6159,16 +6171,29 @@ static int select_idle_cpu(struct task_struct *p, 
struct sched_domain *sd, int t
for_each_cpu_wrap(cpu, cpus, target) {