Re: [PATCHv4 01/12] sched: Add static_key for asymmetric cpu capacity optimizations

2018-08-02 Thread Morten Rasmussen
On Tue, Jul 31, 2018 at 12:59:16PM +0200, Peter Zijlstra wrote:
> 
> Combined with that SD_ASYM.. rework I ended up with the below.
> 
> Holler if you want it changed :-)

Looks good to me.

Thanks,
Morten


Re: [PATCHv4 01/12] sched: Add static_key for asymmetric cpu capacity optimizations

2018-08-02 Thread Morten Rasmussen
On Tue, Jul 31, 2018 at 12:59:16PM +0200, Peter Zijlstra wrote:
> 
> Combined with that SD_ASYM.. rework I ended up with the below.
> 
> Holler if you want it changed :-)

Looks good to me.

Thanks,
Morten


Re: [PATCHv4 01/12] sched: Add static_key for asymmetric cpu capacity optimizations

2018-07-31 Thread Peter Zijlstra


Combined with that SD_ASYM.. rework I ended up with the below.

Holler if you want it changed :-)


---


Subject: sched: Add static_key for asymmetric cpu capacity optimizations
From: Morten Rasmussen 
Date: Wed, 4 Jul 2018 11:17:39 +0100

The existing asymmetric cpu capacity code should cause minimal overhead
for others. Putting it behind a static_key, it has been done for SMT
optimizations, would make it easier to extend and improve without
causing harm to others moving forward.

cc: Ingo Molnar 
cc: Peter Zijlstra 

Cc: valentin.schnei...@arm.com
Cc: mi...@redhat.com
Cc: vincent.guit...@linaro.org
Cc: dietmar.eggem...@arm.com
Cc: gaku.inami...@renesas.com
Signed-off-by: Morten Rasmussen 
Signed-off-by: Peter Zijlstra (Intel) 
Link: 
http://lkml.kernel.org/r/1530699470-29808-2-git-send-email-morten.rasmus...@arm.com
---
 kernel/sched/fair.c |3 +++
 kernel/sched/sched.h|1 +
 kernel/sched/topology.c |9 -
 3 files changed, 12 insertions(+), 1 deletion(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6186,6 +6186,9 @@ static int wake_cap(struct task_struct *
 {
long min_cap, max_cap;
 
+   if (!static_branch_unlikely(_asym_cpucapacity))
+   return 0;
+
min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;
 
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1185,6 +1185,7 @@ DECLARE_PER_CPU(int, sd_llc_id);
 DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
 DECLARE_PER_CPU(struct sched_domain *, sd_numa);
 DECLARE_PER_CPU(struct sched_domain *, sd_asym);
+extern struct static_key_false sched_asym_cpucapacity;
 
 struct sched_group_capacity {
atomic_tref;
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -398,6 +398,7 @@ DEFINE_PER_CPU(int, sd_llc_id);
 DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
 DEFINE_PER_CPU(struct sched_domain *, sd_numa);
 DEFINE_PER_CPU(struct sched_domain *, sd_asym);
+DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
 
 static void update_top_cache_domain(int cpu)
 {
@@ -1708,6 +1709,7 @@ build_sched_domains(const struct cpumask
struct rq *rq = NULL;
int i, ret = -ENOMEM;
struct sched_domain_topology_level *tl_asym;
+   bool has_asym = false;
 
alloc_state = __visit_domain_allocation_hell(, cpu_map);
if (alloc_state != sa_rootdomain)
@@ -1723,8 +1725,10 @@ build_sched_domains(const struct cpumask
for_each_sd_topology(tl) {
int dflags = 0;
 
-   if (tl == tl_asym)
+   if (tl == tl_asym) {
dflags |= SD_ASYM_CPUCAPACITY;
+   has_asym = true;
+   }
 
sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, 
i);
 
@@ -1776,6 +1780,9 @@ build_sched_domains(const struct cpumask
}
rcu_read_unlock();
 
+   if (has_asym)
+   static_branch_enable_cpuslocked(_asym_cpucapacity);
+
if (rq && sched_debug_enabled) {
pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);


Re: [PATCHv4 01/12] sched: Add static_key for asymmetric cpu capacity optimizations

2018-07-31 Thread Peter Zijlstra


Combined with that SD_ASYM.. rework I ended up with the below.

Holler if you want it changed :-)


---


Subject: sched: Add static_key for asymmetric cpu capacity optimizations
From: Morten Rasmussen 
Date: Wed, 4 Jul 2018 11:17:39 +0100

The existing asymmetric cpu capacity code should cause minimal overhead
for others. Putting it behind a static_key, it has been done for SMT
optimizations, would make it easier to extend and improve without
causing harm to others moving forward.

cc: Ingo Molnar 
cc: Peter Zijlstra 

Cc: valentin.schnei...@arm.com
Cc: mi...@redhat.com
Cc: vincent.guit...@linaro.org
Cc: dietmar.eggem...@arm.com
Cc: gaku.inami...@renesas.com
Signed-off-by: Morten Rasmussen 
Signed-off-by: Peter Zijlstra (Intel) 
Link: 
http://lkml.kernel.org/r/1530699470-29808-2-git-send-email-morten.rasmus...@arm.com
---
 kernel/sched/fair.c |3 +++
 kernel/sched/sched.h|1 +
 kernel/sched/topology.c |9 -
 3 files changed, 12 insertions(+), 1 deletion(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6186,6 +6186,9 @@ static int wake_cap(struct task_struct *
 {
long min_cap, max_cap;
 
+   if (!static_branch_unlikely(_asym_cpucapacity))
+   return 0;
+
min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;
 
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1185,6 +1185,7 @@ DECLARE_PER_CPU(int, sd_llc_id);
 DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
 DECLARE_PER_CPU(struct sched_domain *, sd_numa);
 DECLARE_PER_CPU(struct sched_domain *, sd_asym);
+extern struct static_key_false sched_asym_cpucapacity;
 
 struct sched_group_capacity {
atomic_tref;
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -398,6 +398,7 @@ DEFINE_PER_CPU(int, sd_llc_id);
 DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
 DEFINE_PER_CPU(struct sched_domain *, sd_numa);
 DEFINE_PER_CPU(struct sched_domain *, sd_asym);
+DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
 
 static void update_top_cache_domain(int cpu)
 {
@@ -1708,6 +1709,7 @@ build_sched_domains(const struct cpumask
struct rq *rq = NULL;
int i, ret = -ENOMEM;
struct sched_domain_topology_level *tl_asym;
+   bool has_asym = false;
 
alloc_state = __visit_domain_allocation_hell(, cpu_map);
if (alloc_state != sa_rootdomain)
@@ -1723,8 +1725,10 @@ build_sched_domains(const struct cpumask
for_each_sd_topology(tl) {
int dflags = 0;
 
-   if (tl == tl_asym)
+   if (tl == tl_asym) {
dflags |= SD_ASYM_CPUCAPACITY;
+   has_asym = true;
+   }
 
sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, 
i);
 
@@ -1776,6 +1780,9 @@ build_sched_domains(const struct cpumask
}
rcu_read_unlock();
 
+   if (has_asym)
+   static_branch_enable_cpuslocked(_asym_cpucapacity);
+
if (rq && sched_debug_enabled) {
pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);