Re: [PATCH 06/11] powerpc/smp: Generalize 2nd sched domain

2020-07-20 Thread Gautham R Shenoy
On Mon, Jul 20, 2020 at 11:49:11AM +0530, Srikar Dronamraju wrote:
> * Gautham R Shenoy  [2020-07-17 12:07:55]:
> 
> > On Tue, Jul 14, 2020 at 10:06:19AM +0530, Srikar Dronamraju wrote:
> > > Currently "CACHE" domain happens to be the 2nd sched domain as per
> > > powerpc_topology. This domain will collapse if cpumask of l2-cache is
> > > same as SMT domain. However we could generalize this domain such that it
> > > could mean either be a "CACHE" domain or a "BIGCORE" domain.
> > > 
> > > While setting up the "CACHE" domain, check if shared_cache is already
> > > set.
> > > 
> > > Cc: linuxppc-dev 
> > > Cc: Michael Ellerman 
> > > Cc: Nick Piggin 
> > > Cc: Oliver OHalloran 
> > > Cc: Nathan Lynch 
> > > Cc: Michael Neuling 
> > > Cc: Anton Blanchard 
> > > Cc: Gautham R Shenoy 
> > > Cc: Vaidyanathan Srinivasan 
> > > Signed-off-by: Srikar Dronamraju 
> > > ---
> > > @@ -867,11 +869,16 @@ static const struct cpumask *smallcore_smt_mask(int 
> > > cpu)
> > >  }
> > >  #endif
> > > 
> > > +static const struct cpumask *cpu_bigcore_mask(int cpu)
> > > +{
> > > + return cpu_core_mask(cpu);
> > 
> > It should be cpu_smt_mask() if we want the redundant big-core to be
> > degenerated in favour of the SMT level on P8, no? Because
> > cpu_core_mask refers to all the CPUs that are in the same chip.
> > 
> 
> Right, but it cant be cpu_smt_mask since cpu_smt_mask is only enabled in
> CONFIG_SCHED_SMT. I was looking at using sibling_map, but we have to careful
> for power9 / PowerNV mode. Guess that should be fine.

Ok.

> 
> > > +}
> > > +
> > >  static struct sched_domain_topology_level powerpc_topology[] = {
> > >  #ifdef CONFIG_SCHED_SMT
> > >   { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
> > >  #endif
> > > - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
> > > + { cpu_bigcore_mask, SD_INIT_NAME(BIGCORE) },
> > >   { cpu_cpu_mask, SD_INIT_NAME(DIE) },
> > >   { NULL, },
> > >  };
> > > @@ -1319,7 +1326,6 @@ static void add_cpu_to_masks(int cpu)
> > >  void start_secondary(void *unused)
> > >  {
> > >   unsigned int cpu = smp_processor_id();
> > > - struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
> > > 
> > >   mmgrab(_mm);
> > >   current->active_mm = _mm;
> > > @@ -1345,14 +1351,20 @@ void start_secondary(void *unused)
> > >   /* Update topology CPU masks */
> > >   add_cpu_to_masks(cpu);
> > > 
> > > - if (has_big_cores)
> > > - sibling_mask = cpu_smallcore_mask;
> > >   /*
> > >* Check for any shared caches. Note that this must be done on a
> > >* per-core basis because one core in the pair might be disabled.
> > >*/
> > > - if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
> > > - shared_caches = true;
> > > + if (!shared_caches) {
> > > + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
> > > + struct cpumask *mask = cpu_l2_cache_mask(cpu);
> > > +
> > > + if (has_big_cores)
> > > + sibling_mask = cpu_smallcore_mask;
> > > +
> > > + if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
> > > + shared_caches = true;
> > 
> > Shouldn't we use cpumask_subset() here ?
> 
> Wouldn't cpumask_subset should return 1 if both are same?

When are caches shared ? When the sibling_mask(cpu)  is a
strict-subset of cpu_l2_cache_mask(cpu). cpumask_weight() only
checks if the number of CPUs in cpu_l2_cache_mask(cpu) is greater than
sibling_mask(cpu) but not if constituent CPUs of the former forms
a strict superset of the latter.

We are better off using
if (!cpumask_equal(sibling_mask(cpu), mask) &&
cpumask_subset(sibling_mask(cpu), mask))

which is accurate.



> We dont want to have shared_caches set if both the masks are equal.


> 
> > 
> > > + }
> > > 
> > >   set_numa_node(numa_cpu_lookup_table[cpu]);
> > >   set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
> > > @@ -1390,6 +1402,14 @@ void __init smp_cpus_done(unsigned int max_cpus)
> > >   smp_ops->bringup_done();
> > > 
> > >   dump_numa_cpu_topology();
> > > + if (shared_caches) {
> > > + pr_info("Using shared cache scheduler topology\n");
> > > + powerpc_topology[bigcore_idx].mask = shared_cache_mask;
> > > +#ifdef CONFIG_SCHED_DEBUG
> > > + powerpc_topology[bigcore_idx].name = "CACHE";
> > > +#endif
> > > + powerpc_topology[bigcore_idx].sd_flags = 
> > > powerpc_shared_cache_flags;
> > > + }
> > 
> > 
> > I would much rather that we have all the topology-fixups done in one
> > function.
> > 
> > fixup_topology(void) {
> >  if (has_big_core)
> > powerpc_topology[smt_idx].mask = smallcore_smt_mask;
> > 
> > if (shared_caches) {
> >const char *name = "CACHE";
> >powerpc_topology[bigcore_idx].mask = shared_cache_mask;
> >strlcpy(powerpc_topology[bigcore_idx].name, name,
> > strlen(name));
> >powerpc_topology[bigcore_idx].sd_flags = 

Re: [PATCH 06/11] powerpc/smp: Generalize 2nd sched domain

2020-07-20 Thread Srikar Dronamraju
* Gautham R Shenoy  [2020-07-17 12:07:55]:

> On Tue, Jul 14, 2020 at 10:06:19AM +0530, Srikar Dronamraju wrote:
> > Currently "CACHE" domain happens to be the 2nd sched domain as per
> > powerpc_topology. This domain will collapse if cpumask of l2-cache is
> > same as SMT domain. However we could generalize this domain such that it
> > could mean either be a "CACHE" domain or a "BIGCORE" domain.
> > 
> > While setting up the "CACHE" domain, check if shared_cache is already
> > set.
> > 
> > Cc: linuxppc-dev 
> > Cc: Michael Ellerman 
> > Cc: Nick Piggin 
> > Cc: Oliver OHalloran 
> > Cc: Nathan Lynch 
> > Cc: Michael Neuling 
> > Cc: Anton Blanchard 
> > Cc: Gautham R Shenoy 
> > Cc: Vaidyanathan Srinivasan 
> > Signed-off-by: Srikar Dronamraju 
> > ---
> > @@ -867,11 +869,16 @@ static const struct cpumask *smallcore_smt_mask(int 
> > cpu)
> >  }
> >  #endif
> > 
> > +static const struct cpumask *cpu_bigcore_mask(int cpu)
> > +{
> > +   return cpu_core_mask(cpu);
> 
> It should be cpu_smt_mask() if we want the redundant big-core to be
> degenerated in favour of the SMT level on P8, no? Because
> cpu_core_mask refers to all the CPUs that are in the same chip.
> 

Right, but it cant be cpu_smt_mask since cpu_smt_mask is only enabled in
CONFIG_SCHED_SMT. I was looking at using sibling_map, but we have to careful
for power9 / PowerNV mode. Guess that should be fine.

> > +}
> > +
> >  static struct sched_domain_topology_level powerpc_topology[] = {
> >  #ifdef CONFIG_SCHED_SMT
> > { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
> >  #endif
> > -   { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
> > +   { cpu_bigcore_mask, SD_INIT_NAME(BIGCORE) },
> > { cpu_cpu_mask, SD_INIT_NAME(DIE) },
> > { NULL, },
> >  };
> > @@ -1319,7 +1326,6 @@ static void add_cpu_to_masks(int cpu)
> >  void start_secondary(void *unused)
> >  {
> > unsigned int cpu = smp_processor_id();
> > -   struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
> > 
> > mmgrab(_mm);
> > current->active_mm = _mm;
> > @@ -1345,14 +1351,20 @@ void start_secondary(void *unused)
> > /* Update topology CPU masks */
> > add_cpu_to_masks(cpu);
> > 
> > -   if (has_big_cores)
> > -   sibling_mask = cpu_smallcore_mask;
> > /*
> >  * Check for any shared caches. Note that this must be done on a
> >  * per-core basis because one core in the pair might be disabled.
> >  */
> > -   if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
> > -   shared_caches = true;
> > +   if (!shared_caches) {
> > +   struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
> > +   struct cpumask *mask = cpu_l2_cache_mask(cpu);
> > +
> > +   if (has_big_cores)
> > +   sibling_mask = cpu_smallcore_mask;
> > +
> > +   if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
> > +   shared_caches = true;
> 
> Shouldn't we use cpumask_subset() here ?

Wouldn't cpumask_subset should return 1 if both are same?
We dont want to have shared_caches set if both the masks are equal. 

>   
> > +   }
> > 
> > set_numa_node(numa_cpu_lookup_table[cpu]);
> > set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
> > @@ -1390,6 +1402,14 @@ void __init smp_cpus_done(unsigned int max_cpus)
> > smp_ops->bringup_done();
> > 
> > dump_numa_cpu_topology();
> > +   if (shared_caches) {
> > +   pr_info("Using shared cache scheduler topology\n");
> > +   powerpc_topology[bigcore_idx].mask = shared_cache_mask;
> > +#ifdef CONFIG_SCHED_DEBUG
> > +   powerpc_topology[bigcore_idx].name = "CACHE";
> > +#endif
> > +   powerpc_topology[bigcore_idx].sd_flags = 
> > powerpc_shared_cache_flags;
> > +   }
> 
> 
> I would much rather that we have all the topology-fixups done in one
> function.
> 
> fixup_topology(void) {
>  if (has_big_core)
> powerpc_topology[smt_idx].mask = smallcore_smt_mask;
> 
> if (shared_caches) {
>const char *name = "CACHE";
>powerpc_topology[bigcore_idx].mask = shared_cache_mask;
>strlcpy(powerpc_topology[bigcore_idx].name, name,
>   strlen(name));
>powerpc_topology[bigcore_idx].sd_flags = powerpc_shared_cache_flags;
> }
> 
> /* Any other changes to the topology structure here */

We could do this.

> 
> And also as an optimization, get rid of degenerate structures here
> itself so that we don't pay additional penalty while building the
> sched-domains each time.
> 

Yes this is definitely in plan, but slightly later in time.

Thanks for the review and comments.

-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH 06/11] powerpc/smp: Generalize 2nd sched domain

2020-07-17 Thread Gautham R Shenoy
On Tue, Jul 14, 2020 at 10:06:19AM +0530, Srikar Dronamraju wrote:
> Currently "CACHE" domain happens to be the 2nd sched domain as per
> powerpc_topology. This domain will collapse if cpumask of l2-cache is
> same as SMT domain. However we could generalize this domain such that it
> could mean either be a "CACHE" domain or a "BIGCORE" domain.
> 
> While setting up the "CACHE" domain, check if shared_cache is already
> set.
> 
> Cc: linuxppc-dev 
> Cc: Michael Ellerman 
> Cc: Nick Piggin 
> Cc: Oliver OHalloran 
> Cc: Nathan Lynch 
> Cc: Michael Neuling 
> Cc: Anton Blanchard 
> Cc: Gautham R Shenoy 
> Cc: Vaidyanathan Srinivasan 
> Signed-off-by: Srikar Dronamraju 
> ---
>  arch/powerpc/kernel/smp.c | 48 +++
>  1 file changed, 34 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 875f57e41355..f8faf75135af 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -85,6 +85,14 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
>  EXPORT_PER_CPU_SYMBOL(cpu_core_map);
>  EXPORT_SYMBOL_GPL(has_big_cores);
> 
> +enum {
> +#ifdef CONFIG_SCHED_SMT
> + smt_idx,
> +#endif
> + bigcore_idx,
> + die_idx,
> +};
> +
>  #define MAX_THREAD_LIST_SIZE 8
>  #define THREAD_GROUP_SHARE_L1   1
>  struct thread_groups {
> @@ -851,13 +859,7 @@ static int powerpc_shared_cache_flags(void)
>   */
>  static const struct cpumask *shared_cache_mask(int cpu)
>  {
> - if (shared_caches)
> - return cpu_l2_cache_mask(cpu);
> -
> - if (has_big_cores)
> - return cpu_smallcore_mask(cpu);
> -
> - return cpu_smt_mask(cpu);
> + return per_cpu(cpu_l2_cache_map, cpu);
>  }
> 
>  #ifdef CONFIG_SCHED_SMT
> @@ -867,11 +869,16 @@ static const struct cpumask *smallcore_smt_mask(int cpu)
>  }
>  #endif
> 
> +static const struct cpumask *cpu_bigcore_mask(int cpu)
> +{
> + return cpu_core_mask(cpu);

It should be cpu_smt_mask() if we want the redundant big-core to be
degenerated in favour of the SMT level on P8, no? Because
cpu_core_mask refers to all the CPUs that are in the same chip.


> +}
> +
>  static struct sched_domain_topology_level powerpc_topology[] = {
>  #ifdef CONFIG_SCHED_SMT
>   { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
>  #endif
> - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
> + { cpu_bigcore_mask, SD_INIT_NAME(BIGCORE) },
>   { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>   { NULL, },
>  };
> @@ -895,7 +902,7 @@ static int init_big_cores(void)
> 
>  #ifdef CONFIG_SCHED_SMT
>   pr_info("Big cores detected. Using small core scheduling\n");
> - powerpc_topology[0].mask = smallcore_smt_mask;
> + powerpc_topology[smt_idx].mask = smallcore_smt_mask;
>  #endif
> 
>   return 0;
> @@ -1319,7 +1326,6 @@ static void add_cpu_to_masks(int cpu)
>  void start_secondary(void *unused)
>  {
>   unsigned int cpu = smp_processor_id();
> - struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
> 
>   mmgrab(_mm);
>   current->active_mm = _mm;
> @@ -1345,14 +1351,20 @@ void start_secondary(void *unused)
>   /* Update topology CPU masks */
>   add_cpu_to_masks(cpu);
> 
> - if (has_big_cores)
> - sibling_mask = cpu_smallcore_mask;
>   /*
>* Check for any shared caches. Note that this must be done on a
>* per-core basis because one core in the pair might be disabled.
>*/
> - if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
> - shared_caches = true;
> + if (!shared_caches) {
> + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
> + struct cpumask *mask = cpu_l2_cache_mask(cpu);
> +
> + if (has_big_cores)
> + sibling_mask = cpu_smallcore_mask;
> +
> + if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
> + shared_caches = true;

Shouldn't we use cpumask_subset() here ?


> + }
> 
>   set_numa_node(numa_cpu_lookup_table[cpu]);
>   set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
> @@ -1390,6 +1402,14 @@ void __init smp_cpus_done(unsigned int max_cpus)
>   smp_ops->bringup_done();
> 
>   dump_numa_cpu_topology();
> + if (shared_caches) {
> + pr_info("Using shared cache scheduler topology\n");
> + powerpc_topology[bigcore_idx].mask = shared_cache_mask;
> +#ifdef CONFIG_SCHED_DEBUG
> + powerpc_topology[bigcore_idx].name = "CACHE";
> +#endif
> + powerpc_topology[bigcore_idx].sd_flags = 
> powerpc_shared_cache_flags;
> + }


I would much rather that we have all the topology-fixups done in one
function.

fixup_topology(void) {
 if (has_big_core)
powerpc_topology[smt_idx].mask = smallcore_smt_mask;

if (shared_caches) {
   const char *name = "CACHE";
   powerpc_topology[bigcore_idx].mask = 

[PATCH 06/11] powerpc/smp: Generalize 2nd sched domain

2020-07-13 Thread Srikar Dronamraju
Currently "CACHE" domain happens to be the 2nd sched domain as per
powerpc_topology. This domain will collapse if cpumask of l2-cache is
same as SMT domain. However we could generalize this domain such that it
could mean either be a "CACHE" domain or a "BIGCORE" domain.

While setting up the "CACHE" domain, check if shared_cache is already
set.

Cc: linuxppc-dev 
Cc: Michael Ellerman 
Cc: Nick Piggin 
Cc: Oliver OHalloran 
Cc: Nathan Lynch 
Cc: Michael Neuling 
Cc: Anton Blanchard 
Cc: Gautham R Shenoy 
Cc: Vaidyanathan Srinivasan 
Signed-off-by: Srikar Dronamraju 
---
 arch/powerpc/kernel/smp.c | 48 +++
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 875f57e41355..f8faf75135af 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -85,6 +85,14 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 EXPORT_SYMBOL_GPL(has_big_cores);
 
+enum {
+#ifdef CONFIG_SCHED_SMT
+   smt_idx,
+#endif
+   bigcore_idx,
+   die_idx,
+};
+
 #define MAX_THREAD_LIST_SIZE   8
 #define THREAD_GROUP_SHARE_L1   1
 struct thread_groups {
@@ -851,13 +859,7 @@ static int powerpc_shared_cache_flags(void)
  */
 static const struct cpumask *shared_cache_mask(int cpu)
 {
-   if (shared_caches)
-   return cpu_l2_cache_mask(cpu);
-
-   if (has_big_cores)
-   return cpu_smallcore_mask(cpu);
-
-   return cpu_smt_mask(cpu);
+   return per_cpu(cpu_l2_cache_map, cpu);
 }
 
 #ifdef CONFIG_SCHED_SMT
@@ -867,11 +869,16 @@ static const struct cpumask *smallcore_smt_mask(int cpu)
 }
 #endif
 
+static const struct cpumask *cpu_bigcore_mask(int cpu)
+{
+   return cpu_core_mask(cpu);
+}
+
 static struct sched_domain_topology_level powerpc_topology[] = {
 #ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
 #endif
-   { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
+   { cpu_bigcore_mask, SD_INIT_NAME(BIGCORE) },
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },
 };
@@ -895,7 +902,7 @@ static int init_big_cores(void)
 
 #ifdef CONFIG_SCHED_SMT
pr_info("Big cores detected. Using small core scheduling\n");
-   powerpc_topology[0].mask = smallcore_smt_mask;
+   powerpc_topology[smt_idx].mask = smallcore_smt_mask;
 #endif
 
return 0;
@@ -1319,7 +1326,6 @@ static void add_cpu_to_masks(int cpu)
 void start_secondary(void *unused)
 {
unsigned int cpu = smp_processor_id();
-   struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
 
mmgrab(_mm);
current->active_mm = _mm;
@@ -1345,14 +1351,20 @@ void start_secondary(void *unused)
/* Update topology CPU masks */
add_cpu_to_masks(cpu);
 
-   if (has_big_cores)
-   sibling_mask = cpu_smallcore_mask;
/*
 * Check for any shared caches. Note that this must be done on a
 * per-core basis because one core in the pair might be disabled.
 */
-   if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
-   shared_caches = true;
+   if (!shared_caches) {
+   struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
+   struct cpumask *mask = cpu_l2_cache_mask(cpu);
+
+   if (has_big_cores)
+   sibling_mask = cpu_smallcore_mask;
+
+   if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
+   shared_caches = true;
+   }
 
set_numa_node(numa_cpu_lookup_table[cpu]);
set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
@@ -1390,6 +1402,14 @@ void __init smp_cpus_done(unsigned int max_cpus)
smp_ops->bringup_done();
 
dump_numa_cpu_topology();
+   if (shared_caches) {
+   pr_info("Using shared cache scheduler topology\n");
+   powerpc_topology[bigcore_idx].mask = shared_cache_mask;
+#ifdef CONFIG_SCHED_DEBUG
+   powerpc_topology[bigcore_idx].name = "CACHE";
+#endif
+   powerpc_topology[bigcore_idx].sd_flags = 
powerpc_shared_cache_flags;
+   }
 
set_sched_topology(powerpc_topology);
 }
-- 
2.17.1