On 01/08/2018 11:19 AM, Michael Bringmann wrote:
> Add code to parse the new property 'ibm,thread-groups" when it is
> present.  The content of this property explicitly defines the number
> of threads per core as well as the PowerPC 'threads_core_mask'.
> The design provides a common device-tree for both P9 normal core and
> P9 fused core systems.  The new property has been observed to be
> available on P9 pHyp systems, but it is not always present on
> OpenPower BMC systems.
> 
> The property updates the kernel to know which CPUs/threads of each
> core are actually present, and then use the map when adding cores
> to the system at boot, or during hotplug operations.
> 
> * Previously, the information about the number of threads per core
>   was inferred solely from the "ibm,ppc-interrupt-server#s" property
>   in the system device tree.
> * Also previous to this property, The mask of threads per CPU was
>   inferred to be a strict linear series from 0..(nthreads-1).
> * After reading the "ibm,thread-group" property, we can determine
>   the number of threads per core to be the 'bitmask weight' of the
>   CPU thread mask.
> * Also after reading the property, we can determine which of the
>   possible threads we are allowed to online for each CPU.  It is no
>   longer a simple linear sequence, but may be discontinuous e.g.
>   activate threads 1,2,3,5,6,7 on a core instead of 0-5 sequentially.
> 
> Implementation of the "ibm,thread-groups" property is spread across
> a few files in the powerpc specific code:
> 
> * prom.c: Parse the property and create 'ppc_thread_group_mask'.
>           Use the mask in operation of early_init_dt_scan_cpus().
> * setup-common.c: Import 'ppc_thread_group_mask' and use the value
>           in the operation of cpu_init_thread_core_maps(), and
>           smp_setup_cpu_maps.
> * hotplug-cpu.c: Use 'ppc_thread_group_mask' in several locations
>           where the code previously expected to iterate over a
>           linear series of active threads (0..nthreads-1).
> 
> Note that the "ibm,thread-groups" property also includes semantics
> of 'thread-group' i.e. define one or more subgroups of the available
> threads, each group of threads to be used for a specific class of
> task.  Translating thread group semantics into Linux kernel features
> is TBD.

One thing I don't see addressed in the comments or in the code is
migration support. I think we need to update the thread group mask
post-migration to reflect the threads per core on the new system.

-Nathan

> 
> Signed-off-by: Michael Bringmann <m...@linux.vnet.ibm.com>
> ---
> Changes in V2:
>   -- Add more information and examples to the patch description.
>   -- Rename 'pseries_thread_group_mask' to 'ppc_thread_group_mask'
>   -- Remove unnecessary debug message complaining about absence of
>      property.
>   -- Reduce indent complexity of early_init_dt_scan_cpus().
> ---
>  arch/powerpc/include/asm/cputhreads.h        |    2 +
>  arch/powerpc/kernel/prom.c                   |   74 
> ++++++++++++++++++++++++++
>  arch/powerpc/kernel/setup-common.c           |   30 +++++++----
>  arch/powerpc/platforms/pseries/hotplug-cpu.c |   13 ++++-
>  4 files changed, 107 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/cputhreads.h 
> b/arch/powerpc/include/asm/cputhreads.h
> index d71a909..8e444d4 100644
> --- a/arch/powerpc/include/asm/cputhreads.h
> +++ b/arch/powerpc/include/asm/cputhreads.h
> @@ -31,6 +31,8 @@
>  #define threads_core_mask    (*get_cpu_mask(0))
>  #endif
> 
> +extern cpumask_t ppc_thread_group_mask;
> +
>  /* cpu_thread_mask_to_cores - Return a cpumask of one per cores
>   *                            hit by the argument
>   *
> diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
> index b15bae2..0a49231 100644
> --- a/arch/powerpc/kernel/prom.c
> +++ b/arch/powerpc/kernel/prom.c
> @@ -68,6 +68,9 @@
>  #define DBG(fmt...)
>  #endif
> 
> +cpumask_t ppc_thread_group_mask;
> +EXPORT_SYMBOL(ppc_thread_group_mask);
> +
>  #ifdef CONFIG_PPC64
>  int __initdata iommu_is_off;
>  int __initdata iommu_force_on;
> @@ -303,6 +306,71 @@ static void __init check_cpu_feature_properties(unsigned 
> long node)
>       }
>  }
> 
> +static void __init early_init_setup_thread_group_mask(unsigned long node,
> +                                             cpumask_t *thread_group_mask)
> +{
> +     const __be32 *thrgrp;
> +     int len, rc = 0;
> +     u32 cc_type = 0, no_split = 0, thr_per_split = 0;
> +     int j, k;
> +
> +     cpumask_clear(thread_group_mask);
> +
> +     thrgrp = of_get_flat_dt_prop(node, "ibm,thread-groups", &len);
> +     if (!thrgrp)
> +             return;
> +
> +     /* Process the thread groups for the Core thread mask */
> +     /* Characteristic type per table */
> +     cc_type = of_read_number(thrgrp++, 1);
> +
> +     /*
> +      * 1 : Group shares common L1, translation cache, and
> +      *     instruction data flow
> +      * >1 : Reserved
> +      */
> +     if (cc_type != 1) {
> +             rc = -EINVAL;
> +             goto endit;
> +     }
> +
> +     /* No. splits */
> +     no_split = of_read_number(thrgrp++, 1);
> +     if (no_split == 0) {
> +             rc = -EINVAL;
> +             goto endit;
> +     }
> +
> +     /* Threads per split */
> +     thr_per_split = of_read_number(thrgrp++, 1);
> +     if (thr_per_split == 0) {
> +             rc = -EINVAL;
> +             goto endit;
> +     }
> +
> +     DBG("INFO: Node %d; ibm,thread-group "
> +             "(cc_t=%d, no_spl=%d, thr_p_spl=%d)\n",
> +             (int)node, (int)cc_type, (int)no_split,
> +             (int)thr_per_split);
> +
> +     for (j = 0; j < no_split; j++) {
> +             for (k = 0; k < thr_per_split; k++) {
> +                     u32 t = of_read_number(thrgrp++, 1);
> +
> +                     cpumask_set_cpu(t, thread_group_mask);
> +                     DBG("INFO: Node %d; enable thread %d\n",
> +                             (int)node, (int)t);
> +             }
> +     }
> +
> +endit:
> +     if (rc) {
> +             DBG("WARNING: Node %d; error processing "
> +                 "ibm,thread-group property\n", (int)node);
> +             cpumask_setall(thread_group_mask);
> +     }
> +}
> +
>  static int __init early_init_dt_scan_cpus(unsigned long node,
>                                         const char *uname, int depth,
>                                         void *data)
> @@ -326,11 +394,17 @@ static int __init early_init_dt_scan_cpus(unsigned long 
> node,
> 
>       nthreads = len / sizeof(int);
> 
> +     /* Figure out the thread subset */
> +     early_init_setup_thread_group_mask(node, &ppc_thread_group_mask);
> +
>       /*
>        * Now see if any of these threads match our boot cpu.
>        * NOTE: This must match the parsing done in smp_setup_cpu_maps.
>        */
>       for (i = 0; i < nthreads; i++) {
> +             if (!cpumask_test_cpu(i % nthreads, &ppc_thread_group_mask))
> +                     continue;
> +
>               /*
>                * version 2 of the kexec param format adds the phys cpuid of
>                * booted proc.
> diff --git a/arch/powerpc/kernel/setup-common.c 
> b/arch/powerpc/kernel/setup-common.c
> index 2075322..53cadcd 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -427,13 +427,16 @@ void __init check_for_initrd(void)
>  EXPORT_SYMBOL_GPL(threads_shift);
>  EXPORT_SYMBOL_GPL(threads_core_mask);
> 
> -static void __init cpu_init_thread_core_maps(int tpc)
> +static void __init cpu_init_thread_core_maps(int tpc,
> +                             cpumask_t *thread_group_mask)
>  {
> +     cpumask_t work_mask;
>       int i;
> 
>       threads_per_core = tpc;
>       threads_per_subcore = tpc;
>       cpumask_clear(&threads_core_mask);
> +     cpumask_clear(&work_mask);
> 
>       /* This implementation only supports power of 2 number of threads
>        * for simplicity and performance
> @@ -442,14 +445,14 @@ static void __init cpu_init_thread_core_maps(int tpc)
>       BUG_ON(tpc != (1 << threads_shift));
> 
>       for (i = 0; i < tpc; i++)
> -             cpumask_set_cpu(i, &threads_core_mask);
> +             cpumask_set_cpu(i, &work_mask);
> +     cpumask_and(&threads_core_mask, &work_mask, thread_group_mask);
> 
>       printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n",
>              tpc, tpc > 1 ? "s" : "");
>       printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift);
>  }
> 
> -
>  /**
>   * setup_cpu_maps - initialize the following cpu maps:
>   *                  cpu_possible_mask
> @@ -503,17 +506,24 @@ void __init smp_setup_cpu_maps(void)
>               for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
>                       bool avail;
> 
> -                     DBG("    thread %d -> cpu %d (hard id %d)\n",
> -                         j, cpu, be32_to_cpu(intserv[j]));
> -
>                       avail = of_device_is_available(dn);
>                       if (!avail)
>                               avail = !of_property_match_string(dn,
>                                               "enable-method", "spin-table");
> 
> -                     set_cpu_present(cpu, avail);
> -                     set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j]));
> -                     set_cpu_possible(cpu, true);
> +                     DBG("    thread %d -> cpu %d (hard id %d)\n",
> +                         j, cpu, be32_to_cpu(intserv[j]));
> +
> +                     if (cpumask_test_cpu(cpu % nthreads,
> +                                             &ppc_thread_group_mask)) {
> +                             set_cpu_present(cpu, avail);
> +                             set_hard_smp_processor_id(cpu,
> +                                             be32_to_cpu(intserv[j]));
> +                             set_cpu_possible(cpu, true);
> +                     } else {
> +                             set_cpu_present(cpu, false);
> +                             set_cpu_possible(cpu, false);
> +                     }
>                       cpu++;
>               }
>       }
> @@ -572,7 +582,7 @@ void __init smp_setup_cpu_maps(void)
>        * every CPU in the system. If that is not the case, then some code
>        * here will have to be reworked
>        */
> -     cpu_init_thread_core_maps(nthreads);
> +     cpu_init_thread_core_maps(nthreads, &ppc_thread_group_mask);
> 
>       /* Now that possible cpus are set, set nr_cpu_ids for later use */
>       setup_nr_cpu_ids();
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
> b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index a7d14aa7..4125eaa 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -36,6 +36,7 @@
>  #include <asm/xics.h>
>  #include <asm/xive.h>
>  #include <asm/plpar_wrappers.h>
> +#include <asm/cputhreads.h>
> 
>  #include "pseries.h"
>  #include "offline_states.h"
> @@ -258,8 +259,10 @@ static int pseries_add_processor(struct device_node *np)
>       zalloc_cpumask_var(&tmp, GFP_KERNEL);
> 
>       nthreads = len / sizeof(u32);
> -     for (i = 0; i < nthreads; i++)
> -             cpumask_set_cpu(i, tmp);
> +     for (i = 0; i < nthreads; i++) {
> +             if (cpumask_test_cpu(i % nthreads, &ppc_thread_group_mask))
> +                     cpumask_set_cpu(i, tmp);
> +     }
> 
>       cpu_maps_update_begin();
> 
> @@ -324,6 +327,8 @@ static void pseries_remove_processor(struct device_node 
> *np)
> 
>       cpu_maps_update_begin();
>       for (i = 0; i < nthreads; i++) {
> +             if (!cpumask_test_cpu(i % nthreads, &ppc_thread_group_mask))
> +                     continue;
>               thread = be32_to_cpu(intserv[i]);
>               for_each_present_cpu(cpu) {
>                       if (get_hard_smp_processor_id(cpu) != thread)
> @@ -356,6 +361,8 @@ static int dlpar_online_cpu(struct device_node *dn)
> 
>       cpu_maps_update_begin();
>       for (i = 0; i < nthreads; i++) {
> +             if (!cpumask_test_cpu(i % nthreads, &ppc_thread_group_mask))
> +                     continue;
>               thread = be32_to_cpu(intserv[i]);
>               for_each_present_cpu(cpu) {
>                       if (get_hard_smp_processor_id(cpu) != thread)
> @@ -522,6 +529,8 @@ static int dlpar_offline_cpu(struct device_node *dn)
> 
>       cpu_maps_update_begin();
>       for (i = 0; i < nthreads; i++) {
> +             if (!cpumask_test_cpu(i % nthreads, &ppc_thread_group_mask))
> +                     continue;
>               thread = be32_to_cpu(intserv[i]);
>               for_each_present_cpu(cpu) {
>                       if (get_hard_smp_processor_id(cpu) != thread)
> 

Reply via email to