On 01/08/2018 11:19 AM, Michael Bringmann wrote: > Add code to parse the new property 'ibm,thread-groups" when it is > present. The content of this property explicitly defines the number > of threads per core as well as the PowerPC 'threads_core_mask'. > The design provides a common device-tree for both P9 normal core and > P9 fused core systems. The new property has been observed to be > available on P9 pHyp systems, but it is not always present on > OpenPower BMC systems. > > The property updates the kernel to know which CPUs/threads of each > core are actually present, and then use the map when adding cores > to the system at boot, or during hotplug operations. > > * Previously, the information about the number of threads per core > was inferred solely from the "ibm,ppc-interrupt-server#s" property > in the system device tree. > * Also previous to this property, The mask of threads per CPU was > inferred to be a strict linear series from 0..(nthreads-1). > * After reading the "ibm,thread-group" property, we can determine > the number of threads per core to be the 'bitmask weight' of the > CPU thread mask. > * Also after reading the property, we can determine which of the > possible threads we are allowed to online for each CPU. It is no > longer a simple linear sequence, but may be discontinuous e.g. > activate threads 1,2,3,5,6,7 on a core instead of 0-5 sequentially. > > Implementation of the "ibm,thread-groups" property is spread across > a few files in the powerpc specific code: > > * prom.c: Parse the property and create 'ppc_thread_group_mask'. > Use the mask in operation of early_init_dt_scan_cpus(). > * setup-common.c: Import 'ppc_thread_group_mask' and use the value > in the operation of cpu_init_thread_core_maps(), and > smp_setup_cpu_maps. > * hotplug-cpu.c: Use 'ppc_thread_group_mask' in several locations > where the code previously expected to iterate over a > linear series of active threads (0..nthreads-1). > > Note that the "ibm,thread-groups" property also includes semantics > of 'thread-group' i.e. define one or more subgroups of the available > threads, each group of threads to be used for a specific class of > task. Translating thread group semantics into Linux kernel features > is TBD.
One thing I don't see addressed in the comments or in the code is migration support. I think we need to update the thread group mask post-migration to reflect the threads per core on the new system. -Nathan > > Signed-off-by: Michael Bringmann <m...@linux.vnet.ibm.com> > --- > Changes in V2: > -- Add more information and examples to the patch description. > -- Rename 'pseries_thread_group_mask' to 'ppc_thread_group_mask' > -- Remove unnecessary debug message complaining about absence of > property. > -- Reduce indent complexity of early_init_dt_scan_cpus(). > --- > arch/powerpc/include/asm/cputhreads.h | 2 + > arch/powerpc/kernel/prom.c | 74 > ++++++++++++++++++++++++++ > arch/powerpc/kernel/setup-common.c | 30 +++++++---- > arch/powerpc/platforms/pseries/hotplug-cpu.c | 13 ++++- > 4 files changed, 107 insertions(+), 12 deletions(-) > > diff --git a/arch/powerpc/include/asm/cputhreads.h > b/arch/powerpc/include/asm/cputhreads.h > index d71a909..8e444d4 100644 > --- a/arch/powerpc/include/asm/cputhreads.h > +++ b/arch/powerpc/include/asm/cputhreads.h > @@ -31,6 +31,8 @@ > #define threads_core_mask (*get_cpu_mask(0)) > #endif > > +extern cpumask_t ppc_thread_group_mask; > + > /* cpu_thread_mask_to_cores - Return a cpumask of one per cores > * hit by the argument > * > diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c > index b15bae2..0a49231 100644 > --- a/arch/powerpc/kernel/prom.c > +++ b/arch/powerpc/kernel/prom.c > @@ -68,6 +68,9 @@ > #define DBG(fmt...) > #endif > > +cpumask_t ppc_thread_group_mask; > +EXPORT_SYMBOL(ppc_thread_group_mask); > + > #ifdef CONFIG_PPC64 > int __initdata iommu_is_off; > int __initdata iommu_force_on; > @@ -303,6 +306,71 @@ static void __init check_cpu_feature_properties(unsigned > long node) > } > } > > +static void __init early_init_setup_thread_group_mask(unsigned long node, > + cpumask_t *thread_group_mask) > +{ > + const __be32 *thrgrp; > + int len, rc = 0; > + u32 cc_type = 0, no_split = 0, thr_per_split = 0; > + int j, k; > + > + cpumask_clear(thread_group_mask); > + > + thrgrp = of_get_flat_dt_prop(node, "ibm,thread-groups", &len); > + if (!thrgrp) > + return; > + > + /* Process the thread groups for the Core thread mask */ > + /* Characteristic type per table */ > + cc_type = of_read_number(thrgrp++, 1); > + > + /* > + * 1 : Group shares common L1, translation cache, and > + * instruction data flow > + * >1 : Reserved > + */ > + if (cc_type != 1) { > + rc = -EINVAL; > + goto endit; > + } > + > + /* No. splits */ > + no_split = of_read_number(thrgrp++, 1); > + if (no_split == 0) { > + rc = -EINVAL; > + goto endit; > + } > + > + /* Threads per split */ > + thr_per_split = of_read_number(thrgrp++, 1); > + if (thr_per_split == 0) { > + rc = -EINVAL; > + goto endit; > + } > + > + DBG("INFO: Node %d; ibm,thread-group " > + "(cc_t=%d, no_spl=%d, thr_p_spl=%d)\n", > + (int)node, (int)cc_type, (int)no_split, > + (int)thr_per_split); > + > + for (j = 0; j < no_split; j++) { > + for (k = 0; k < thr_per_split; k++) { > + u32 t = of_read_number(thrgrp++, 1); > + > + cpumask_set_cpu(t, thread_group_mask); > + DBG("INFO: Node %d; enable thread %d\n", > + (int)node, (int)t); > + } > + } > + > +endit: > + if (rc) { > + DBG("WARNING: Node %d; error processing " > + "ibm,thread-group property\n", (int)node); > + cpumask_setall(thread_group_mask); > + } > +} > + > static int __init early_init_dt_scan_cpus(unsigned long node, > const char *uname, int depth, > void *data) > @@ -326,11 +394,17 @@ static int __init early_init_dt_scan_cpus(unsigned long > node, > > nthreads = len / sizeof(int); > > + /* Figure out the thread subset */ > + early_init_setup_thread_group_mask(node, &ppc_thread_group_mask); > + > /* > * Now see if any of these threads match our boot cpu. > * NOTE: This must match the parsing done in smp_setup_cpu_maps. > */ > for (i = 0; i < nthreads; i++) { > + if (!cpumask_test_cpu(i % nthreads, &ppc_thread_group_mask)) > + continue; > + > /* > * version 2 of the kexec param format adds the phys cpuid of > * booted proc. > diff --git a/arch/powerpc/kernel/setup-common.c > b/arch/powerpc/kernel/setup-common.c > index 2075322..53cadcd 100644 > --- a/arch/powerpc/kernel/setup-common.c > +++ b/arch/powerpc/kernel/setup-common.c > @@ -427,13 +427,16 @@ void __init check_for_initrd(void) > EXPORT_SYMBOL_GPL(threads_shift); > EXPORT_SYMBOL_GPL(threads_core_mask); > > -static void __init cpu_init_thread_core_maps(int tpc) > +static void __init cpu_init_thread_core_maps(int tpc, > + cpumask_t *thread_group_mask) > { > + cpumask_t work_mask; > int i; > > threads_per_core = tpc; > threads_per_subcore = tpc; > cpumask_clear(&threads_core_mask); > + cpumask_clear(&work_mask); > > /* This implementation only supports power of 2 number of threads > * for simplicity and performance > @@ -442,14 +445,14 @@ static void __init cpu_init_thread_core_maps(int tpc) > BUG_ON(tpc != (1 << threads_shift)); > > for (i = 0; i < tpc; i++) > - cpumask_set_cpu(i, &threads_core_mask); > + cpumask_set_cpu(i, &work_mask); > + cpumask_and(&threads_core_mask, &work_mask, thread_group_mask); > > printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n", > tpc, tpc > 1 ? "s" : ""); > printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift); > } > > - > /** > * setup_cpu_maps - initialize the following cpu maps: > * cpu_possible_mask > @@ -503,17 +506,24 @@ void __init smp_setup_cpu_maps(void) > for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { > bool avail; > > - DBG(" thread %d -> cpu %d (hard id %d)\n", > - j, cpu, be32_to_cpu(intserv[j])); > - > avail = of_device_is_available(dn); > if (!avail) > avail = !of_property_match_string(dn, > "enable-method", "spin-table"); > > - set_cpu_present(cpu, avail); > - set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j])); > - set_cpu_possible(cpu, true); > + DBG(" thread %d -> cpu %d (hard id %d)\n", > + j, cpu, be32_to_cpu(intserv[j])); > + > + if (cpumask_test_cpu(cpu % nthreads, > + &ppc_thread_group_mask)) { > + set_cpu_present(cpu, avail); > + set_hard_smp_processor_id(cpu, > + be32_to_cpu(intserv[j])); > + set_cpu_possible(cpu, true); > + } else { > + set_cpu_present(cpu, false); > + set_cpu_possible(cpu, false); > + } > cpu++; > } > } > @@ -572,7 +582,7 @@ void __init smp_setup_cpu_maps(void) > * every CPU in the system. If that is not the case, then some code > * here will have to be reworked > */ > - cpu_init_thread_core_maps(nthreads); > + cpu_init_thread_core_maps(nthreads, &ppc_thread_group_mask); > > /* Now that possible cpus are set, set nr_cpu_ids for later use */ > setup_nr_cpu_ids(); > diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c > b/arch/powerpc/platforms/pseries/hotplug-cpu.c > index a7d14aa7..4125eaa 100644 > --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c > +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c > @@ -36,6 +36,7 @@ > #include <asm/xics.h> > #include <asm/xive.h> > #include <asm/plpar_wrappers.h> > +#include <asm/cputhreads.h> > > #include "pseries.h" > #include "offline_states.h" > @@ -258,8 +259,10 @@ static int pseries_add_processor(struct device_node *np) > zalloc_cpumask_var(&tmp, GFP_KERNEL); > > nthreads = len / sizeof(u32); > - for (i = 0; i < nthreads; i++) > - cpumask_set_cpu(i, tmp); > + for (i = 0; i < nthreads; i++) { > + if (cpumask_test_cpu(i % nthreads, &ppc_thread_group_mask)) > + cpumask_set_cpu(i, tmp); > + } > > cpu_maps_update_begin(); > > @@ -324,6 +327,8 @@ static void pseries_remove_processor(struct device_node > *np) > > cpu_maps_update_begin(); > for (i = 0; i < nthreads; i++) { > + if (!cpumask_test_cpu(i % nthreads, &ppc_thread_group_mask)) > + continue; > thread = be32_to_cpu(intserv[i]); > for_each_present_cpu(cpu) { > if (get_hard_smp_processor_id(cpu) != thread) > @@ -356,6 +361,8 @@ static int dlpar_online_cpu(struct device_node *dn) > > cpu_maps_update_begin(); > for (i = 0; i < nthreads; i++) { > + if (!cpumask_test_cpu(i % nthreads, &ppc_thread_group_mask)) > + continue; > thread = be32_to_cpu(intserv[i]); > for_each_present_cpu(cpu) { > if (get_hard_smp_processor_id(cpu) != thread) > @@ -522,6 +529,8 @@ static int dlpar_offline_cpu(struct device_node *dn) > > cpu_maps_update_begin(); > for (i = 0; i < nthreads; i++) { > + if (!cpumask_test_cpu(i % nthreads, &ppc_thread_group_mask)) > + continue; > thread = be32_to_cpu(intserv[i]); > for_each_present_cpu(cpu) { > if (get_hard_smp_processor_id(cpu) != thread) >