* Parth Shah <pa...@linux.ibm.com> [2021-07-28 23:26:05]: > From: "Gautham R. Shenoy" <e...@linux.vnet.ibm.com> > > Currently the cacheinfo code on powerpc indexes the "cache" objects > (modelling the L1/L2/L3 caches) where the key is device-tree node > corresponding to that cache. On some of the POWER server platforms > thread-groups within the core share different sets of caches (Eg: On > SMT8 POWER9 systems, threads 0,2,4,6 of a core share L1 cache and > threads 1,3,5,7 of the same core share another L1 cache). On such > platforms, there is a single device-tree node corresponding to that > cache and the cache-configuration within the threads of the core is > indicated via "ibm,thread-groups" device-tree property. > > Since the current code is not aware of the "ibm,thread-groups" > property, on the aforementoined systems, cacheinfo code still treats > all the threads in the core to be sharing the cache because of the > single device-tree node (In the earlier example, the cacheinfo code > would says CPUs 0-7 share L1 cache). > > In this patch, we make the powerpc cacheinfo code aware of the > "ibm,thread-groups" property. We indexe the "cache" objects by the > key-pair (device-tree node, thread-group id). For any CPUX, for a > given level of cache, the thread-group id is defined to be the first > CPU in the "ibm,thread-groups" cache-group containing CPUX. For levels > of cache which are not represented in "ibm,thread-groups" property, > the thread-group id is -1. > > Signed-off-by: Gautham R. Shenoy <e...@linux.vnet.ibm.com> > [parth: Remove "static" keyword for the definition of > "thread_group_l1_cache_map" > and "thread_group_l2_cache_map" to get rid of the compile error.] > Signed-off-by: Parth Shah <pa...@linux.ibm.com>
Looks good to me. Reviewed-by: Srikar Dronamraju <sri...@linux.vnet.ibm.com> > --- > arch/powerpc/include/asm/smp.h | 3 ++ > arch/powerpc/kernel/cacheinfo.c | 80 ++++++++++++++++++++++++--------- > arch/powerpc/kernel/smp.c | 4 +- > 3 files changed, 63 insertions(+), 24 deletions(-) > > diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h > index 03b3d010cbab..1259040cc3a4 100644 > --- a/arch/powerpc/include/asm/smp.h > +++ b/arch/powerpc/include/asm/smp.h > @@ -33,6 +33,9 @@ extern bool coregroup_enabled; > extern int cpu_to_chip_id(int cpu); > extern int *chip_id_lookup_table; > > +DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); > +DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); > + > #ifdef CONFIG_SMP > > struct smp_ops_t { > diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c > index 6f903e9aa20b..5a6925d87424 100644 > --- a/arch/powerpc/kernel/cacheinfo.c > +++ b/arch/powerpc/kernel/cacheinfo.c > @@ -120,6 +120,7 @@ struct cache { > struct cpumask shared_cpu_map; /* online CPUs using this cache */ > int type; /* split cache disambiguation */ > int level; /* level not explicit in device tree */ > + int group_id; /* id of the group of threads that share > this cache */ > struct list_head list; /* global list of cache objects */ > struct cache *next_local; /* next cache of >= level */ > }; > @@ -142,22 +143,24 @@ static const char *cache_type_string(const struct cache > *cache) > } > > static void cache_init(struct cache *cache, int type, int level, > - struct device_node *ofnode) > + struct device_node *ofnode, int group_id) > { > cache->type = type; > cache->level = level; > cache->ofnode = of_node_get(ofnode); > + cache->group_id = group_id; > INIT_LIST_HEAD(&cache->list); > list_add(&cache->list, &cache_list); > } > > -static struct cache *new_cache(int type, int level, struct device_node > *ofnode) > +static struct cache *new_cache(int type, int level, > + struct device_node *ofnode, int group_id) > { > struct cache *cache; > > cache = kzalloc(sizeof(*cache), GFP_KERNEL); > if (cache) > - cache_init(cache, type, level, ofnode); > + cache_init(cache, type, level, ofnode, group_id); > > return cache; > } > @@ -309,20 +312,24 @@ static struct cache *cache_find_first_sibling(struct > cache *cache) > return cache; > > list_for_each_entry(iter, &cache_list, list) > - if (iter->ofnode == cache->ofnode && iter->next_local == cache) > + if (iter->ofnode == cache->ofnode && > + iter->group_id == cache->group_id && > + iter->next_local == cache) > return iter; > > return cache; > } > > -/* return the first cache on a local list matching node */ > -static struct cache *cache_lookup_by_node(const struct device_node *node) > +/* return the first cache on a local list matching node and thread-group id > */ > +static struct cache *cache_lookup_by_node_group(const struct device_node > *node, > + int group_id) > { > struct cache *cache = NULL; > struct cache *iter; > > list_for_each_entry(iter, &cache_list, list) { > - if (iter->ofnode != node) > + if (iter->ofnode != node || > + iter->group_id != group_id) > continue; > cache = cache_find_first_sibling(iter); > break; > @@ -352,14 +359,15 @@ static int cache_is_unified_d(const struct device_node > *np) > CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED; > } > > -static struct cache *cache_do_one_devnode_unified(struct device_node *node, > int level) > +static struct cache *cache_do_one_devnode_unified(struct device_node *node, > int group_id, > + int level) > { > pr_debug("creating L%d ucache for %pOFP\n", level, node); > > - return new_cache(cache_is_unified_d(node), level, node); > + return new_cache(cache_is_unified_d(node), level, node, group_id); > } > > -static struct cache *cache_do_one_devnode_split(struct device_node *node, > +static struct cache *cache_do_one_devnode_split(struct device_node *node, > int group_id, > int level) > { > struct cache *dcache, *icache; > @@ -367,8 +375,8 @@ static struct cache *cache_do_one_devnode_split(struct > device_node *node, > pr_debug("creating L%d dcache and icache for %pOFP\n", level, > node); > > - dcache = new_cache(CACHE_TYPE_DATA, level, node); > - icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node); > + dcache = new_cache(CACHE_TYPE_DATA, level, node, group_id); > + icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node, group_id); > > if (!dcache || !icache) > goto err; > @@ -382,31 +390,32 @@ static struct cache *cache_do_one_devnode_split(struct > device_node *node, > return NULL; > } > > -static struct cache *cache_do_one_devnode(struct device_node *node, int > level) > +static struct cache *cache_do_one_devnode(struct device_node *node, int > group_id, int level) > { > struct cache *cache; > > if (cache_node_is_unified(node)) > - cache = cache_do_one_devnode_unified(node, level); > + cache = cache_do_one_devnode_unified(node, group_id, level); > else > - cache = cache_do_one_devnode_split(node, level); > + cache = cache_do_one_devnode_split(node, group_id, level); > > return cache; > } > > static struct cache *cache_lookup_or_instantiate(struct device_node *node, > + int group_id, > int level) > { > struct cache *cache; > > - cache = cache_lookup_by_node(node); > + cache = cache_lookup_by_node_group(node, group_id); > > WARN_ONCE(cache && cache->level != level, > "cache level mismatch on lookup (got %d, expected %d)\n", > cache->level, level); > > if (!cache) > - cache = cache_do_one_devnode(node, level); > + cache = cache_do_one_devnode(node, group_id, level); > > return cache; > } > @@ -443,7 +452,27 @@ static void do_subsidiary_caches_debugcheck(struct cache > *cache) > of_node_get_device_type(cache->ofnode)); > } > > -static void do_subsidiary_caches(struct cache *cache) > +/* > + * If sub-groups of threads in a core containing @cpu_id share the > + * L@level-cache (information obtained via "ibm,thread-groups" > + * device-tree property), then we identify the group by the first > + * thread-sibling in the group. We define this to be the group-id. > + * > + * In the absence of any thread-group information for L@level-cache, > + * this function returns -1. > + */ > +static int get_group_id(unsigned int cpu_id, int level) > +{ > + if (has_big_cores && level == 1) > + return cpumask_first(per_cpu(thread_group_l1_cache_map, > + cpu_id)); > + else if (thread_group_shares_l2 && level == 2) > + return cpumask_first(per_cpu(thread_group_l2_cache_map, > + cpu_id)); > + return -1; > +} > + > +static void do_subsidiary_caches(struct cache *cache, unsigned int cpu_id) > { > struct device_node *subcache_node; > int level = cache->level; > @@ -452,9 +481,11 @@ static void do_subsidiary_caches(struct cache *cache) > > while ((subcache_node = of_find_next_cache_node(cache->ofnode))) { > struct cache *subcache; > + int group_id; > > level++; > - subcache = cache_lookup_or_instantiate(subcache_node, level); > + group_id = get_group_id(cpu_id, level); > + subcache = cache_lookup_or_instantiate(subcache_node, group_id, > level); > of_node_put(subcache_node); > if (!subcache) > break; > @@ -468,6 +499,7 @@ static struct cache *cache_chain_instantiate(unsigned int > cpu_id) > { > struct device_node *cpu_node; > struct cache *cpu_cache = NULL; > + int group_id; > > pr_debug("creating cache object(s) for CPU %i\n", cpu_id); > > @@ -476,11 +508,13 @@ static struct cache *cache_chain_instantiate(unsigned > int cpu_id) > if (!cpu_node) > goto out; > > - cpu_cache = cache_lookup_or_instantiate(cpu_node, 1); > + group_id = get_group_id(cpu_id, 1); > + > + cpu_cache = cache_lookup_or_instantiate(cpu_node, group_id, 1); > if (!cpu_cache) > goto out; > > - do_subsidiary_caches(cpu_cache); > + do_subsidiary_caches(cpu_cache, cpu_id); > > cache_cpu_set(cpu_cache, cpu_id); > out: > @@ -848,13 +882,15 @@ static struct cache *cache_lookup_by_cpu(unsigned int > cpu_id) > { > struct device_node *cpu_node; > struct cache *cache; > + int group_id; > > cpu_node = of_get_cpu_node(cpu_id, NULL); > WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id); > if (!cpu_node) > return NULL; > > - cache = cache_lookup_by_node(cpu_node); > + group_id = get_group_id(cpu_id, 1); > + cache = cache_lookup_by_node_group(cpu_node, group_id); > of_node_put(cpu_node); > > return cache; > diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c > index 447b78a87c8f..a7fcac44a8e2 100644 > --- a/arch/powerpc/kernel/smp.c > +++ b/arch/powerpc/kernel/smp.c > @@ -122,14 +122,14 @@ static struct thread_groups_list tgl[NR_CPUS] > __initdata; > * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to > * the set its siblings that share the L1-cache. > */ > -static DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); > +DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); > > /* > * On some big-cores system, thread_group_l2_cache_map for each CPU > * corresponds to the set its siblings within the core that share the > * L2-cache. > */ > -static DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); > +DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); > > /* SMP operations for this machine */ > struct smp_ops_t *smp_ops; > -- > 2.26.3 > -- Thanks and Regards Srikar Dronamraju