On 3/2/21 5:14 PM, Christoph Lameter wrote:
> On Mon, 10 Aug 2020, Xunlei Pang wrote:
> 
>>
>> diff --git a/mm/slab.h b/mm/slab.h
>> index c85e2fa..a709a70 100644
>> --- a/mm/slab.h
>> +++ b/mm/slab.h
>> @@ -616,7 +616,7 @@ struct kmem_cache_node {
>>  #ifdef CONFIG_SLUB
>>      unsigned long nr_partial;
>>      struct list_head partial;
>> -    atomic_long_t partial_free_objs;
>> +    atomic_long_t __percpu *partial_free_objs;
> 
> A percpu counter is never atomic. Just use unsigned long and use this_cpu
> operations for this thing. That should cut down further on the overhead.
> 
>> --- a/mm/slub.c
>> +++ b/mm/slub.c
>> @@ -1775,11 +1775,21 @@ static void discard_slab(struct kmem_cache *s, 
>> struct page *page)
>>  /*
>>   * Management of partially allocated slabs.
>>   */
>> +static inline long get_partial_free(struct kmem_cache_node *n)
>> +{
>> +    long nr = 0;
>> +    int cpu;
>> +
>> +    for_each_possible_cpu(cpu)
>> +            nr += atomic_long_read(per_cpu_ptr(n->partial_free_objs, cpu));
> 
> this_cpu_read(*n->partial_free_objs)
> 
>>  static inline void
>>  __update_partial_free(struct kmem_cache_node *n, long delta)
>>  {
>> -    atomic_long_add(delta, &n->partial_free_objs);
>> +    atomic_long_add(delta, this_cpu_ptr(n->partial_free_objs));
> 
> this_cpu_add()
> 
> and so on.
> 

Thanks, I changed them both to use "unsigned long", and will send v3 out
after our internal performance regression test passes.

Reply via email to