Teach kfree_rcu_sheaf() how to handle the !allow_spin case. Try to get an empty sheaf from pcs->spare or the barn even when spinning is not allowed. Unlike __pcs_replace_full_main(), try harder to allocate an empty sheaf because the fallback path will be more expensive than kfree_nolock().
When trylock fails or the kernel observes non-NULL pcs->rcu_free after lock acquisition, free the sheaf instead of putting it to the barn. This is rare and not worth complicating the code. Since call_rcu() cannot be called in an unknown context, kfree_rcu_sheaf() fails when the rcu sheaf becomes full. Signed-off-by: Harry Yoo (Oracle) <[email protected]> --- mm/slab.h | 2 +- mm/slab_common.c | 2 +- mm/slub.c | 39 ++++++++++++++++++++++++++++++--------- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index 509f330654b8..b1bd33a16544 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -429,7 +429,7 @@ static inline bool is_kmalloc_normal(struct kmem_cache *s) return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT)); } -bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj); +bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin); void flush_all_rcu_sheaves(void); void flush_rcu_sheaves_on_cache(struct kmem_cache *s); diff --git a/mm/slab_common.c b/mm/slab_common.c index b6426d7ceec9..bc1a8ec938d9 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1605,7 +1605,7 @@ static bool kfree_rcu_sheaf(void *obj) s = slab->slab_cache; if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())) - return __kfree_rcu_sheaf(s, obj); + return __kfree_rcu_sheaf(s, obj, /* allow_spin = */ true); return false; } diff --git a/mm/slub.c b/mm/slub.c index 87ca154ccd80..b0d38d515386 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2815,7 +2815,8 @@ static inline struct slab_sheaf *alloc_empty_sheaf(struct kmem_cache *s, return __alloc_empty_sheaf(s, gfp, alloc_flags, s->sheaf_capacity); } -static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf) +static void __free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf, + bool allow_spin) { /* * If the sheaf was created with SLAB_ALLOC_NO_RECURSE flag then its @@ -2827,11 +2828,20 @@ static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf) mark_obj_codetag_empty(sheaf); VM_WARN_ON_ONCE(sheaf->size > 0); - kfree(sheaf); + + if (likely(allow_spin)) + kfree(sheaf); + else + kfree_nolock(sheaf); stat(s, SHEAF_FREE); } +static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf) +{ + __free_empty_sheaf(s, sheaf, /* allow_spin = */ true); +} + static unsigned int refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min, unsigned int max); @@ -3132,7 +3142,6 @@ static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, * intended action due to a race or cpu migration. Thus they do not check the * empty or full sheaf limits for simplicity. */ - static void barn_put_empty_sheaf(struct node_barn *barn, struct slab_sheaf *sheaf) { unsigned long flags; @@ -6065,7 +6074,7 @@ static void rcu_free_sheaf(struct rcu_head *head) */ static DEFINE_WAIT_OVERRIDE_MAP(kfree_rcu_sheaf_map, LD_WAIT_CONFIG); -bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj) +bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin) { struct slub_percpu_sheaves *pcs; struct slab_sheaf *rcu_sheaf; @@ -6081,9 +6090,10 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj) pcs = this_cpu_ptr(s->cpu_sheaves); if (unlikely(!pcs->rcu_free)) { - struct slab_sheaf *empty; struct node_barn *barn; + unsigned int alloc_flags = SLAB_ALLOC_DEFAULT; + gfp_t gfp = GFP_NOWAIT; /* Bootstrap or debug cache, fall back */ if (unlikely(!cache_has_sheaves(s))) { @@ -6103,7 +6113,7 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj) goto fail; } - empty = barn_get_empty_sheaf(barn, true); + empty = barn_get_empty_sheaf(barn, allow_spin); if (empty) { pcs->rcu_free = empty; @@ -6112,20 +6122,25 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj) local_unlock(&s->cpu_sheaves->lock); - empty = alloc_empty_sheaf(s, GFP_NOWAIT, SLAB_ALLOC_DEFAULT); + if (unlikely(!allow_spin)) { + alloc_flags = SLAB_ALLOC_TRYLOCK; + gfp = 0; + } + + empty = alloc_empty_sheaf(s, gfp, alloc_flags); if (!empty) goto fail; if (!local_trylock(&s->cpu_sheaves->lock)) { - barn_put_empty_sheaf(barn, empty); + __free_empty_sheaf(s, empty, allow_spin); goto fail; } pcs = this_cpu_ptr(s->cpu_sheaves); if (unlikely(pcs->rcu_free)) - barn_put_empty_sheaf(barn, empty); + __free_empty_sheaf(s, empty, allow_spin); else pcs->rcu_free = empty; } @@ -6143,6 +6158,12 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj) if (likely(rcu_sheaf->size < s->sheaf_capacity)) { rcu_sheaf = NULL; } else { + if (unlikely(!allow_spin)) { + /* call_rcu() cannot be called in an unknown context */ + rcu_sheaf->size--; + local_unlock(&s->cpu_sheaves->lock); + goto fail; + } pcs->rcu_free = NULL; rcu_sheaf->node = numa_node_id(); } -- 2.53.0

