As suggested by Vlastimil Babka, kfree_rcu_sheaf() can be used
on PREEMPT_RT if we always assume spinning is not allowed on PREEMPT_RT.
This is because local_trylock and spinlock_t are safe to use with
trylock variant as long as the kernel does not spin and the context is
not NMI and not hardirq.

Now that __kfree_rcu_sheaf() knows how to handle allow_spin = false,
relax the limitation and try the sheaves path on PREEMPT_RT as well.

Keep the lockdep map on non RT kernels. However, do not use the lockdep
map on PREEMPT_RT to avoid suppressing valid lockdep warnings.

Link: 
https://lore.kernel.org/linux-mm/[email protected]
Suggested-by: Vlastimil Babka (SUSE) <[email protected]>
Signed-off-by: Harry Yoo (Oracle) <[email protected]>
---
 mm/slab_common.c | 11 +++++++++--
 mm/slub.c        | 17 ++++++++++-------
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index 55546b8385ff..807924a94fb0 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1595,6 +1595,13 @@ static bool kfree_rcu_sheaf(void *obj)
 {
        struct kmem_cache *s;
        struct slab *slab;
+       bool allow_spin;
+
+       /*
+        * It is not safe to spin on PREEMPT_RT because the kernel might be
+        * holding a raw spinlock and slab acquires sleeping locks.
+        */
+       allow_spin = !IS_ENABLED(CONFIG_PREEMPT_RT);
 
        if (is_vmalloc_addr(obj))
                return false;
@@ -1605,7 +1612,7 @@ static bool kfree_rcu_sheaf(void *obj)
 
        s = slab->slab_cache;
        if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id()))
-               return __kfree_rcu_sheaf(s, obj, /* allow_spin = */ true);
+               return __kfree_rcu_sheaf(s, obj, allow_spin);
 
        return false;
 }
@@ -1954,7 +1961,7 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
        if (!head)
                might_sleep();
 
-       if (!IS_ENABLED(CONFIG_PREEMPT_RT) && kfree_rcu_sheaf(ptr))
+       if (kfree_rcu_sheaf(ptr))
                return;
 
        // Queue the object but don't yet schedule the batch.
diff --git a/mm/slub.c b/mm/slub.c
index ba593c1c53d5..4850629774b2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -6082,12 +6082,13 @@ static void rcu_free_sheaf(struct rcu_head *head)
  * kvfree_call_rcu() can be called while holding a raw_spinlock_t. Since
  * __kfree_rcu_sheaf() may acquire a spinlock_t (sleeping lock on PREEMPT_RT),
  * this would violate lock nesting rules. Therefore, kvfree_call_rcu() avoids
- * this problem by bypassing the sheaves layer entirely on PREEMPT_RT.
+ * this problem by passing allow_spin = false on PREEMPT_RT.
  *
  * However, lockdep still complains that it is invalid to acquire spinlock_t
  * while holding raw_spinlock_t, even on !PREEMPT_RT where spinlock_t is a
  * spinning lock. Tell lockdep that acquiring spinlock_t is valid here
- * by temporarily raising the wait-type to LD_WAIT_CONFIG.
+ * by temporarily raising the wait-type to LD_WAIT_CONFIG. Skip the lockdep map
+ * on PREEMPT_RT to avoid suppressing valid lockdep warnings.
  */
 static DEFINE_WAIT_OVERRIDE_MAP(kfree_rcu_sheaf_map, LD_WAIT_CONFIG);
 
@@ -6096,10 +6097,10 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, 
bool allow_spin)
        struct slub_percpu_sheaves *pcs;
        struct slab_sheaf *rcu_sheaf;
 
-       if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
-               return false;
+       VM_WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT) && allow_spin);
 
-       lock_map_acquire_try(&kfree_rcu_sheaf_map);
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               lock_map_acquire_try(&kfree_rcu_sheaf_map);
 
        if (!local_trylock(&s->cpu_sheaves->lock))
                goto fail;
@@ -6199,12 +6200,14 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, 
bool allow_spin)
        local_unlock(&s->cpu_sheaves->lock);
 
        stat(s, FREE_RCU_SHEAF);
-       lock_map_release(&kfree_rcu_sheaf_map);
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               lock_map_release(&kfree_rcu_sheaf_map);
        return true;
 
 fail:
        stat(s, FREE_RCU_SHEAF_FAIL);
-       lock_map_release(&kfree_rcu_sheaf_map);
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               lock_map_release(&kfree_rcu_sheaf_map);
        return false;
 }
 

-- 
2.53.0


Reply via email to