Setup the maple_node_cache with percpu sheaves of size 32 to hopefully
improve its performance. Change the single node rcu freeing in
ma_free_rcu() to use kfree_rcu() instead of the custom callback, which
allows the rcu_free sheaf batching to be used. Note there are other
users of mt_free_rcu() where larger parts of maple tree are submitted to
call_rcu() as a whole, and that cannot use the rcu_free sheaf. But it's
still possible for maple nodes freed this way to be reused via the barn,
even if only some cpus are allowed to process rcu callbacks.

Signed-off-by: Vlastimil Babka <vba...@suse.cz>
Reviewed-by: Suren Baghdasaryan <sur...@google.com>
---
 lib/maple_tree.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 
d0bea23fa4bc9fdd0ca4803a108d3c943f6a0c73..812ba155f3577d1b6ecc779ce9e4e7ded3085d8b
 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -208,7 +208,7 @@ static void mt_free_rcu(struct rcu_head *head)
 static void ma_free_rcu(struct maple_node *node)
 {
        WARN_ON(node->parent != ma_parent_ptr(node));
-       call_rcu(&node->rcu, mt_free_rcu);
+       kfree_rcu(node, rcu);
 }
 
 static void mas_set_height(struct ma_state *mas)
@@ -6254,9 +6254,14 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp)
 
 void __init maple_tree_init(void)
 {
+       struct kmem_cache_args args = {
+               .align  = sizeof(struct maple_node),
+               .sheaf_capacity = 32,
+       };
+
        maple_node_cache = kmem_cache_create("maple_node",
-                       sizeof(struct maple_node), sizeof(struct maple_node),
-                       SLAB_PANIC, NULL);
+                       sizeof(struct maple_node), &args,
+                       SLAB_PANIC);
 }
 
 /**

-- 
2.49.0


Reply via email to