From: Rao Shoaib <rao.sho...@oracle.com>

kfree_rcu() should use the new kfree_bulk() interface for freeing
rcu structures as it is more efficient.

Signed-off-by: Rao Shoaib <rao.sho...@oracle.com>
---
 include/linux/mm.h      |   5 ++
 include/linux/rcutiny.h |   8 ++-
 kernel/sysctl.c         |  40 ++++++++++++
 mm/slab.h               |  23 +++++++
 mm/slab_common.c        | 164 +++++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 238 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ad06d42..fb1e54c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2673,5 +2673,10 @@ void __init setup_nr_node_ids(void);
 static inline void setup_nr_node_ids(void) {}
 #endif
 
+extern int sysctl_kfree_rcu_drain_limit;
+extern int sysctl_kfree_rcu_poll_limit;
+extern int sysctl_kfree_rcu_empty_limit;
+extern int sysctl_kfree_rcu_caching_allowed;
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ce9beec..b9e9025 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -84,10 +84,16 @@ static inline void synchronize_sched_expedited(void)
        synchronize_sched();
 }
 
+static inline void call_rcu_lazy(struct rcu_head *head,
+                                rcu_callback_t func)
+{
+       call_rcu(head, func);
+}
+
 static inline void kfree_call_rcu(struct rcu_head *head,
                                  rcu_callback_t func)
 {
-       call_rcu(head, func);
+       call_rcu_lazy(head, func);
 }
 
 #define rcu_note_context_switch(preempt) \
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f98f28c..ab70c99 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1650,6 +1650,46 @@ static struct ctl_table vm_table[] = {
                .extra2         = (void *)&mmap_rnd_compat_bits_max,
        },
 #endif
+       {
+               .procname       = "kfree_rcu_drain_limit",
+               .data           = &sysctl_kfree_rcu_drain_limit,
+               .maxlen         = sizeof(sysctl_kfree_rcu_drain_limit),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &one,
+               .extra2         = &one_hundred,
+       },
+
+       {
+               .procname       = "kfree_rcu_poll_limit",
+               .data           = &sysctl_kfree_rcu_poll_limit,
+               .maxlen         = sizeof(sysctl_kfree_rcu_poll_limit),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &one,
+               .extra2         = &one_hundred,
+       },
+
+       {
+               .procname       = "kfree_rcu_empty_limit",
+               .data           = &sysctl_kfree_rcu_empty_limit,
+               .maxlen         = sizeof(sysctl_kfree_rcu_empty_limit),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &four,
+       },
+
+       {
+               .procname       = "kfree_rcu_caching_allowed",
+               .data           = &sysctl_kfree_rcu_caching_allowed,
+               .maxlen         = sizeof(sysctl_kfree_rcu_caching_allowed),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+
        { }
 };
 
diff --git a/mm/slab.h b/mm/slab.h
index 5181323..a332ea6 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -80,6 +80,29 @@ extern const struct kmalloc_info_struct {
        unsigned long size;
 } kmalloc_info[];
 
+#define        RCU_MAX_ACCUMULATE_SIZE 25
+
+struct rcu_bulk_free_container {
+       struct  rcu_head rbfc_rcu;
+       int     rbfc_entries;
+       void    *rbfc_data[RCU_MAX_ACCUMULATE_SIZE];
+       struct  rcu_bulk_free *rbfc_rbf;
+};
+
+struct rcu_bulk_free {
+       struct  rcu_head rbf_rcu; /* used to schedule monitor process */
+       spinlock_t      rbf_lock;
+       struct          rcu_bulk_free_container *rbf_container;
+       struct          rcu_bulk_free_container *rbf_cached_container;
+       struct          rcu_head *rbf_list_head;
+       int             rbf_list_size;
+       int             rbf_cpu;
+       int             rbf_empty;
+       int             rbf_polled;
+       bool            rbf_init;
+       bool            rbf_monitor;
+};
+
 #ifndef CONFIG_SLOB
 /* Kmalloc array related functions */
 void setup_kmalloc_cache_index_table(void);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2ea9866..6e8afff 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -20,6 +20,7 @@
 #include <asm/tlbflush.h>
 #include <asm/page.h>
 #include <linux/memcontrol.h>
+#include <linux/types.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/kmem.h>
@@ -1525,13 +1526,174 @@ void kzfree(const void *p)
 }
 EXPORT_SYMBOL(kzfree);
 
+static DEFINE_PER_CPU(struct rcu_bulk_free, cpu_rbf);
+
+/* drain if atleast these many objects */
+int sysctl_kfree_rcu_drain_limit __read_mostly = 10;
+
+/* time to poll if fewer than drain_limit */
+int sysctl_kfree_rcu_poll_limit __read_mostly = 5;
+
+/* num of times to check bfr exit */
+int sysctl_kfree_rcu_empty_limit __read_mostly = 2;
+
+int sysctl_kfree_rcu_caching_allowed __read_mostly = 1;
+
+/* RCU call back function. Frees the memory */
+static void __rcu_bulk_free_impl(struct rcu_head *rbfc_rcu)
+{
+       struct rcu_bulk_free *rbf = NULL;
+       struct rcu_bulk_free_container *rbfc = container_of(rbfc_rcu,
+           struct rcu_bulk_free_container, rbfc_rcu);
+
+       kfree_bulk(rbfc->rbfc_entries, rbfc->rbfc_data);
+
+       rbf = rbfc->rbfc_rbf;
+       if (!sysctl_kfree_rcu_caching_allowed ||
+           cmpxchg(&rbf->rbf_cached_container, NULL, rbfc)) {
+               kfree(rbfc);
+       }
+}
+
+/* processes list of rcu structures
+ * used when conatiner can not be allocated
+ */
+static void __rcu_bulk_schedule_list(struct rcu_bulk_free *rbf)
+{
+       int i;
+
+       for (i = 0; i < rbf->rbf_list_size; i++) {
+               struct rcu_head *free_head;
+
+               free_head = rbf->rbf_list_head;
+               rbf->rbf_list_head = free_head->next;
+               free_head->next = NULL;
+               call_rcu(free_head, free_head->func);
+       }
+       rbf->rbf_list_size = 0;
+}
+
+/* RCU monitoring function -- submits elements for RCU reclaim */
+static void __rcu_bulk_free_monitor(struct rcu_head *rbf_rcu)
+{
+       struct rcu_bulk_free *rbf = NULL;
+       struct rcu_bulk_free_container *rbfc = NULL;
+
+       rbf = container_of(rbf_rcu, struct rcu_bulk_free, rbf_rcu);
+
+       spin_lock(&rbf->rbf_lock);
+
+       rbfc = rbf->rbf_container;
+
+       rbf->rbf_polled++;
+       if (rbf->rbf_list_size > 0) {
+               if (rbf->rbf_list_size >= sysctl_kfree_rcu_drain_limit ||
+                   rbf->rbf_polled >= sysctl_kfree_rcu_poll_limit) {
+                       rbf->rbf_polled = 0;
+                       __rcu_bulk_schedule_list(rbf);
+               }
+       } else if (rbfc) {
+               if (rbfc->rbfc_entries >= sysctl_kfree_rcu_drain_limit ||
+                   rbf->rbf_polled >= sysctl_kfree_rcu_poll_limit) {
+                       rbf->rbf_polled = 0;
+                       call_rcu(&rbfc->rbfc_rcu, __rcu_bulk_free_impl);
+                       rbf->rbf_container = NULL;
+               }
+       } else if (rbf->rbf_polled >= sysctl_kfree_rcu_empty_limit) {
+               rbf->rbf_monitor = false;
+               rbf->rbf_polled = 0;
+       }
+
+       spin_unlock(&rbf->rbf_lock);
+
+       if (rbf->rbf_monitor)
+               call_rcu(&rbf->rbf_rcu, __rcu_bulk_free_monitor);
+}
+
+/* Main RCU function that is called to free RCU structures */
+static void __rcu_bulk_free(struct rcu_head *head, rcu_callback_t func)
+{
+       unsigned long offset;
+       void *ptr;
+       struct rcu_bulk_free *rbf;
+       struct rcu_bulk_free_container *rbfc = NULL;
+
+       preempt_disable();
+       rbf = this_cpu_ptr(&cpu_rbf);
+
+       if (unlikely(!rbf->rbf_init)) {
+               spin_lock_init(&rbf->rbf_lock);
+               rbf->rbf_cpu = smp_processor_id();
+               rbf->rbf_init = true;
+       }
+
+       /* hold lock to protect against other cpu's */
+       spin_lock_bh(&rbf->rbf_lock);
+
+       rbfc = rbf->rbf_container;
+
+       if (!rbfc) {
+               if (!rbf->rbf_cached_container) {
+                       rbf->rbf_container =
+                           kmalloc(sizeof(struct rcu_bulk_free_container),
+                                   GFP_ATOMIC);
+               } else {
+                       rbf->rbf_container =
+                           READ_ONCE(rbf->rbf_cached_container);
+                       cmpxchg(&rbf->rbf_cached_container,
+                               rbf->rbf_container, NULL);
+               }
+
+               if (unlikely(!rbf->rbf_container)) {
+                       /* Memory allocation failed maintain a list */
+
+                       head->func = (void *)func;
+                       head->next = rbf->rbf_list_head;
+                       rbf->rbf_list_head = head;
+                       rbf->rbf_list_size++;
+                       if (rbf->rbf_list_size == RCU_MAX_ACCUMULATE_SIZE)
+                               __rcu_bulk_schedule_list(rbf);
+
+                       goto done;
+               }
+
+               rbfc = rbf->rbf_container;
+               rbfc->rbfc_rbf = rbf;
+               rbfc->rbfc_entries = 0;
+
+               if (!rbf->rbf_list_head)
+                       __rcu_bulk_schedule_list(rbf);
+       }
+
+       offset = (unsigned long)func;
+       ptr = (void *)head - offset;
+
+       rbfc->rbfc_data[rbfc->rbfc_entries++] = ptr;
+       if (rbfc->rbfc_entries == RCU_MAX_ACCUMULATE_SIZE) {
+               rbf->rbf_container = NULL;
+               spin_unlock_bh(&rbf->rbf_lock);
+               call_rcu_lazy(&rbfc->rbfc_rcu, __rcu_bulk_free_impl);
+               preempt_enable();
+               return;
+       }
+
+done:
+       if (!rbf->rbf_monitor) {
+               call_rcu_lazy(&rbf->rbf_rcu, __rcu_bulk_free_monitor);
+               rbf->rbf_monitor = true;
+       }
+
+       spin_unlock_bh(&rbf->rbf_lock);
+       preempt_enable();
+}
+
 /*
  * Queue Memory to be freed by RCU after a grace period.
  */
 void kfree_call_rcu(struct rcu_head *head,
                    rcu_callback_t func)
 {
-       call_rcu_lazy(head, func);
+       __rcu_bulk_free(head, func);
 }
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 
-- 
2.7.4

Reply via email to