On Thu, 31 Jul 2025 17:24:24 +0800 Menglong Dong <menglong8.d...@gmail.com> wrote:
> For now, all the kernel functions who are hooked by the fprobe will be > added to the hash table "fprobe_ip_table". The key of it is the function > address, and the value of it is "struct fprobe_hlist_node". > > The budget of the hash table is FPROBE_IP_TABLE_SIZE, which is 256. And > this means the overhead of the hash table lookup will grow linearly if > the count of the functions in the fprobe more than 256. When we try to > hook all the kernel functions, the overhead will be huge. > > Therefore, replace the hash table with rhltable to reduce the overhead. > Hi Menglong, Thanks for update, I have just some nitpicks. > Signed-off-by: Menglong Dong <dong...@chinatelecom.cn> > --- > v3: > - some format optimization > - handle the error that returned from rhltable_insert in > insert_fprobe_node > --- > include/linux/fprobe.h | 3 +- > kernel/trace/fprobe.c | 154 +++++++++++++++++++++++------------------ > 2 files changed, 90 insertions(+), 67 deletions(-) > > diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h > index 702099f08929..f5d8982392b9 100644 > --- a/include/linux/fprobe.h > +++ b/include/linux/fprobe.h > @@ -7,6 +7,7 @@ > #include <linux/ftrace.h> > #include <linux/rcupdate.h> > #include <linux/refcount.h> > +#include <linux/rhashtable.h> nit: can you also include this header file in fprobe.c ? > #include <linux/slab.h> > > struct fprobe; > @@ -26,7 +27,7 @@ typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned > long entry_ip, > * @fp: The fprobe which owns this. > */ > struct fprobe_hlist_node { > - struct hlist_node hlist; > + struct rhlist_head hlist; > unsigned long addr; > struct fprobe *fp; > }; > diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c > index ba7ff14f5339..2f1683a26c10 100644 > --- a/kernel/trace/fprobe.c > +++ b/kernel/trace/fprobe.c > @@ -41,47 +41,46 @@ > * - RCU hlist traversal under disabling preempt > */ > static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE]; > -static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE]; > +static struct rhltable fprobe_ip_table; > static DEFINE_MUTEX(fprobe_mutex); > > -/* > - * Find first fprobe in the hlist. It will be iterated twice in the entry > - * probe, once for correcting the total required size, the second time is > - * calling back the user handlers. > - * Thus the hlist in the fprobe_table must be sorted and new probe needs to > - * be added *before* the first fprobe. > - */ > -static struct fprobe_hlist_node *find_first_fprobe_node(unsigned long ip) > +static u32 fprobe_node_hashfn(const void *data, u32 len, u32 seed) > { > - struct fprobe_hlist_node *node; > - struct hlist_head *head; > + return hash_ptr(*(unsigned long **)data, 32); > +} > > - head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)]; > - hlist_for_each_entry_rcu(node, head, hlist, > - lockdep_is_held(&fprobe_mutex)) { > - if (node->addr == ip) > - return node; > - } > - return NULL; > +static int fprobe_node_cmp(struct rhashtable_compare_arg *arg, > + const void *ptr) > +{ > + unsigned long key = *(unsigned long *)arg->key; > + const struct fprobe_hlist_node *n = ptr; > + > + return n->addr != key; > } > -NOKPROBE_SYMBOL(find_first_fprobe_node); > > -/* Node insertion and deletion requires the fprobe_mutex */ > -static void insert_fprobe_node(struct fprobe_hlist_node *node) > +static u32 fprobe_node_obj_hashfn(const void *data, u32 len, u32 seed) > { > - unsigned long ip = node->addr; > - struct fprobe_hlist_node *next; > - struct hlist_head *head; > + const struct fprobe_hlist_node *n = data; > + > + return hash_ptr((void *)n->addr, 32); > +} > + > +static const struct rhashtable_params fprobe_rht_params = { > + .head_offset = offsetof(struct fprobe_hlist_node, hlist), > + .key_offset = offsetof(struct fprobe_hlist_node, addr), > + .key_len = sizeof_field(struct fprobe_hlist_node, addr), > + .hashfn = fprobe_node_hashfn, > + .obj_hashfn = fprobe_node_obj_hashfn, > + .obj_cmpfn = fprobe_node_cmp, > + .automatic_shrinking = true, > +}; > > +/* Node insertion and deletion requires the fprobe_mutex */ > +static int insert_fprobe_node(struct fprobe_hlist_node *node) > +{ > lockdep_assert_held(&fprobe_mutex); > > - next = find_first_fprobe_node(ip); > - if (next) { > - hlist_add_before_rcu(&node->hlist, &next->hlist); > - return; > - } > - head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)]; > - hlist_add_head_rcu(&node->hlist, head); > + return rhltable_insert(&fprobe_ip_table, &node->hlist, > fprobe_rht_params); > } > > /* Return true if there are synonims */ > @@ -92,9 +91,11 @@ static bool delete_fprobe_node(struct fprobe_hlist_node > *node) > /* Avoid double deleting */ > if (READ_ONCE(node->fp) != NULL) { > WRITE_ONCE(node->fp, NULL); > - hlist_del_rcu(&node->hlist); > + rhltable_remove(&fprobe_ip_table, &node->hlist, > + fprobe_rht_params); > } > - return !!find_first_fprobe_node(node->addr); > + return !!rhltable_lookup(&fprobe_ip_table, &node->addr, > + fprobe_rht_params); > } > > /* Check existence of the fprobe */ > @@ -249,9 +250,10 @@ static inline int __fprobe_kprobe_handler(unsigned long > ip, unsigned long parent > static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops > *gops, > struct ftrace_regs *fregs) > { > - struct fprobe_hlist_node *node, *first; > unsigned long *fgraph_data = NULL; > unsigned long func = trace->func; > + struct fprobe_hlist_node *node; > + struct rhlist_head *head, *pos; > unsigned long ret_ip; > int reserved_words; > struct fprobe *fp; > @@ -260,14 +262,12 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, > struct fgraph_ops *gops, > if (WARN_ON_ONCE(!fregs)) > return 0; > > - first = node = find_first_fprobe_node(func); > - if (unlikely(!first)) > - return 0; > - > + rcu_read_lock(); > + head = rhltable_lookup(&fprobe_ip_table, &func, fprobe_rht_params); > reserved_words = 0; > - hlist_for_each_entry_from_rcu(node, hlist) { > + rhl_for_each_entry_rcu(node, pos, head, hlist) { > if (node->addr != func) > - break; > + continue; > fp = READ_ONCE(node->fp); > if (!fp || !fp->exit_handler) > continue; > @@ -278,17 +278,19 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, > struct fgraph_ops *gops, > reserved_words += > FPROBE_HEADER_SIZE_IN_LONG + > SIZE_IN_LONG(fp->entry_data_size); > } > - node = first; > + rcu_read_unlock(); > if (reserved_words) { > fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * > sizeof(long)); > if (unlikely(!fgraph_data)) { > - hlist_for_each_entry_from_rcu(node, hlist) { > + rcu_read_lock(); > + rhl_for_each_entry_rcu(node, pos, head, hlist) { > if (node->addr != func) > - break; > + continue; > fp = READ_ONCE(node->fp); > if (fp && !fprobe_disabled(fp)) > fp->nmissed++; > } > + rcu_read_unlock(); > return 0; > } > } > @@ -299,12 +301,12 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, > struct fgraph_ops *gops, > */ > ret_ip = ftrace_regs_get_return_address(fregs); > used = 0; > - hlist_for_each_entry_from_rcu(node, hlist) { > + rhl_for_each_entry_rcu(node, pos, head, hlist) { > int data_size; > void *data; > > if (node->addr != func) > - break; > + continue; > fp = READ_ONCE(node->fp); > if (!fp || fprobe_disabled(fp)) > continue; > @@ -448,25 +450,21 @@ static int fprobe_addr_list_add(struct fprobe_addr_list > *alist, unsigned long ad > return 0; > } > > -static void fprobe_remove_node_in_module(struct module *mod, struct > hlist_head *head, > - struct fprobe_addr_list *alist) > +static void fprobe_remove_node_in_module(struct module *mod, struct > fprobe_hlist_node *node, > + struct fprobe_addr_list *alist) > { > - struct fprobe_hlist_node *node; > int ret = 0; > > - hlist_for_each_entry_rcu(node, head, hlist, > - lockdep_is_held(&fprobe_mutex)) { > - if (!within_module(node->addr, mod)) > - continue; > - if (delete_fprobe_node(node)) > - continue; > - /* > - * If failed to update alist, just continue to update hlist. > - * Therefore, at list user handler will not hit anymore. > - */ > - if (!ret) > - ret = fprobe_addr_list_add(alist, node->addr); > - } > + if (!within_module(node->addr, mod)) > + return; > + if (delete_fprobe_node(node)) > + return; > + /* > + * If failed to update alist, just continue to update hlist. > + * Therefore, at list user handler will not hit anymore. > + */ > + if (!ret) > + ret = fprobe_addr_list_add(alist, node->addr); > } > > /* Handle module unloading to manage fprobe_ip_table. */ > @@ -474,8 +472,9 @@ static int fprobe_module_callback(struct notifier_block > *nb, > unsigned long val, void *data) > { > struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT}; > + struct fprobe_hlist_node *node; > + struct rhashtable_iter iter; > struct module *mod = data; > - int i; > > if (val != MODULE_STATE_GOING) > return NOTIFY_DONE; > @@ -486,8 +485,16 @@ static int fprobe_module_callback(struct notifier_block > *nb, > return NOTIFY_DONE; > > mutex_lock(&fprobe_mutex); > - for (i = 0; i < FPROBE_IP_TABLE_SIZE; i++) > - fprobe_remove_node_in_module(mod, &fprobe_ip_table[i], &alist); > + rhashtable_walk_enter(&fprobe_ip_table.ht, &iter); nit: Use rhltable_walk_enter() instead. Others looks good to me. Thank you, > + do { > + rhashtable_walk_start(&iter); > + > + while ((node = rhashtable_walk_next(&iter)) && !IS_ERR(node)) > + fprobe_remove_node_in_module(mod, node, &alist); > + > + rhashtable_walk_stop(&iter); > + } while (node == ERR_PTR(-EAGAIN)); > + rhashtable_walk_exit(&iter); > > if (alist.index < alist.size && alist.index > 0) > ftrace_set_filter_ips(&fprobe_graph_ops.ops, > @@ -722,8 +729,16 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long > *addrs, int num) > ret = fprobe_graph_add_ips(addrs, num); > if (!ret) { > add_fprobe_hash(fp); > - for (i = 0; i < hlist_array->size; i++) > - insert_fprobe_node(&hlist_array->array[i]); > + for (i = 0; i < hlist_array->size; i++) { > + ret = insert_fprobe_node(&hlist_array->array[i]); > + if (ret) > + break; > + } > + /* fallback on insert error */ > + if (ret) { > + for (i--; i >= 0; i--) > + delete_fprobe_node(&hlist_array->array[i]); > + } > } > mutex_unlock(&fprobe_mutex); > > @@ -819,3 +834,10 @@ int unregister_fprobe(struct fprobe *fp) > return ret; > } > EXPORT_SYMBOL_GPL(unregister_fprobe); > + > +static int __init fprobe_initcall(void) > +{ > + rhltable_init(&fprobe_ip_table, &fprobe_rht_params); > + return 0; > +} > +late_initcall(fprobe_initcall); > -- > 2.50.1 > -- Masami Hiramatsu (Google) <mhira...@kernel.org>