On Wed, 15 Oct 2025 16:32:37 +0800 Menglong Dong <[email protected]> wrote:
> For now, fgraph is used for the fprobe, even if we need trace the entry > only. However, the performance of ftrace is better than fgraph, and we > can use ftrace_ops for this case. > > Then performance of kprobe-multi increases from 54M to 69M. Before this > commit: > > $ ./benchs/run_bench_trigger.sh kprobe-multi > kprobe-multi : 54.663 ± 0.493M/s > > After this commit: > > $ ./benchs/run_bench_trigger.sh kprobe-multi > kprobe-multi : 69.447 ± 0.143M/s > > Mitigation is disable during the bench testing above. > Looks good to me. Thanks! > Signed-off-by: Menglong Dong <[email protected]> > --- > v4: > - fallback to fgraph if FTRACE_OPS_FL_SAVE_REGS not supported > > v3: > - add some comment to the rcu_read_lock() in fprobe_ftrace_entry() > > v2: > - add some document for fprobe_fgraph_entry as Masami suggested > - merge the rename of fprobe_entry into current patch > - use ftrace_test_recursion_trylock() in fprobe_ftrace_entry() > --- > kernel/trace/fprobe.c | 128 +++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 119 insertions(+), 9 deletions(-) > > diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c > index 99d83c08b9e2..ecd623eef68b 100644 > --- a/kernel/trace/fprobe.c > +++ b/kernel/trace/fprobe.c > @@ -254,8 +254,106 @@ static inline int __fprobe_kprobe_handler(unsigned long > ip, unsigned long parent > return ret; > } > > -static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops > *gops, > - struct ftrace_regs *fregs) > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > +/* ftrace_ops callback, this processes fprobes which have only > entry_handler. */ > +static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip, > + struct ftrace_ops *ops, struct ftrace_regs *fregs) > +{ > + struct fprobe_hlist_node *node; > + struct rhlist_head *head, *pos; > + struct fprobe *fp; > + int bit; > + > + bit = ftrace_test_recursion_trylock(ip, parent_ip); > + if (bit < 0) > + return; > + > + /* > + * ftrace_test_recursion_trylock() disables preemption, but > + * rhltable_lookup() checks whether rcu_read_lcok is held. > + * So we take rcu_read_lock() here. > + */ > + rcu_read_lock(); > + head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params); > + > + rhl_for_each_entry_rcu(node, pos, head, hlist) { > + if (node->addr != ip) > + break; > + fp = READ_ONCE(node->fp); > + if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler)) > + continue; > + > + if (fprobe_shared_with_kprobes(fp)) > + __fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL); > + else > + __fprobe_handler(ip, parent_ip, fp, fregs, NULL); > + } > + rcu_read_unlock(); > + ftrace_test_recursion_unlock(bit); > +} > +NOKPROBE_SYMBOL(fprobe_ftrace_entry); > + > +static struct ftrace_ops fprobe_ftrace_ops = { > + .func = fprobe_ftrace_entry, > + .flags = FTRACE_OPS_FL_SAVE_REGS, > +}; > +static int fprobe_ftrace_active; > + > +static int fprobe_ftrace_add_ips(unsigned long *addrs, int num) > +{ > + int ret; > + > + lockdep_assert_held(&fprobe_mutex); > + > + ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0); > + if (ret) > + return ret; > + > + if (!fprobe_ftrace_active) { > + ret = register_ftrace_function(&fprobe_ftrace_ops); > + if (ret) { > + ftrace_free_filter(&fprobe_ftrace_ops); > + return ret; > + } > + } > + fprobe_ftrace_active++; > + return 0; > +} > + > +static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num) > +{ > + lockdep_assert_held(&fprobe_mutex); > + > + fprobe_ftrace_active--; > + if (!fprobe_ftrace_active) > + unregister_ftrace_function(&fprobe_ftrace_ops); > + if (num) > + ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0); > +} > + > +static bool fprobe_is_ftrace(struct fprobe *fp) > +{ > + return !fp->exit_handler; > +} > +#else > +static int fprobe_ftrace_add_ips(unsigned long *addrs, int num) > +{ > + return -ENOENT; > +} > + > +static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num) > +{ > +} > + > +static bool fprobe_is_ftrace(struct fprobe *fp) > +{ > + return false; > +} > +#endif > + > +/* fgraph_ops callback, this processes fprobes which have exit_handler. */ > +static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct > fgraph_ops *gops, > + struct ftrace_regs *fregs) > { > unsigned long *fgraph_data = NULL; > unsigned long func = trace->func; > @@ -292,7 +390,7 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, > struct fgraph_ops *gops, > if (node->addr != func) > continue; > fp = READ_ONCE(node->fp); > - if (fp && !fprobe_disabled(fp)) > + if (fp && !fprobe_disabled(fp) && > !fprobe_is_ftrace(fp)) > fp->nmissed++; > } > return 0; > @@ -312,7 +410,7 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, > struct fgraph_ops *gops, > if (node->addr != func) > continue; > fp = READ_ONCE(node->fp); > - if (!fp || fprobe_disabled(fp)) > + if (unlikely(!fp || fprobe_disabled(fp) || > fprobe_is_ftrace(fp))) > continue; > > data_size = fp->entry_data_size; > @@ -340,7 +438,7 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, > struct fgraph_ops *gops, > /* If any exit_handler is set, data must be used. */ > return used != 0; > } > -NOKPROBE_SYMBOL(fprobe_entry); > +NOKPROBE_SYMBOL(fprobe_fgraph_entry); > > static void fprobe_return(struct ftrace_graph_ret *trace, > struct fgraph_ops *gops, > @@ -379,7 +477,7 @@ static void fprobe_return(struct ftrace_graph_ret *trace, > NOKPROBE_SYMBOL(fprobe_return); > > static struct fgraph_ops fprobe_graph_ops = { > - .entryfunc = fprobe_entry, > + .entryfunc = fprobe_fgraph_entry, > .retfunc = fprobe_return, > }; > static int fprobe_graph_active; > @@ -498,9 +596,14 @@ static int fprobe_module_callback(struct notifier_block > *nb, > } while (node == ERR_PTR(-EAGAIN)); > rhashtable_walk_exit(&iter); > > - if (alist.index > 0) > + if (alist.index > 0) { > ftrace_set_filter_ips(&fprobe_graph_ops.ops, > alist.addrs, alist.index, 1, 0); > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > + ftrace_set_filter_ips(&fprobe_ftrace_ops, > + alist.addrs, alist.index, 1, 0); > +#endif > + } > mutex_unlock(&fprobe_mutex); > > kfree(alist.addrs); > @@ -733,7 +836,11 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long > *addrs, int num) > mutex_lock(&fprobe_mutex); > > hlist_array = fp->hlist_array; > - ret = fprobe_graph_add_ips(addrs, num); > + if (fprobe_is_ftrace(fp)) > + ret = fprobe_ftrace_add_ips(addrs, num); > + else > + ret = fprobe_graph_add_ips(addrs, num); > + > if (!ret) { > add_fprobe_hash(fp); > for (i = 0; i < hlist_array->size; i++) { > @@ -829,7 +936,10 @@ int unregister_fprobe(struct fprobe *fp) > } > del_fprobe_hash(fp); > > - fprobe_graph_remove_ips(addrs, count); > + if (fprobe_is_ftrace(fp)) > + fprobe_ftrace_remove_ips(addrs, count); > + else > + fprobe_graph_remove_ips(addrs, count); > > kfree_rcu(hlist_array, rcu); > fp->hlist_array = NULL; > -- > 2.51.0 > -- Masami Hiramatsu (Google) <[email protected]>
