On Tue, 2025-11-25 at 16:40 -0500, Steven Rostedt wrote:
> From: Steven Rostedt <[email protected]>
> 
> The event trigger data requires a full tracepoint_synchronize_unregister()
> call before freeing. That call can take 100s of milliseconds to complete.
> In order to allow for bulk freeing of the trigger data, it can not call
> the tracepoint_synchronize_unregister() for every individual trigger data
> being free.
> 
> Create a kthread that gets created the first time a trigger data is freed,
> and have it use the lockless llist to get the list of data to free, run
> the tracepoint_synchronize_unregister() then free everything in the list.
> 
> By freeing hundreds of event_trigger_data elements together, it only
> requires two runs of the synchronization function, and not hundreds of
> runs. This speeds up the operation by orders of magnitude (milliseconds
> instead of several seconds).
> 
> Acked-by: Masami Hiramatsu (Google) <[email protected]>
> Signed-off-by: Steven Rostedt (Google) <[email protected]>

Very nice!

Reviewed-by: Tom Zanussi <[email protected]>

> ---
> Changes since v1: https://patch.msgid.link/[email protected]
> 
> - Moved include of llist.h to trace.h as it is used there (Masami Hiramatsu)
> 
>  kernel/trace/trace.h                |  2 ++
>  kernel/trace/trace_events_trigger.c | 55 +++++++++++++++++++++++++++--
>  2 files changed, 54 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 5863800b1ab3..911fc75dc6c4 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -22,6 +22,7 @@
>  #include <linux/ctype.h>
>  #include <linux/once_lite.h>
>  #include <linux/ftrace_regs.h>
> +#include <linux/llist.h>
>  
>  #include "pid_list.h"
>  
> @@ -1808,6 +1809,7 @@ struct event_trigger_data {
>       char                            *name;
>       struct list_head                named_list;
>       struct event_trigger_data       *named_data;
> +     struct llist_node               llist;
>  };
>  
>  /* Avoid typos */
> diff --git a/kernel/trace/trace_events_trigger.c 
> b/kernel/trace/trace_events_trigger.c
> index e5dcfcbb2cd5..3b97c242b795 100644
> --- a/kernel/trace/trace_events_trigger.c
> +++ b/kernel/trace/trace_events_trigger.c
> @@ -6,6 +6,7 @@
>   */
>  
>  #include <linux/security.h>
> +#include <linux/kthread.h>
>  #include <linux/module.h>
>  #include <linux/ctype.h>
>  #include <linux/mutex.h>
> @@ -17,15 +18,63 @@
>  static LIST_HEAD(trigger_commands);
>  static DEFINE_MUTEX(trigger_cmd_mutex);
>  
> +static struct task_struct *trigger_kthread;
> +static struct llist_head trigger_data_free_list;
> +static DEFINE_MUTEX(trigger_data_kthread_mutex);
> +
> +/* Bulk garbage collection of event_trigger_data elements */
> +static int trigger_kthread_fn(void *ignore)
> +{
> +     struct event_trigger_data *data, *tmp;
> +     struct llist_node *llnodes;
> +
> +     /* Once this task starts, it lives forever */
> +     for (;;) {
> +             set_current_state(TASK_INTERRUPTIBLE);
> +             if (llist_empty(&trigger_data_free_list))
> +                     schedule();
> +
> +             __set_current_state(TASK_RUNNING);
> +
> +             llnodes = llist_del_all(&trigger_data_free_list);
> +
> +             /* make sure current triggers exit before free */
> +             tracepoint_synchronize_unregister();
> +
> +             llist_for_each_entry_safe(data, tmp, llnodes, llist)
> +                     kfree(data);
> +     }
> +
> +     return 0;
> +}
> +
>  void trigger_data_free(struct event_trigger_data *data)
>  {
>       if (data->cmd_ops->set_filter)
>               data->cmd_ops->set_filter(NULL, data, NULL);
>  
> -     /* make sure current triggers exit before free */
> -     tracepoint_synchronize_unregister();
> +     if (unlikely(!trigger_kthread)) {
> +             guard(mutex)(&trigger_data_kthread_mutex);
> +             /* Check again after taking mutex */
> +             if (!trigger_kthread) {
> +                     struct task_struct *kthread;
> +
> +                     kthread = kthread_create(trigger_kthread_fn, NULL,
> +                                              "trigger_data_free");
> +                     if (!IS_ERR(kthread))
> +                             WRITE_ONCE(trigger_kthread, kthread);
> +             }
> +     }
> +
> +     if (!trigger_kthread) {
> +             /* Do it the slow way */
> +             tracepoint_synchronize_unregister();
> +             kfree(data);
> +             return;
> +     }
>  
> -     kfree(data);
> +     llist_add(&data->llist, &trigger_data_free_list);
> +     wake_up_process(trigger_kthread);
>  }
>  
>  static inline void data_ops_trigger(struct event_trigger_data *data,


Reply via email to