On Mon, Jun 09, 2025 at 01:17:32PM -0400, Steven Rostedt wrote:
> From: Steven Rostedt <rost...@goodmis.org>
> 
> Freeing of filters requires to wait for both an RCU grace period as well as
> a RCU task trace wait period after they have been detached from their
> lists. The trace task period can be quite large so the freeing of the
> filters was moved to use the call_rcu*() routines. The problem with that is
> that the callback functions of call_rcu*() is done from a soft irq and can
> cause latencies if the callback takes a bit of time.
> 
> The filters are freed per event in a system and the syscalls system
> contains an event per system call, which can be over 700 events. Freeing 700
> filters in a bottom half is undesirable.
> 
> Instead, move the freeing to use queue_rcu_work() which is done in task
> context.
> 
> Link: 
> https://lore.kernel.org/all/9a2f0cd0-1561-4206-8966-f93ccd25927f@paulmck-laptop/
> 
> Fixes: a9d0aab5eb33 ("tracing: Fix regression of filter waiting a long time 
> on RCU synchronization")
> Suggested-by: "Paul E. McKenney" <paul...@kernel.org>
> Signed-off-by: Steven Rostedt (Google) <rost...@goodmis.org>

Thank you, and looks good to me.

Reviewed-by: Paul E. McKenney <paul...@kernel.org>

> ---
> Note, I added a Fixes tag but not a stable tag as this is a nice-to-have
> but doesn't hit the level of critical fix to backport or add to an rc
> release. If someone wants to back port it, feel free.
> 
>  kernel/trace/trace_events_filter.c | 28 ++++++++++++++++++++--------
>  1 file changed, 20 insertions(+), 8 deletions(-)
> 
> diff --git a/kernel/trace/trace_events_filter.c 
> b/kernel/trace/trace_events_filter.c
> index ea8b364b6818..b6fe8167ef01 100644
> --- a/kernel/trace/trace_events_filter.c
> +++ b/kernel/trace/trace_events_filter.c
> @@ -1344,13 +1344,14 @@ struct filter_list {
>  
>  struct filter_head {
>       struct list_head        list;
> -     struct rcu_head         rcu;
> +     union {
> +             struct rcu_head         rcu;
> +             struct rcu_work         rwork;
> +     };
>  };
>  
> -
> -static void free_filter_list(struct rcu_head *rhp)
> +static void free_filter_list(struct filter_head *filter_list)
>  {
> -     struct filter_head *filter_list = container_of(rhp, struct filter_head, 
> rcu);
>       struct filter_list *filter_item, *tmp;
>  
>       list_for_each_entry_safe(filter_item, tmp, &filter_list->list, list) {
> @@ -1361,9 +1362,20 @@ static void free_filter_list(struct rcu_head *rhp)
>       kfree(filter_list);
>  }
>  
> +static void free_filter_list_work(struct work_struct *work)
> +{
> +     struct filter_head *filter_list;
> +
> +     filter_list = container_of(to_rcu_work(work), struct filter_head, 
> rwork);
> +     free_filter_list(filter_list);
> +}
> +
>  static void free_filter_list_tasks(struct rcu_head *rhp)
>  {
> -     call_rcu(rhp, free_filter_list);
> +     struct filter_head *filter_list = container_of(rhp, struct filter_head, 
> rcu);
> +
> +     INIT_RCU_WORK(&filter_list->rwork, free_filter_list_work);
> +     queue_rcu_work(system_wq, &filter_list->rwork);
>  }
>  
>  /*
> @@ -1462,7 +1474,7 @@ static void filter_free_subsystem_filters(struct 
> trace_subsystem_dir *dir,
>       tracepoint_synchronize_unregister();
>  
>       if (head)
> -             free_filter_list(&head->rcu);
> +             free_filter_list(head);
>  
>       list_for_each_entry(file, &tr->events, list) {
>               if (file->system != dir || !file->filter)
> @@ -2307,7 +2319,7 @@ static int process_system_preds(struct 
> trace_subsystem_dir *dir,
>       return 0;
>   fail:
>       /* No call succeeded */
> -     free_filter_list(&filter_list->rcu);
> +     free_filter_list(filter_list);
>       parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
>       return -EINVAL;
>   fail_mem:
> @@ -2317,7 +2329,7 @@ static int process_system_preds(struct 
> trace_subsystem_dir *dir,
>       if (!fail)
>               delay_free_filter(filter_list);
>       else
> -             free_filter_list(&filter_list->rcu);
> +             free_filter_list(filter_list);
>  
>       return -ENOMEM;
>  }
> -- 
> 2.47.2
> 

Reply via email to