On Tue, 25 Nov 2025 12:11:53 -0500
Steven Rostedt <[email protected]> wrote:

> From: Steven Rostedt <[email protected]>
> 
> The allocation of the per CPU buffer descriptor, the buffer page
> descriptors and the buffer page data itself can be pretty ugly:
> 
>   kzalloc_node(ALIGN(sizeof(struct buffer_page), cache_line_size()),
>                GFP_KERNEL, cpu_to_node(cpu));
> 
> And the data pages:
> 
>   page = alloc_pages_node(cpu_to_node(cpu),
>                           GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_COMP | 
> __GFP_ZERO, order);
>   if (!page)
>       return NULL;
>   bpage->page = page_address(page);
>   rb_init_page(bpage->page);
> 
> Add helper functions to make the code easier to read.
> 
> This does make all allocations of the data page (bpage->page) allocated
> with the __GFP_RETRY_MAYFAIL flag (and not just the bulk allocator). Which
> is actually better, as allocating the data page for the ring buffer tracing
> should try hard but not trigger the OOM killer.
> 
> Link: 
> https://lore.kernel.org/all/CAHk-=wjmmsaaqtjbsfyenfuze1bmjlj+2dltlwjugt07ugc...@mail.gmail.com/
> 
> Suggested-by: Linus Torvalds <[email protected]>
> Signed-off-by: Steven Rostedt (Google) <[email protected]>

Looks good to me.

Acked-by: Masami Hiramatsu (Google) <[email protected]>

Thanks,

> ---
> Changes since v1: 
> https://patch.msgid.link/[email protected]
> 
> - Removed set but unused variables (kernel test robot)
> 
>  kernel/trace/ring_buffer.c | 97 +++++++++++++++++++++-----------------
>  1 file changed, 53 insertions(+), 44 deletions(-)
> 
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index 1244d2c5c384..6295443b0944 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -401,6 +401,41 @@ static void free_buffer_page(struct buffer_page *bpage)
>       kfree(bpage);
>  }
>  
> +/*
> + * For best performance, allocate cpu buffer data cache line sized
> + * and per CPU.
> + */
> +#define alloc_cpu_buffer(cpu) (struct ring_buffer_per_cpu *)         \
> +     kzalloc_node(ALIGN(sizeof(struct ring_buffer_per_cpu),          \
> +                        cache_line_size()), GFP_KERNEL, cpu_to_node(cpu));
> +
> +#define alloc_cpu_page(cpu) (struct buffer_page *)                   \
> +     kzalloc_node(ALIGN(sizeof(struct buffer_page),                  \
> +                        cache_line_size()), GFP_KERNEL, cpu_to_node(cpu));
> +
> +static struct buffer_data_page *alloc_cpu_data(int cpu, int order)
> +{
> +     struct buffer_data_page *dpage;
> +     struct page *page;
> +     gfp_t mflags;
> +
> +     /*
> +      * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
> +      * gracefully without invoking oom-killer and the system is not
> +      * destabilized.
> +      */
> +     mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_COMP | __GFP_ZERO;
> +
> +     page = alloc_pages_node(cpu_to_node(cpu), mflags, order);
> +     if (!page)
> +             return NULL;
> +
> +     dpage = page_address(page);
> +     rb_init_page(dpage);
> +
> +     return dpage;
> +}
> +
>  /*
>   * We need to fit the time_stamp delta into 27 bits.
>   */
> @@ -2204,7 +2239,6 @@ static int __rb_allocate_pages(struct 
> ring_buffer_per_cpu *cpu_buffer,
>       struct ring_buffer_cpu_meta *meta = NULL;
>       struct buffer_page *bpage, *tmp;
>       bool user_thread = current->mm != NULL;
> -     gfp_t mflags;
>       long i;
>  
>       /*
> @@ -2218,13 +2252,6 @@ static int __rb_allocate_pages(struct 
> ring_buffer_per_cpu *cpu_buffer,
>       if (i < nr_pages)
>               return -ENOMEM;
>  
> -     /*
> -      * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
> -      * gracefully without invoking oom-killer and the system is not
> -      * destabilized.
> -      */
> -     mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL;
> -
>       /*
>        * If a user thread allocates too much, and si_mem_available()
>        * reports there's enough memory, even though there is not.
> @@ -2241,10 +2268,8 @@ static int __rb_allocate_pages(struct 
> ring_buffer_per_cpu *cpu_buffer,
>               meta = rb_range_meta(buffer, nr_pages, cpu_buffer->cpu);
>  
>       for (i = 0; i < nr_pages; i++) {
> -             struct page *page;
>  
> -             bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
> -                                 mflags, cpu_to_node(cpu_buffer->cpu));
> +             bpage = alloc_cpu_page(cpu_buffer->cpu);
>               if (!bpage)
>                       goto free_pages;
>  
> @@ -2267,13 +2292,10 @@ static int __rb_allocate_pages(struct 
> ring_buffer_per_cpu *cpu_buffer,
>                       bpage->range = 1;
>                       bpage->id = i + 1;
>               } else {
> -                     page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
> -                                             mflags | __GFP_COMP | 
> __GFP_ZERO,
> -                                             
> cpu_buffer->buffer->subbuf_order);
> -                     if (!page)
> +                     int order = cpu_buffer->buffer->subbuf_order;
> +                     bpage->page = alloc_cpu_data(cpu_buffer->cpu, order);
> +                     if (!bpage->page)
>                               goto free_pages;
> -                     bpage->page = page_address(page);
> -                     rb_init_page(bpage->page);
>               }
>               bpage->order = cpu_buffer->buffer->subbuf_order;
>  
> @@ -2324,14 +2346,12 @@ static int rb_allocate_pages(struct 
> ring_buffer_per_cpu *cpu_buffer,
>  static struct ring_buffer_per_cpu *
>  rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
>  {
> -     struct ring_buffer_per_cpu *cpu_buffer __free(kfree) = NULL;
> +     struct ring_buffer_per_cpu *cpu_buffer __free(kfree) =
> +             alloc_cpu_buffer(cpu);
>       struct ring_buffer_cpu_meta *meta;
>       struct buffer_page *bpage;
> -     struct page *page;
>       int ret;
>  
> -     cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
> -                               GFP_KERNEL, cpu_to_node(cpu));
>       if (!cpu_buffer)
>               return NULL;
>  
> @@ -2347,8 +2367,7 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, 
> long nr_pages, int cpu)
>       init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
>       mutex_init(&cpu_buffer->mapping_lock);
>  
> -     bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
> -                         GFP_KERNEL, cpu_to_node(cpu));
> +     bpage = alloc_cpu_page(cpu);
>       if (!bpage)
>               return NULL;
>  
> @@ -2370,13 +2389,10 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, 
> long nr_pages, int cpu)
>                       rb_meta_buffer_update(cpu_buffer, bpage);
>               bpage->range = 1;
>       } else {
> -             page = alloc_pages_node(cpu_to_node(cpu),
> -                                     GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
> -                                     cpu_buffer->buffer->subbuf_order);
> -             if (!page)
> +             int order = cpu_buffer->buffer->subbuf_order;
> +             bpage->page = alloc_cpu_data(cpu, order);
> +             if (!bpage->page)
>                       goto fail_free_reader;
> -             bpage->page = page_address(page);
> -             rb_init_page(bpage->page);
>       }
>  
>       INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
> @@ -6464,7 +6480,6 @@ ring_buffer_alloc_read_page(struct trace_buffer 
> *buffer, int cpu)
>       struct ring_buffer_per_cpu *cpu_buffer;
>       struct buffer_data_read_page *bpage = NULL;
>       unsigned long flags;
> -     struct page *page;
>  
>       if (!cpumask_test_cpu(cpu, buffer->cpumask))
>               return ERR_PTR(-ENODEV);
> @@ -6486,22 +6501,16 @@ ring_buffer_alloc_read_page(struct trace_buffer 
> *buffer, int cpu)
>       arch_spin_unlock(&cpu_buffer->lock);
>       local_irq_restore(flags);
>  
> -     if (bpage->data)
> -             goto out;
> -
> -     page = alloc_pages_node(cpu_to_node(cpu),
> -                             GFP_KERNEL | __GFP_NORETRY | __GFP_COMP | 
> __GFP_ZERO,
> -                             cpu_buffer->buffer->subbuf_order);
> -     if (!page) {
> -             kfree(bpage);
> -             return ERR_PTR(-ENOMEM);
> +     if (bpage->data) {
> +             rb_init_page(bpage->data);
> +     } else {
> +             bpage->data = alloc_cpu_data(cpu, 
> cpu_buffer->buffer->subbuf_order);
> +             if (!bpage->data) {
> +                     kfree(bpage);
> +                     return ERR_PTR(-ENOMEM);
> +             }
>       }
>  
> -     bpage->data = page_address(page);
> -
> - out:
> -     rb_init_page(bpage->data);
> -
>       return bpage;
>  }
>  EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
> -- 
> 2.51.0
> 


-- 
Masami Hiramatsu (Google) <[email protected]>

Reply via email to