On Wed, 05 Feb 2025 17:50:33 -0500
Steven Rostedt <rost...@goodmis.org> wrote:

> From: Steven Rostedt <rost...@goodmis.org>
> 
> Instead of just having a meta data at the first page of each sub buffer
> that has duplicate data, add a new meta page to the entire block of memory
> that holds the duplicate data and remove it from the sub buffer meta data.
> 
> This will open up the extra memory in this first page to be used by the
> tracer for its own persistent data.



> 
> Signed-off-by: Steven Rostedt (Google) <rost...@goodmis.org>
> ---
>  kernel/trace/ring_buffer.c | 165 ++++++++++++++++++++++++++-----------
>  1 file changed, 115 insertions(+), 50 deletions(-)
> 
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index 7146b780176f..0446d053dbd6 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -49,7 +49,12 @@ static void update_pages_handler(struct work_struct *work);
>  

Can you add a comment to explain the layout of these meta-data and subbufs?


>  struct ring_buffer_meta {
>       int             magic;
> -     int             struct_size;
> +     int             struct_sizes;
> +     unsigned long   total_size;
> +     unsigned long   buffers_offset;
> +};
> +
> +struct ring_buffer_cpu_meta {
>       unsigned long   kaslr_addr;
>       unsigned long   first_buffer;
>       unsigned long   head_buffer;
> @@ -517,7 +522,7 @@ struct ring_buffer_per_cpu {
>       struct mutex                    mapping_lock;
>       unsigned long                   *subbuf_ids;    /* ID to subbuf VA */
>       struct trace_buffer_meta        *meta_page;
> -     struct ring_buffer_meta         *ring_meta;
> +     struct ring_buffer_cpu_meta     *ring_meta;
>  
>       /* ring buffer pages to update, > 0 to add, < 0 to remove */
>       long                            nr_pages_to_update;
> @@ -550,6 +555,8 @@ struct trace_buffer {
>       unsigned long                   range_addr_start;
>       unsigned long                   range_addr_end;
>  
> +     struct ring_buffer_meta         *meta;
> +
>       unsigned long                   kaslr_addr;
>  
>       unsigned int                    subbuf_size;
> @@ -1270,7 +1277,7 @@ static void rb_head_page_activate(struct 
> ring_buffer_per_cpu *cpu_buffer)
>       rb_set_list_to_head(head->list.prev);
>  
>       if (cpu_buffer->ring_meta) {
> -             struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> +             struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
>               meta->head_buffer = (unsigned long)head->page;
>       }
>  }
> @@ -1568,7 +1575,7 @@ static void rb_check_pages(struct ring_buffer_per_cpu 
> *cpu_buffer)
>  static unsigned long
>  rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs)
>  {
> -     addr += sizeof(struct ring_buffer_meta) +
> +     addr += sizeof(struct ring_buffer_cpu_meta) +
>               sizeof(int) * nr_subbufs;
>       return ALIGN(addr, subbuf_size);
>  }
> @@ -1579,19 +1586,22 @@ rb_range_align_subbuf(unsigned long addr, int 
> subbuf_size, int nr_subbufs)
>  static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int 
> cpu)
>  {
>       int subbuf_size = buffer->subbuf_size + BUF_PAGE_HDR_SIZE;
> -     unsigned long ptr = buffer->range_addr_start;
> -     struct ring_buffer_meta *meta;
> +     struct ring_buffer_cpu_meta *meta;
> +     struct ring_buffer_meta *bmeta;
> +     unsigned long ptr;
>       int nr_subbufs;
>  
> -     if (!ptr)
> +     bmeta = buffer->meta;
> +     if (!bmeta)
>               return NULL;
>  
> +     ptr = (unsigned long)bmeta + bmeta->buffers_offset;
> +     meta = (struct ring_buffer_cpu_meta *)ptr;
> +
>       /* When nr_pages passed in is zero, the first meta has already been 
> initialized */
>       if (!nr_pages) {
> -             meta = (struct ring_buffer_meta *)ptr;
>               nr_subbufs = meta->nr_subbufs;
>       } else {
> -             meta = NULL;
>               /* Include the reader page */
>               nr_subbufs = nr_pages + 1;
>       }
> @@ -1623,7 +1633,7 @@ static void *rb_range_meta(struct trace_buffer *buffer, 
> int nr_pages, int cpu)
>  }
>  
>  /* Return the start of subbufs given the meta pointer */
> -static void *rb_subbufs_from_meta(struct ring_buffer_meta *meta)
> +static void *rb_subbufs_from_meta(struct ring_buffer_cpu_meta *meta)
>  {
>       int subbuf_size = meta->subbuf_size;
>       unsigned long ptr;
> @@ -1639,7 +1649,7 @@ static void *rb_subbufs_from_meta(struct 
> ring_buffer_meta *meta)
>   */
>  static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx)
>  {
> -     struct ring_buffer_meta *meta;
> +     struct ring_buffer_cpu_meta *meta;
>       unsigned long ptr;
>       int subbuf_size;
>  
> @@ -1664,14 +1674,73 @@ static void *rb_range_buffer(struct 
> ring_buffer_per_cpu *cpu_buffer, int idx)
>       return (void *)ptr;
>  }
>  
> +/*
> + * See if the existing memory contains a valid meta section.
> + * if so, use that, otherwise initialize it.

Note that this must be called only for the persistent ring buffer.

> + */
> +static bool rb_meta_init(struct trace_buffer *buffer)
> +{
> +     unsigned long ptr = buffer->range_addr_start;
> +     struct ring_buffer_meta *bmeta;
> +     unsigned long total_size;
> +     int struct_sizes;
> +
> +     bmeta = (struct ring_buffer_meta *)ptr;
> +     buffer->meta = bmeta;
> +
> +     total_size = buffer->range_addr_end - buffer->range_addr_start;
> +
> +     struct_sizes = sizeof(struct ring_buffer_cpu_meta);
> +     struct_sizes |= sizeof(*bmeta) << 16;
> +
> +     /* The first buffer will start one page after the meta page */
> +     ptr += sizeof(*bmeta);
> +     ptr = ALIGN(ptr, PAGE_SIZE);
> +
> +     if (bmeta->magic != RING_BUFFER_META_MAGIC) {
> +             pr_info("Ring buffer boot meta mismatch of magic\n");
> +             goto init;
> +     }
> +
> +     if (bmeta->struct_sizes != struct_sizes) {
> +             pr_info("Ring buffer boot meta mismatch of struct size\n");
> +             goto init;
> +     }
> +
> +     if (bmeta->total_size != total_size) {
> +             pr_info("Ring buffer boot meta mismatch of total size\n");
> +             goto init;
> +     }
> +
> +     if (bmeta->buffers_offset > bmeta->total_size) {
> +             pr_info("Ring buffer boot meta mismatch of offset outside of 
> total size\n");
> +             goto init;
> +     }
> +
> +     if (bmeta->buffers_offset != (void *)ptr - (void *)bmeta) {
> +             pr_info("Ring buffer boot meta mismatch of first buffer 
> offset\n");
> +             goto init;
> +     }
> +
> +     return true;
> +
> + init:
> +     bmeta->magic = RING_BUFFER_META_MAGIC;
> +     bmeta->struct_sizes = struct_sizes;
> +     bmeta->total_size = total_size;
> +     bmeta->buffers_offset = (void *)ptr - (void *)bmeta;
> +
> +     return false;
> +}
> +
>  /*
>   * See if the existing memory contains valid ring buffer data.
>   * As the previous kernel must be the same as this kernel, all
>   * the calculations (size of buffers and number of buffers)
>   * must be the same.
>   */
> -static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu,
> -                       struct trace_buffer *buffer, int nr_pages)
> +static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu,
> +                           struct trace_buffer *buffer, int nr_pages)
>  {
>       int subbuf_size = PAGE_SIZE;
>       struct buffer_data_page *subbuf;
> @@ -1679,20 +1748,6 @@ static bool rb_meta_valid(struct ring_buffer_meta 
> *meta, int cpu,
>       unsigned long buffers_end;
>       int i;
>  
> -     /* Check the meta magic and meta struct size */
> -     if (meta->magic != RING_BUFFER_META_MAGIC ||
> -         meta->struct_size != sizeof(*meta)) {
> -             pr_info("Ring buffer boot meta[%d] mismatch of magic or struct 
> size\n", cpu);
> -             return false;
> -     }
> -
> -     /* The subbuffer's size and number of subbuffers must match */
> -     if (meta->subbuf_size != subbuf_size ||
> -         meta->nr_subbufs != nr_pages + 1) {
> -             pr_info("Ring buffer boot meta [%d] mismatch of 
> subbuf_size/nr_pages\n", cpu);
> -             return false;
> -     }
> -
>       buffers_start = meta->first_buffer;
>       buffers_end = meta->first_buffer + (subbuf_size * meta->nr_subbufs);
>  
> @@ -1730,7 +1785,7 @@ static bool rb_meta_valid(struct ring_buffer_meta 
> *meta, int cpu,
>       return true;
>  }
>  
> -static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf);
> +static int rb_meta_subbuf_idx(struct ring_buffer_cpu_meta *meta, void 
> *subbuf);
>  
>  static int rb_read_data_buffer(struct buffer_data_page *dpage, int tail, int 
> cpu,
>                              unsigned long long *timestamp, u64 *delta_ptr)
> @@ -1797,7 +1852,7 @@ static int rb_validate_buffer(struct buffer_data_page 
> *dpage, int cpu)
>  /* If the meta data has been validated, now validate the events */
>  static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
>  {
> -     struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> +     struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
>       struct buffer_page *head_page;
>       unsigned long entry_bytes = 0;
>       unsigned long entries = 0;
> @@ -1873,7 +1928,7 @@ static void rb_meta_validate_events(struct 
> ring_buffer_per_cpu *cpu_buffer)
>       }
>  }
>  
> -static void rb_meta_init_text_addr(struct ring_buffer_meta *meta)
> +static void rb_meta_init_text_addr(struct ring_buffer_cpu_meta *meta)
>  {
>  #ifdef CONFIG_RANDOMIZE_BASE
>       meta->kaslr_addr = kaslr_offset();
> @@ -1884,18 +1939,25 @@ static void rb_meta_init_text_addr(struct 
> ring_buffer_meta *meta)
>  
>  static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages)
>  {
> -     struct ring_buffer_meta *meta;
> +     struct ring_buffer_cpu_meta *meta;
> +     struct ring_buffer_meta *bmeta;
>       unsigned long delta;
>       void *subbuf;
> +     bool valid = false;
>       int cpu;
>       int i;
>  
> +     if (rb_meta_init(buffer))
> +             valid = true;
> +
> +     bmeta = buffer->meta;
> +
>       for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
>               void *next_meta;
>  
>               meta = rb_range_meta(buffer, nr_pages, cpu);
>  
> -             if (rb_meta_valid(meta, cpu, buffer, nr_pages)) {
> +             if (valid && rb_cpu_meta_valid(meta, cpu, buffer, nr_pages)) {
>                       /* Make the mappings match the current address */
>                       subbuf = rb_subbufs_from_meta(meta);
>                       delta = (unsigned long)subbuf - meta->first_buffer;
> @@ -1913,9 +1975,6 @@ static void rb_range_meta_init(struct trace_buffer 
> *buffer, int nr_pages)
>  
>               memset(meta, 0, next_meta - (void *)meta);
>  
> -             meta->magic = RING_BUFFER_META_MAGIC;
> -             meta->struct_size = sizeof(*meta);
> -
>               meta->nr_subbufs = nr_pages + 1;
>               meta->subbuf_size = PAGE_SIZE;
>  
> @@ -1943,7 +2002,7 @@ static void rb_range_meta_init(struct trace_buffer 
> *buffer, int nr_pages)
>  static void *rbm_start(struct seq_file *m, loff_t *pos)
>  {
>       struct ring_buffer_per_cpu *cpu_buffer = m->private;
> -     struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> +     struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
>       unsigned long val;
>  
>       if (!meta)
> @@ -1968,7 +2027,7 @@ static void *rbm_next(struct seq_file *m, void *v, 
> loff_t *pos)
>  static int rbm_show(struct seq_file *m, void *v)
>  {
>       struct ring_buffer_per_cpu *cpu_buffer = m->private;
> -     struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> +     struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
>       unsigned long val = (unsigned long)v;
>  
>       if (val == 1) {
> @@ -2017,7 +2076,7 @@ int ring_buffer_meta_seq_init(struct file *file, struct 
> trace_buffer *buffer, in
>  static void rb_meta_buffer_update(struct ring_buffer_per_cpu *cpu_buffer,
>                                 struct buffer_page *bpage)
>  {
> -     struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> +     struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
>  
>       if (meta->head_buffer == (unsigned long)bpage->page)
>               cpu_buffer->head_page = bpage;
> @@ -2032,7 +2091,7 @@ static int __rb_allocate_pages(struct 
> ring_buffer_per_cpu *cpu_buffer,
>               long nr_pages, struct list_head *pages)
>  {
>       struct trace_buffer *buffer = cpu_buffer->buffer;
> -     struct ring_buffer_meta *meta = NULL;
> +     struct ring_buffer_cpu_meta *meta = NULL;
>       struct buffer_page *bpage, *tmp;
>       bool user_thread = current->mm != NULL;
>       gfp_t mflags;
> @@ -2156,7 +2215,7 @@ static struct ring_buffer_per_cpu *
>  rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
>  {
>       struct ring_buffer_per_cpu *cpu_buffer;
> -     struct ring_buffer_meta *meta;
> +     struct ring_buffer_cpu_meta *meta;
>       struct buffer_page *bpage;
>       struct page *page;
>       int ret;
> @@ -2327,10 +2386,16 @@ static struct trace_buffer *alloc_buffer(unsigned 
> long size, unsigned flags,
>  
>       /* If start/end are specified, then that overrides size */
>       if (start && end) {
> +             unsigned long buffers_start;
>               unsigned long ptr;
>               int n;
>  
>               size = end - start;
> +
> +             /* Subtract the buffer meta data */
> +             size -= PAGE_SIZE;
> +             buffers_start = start + PAGE_SIZE;
> +

So the buffer meta data is PAGE_SIZE aligned?

>               size = size / nr_cpu_ids;
>  
>               /*
> @@ -2340,7 +2405,7 @@ static struct trace_buffer *alloc_buffer(unsigned long 
> size, unsigned flags,
>                * needed, plus account for the integer array index that
>                * will be appended to the meta data.
>                */
> -             nr_pages = (size - sizeof(struct ring_buffer_meta)) /
> +             nr_pages = (size - sizeof(struct ring_buffer_cpu_meta)) /
>                       (subbuf_size + sizeof(int));
>               /* Need at least two pages plus the reader page */
>               if (nr_pages < 3)
> @@ -2348,8 +2413,8 @@ static struct trace_buffer *alloc_buffer(unsigned long 
> size, unsigned flags,
>  
>   again:
>               /* Make sure that the size fits aligned */
> -             for (n = 0, ptr = start; n < nr_cpu_ids; n++) {
> -                     ptr += sizeof(struct ring_buffer_meta) +
> +             for (n = 0, ptr = buffers_start; n < nr_cpu_ids; n++) {
> +                     ptr += sizeof(struct ring_buffer_cpu_meta) +
>                               sizeof(int) * nr_pages;
>                       ptr = ALIGN(ptr, subbuf_size);
>                       ptr += subbuf_size * nr_pages;
> @@ -3075,7 +3140,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
>  }
>  
>  /* Return the index into the sub-buffers for a given sub-buffer */
> -static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf)
> +static int rb_meta_subbuf_idx(struct ring_buffer_cpu_meta *meta, void 
> *subbuf)
>  {
>       void *subbuf_array;
>  
> @@ -3087,7 +3152,7 @@ static int rb_meta_subbuf_idx(struct ring_buffer_meta 
> *meta, void *subbuf)
>  static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer,
>                               struct buffer_page *next_page)
>  {
> -     struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> +     struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
>       unsigned long old_head = (unsigned long)next_page->page;
>       unsigned long new_head;
>  
> @@ -3104,7 +3169,7 @@ static void rb_update_meta_head(struct 
> ring_buffer_per_cpu *cpu_buffer,
>  static void rb_update_meta_reader(struct ring_buffer_per_cpu *cpu_buffer,
>                                 struct buffer_page *reader)
>  {
> -     struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> +     struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
>       void *old_reader = cpu_buffer->reader_page->page;
>       void *new_reader = reader->page;
>       int id;
> @@ -3733,7 +3798,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu 
> *cpu_buffer)
>                         rb_page_write(cpu_buffer->commit_page));
>               rb_inc_page(&cpu_buffer->commit_page);
>               if (cpu_buffer->ring_meta) {
> -                     struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> +                     struct ring_buffer_cpu_meta *meta = 
> cpu_buffer->ring_meta;
>                       meta->commit_buffer = (unsigned 
> long)cpu_buffer->commit_page->page;
>               }
>               /* add barrier to keep gcc from optimizing too much */
> @@ -5986,7 +6051,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
>       if (cpu_buffer->mapped) {
>               rb_update_meta_page(cpu_buffer);
>               if (cpu_buffer->ring_meta) {
> -                     struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
> +                     struct ring_buffer_cpu_meta *meta = 
> cpu_buffer->ring_meta;
>                       meta->commit_buffer = meta->head_buffer;
>               }
>       }
> @@ -6020,7 +6085,7 @@ static void reset_disabled_cpu_buffer(struct 
> ring_buffer_per_cpu *cpu_buffer)
>  void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
>  {
>       struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
> -     struct ring_buffer_meta *meta;
> +     struct ring_buffer_cpu_meta *meta;
>  
>       if (!cpumask_test_cpu(cpu, buffer->cpumask))
>               return;
> @@ -6058,7 +6123,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
>  void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
>  {
>       struct ring_buffer_per_cpu *cpu_buffer;
> -     struct ring_buffer_meta *meta;
> +     struct ring_buffer_cpu_meta *meta;
>       int cpu;
>  
>       /* prevent another thread from changing buffer sizes */

Thank you,

> -- 
> 2.45.2
> 
> 


-- 
Masami Hiramatsu (Google) <mhira...@kernel.org>

Reply via email to