On Fri, 21 Nov 2025 13:48:40 -0500 Steven Rostedt <[email protected]> wrote:
> On Fri, 21 Nov 2025 16:01:05 +0900 > "Masami Hiramatsu (Google)" <[email protected]> wrote: > > > Signed-off-by: Masami Hiramatsu (Google) <[email protected]> > > --- > > kernel/trace/trace.c | 58 > > +++++++++++++++++++++++++++++++++++++++++++++++--- > > kernel/trace/trace.h | 1 + > > 2 files changed, 55 insertions(+), 4 deletions(-) > > > > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c > > index 33fcde91924c..616d24580371 100644 > > --- a/kernel/trace/trace.c > > +++ b/kernel/trace/trace.c > > @@ -10523,6 +10523,8 @@ static int __remove_instance(struct trace_array *tr) > > reserve_mem_release_by_name(tr->range_name); > > kfree(tr->range_name); > > } > > + if (tr->flags & TRACE_ARRAY_FL_VMALLOC) > > + vfree((void *)tr->range_addr_start); > > The ring buffer user space mapping expects the ring buffer to be real > memory. Thus, virtual mapping is not supported. Either we need to allocate > normal memory, or the mmap function of his array must return an error. Hmm, I think allocating contiguous memory at boot is not realistic, and mmap support is a lower priority than a single copy at boot time. > > > > for (i = 0; i < tr->nr_topts; i++) { > > kfree(tr->topts[i].topts); > > @@ -11328,6 +11330,41 @@ __init static void do_allocate_snapshot(const char > > *name) > > static inline void do_allocate_snapshot(const char *name) { } > > #endif > > > > +__init static int backup_instance_area(const char *backup, > > + unsigned long *addr, phys_addr_t *size) > > +{ > > + struct trace_array *backup_tr; > > + void *allocated_vaddr = NULL; > > + > > + backup_tr = trace_array_get_by_name(backup, NULL); > > + if (!backup_tr) { > > + pr_warn("Tracing: Instance %s is not found.\n", backup); > > + return -ENOENT; > > + } > > Add space. OK > > > + if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) { > > + pr_warn("Tracing: Instance %s is not boot mapped.\n", backup); > > + trace_array_put(backup_tr); > > + return -EINVAL; > > + } > > + > > + *size = backup_tr->range_addr_size; > > + > > + allocated_vaddr = vzalloc(*size); > > + if (!allocated_vaddr) { > > + pr_warn("Tracing: Failed to allocate memory for copying > > instance %s (size 0x%lx)\n", > > + backup, (unsigned long)*size); > > + trace_array_put(backup_tr); > > + return -ENOMEM; > > + } > > + > > + memcpy(allocated_vaddr, > > + (void *)backup_tr->range_addr_start, (size_t)*size); > > + *addr = (unsigned long)allocated_vaddr; > > + > > + trace_array_put(backup_tr); > > + return 0; > > +} > > + > > __init static void enable_instances(void) > > { > > struct trace_array *tr; > > @@ -11350,11 +11387,15 @@ __init static void enable_instances(void) > > char *flag_delim; > > char *addr_delim; > > char *rname __free(kfree) = NULL; > > + char *backup; > > > > tok = strsep(&curr_str, ","); > > > > - flag_delim = strchr(tok, '^'); > > - addr_delim = strchr(tok, '@'); > > + name = strsep(&tok, "="); > > + backup = tok; > > + > > + flag_delim = strchr(name, '^'); > > + addr_delim = strchr(name, '@'); > > > > if (addr_delim) > > *addr_delim++ = '\0'; > > @@ -11362,7 +11403,10 @@ __init static void enable_instances(void) > > if (flag_delim) > > *flag_delim++ = '\0'; > > > > - name = tok; > > + if (backup) { > > + if (backup_instance_area(backup, &addr, &size) < 0) > > + continue; > > + } > > > > if (flag_delim) { > > char *flag; > > @@ -11458,7 +11502,13 @@ __init static void enable_instances(void) > > tr->ref++; > > } > > > > - if (start) { > > + /* > > + * Backup buffers can be freed but need vfree(). > > + */ > > + if (backup) > > + tr->flags |= TRACE_ARRAY_FL_VMALLOC; > > + > > + if (start || backup) { > > tr->flags |= TRACE_ARRAY_FL_BOOT | > > TRACE_ARRAY_FL_LAST_BOOT; > > tr->range_name = no_free_ptr(rname); > > } > > This would then need: > > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c > index d1e527cf2aae..104e385e718e 100644 > --- a/kernel/trace/trace.c > +++ b/kernel/trace/trace.c > @@ -8791,8 +8791,8 @@ static int tracing_buffers_mmap(struct file *filp, > struct vm_area_struct *vma) > struct trace_iterator *iter = &info->iter; > int ret = 0; > > - /* A memmap'ed buffer is not supported for user space mmap */ > - if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP) > + /* A memmap'ed and backup buffers are not supported for user space mmap > */ > + if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC)) > return -ENODEV; > > ret = get_snapshot_map(iter->tr); > Ah, thanks for the code :) > > As without it, when I tried mmapping the backup buffer I got this; > > [ 251.615441] BUG: unable to handle page fault for address: ffffd1c31e0000c8 > [ 251.615447] #PF: supervisor read access in kernel mode > [ 251.615449] #PF: error_code(0x0000) - not-present page > [ 251.615451] PGD 0 P4D 0 > [ 251.615456] Oops: Oops: 0000 [#1] SMP PTI > [ 251.615463] CPU: 6 UID: 0 PID: 1450 Comm: mmap-rb-lib Not tainted > 6.18.0-rc3-ftest-00018-g02548113c2f6 #287 PREEMPT_{RT,LAZY} > [ 251.615469] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS > 1.17.0-debian-1.17.0-1 04/01/2014 > [ 251.615471] RIP: 0010:validate_page_before_insert+0x5/0x90 > [ 251.615479] Code: 48 c1 e8 0e 89 c2 83 e2 01 89 d0 c3 cc cc cc cc 0f 1f 44 > 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 <48> 8b > 46 08 a8 01 75 5d 66 90 48 89 f0 8b 50 34 85 d2 74 68 48 89 > [ 251.615481] RSP: 0018:ffffa52b443d7988 EFLAGS: 00010246 > [ 251.615484] RAX: ffff8a84dcac4000 RBX: 00007f07e0c10000 RCX: > 0000000108d2f067 > [ 251.615486] RDX: 0000000108d2f067 RSI: ffffd1c31e0000c0 RDI: > ffff8a84dd865700 > [ 251.615487] RBP: 0000000000000000 R08: 000000005ab70715 R09: > 0000000000000009 > [ 251.615489] R10: 0000000000000009 R11: ffff8a84c9320fb8 R12: > ffff8a84dd865700 > [ 251.615490] R13: 0000000000000000 R14: ffffd1c31e0000c0 R15: > ffff8a84c8d2f080 > [ 251.615492] FS: 00007f07e0e8e740(0000) GS:ffff8a86afbb7000(0000) > knlGS:0000000000000000 > [ 251.615494] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 251.615496] CR2: ffffd1c31e0000c8 CR3: 000000011c390006 CR4: > 0000000000170ef0 > [ 251.615501] Call Trace: > [ 251.615502] <TASK> > [ 251.615504] vm_insert_pages+0x157/0x360 > [ 251.615513] __rb_map_vma+0x1ef/0x390 > [ 251.615521] ring_buffer_map+0x261/0x370 > [ 251.615527] tracing_buffers_mmap+0x7e/0x100 > [ 251.615532] __mmap_region+0x831/0xe00 > [ 251.615554] do_mmap+0x4bf/0x6b0 > [ 251.615560] vm_mmap_pgoff+0x126/0x230 > [ 251.615569] ksys_mmap_pgoff+0x162/0x220 > [ 251.615573] do_syscall_64+0x76/0x9a0 > [ 251.615581] entry_SYSCALL_64_after_hwframe+0x76/0x7e > [ 251.615584] RIP: 0033:0x7f07e0f9dde2 > [ 251.615589] Code: 00 00 00 0f 1f 44 00 00 > > -- Steve > > > > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h > > index 58be6d741d72..9e5186c96e9c 100644 > > --- a/kernel/trace/trace.h > > +++ b/kernel/trace/trace.h > > @@ -453,6 +453,7 @@ enum { > > TRACE_ARRAY_FL_LAST_BOOT = BIT(2), > > TRACE_ARRAY_FL_MOD_INIT = BIT(3), > > TRACE_ARRAY_FL_MEMMAP = BIT(4), > > + TRACE_ARRAY_FL_VMALLOC = BIT(5), > > }; > > > > #ifdef CONFIG_MODULES > > -- Masami Hiramatsu (Google) <[email protected]>
