On Wed, 18 Jun 2025 16:13:45 +0200 Peter Zijlstra <pet...@infradead.org> wrote:
> > diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h > > index f94f3fdf15fc..6e850c9d3f0c 100644 > > --- a/include/linux/entry-common.h > > +++ b/include/linux/entry-common.h > > @@ -12,6 +12,7 @@ > > #include <linux/resume_user_mode.h> > > #include <linux/tick.h> > > #include <linux/kmsan.h> > > +#include <linux/unwind_deferred.h> > > > > #include <asm/entry-common.h> > > #include <asm/syscall.h> > > @@ -362,6 +363,7 @@ static __always_inline void exit_to_user_mode(void) > > lockdep_hardirqs_on_prepare(); > > instrumentation_end(); > > > > + unwind_exit_to_user_mode(); > > So I was expecting this to do the actual unwind, and was about to go > yell this is the wrong place for that. > > But this is not that. Perhaps find a better name like: > unwind_clear_cache() or so? Sure. How about unwind_reset_info()? As it's not going to just clear the cache but also reset the trace info (like the timestamp and such). > > > user_enter_irqoff(); > > arch_exit_to_user_mode(); > > lockdep_hardirqs_on(CALLER_ADDR0); > > > > diff --git a/include/linux/unwind_deferred_types.h > > b/include/linux/unwind_deferred_types.h > > index aa32db574e43..db5b54b18828 100644 > > --- a/include/linux/unwind_deferred_types.h > > +++ b/include/linux/unwind_deferred_types.h > > @@ -2,8 +2,13 @@ > > #ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H > > #define _LINUX_UNWIND_USER_DEFERRED_TYPES_H > > > > +struct unwind_cache { > > + unsigned int nr_entries; > > + unsigned long entries[]; > > +}; > > + > > struct unwind_task_info { > > - unsigned long *entries; > > + struct unwind_cache *cache; > > }; > > > > #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ > > diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c > > index 0bafb95e6336..e3913781c8c6 100644 > > --- a/kernel/unwind/deferred.c > > +++ b/kernel/unwind/deferred.c > > @@ -24,6 +24,7 @@ > > int unwind_deferred_trace(struct unwind_stacktrace *trace) > > { > > struct unwind_task_info *info = ¤t->unwind_info; > > + struct unwind_cache *cache; > > > > /* Should always be called from faultable context */ > > might_fault(); > > @@ -31,17 +32,30 @@ int unwind_deferred_trace(struct unwind_stacktrace > > *trace) > > if (current->flags & PF_EXITING) > > return -EINVAL; > > > > - if (!info->entries) { > > - info->entries = kmalloc_array(UNWIND_MAX_ENTRIES, sizeof(long), > > - GFP_KERNEL); > > - if (!info->entries) > > + if (!info->cache) { > > + info->cache = kzalloc(struct_size(cache, entries, > > UNWIND_MAX_ENTRIES), > > + GFP_KERNEL); > > And now you're one 'long' larger than a page. Surely that's a crap size > for an allocator? Bah, Ingo suggested to put the counter in the allocation and I didn't think about the size going over the page. Good catch! Since it can make one per task, it may be good to make this into a kmemcache. -- Steve