On Thu, 24 Jul 2025 13:46:35 +0900 "Masami Hiramatsu (Google)" <mhira...@kernel.org> wrote:
> From: Masami Hiramatsu (Google) <mhira...@kernel.org> > > With CONFIG_DEBUG_INFO_BTF=y and PAHOLE_HAS_BTF_TAG=y, `__user` is > converted to `__attribute__((btf_type_tag("user")))`. In this case, > some syscall events have it for __user data, like below; > > /sys/kernel/tracing # cat events/syscalls/sys_enter_openat/format > name: sys_enter_openat > ID: 720 > format: > field:unsigned short common_type; offset:0; size:2; > signed:0; > field:unsigned char common_flags; offset:2; size:1; > signed:0; > field:unsigned char common_preempt_count; offset:3; > size:1; signed:0; > field:int common_pid; offset:4; size:4; signed:1; > > field:int __syscall_nr; offset:8; size:4; signed:1; > field:int dfd; offset:16; size:8; signed:0; > field:const char __attribute__((btf_type_tag("user"))) * filename; > offset:24; size:8; signed:0; > field:int flags; offset:32; size:8; signed:0; > field:umode_t mode; offset:40; size:8; signed:0; > > > Then the trace event filter fails to set the string acceptable flag > (FILTER_PTR_STRING) to the field and rejects setting string filter; > > # echo 'filename.ustring ~ "*ftracetest-dir.wbx24v*"' \ > >> events/syscalls/sys_enter_openat/filter > sh: write error: Invalid argument > # cat error_log > [ 723.743637] event filter parse error: error: Expecting numeric field > Command: filename.ustring ~ "*ftracetest-dir.wbx24v*" > > Since this __attribute__ makes format parsing complicated and not > needed, remove the __attribute__(.*) from the type string. > > Signed-off-by: Masami Hiramatsu (Google) <mhira...@kernel.org> > --- > Changes in v4: > - Run sanitizer only if btf_type_tag() attribute is defined. > Changes in v3: > - Sanitize field in update_event_field() to avoid boottime performance > overhead. > - Change the function names because those are not always require eval > maps. > - Remove unneeded alloc_type flag. > Changes in v2: > - Add memory allocation check flag. > - Check the flag in update_event_fields() to avoid memory leak. > - Fix 'static const int ... strlen()' issue. > - Fix to find 2nd __attribute__ correctly. (adjust next after strcpy) > --- > kernel/trace/trace.c | 25 ++++++--- > kernel/trace/trace.h | 4 + > kernel/trace/trace_events.c | 115 > +++++++++++++++++++++++++++++++++++++------ > 3 files changed, 116 insertions(+), 28 deletions(-) > > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c > index 95ae7c4e5835..b62528c42e26 100644 > --- a/kernel/trace/trace.c > +++ b/kernel/trace/trace.c > @@ -5937,17 +5937,26 @@ static inline void trace_insert_eval_map_file(struct > module *mod, > struct trace_eval_map **start, int len) { } > #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */ > > -static void trace_insert_eval_map(struct module *mod, > - struct trace_eval_map **start, int len) > +static void > +trace_event_update_with_eval_map(struct module *mod, > + struct trace_eval_map **start, > + int len) > { > struct trace_eval_map **map; > > - if (len <= 0) > + /* Always run sanitizer only if btf_type_tag attr exists. */ > + if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) && > + IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) && > + __has_attribute(btf_type_tag)) && > + len <= 0) > return; The above is quite messy and hard to read. Can you change it to: if (len <= 0) { /* If bpf_type_tag attr is used, still need to sanitize */ if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) && IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) && __has_attribute(btf_type_tag)) return; } Not much better, but a little easier to read. > > map = start; > > - trace_event_eval_update(map, len); > + trace_event_update_all(map, len); > + > + if (len <= 0) > + return; > > trace_insert_eval_map_file(mod, start, len); > } > @@ -10335,7 +10344,7 @@ static void __init eval_map_work_func(struct > work_struct *work) > int len; > > len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; > - trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len); > + trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len); > } > > static int __init trace_eval_init(void) > @@ -10388,9 +10397,6 @@ bool module_exists(const char *module) > > static void trace_module_add_evals(struct module *mod) > { > - if (!mod->num_trace_evals) > - return; > - > /* > * Modules with bad taint do not have events created, do > * not bother with enums either. > @@ -10398,7 +10404,8 @@ static void trace_module_add_evals(struct module *mod) > if (trace_module_has_bad_taint(mod)) > return; > > - trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals); > + /* Even if no trace_evals, this need to sanitize field types. */ > + trace_event_update_with_eval_map(mod, mod->trace_evals, > mod->num_trace_evals); > } > > #ifdef CONFIG_TRACE_EVAL_MAP_FILE > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h > index bd084953a98b..1dbf1d3cf2f1 100644 > --- a/kernel/trace/trace.h > +++ b/kernel/trace/trace.h > @@ -2125,13 +2125,13 @@ static inline const char *get_syscall_name(int > syscall) > > #ifdef CONFIG_EVENT_TRACING > void trace_event_init(void); > -void trace_event_eval_update(struct trace_eval_map **map, int len); > +void trace_event_update_all(struct trace_eval_map **map, int len); > /* Used from boot time tracer */ > extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); > extern int trigger_process_regex(struct trace_event_file *file, char *buff); > #else > static inline void __init trace_event_init(void) { } > -static inline void trace_event_eval_update(struct trace_eval_map **map, int > len) { } > +static inline void trace_event_update_all(struct trace_eval_map **map, int > len) { } > #endif > > #ifdef CONFIG_TRACER_SNAPSHOT > diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c > index 120531268abf..5289e2032678 100644 > --- a/kernel/trace/trace_events.c > +++ b/kernel/trace/trace_events.c > @@ -3264,41 +3264,115 @@ static void add_str_to_module(struct module *module, > char *str) > list_add(&modstr->next, &module_strings); > } > > +#define ATTRIBUTE_STR "__attribute__" If you are going to test for the '(' right after, why not add it to the strstr test? > +#define ATTRIBUTE_STR_LEN (sizeof(ATTRIBUTE_STR) - 1) > + > +/* Remove all __attribute__() from type */ > +static void sanitize_field_type(char *type) > +{ > + char *attr, *tmp, *next; > + int depth; > + > + next = type; > + while ((attr = strstr(next, ATTRIBUTE_STR))) { > + next = attr + ATTRIBUTE_STR_LEN; > + > + /* Retry if __attribute__ is a part of type name. */ > + if ((attr != type && !isspace(attr[-1])) || > + *next != '(') Now you wouldn't need the *next != '(' here. > + continue; > + > + depth = 0; /* the ATTRIBUTE_STR already has the first '(' */ depth = 1; > + while ((tmp = strpbrk(next, "()"))) { > + if (*tmp == '(') > + depth++; > + else > + depth--; > + next = tmp + 1; > + if (depth == 0) > + break; > + } > + next = skip_spaces(next); > + strcpy(attr, next); > + next = attr; > + } > +} > + > +static bool need_sanitize_field_type(const char *type) > +{ > + return !!strstr(type, ATTRIBUTE_STR); > +} > + > +static char *find_replacable_eval(const char *type, const char *eval_string, > + int len) > +{ > + char *ptr; > + > + if (!eval_string) > + return NULL; > + > + ptr = strchr(type, '['); > + if (!ptr) > + return NULL; > + ptr++; > + > + if (!isalpha(*ptr) && *ptr != '_') > + return NULL; > + > + if (strncmp(eval_string, ptr, len) != 0) > + return NULL; > + > + return ptr; > +} > + > static void update_event_fields(struct trace_event_call *call, > struct trace_eval_map *map) > { > struct ftrace_event_field *field; > + const char *eval_string = NULL; > struct list_head *head; > + bool need_sanitize; > + int len = 0; > char *ptr; > char *str; > - int len = strlen(map->eval_string); > > /* Dynamic events should never have field maps */ > - if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC)) > + if (call->flags & TRACE_EVENT_FL_DYNAMIC) > return; > > + if (map) { > + eval_string = map->eval_string; > + len = strlen(map->eval_string); > + } > + > head = trace_get_fields(call); > list_for_each_entry(field, head, link) { > - ptr = strchr(field->type, '['); > - if (!ptr) > - continue; > - ptr++; > > - if (!isalpha(*ptr) && *ptr != '_') > - continue; > - > - if (strncmp(map->eval_string, ptr, len) != 0) > + /* Check the field has bad string or eval. */ > + need_sanitize = need_sanitize_field_type(field->type); Is there a reason you can't call the sanitize first? Then you wouldn't need to do the strstr(ATTRIBUTE_STR) twice. -- Steve > + ptr = find_replacable_eval(field->type, eval_string, len); > + if (likely(!need_sanitize && !ptr)) > continue; > > str = kstrdup(field->type, GFP_KERNEL); > if (WARN_ON_ONCE(!str)) > return; > - ptr = str + (ptr - field->type); > - ptr = eval_replace(ptr, map, len); > - /* enum/sizeof string smaller than value */ > - if (WARN_ON_ONCE(!ptr)) { > - kfree(str); > - continue; > + > + if (ptr) { > + ptr = str + (ptr - field->type); > + > + ptr = eval_replace(ptr, map, len); > + /* enum/sizeof string smaller than value */ > + if (WARN_ON_ONCE(!ptr) && !need_sanitize) { > + kfree(str); > + continue; > + } > + } > + if (need_sanitize) { > + sanitize_field_type(str); > + /* Update field type */ > + if (field->filter_type == FILTER_OTHER) > + field->filter_type = filter_assign_type(str); > } > > /* > @@ -3313,11 +3387,13 @@ static void update_event_fields(struct > trace_event_call *call, > } > } > > -void trace_event_eval_update(struct trace_eval_map **map, int len) > +/* Update all events for replacing eval and sanitizing */ > +void trace_event_update_all(struct trace_eval_map **map, int len) > { > struct trace_event_call *call, *p; > const char *last_system = NULL; > bool first = false; > + bool updated; > int last_i; > int i; > > @@ -3330,6 +3406,7 @@ void trace_event_eval_update(struct trace_eval_map > **map, int len) > last_system = call->class->system; > } > > + updated = false; > /* > * Since calls are grouped by systems, the likelihood that the > * next call in the iteration belongs to the same system as the > @@ -3349,8 +3426,12 @@ void trace_event_eval_update(struct trace_eval_map > **map, int len) > } > update_event_printk(call, map[i]); > update_event_fields(call, map[i]); > + updated = true; > } > } > + /* If not updated yet, update field for sanitizing. */ > + if (!updated) > + update_event_fields(call, NULL); > cond_resched(); > } > up_write(&trace_event_sem);