On Thu, 24 Jul 2025 13:46:35 +0900
"Masami Hiramatsu (Google)" <mhira...@kernel.org> wrote:

> From: Masami Hiramatsu (Google) <mhira...@kernel.org>
> 
> With CONFIG_DEBUG_INFO_BTF=y and PAHOLE_HAS_BTF_TAG=y, `__user` is
> converted to `__attribute__((btf_type_tag("user")))`. In this case,
> some syscall events have it for __user data, like below;
> 
> /sys/kernel/tracing # cat events/syscalls/sys_enter_openat/format
> name: sys_enter_openat
> ID: 720
> format:
>         field:unsigned short common_type;       offset:0;       size:2; 
> signed:0;
>         field:unsigned char common_flags;       offset:2;       size:1; 
> signed:0;
>         field:unsigned char common_preempt_count;       offset:3;       
> size:1; signed:0;
>         field:int common_pid;   offset:4;       size:4; signed:1;
> 
>         field:int __syscall_nr; offset:8;       size:4; signed:1;
>         field:int dfd;  offset:16;      size:8; signed:0;
>         field:const char __attribute__((btf_type_tag("user"))) * filename;    
>   offset:24;      size:8; signed:0;
>         field:int flags;        offset:32;      size:8; signed:0;
>         field:umode_t mode;     offset:40;      size:8; signed:0;
> 
> 
> Then the trace event filter fails to set the string acceptable flag
> (FILTER_PTR_STRING) to the field and rejects setting string filter;
> 
>  # echo 'filename.ustring ~ "*ftracetest-dir.wbx24v*"' \
>     >> events/syscalls/sys_enter_openat/filter
>  sh: write error: Invalid argument
>  # cat error_log
>  [  723.743637] event filter parse error: error: Expecting numeric field
>    Command: filename.ustring ~ "*ftracetest-dir.wbx24v*"
> 
> Since this __attribute__ makes format parsing complicated and not
> needed, remove the __attribute__(.*) from the type string.
> 
> Signed-off-by: Masami Hiramatsu (Google) <mhira...@kernel.org>
> ---
> Changes in v4:
>  - Run sanitizer only if btf_type_tag() attribute is defined.
> Changes in v3:
>  - Sanitize field in update_event_field() to avoid boottime performance
>    overhead.
>  - Change the function names because those are not always require eval
>    maps.
>  - Remove unneeded alloc_type flag.
> Changes in v2:
>  - Add memory allocation check flag.
>  - Check the flag in update_event_fields() to avoid memory leak.
>  - Fix 'static const int ... strlen()' issue.
>  - Fix to find 2nd __attribute__ correctly. (adjust next after strcpy)
> ---
>  kernel/trace/trace.c        |   25 ++++++---
>  kernel/trace/trace.h        |    4 +
>  kernel/trace/trace_events.c |  115 
> +++++++++++++++++++++++++++++++++++++------
>  3 files changed, 116 insertions(+), 28 deletions(-)
> 
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 95ae7c4e5835..b62528c42e26 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -5937,17 +5937,26 @@ static inline void trace_insert_eval_map_file(struct 
> module *mod,
>                             struct trace_eval_map **start, int len) { }
>  #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
>  
> -static void trace_insert_eval_map(struct module *mod,
> -                               struct trace_eval_map **start, int len)
> +static void
> +trace_event_update_with_eval_map(struct module *mod,
> +                              struct trace_eval_map **start,
> +                              int len)
>  {
>       struct trace_eval_map **map;
>  
> -     if (len <= 0)
> +     /* Always run sanitizer only if btf_type_tag attr exists. */
> +     if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
> +           IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
> +           __has_attribute(btf_type_tag)) &&
> +         len <= 0)
>               return;

The above is quite messy and hard to read. Can you change it to:

        if (len <= 0) {
                /* If bpf_type_tag attr is used, still need to sanitize */
                if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
                      IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
                      __has_attribute(btf_type_tag))
                        return;
        }

Not much better, but a little easier to read.
                
>  
>       map = start;
>  
> -     trace_event_eval_update(map, len);
> +     trace_event_update_all(map, len);
> +
> +     if (len <= 0)
> +             return;
>  
>       trace_insert_eval_map_file(mod, start, len);
>  }
> @@ -10335,7 +10344,7 @@ static void __init eval_map_work_func(struct 
> work_struct *work)
>       int len;
>  
>       len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
> -     trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
> +     trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
>  }
>  
>  static int __init trace_eval_init(void)
> @@ -10388,9 +10397,6 @@ bool module_exists(const char *module)
>  
>  static void trace_module_add_evals(struct module *mod)
>  {
> -     if (!mod->num_trace_evals)
> -             return;
> -
>       /*
>        * Modules with bad taint do not have events created, do
>        * not bother with enums either.
> @@ -10398,7 +10404,8 @@ static void trace_module_add_evals(struct module *mod)
>       if (trace_module_has_bad_taint(mod))
>               return;
>  
> -     trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
> +     /* Even if no trace_evals, this need to sanitize field types. */
> +     trace_event_update_with_eval_map(mod, mod->trace_evals, 
> mod->num_trace_evals);
>  }
>  
>  #ifdef CONFIG_TRACE_EVAL_MAP_FILE
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index bd084953a98b..1dbf1d3cf2f1 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -2125,13 +2125,13 @@ static inline const char *get_syscall_name(int 
> syscall)
>  
>  #ifdef CONFIG_EVENT_TRACING
>  void trace_event_init(void);
> -void trace_event_eval_update(struct trace_eval_map **map, int len);
> +void trace_event_update_all(struct trace_eval_map **map, int len);
>  /* Used from boot time tracer */
>  extern int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set);
>  extern int trigger_process_regex(struct trace_event_file *file, char *buff);
>  #else
>  static inline void __init trace_event_init(void) { }
> -static inline void trace_event_eval_update(struct trace_eval_map **map, int 
> len) { }
> +static inline void trace_event_update_all(struct trace_eval_map **map, int 
> len) { }
>  #endif
>  
>  #ifdef CONFIG_TRACER_SNAPSHOT
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index 120531268abf..5289e2032678 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -3264,41 +3264,115 @@ static void add_str_to_module(struct module *module, 
> char *str)
>       list_add(&modstr->next, &module_strings);
>  }
>  
> +#define ATTRIBUTE_STR "__attribute__"

If you are going to test for the '(' right after, why not add it to the
strstr test?

> +#define ATTRIBUTE_STR_LEN (sizeof(ATTRIBUTE_STR) - 1)
> +
> +/* Remove all __attribute__() from type */
> +static void sanitize_field_type(char *type)
> +{
> +     char *attr, *tmp, *next;
> +     int depth;
> +
> +     next = type;
> +     while ((attr = strstr(next, ATTRIBUTE_STR))) {
> +             next = attr + ATTRIBUTE_STR_LEN;
> +
> +             /* Retry if __attribute__ is a part of type name. */
> +             if ((attr != type && !isspace(attr[-1])) ||
> +                 *next != '(')

Now you wouldn't need the *next != '(' here.

> +                     continue;
> +
> +             depth = 0;

                /* the ATTRIBUTE_STR already has the first '(' */
                depth = 1;

> +             while ((tmp = strpbrk(next, "()"))) {
> +                     if (*tmp == '(')
> +                             depth++;
> +                     else
> +                             depth--;
> +                     next = tmp + 1;
> +                     if (depth == 0)
> +                             break;
> +             }
> +             next = skip_spaces(next);
> +             strcpy(attr, next);
> +             next = attr;
> +     }
> +}
> +
> +static bool need_sanitize_field_type(const char *type)
> +{
> +     return !!strstr(type, ATTRIBUTE_STR);
> +}
> +
> +static char *find_replacable_eval(const char *type, const char *eval_string,
> +                               int len)
> +{
> +     char *ptr;
> +
> +     if (!eval_string)
> +             return NULL;
> +
> +     ptr = strchr(type, '[');
> +     if (!ptr)
> +             return NULL;
> +     ptr++;
> +
> +     if (!isalpha(*ptr) && *ptr != '_')
> +             return NULL;
> +
> +     if (strncmp(eval_string, ptr, len) != 0)
> +             return NULL;
> +
> +     return ptr;
> +}
> +
>  static void update_event_fields(struct trace_event_call *call,
>                               struct trace_eval_map *map)
>  {
>       struct ftrace_event_field *field;
> +     const char *eval_string = NULL;
>       struct list_head *head;
> +     bool need_sanitize;
> +     int len = 0;
>       char *ptr;
>       char *str;
> -     int len = strlen(map->eval_string);
>  
>       /* Dynamic events should never have field maps */
> -     if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC))
> +     if (call->flags & TRACE_EVENT_FL_DYNAMIC)
>               return;
>  
> +     if (map) {
> +             eval_string = map->eval_string;
> +             len = strlen(map->eval_string);
> +     }
> +
>       head = trace_get_fields(call);
>       list_for_each_entry(field, head, link) {
> -             ptr = strchr(field->type, '[');
> -             if (!ptr)
> -                     continue;
> -             ptr++;
>  
> -             if (!isalpha(*ptr) && *ptr != '_')
> -                     continue;
> -
> -             if (strncmp(map->eval_string, ptr, len) != 0)
> +             /* Check the field has bad string or eval. */
> +             need_sanitize = need_sanitize_field_type(field->type);

Is there a reason you can't call the sanitize first?

Then you wouldn't need to do the strstr(ATTRIBUTE_STR) twice.

-- Steve


> +             ptr = find_replacable_eval(field->type, eval_string, len);
> +             if (likely(!need_sanitize && !ptr))
>                       continue;
>  
>               str = kstrdup(field->type, GFP_KERNEL);
>               if (WARN_ON_ONCE(!str))
>                       return;
> -             ptr = str + (ptr - field->type);
> -             ptr = eval_replace(ptr, map, len);
> -             /* enum/sizeof string smaller than value */
> -             if (WARN_ON_ONCE(!ptr)) {
> -                     kfree(str);
> -                     continue;
> +
> +             if (ptr) {
> +                     ptr = str + (ptr - field->type);
> +
> +                     ptr = eval_replace(ptr, map, len);
> +                     /* enum/sizeof string smaller than value */
> +                     if (WARN_ON_ONCE(!ptr) && !need_sanitize) {
> +                             kfree(str);
> +                             continue;
> +                     }
> +             }
> +             if (need_sanitize) {
> +                     sanitize_field_type(str);
> +                     /* Update field type */
> +                     if (field->filter_type == FILTER_OTHER)
> +                             field->filter_type = filter_assign_type(str);
>               }
>  
>               /*
> @@ -3313,11 +3387,13 @@ static void update_event_fields(struct 
> trace_event_call *call,
>       }
>  }
>  
> -void trace_event_eval_update(struct trace_eval_map **map, int len)
> +/* Update all events for replacing eval and sanitizing */
> +void trace_event_update_all(struct trace_eval_map **map, int len)
>  {
>       struct trace_event_call *call, *p;
>       const char *last_system = NULL;
>       bool first = false;
> +     bool updated;
>       int last_i;
>       int i;
>  
> @@ -3330,6 +3406,7 @@ void trace_event_eval_update(struct trace_eval_map 
> **map, int len)
>                       last_system = call->class->system;
>               }
>  
> +             updated = false;
>               /*
>                * Since calls are grouped by systems, the likelihood that the
>                * next call in the iteration belongs to the same system as the
> @@ -3349,8 +3426,12 @@ void trace_event_eval_update(struct trace_eval_map 
> **map, int len)
>                               }
>                               update_event_printk(call, map[i]);
>                               update_event_fields(call, map[i]);
> +                             updated = true;
>                       }
>               }
> +             /* If not updated yet, update field for sanitizing. */
> +             if (!updated)
> +                     update_event_fields(call, NULL);
>               cond_resched();
>       }
>       up_write(&trace_event_sem);


Reply via email to