From: Steven Rostedt <[email protected]> Currently a trigger can only be added to individual events. Some triggers (like stacktrace) can be useful to add as a bulk trigger for a set of system events (like interrupt or scheduling).
Add a trigger file to the system directories: /sys/kernel/tracing/events/*/trigger And allow stacktrace trigger to be enabled for all those events. Writing into the system/trigger file acts the same as writing into each of the system event's trigger files individually. This also allows to remove a trigger from all events in a subsystem (even if it's not a subsystem trigger!). Signed-off-by: Steven Rostedt (Google) <[email protected]> --- Note, this is based on top of: https://patchwork.kernel.org/project/linux-trace-kernel/cover/[email protected]/ Changes since v1: https://patch.msgid.link/[email protected] - Removed unused set variable len (kernel test robot) - Assign next to strim(buf) to remove beginning spaces Documentation/trace/events.rst | 25 ++++ kernel/trace/trace.c | 11 +- kernel/trace/trace.h | 15 ++- kernel/trace/trace_events.c | 70 +++++----- kernel/trace/trace_events_trigger.c | 199 +++++++++++++++++++++++++++- 5 files changed, 278 insertions(+), 42 deletions(-) diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst index 18d112963dec..caa4958af43a 100644 --- a/Documentation/trace/events.rst +++ b/Documentation/trace/events.rst @@ -416,6 +416,31 @@ way, so beware about making generalizations between the two. can also enable triggers that are written into /sys/kernel/tracing/events/ftrace/print/trigger +The system directory also has a trigger file that allows some triggers to be +set for all the system's events. This is limited to only a small subset of the +triggers and does not allow for the count parameter. But it does allow for +filters. Writing into this file is the same as writing into each of the +system's event's trigger files individually. Although only a subset of +triggers may use this file for enabling, all triggers may use this file for +disabling:: + + cd /sys/kernel/tracing + cat events/sched/trigger + # Available system triggers: + # stacktrace + + echo stacktrace > events/sched/trigger + cat events/sched/sched_switch/trigger + stacktrace:unlimited + + echo snapshot > events/sched/sched_waking/trigger + cat events/sched/sched_waking/trigger + snapshot:unlimited + echo '!snapshot' > events/sched/trigger + cat events/sched/sched_waking/trigger + # Available triggers: + # traceon traceoff snapshot stacktrace enable_event disable_event enable_hist disable_hist hist + 6.1 Expression syntax --------------------- diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 032bdedca5d9..feced9f43156 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -592,11 +592,12 @@ void trace_set_ring_buffer_expanded(struct trace_array *tr) LIST_HEAD(ftrace_trace_arrays); -int trace_array_get(struct trace_array *this_tr) +int __trace_array_get(struct trace_array *this_tr) { struct trace_array *tr; - guard(mutex)(&trace_types_lock); + lockdep_assert_held(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (tr == this_tr) { tr->ref++; @@ -607,6 +608,12 @@ int trace_array_get(struct trace_array *this_tr) return -ENODEV; } +int trace_array_get(struct trace_array *tr) +{ + guard(mutex)(&trace_types_lock); + return __trace_array_get(tr); +} + static void __trace_array_put(struct trace_array *this_tr) { WARN_ON(!this_tr->ref); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fd5a6daa6c25..7379763a057d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -469,10 +469,14 @@ extern struct list_head ftrace_trace_arrays; extern struct mutex trace_types_lock; extern int trace_array_get(struct trace_array *tr); +extern int __trace_array_get(struct trace_array *tr); extern int tracing_check_open_get_tr(struct trace_array *tr); extern struct trace_array *trace_array_find(const char *instance); extern struct trace_array *trace_array_find_get(const char *instance); +extern struct trace_subsystem_dir *trace_get_system_dir(struct inode *inode); +void trace_put_system_dir(struct trace_subsystem_dir *dir); + extern u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe); extern int tracing_set_filter_buffering(struct trace_array *tr, bool set); extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); @@ -1774,6 +1778,7 @@ static inline struct trace_event_file *event_file_file(struct file *filp) } extern const struct file_operations event_trigger_fops; +extern const struct file_operations event_system_trigger_fops; extern const struct file_operations event_hist_fops; extern const struct file_operations event_hist_debug_fops; extern const struct file_operations event_inject_fops; @@ -2057,10 +2062,16 @@ struct event_command { * regardless of whether or not it has a filter associated with * it (filters make a trigger require access to the trace record * but are not always present). + * + * @SYSTEM: A flag that says whether or not this command can be used + * at the event system level. For example, can it be written into + * events/sched/trigger file where it will be enabled for all + * sched events? */ enum event_command_flags { - EVENT_CMD_FL_POST_TRIGGER = 1, - EVENT_CMD_FL_NEEDS_REC = 2, + EVENT_CMD_FL_POST_TRIGGER = BIT(1), + EVENT_CMD_FL_NEEDS_REC = BIT(2), + EVENT_CMD_FL_SYSTEM = BIT(3), }; static inline bool event_command_post_trigger(struct event_command *cmd_ops) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 9b07ad9eb284..f00b41f73fc2 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2168,51 +2168,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, static LIST_HEAD(event_subsystems); -static int subsystem_open(struct inode *inode, struct file *filp) +struct trace_subsystem_dir *trace_get_system_dir(struct inode *inode) { - struct trace_subsystem_dir *dir = NULL, *iter_dir; - struct trace_array *tr = NULL, *iter_tr; - struct event_subsystem *system = NULL; - int ret; + struct trace_subsystem_dir *dir; + struct trace_array *tr = NULL; - if (tracing_is_disabled()) - return -ENODEV; + guard(mutex)(&event_mutex); + guard(mutex)(&trace_types_lock); /* Make sure the system still exists */ - mutex_lock(&event_mutex); - mutex_lock(&trace_types_lock); - list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) { - list_for_each_entry(iter_dir, &iter_tr->systems, list) { - if (iter_dir == inode->i_private) { + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + list_for_each_entry(dir, &tr->systems, list) { + if (dir == inode->i_private) { /* Don't open systems with no events */ - tr = iter_tr; - dir = iter_dir; - if (dir->nr_events) { - __get_system_dir(dir); - system = dir->subsystem; - } - goto exit_loop; + if (!dir->nr_events) + return NULL; + if (__trace_array_get(tr) < 0) + return NULL; + __get_system_dir(dir); + return dir; } } } - exit_loop: - mutex_unlock(&trace_types_lock); - mutex_unlock(&event_mutex); + return NULL; +} - if (!system) +void trace_put_system_dir(struct trace_subsystem_dir *dir) +{ + trace_array_put(dir->tr); + put_system(dir); +} + +static int subsystem_open(struct inode *inode, struct file *filp) +{ + struct trace_subsystem_dir *dir; + int ret; + + if (tracing_is_disabled()) return -ENODEV; - /* Still need to increment the ref count of the system */ - if (trace_array_get(tr) < 0) { - put_system(dir); + dir = trace_get_system_dir(inode); + if (!dir) return -ENODEV; - } ret = tracing_open_generic(inode, filp); - if (ret < 0) { - trace_array_put(tr); - put_system(dir); - } + if (ret < 0) + trace_put_system_dir(dir); return ret; } @@ -2761,6 +2762,9 @@ static int system_callback(const char *name, umode_t *mode, void **data, else if (strcmp(name, "enable") == 0) *fops = &ftrace_system_enable_fops; + else if (strcmp(name, "trigger") == 0) + *fops = &event_system_trigger_fops; + else return 0; @@ -2784,6 +2788,10 @@ event_subsystem_dir(struct trace_array *tr, const char *name, { .name = "enable", .callback = system_callback, + }, + { + .name = "trigger", + .callback = system_callback, } }; diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 1dfe69146a81..9249770679f3 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -329,21 +329,28 @@ int trigger_process_regex(struct trace_event_file *file, char *buff) return -EINVAL; } +static char *get_user_buf(const char __user *ubuf, size_t cnt) +{ + if (!cnt) + return NULL; + + if (cnt >= PAGE_SIZE) + return ERR_PTR(-EINVAL); + + return memdup_user_nul(ubuf, cnt); +} + static ssize_t event_trigger_regex_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_event_file *event_file; ssize_t ret; - char *buf __free(kfree) = NULL; + char *buf __free(kfree) = get_user_buf(ubuf, cnt); - if (!cnt) + if (!buf) return 0; - if (cnt >= PAGE_SIZE) - return -EINVAL; - - buf = memdup_user_nul(ubuf, cnt); if (IS_ERR(buf)) return PTR_ERR(buf); @@ -397,6 +404,184 @@ const struct file_operations event_trigger_fops = { .release = event_trigger_release, }; +static ssize_t +event_system_trigger_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + char *buf __free(kfree) = kmalloc(SZ_4K, GFP_KERNEL); + struct event_command *p; + struct seq_buf s; + int len; + + if (!buf) + return -ENOMEM; + + seq_buf_init(&s, buf, SZ_4K); + + seq_buf_puts(&s, "# Available system triggers:\n"); + seq_buf_putc(&s, '#'); + + guard(mutex)(&trigger_cmd_mutex); + list_for_each_entry_reverse(p, &trigger_commands, list) { + if (p->flags & EVENT_CMD_FL_SYSTEM) + seq_buf_printf(&s, " %s", p->name); + } + seq_buf_putc(&s, '\n'); + + len = seq_buf_used(&s); + + if (*ppos >= len) + return 0; + + len -= *ppos; + + if (count > len) + count = len; + + if (copy_to_user(ubuf, buf + *ppos, count)) + return -EFAULT; + + *ppos += count; + + return count; +} + +static int process_system_events(struct trace_subsystem_dir *dir, + struct event_command *p, char *buff, + char *command, char *next) +{ + struct event_subsystem *system = dir->subsystem; + struct trace_event_file *file; + struct trace_array *tr = dir->tr; + bool remove = false; + int ret = 0; + + if (buff[0] == '!') + remove = true; + + lockdep_assert_held(&event_mutex); + + list_for_each_entry(file, &tr->events, list) { + + if (strcmp(system->name, file->event_call->class->system) != 0) + continue; + + ret = p->parse(p, file, buff, command, next); + + /* Removals and existing events do not error */ + if (ret < 0 && ret != -EEXIST && !remove) { + pr_warn("Failed adding trigger %s on %s\n", + command, trace_event_name(file->event_call)); + } + } + return 0; +} + +static ssize_t +event_system_trigger_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_subsystem_dir *dir = filp->private_data; + struct event_command *p; + char *command, *next; + char *buf __free(kfree) = get_user_buf(ubuf, cnt); + bool remove = false; + bool found = false; + ssize_t ret; + + if (!buf) + return 0; + + if (IS_ERR(buf)) + return PTR_ERR(buf); + + /* system triggers are not allowed to have counters */ + if (strchr(buf, ':')) + return -EINVAL; + + /* If opened for read too, dir is in the seq_file descriptor */ + if (filp->f_mode & FMODE_READ) { + struct seq_file *m = filp->private_data; + dir = m->private; + } + + /* Skip added space at beginning of buf */ + next = strim(buf); + + command = strsep(&next, " \t"); + if (next) { + next = skip_spaces(next); + if (!*next) + next = NULL; + } + if (command[0] == '!') { + remove = true; + command++; + } + + guard(mutex)(&event_mutex); + guard(mutex)(&trigger_cmd_mutex); + + list_for_each_entry(p, &trigger_commands, list) { + /* Allow to remove any trigger */ + if (!remove && !(p->flags & EVENT_CMD_FL_SYSTEM)) + continue; + if (strcmp(p->name, command) == 0) { + found = true; + ret = process_system_events(dir, p, buf, command, next); + break; + } + } + + if (!found) + ret = -ENODEV; + + if (!ret) + *ppos += cnt; + + if (remove || ret < 0) + return ret ? : cnt; + + return cnt; +} + +static int +event_system_trigger_open(struct inode *inode, struct file *file) +{ + struct trace_subsystem_dir *dir; + int ret; + + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + + dir = trace_get_system_dir(inode); + if (!dir) + return -ENODEV; + + file->private_data = dir; + + return ret; +} + +static int +event_system_trigger_release(struct inode *inode, struct file *file) +{ + struct trace_subsystem_dir *dir = inode->i_private; + + trace_put_system_dir(dir); + + return 0; +} + +const struct file_operations event_system_trigger_fops = { + .open = event_system_trigger_open, + .read = event_system_trigger_read, + .write = event_system_trigger_write, + .llseek = tracing_lseek, + .release = event_system_trigger_release, +}; + /* * Currently we only register event commands from __init, so mark this * __init too. @@ -1587,7 +1772,7 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_data *data) static struct event_command trigger_stacktrace_cmd = { .name = "stacktrace", .trigger_type = ETT_STACKTRACE, - .flags = EVENT_CMD_FL_POST_TRIGGER, + .flags = EVENT_CMD_FL_POST_TRIGGER | EVENT_CMD_FL_SYSTEM, .parse = event_trigger_parse, .reg = register_trigger, .unreg = unregister_trigger, -- 2.51.0
