On Tue, 10 Mar 2026 16:09:56 -0400 Steven Rostedt <[email protected]> wrote:
> From: Steven Rostedt <[email protected]> > > Make the futex syscall trace event a little more smart. Have it read the > futex_op instruction to determine what else it can save and print. For the > appropriate options, it will read the utime (timespec) parameter and show > its output as well as the uaddr2. > > futex_requeue_p-1154 [004] ..... 144.568339: sys_futex(uaddr: > 0x5652b178d834 (0x482), FUTEX_UNLOCK_PI|FUTEX_PRIVATE_FLAG, val: 0) > futex_requeue_p-1162 [002] ..... 144.568696: sys_futex(uaddr: > 0x7f763b7fece0 (2), FUTEX_WAIT|FUTEX_PRIVATE_FLAG, val: 2) > futex_requeue_p-1151 [000] ..... 144.568700: sys_futex(uaddr: > 0x7f763b7fece0 (0), FUTEX_WAKE|FUTEX_PRIVATE_FLAG, val: 1) > futex_requeue_p-1162 [002] ..... 144.568705: sys_futex(uaddr: > 0x7f763b7fece0 (0), FUTEX_WAKE|FUTEX_PRIVATE_FLAG, val: 1) > futex_requeue_p-1151 [000] ..... 144.568715: sys_futex(uaddr: > 0x7f764369e990 (0x483), FUTEX_WAIT_BITSET|FUTEX_CLOCK_REALTIME, val: 1155) > futex_requeue_p-1155 [005] ..... 144.569420: sys_futex(uaddr: > 0x5652b178d838 (0), FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG, val: 0, > timespec: 0x7ffdacfba500 (143.890024054), uaddr2: 0x5652b178d834 (0), val3: 0) > futex_requeue_p-1155 [005] ..... 144.569454: sys_futex(uaddr: > 0x5652b178d834 (0), FUTEX_LOCK_PI|FUTEX_PRIVATE_FLAG, val: 0) > > Signed-off-by: Steven Rostedt (Google) <[email protected]> This looks good to me. Reviewed-by: Masami Hiramatsu (Google) <[email protected]> Thanks, > --- > Changes since v1: > https://lore.kernel.org/all/[email protected]/ > > - Updated to have the print processing in kernel/futex/syscall.c > > include/linux/futex.h | 35 ++++++++- > kernel/futex/syscalls.c | 48 ++++++------- > kernel/trace/trace_syscalls.c | 129 +++++++++++++++++++++++++++++----- > 3 files changed, 164 insertions(+), 48 deletions(-) > > diff --git a/include/linux/futex.h b/include/linux/futex.h > index 9fc47aa01a8b..976fa257ab5c 100644 > --- a/include/linux/futex.h > +++ b/include/linux/futex.h > @@ -82,8 +82,35 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t > *timeout, > u32 __user *uaddr2, u32 val2, u32 val3); > int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long > arg4); > > +static __always_inline bool futex_cmd_has_timeout(u32 cmd) > +{ > + switch (cmd) { > + case FUTEX_WAIT: > + case FUTEX_LOCK_PI: > + case FUTEX_LOCK_PI2: > + case FUTEX_WAIT_BITSET: > + case FUTEX_WAIT_REQUEUE_PI: > + return true; > + } > + return false; > +} > + > +static __always_inline bool futex_cmd_has_addr2(u32 cmd) > +{ > + switch (cmd) { > + case FUTEX_REQUEUE: > + case FUTEX_CMP_REQUEUE: > + case FUTEX_WAKE_OP: > + case FUTEX_WAIT_REQUEUE_PI: > + return true; > + } > + return false; > +} > + > #ifdef CONFIG_FTRACE_SYSCALLS > -void futex_print_syscall(struct seq_buf *s, int nr_args, unsigned long > *args, u32 *ptr); > +void futex_print_syscall(struct seq_buf *s, int nr_args, unsigned long *args, > + u32 *ptr1, u32 *ptr2, unsigned long *ts1, > + unsigned long *ts2); > #endif > > #ifdef CONFIG_FUTEX_PRIVATE_HASH > @@ -119,7 +146,11 @@ static inline int futex_hash_allocate_default(void) > static inline int futex_hash_free(struct mm_struct *mm) { return 0; } > static inline int futex_mm_init(struct mm_struct *mm) { return 0; } > static inline void futex_print_syscall(struct seq_buf *s, int nr_args, > - unsigned long *args, u32 *ptr) { } > + unsigned long *args, u32 *ptr1, > + u32 *ptr2, unsigned long *ts1, > + unsigned long *ts2) { } > +static __always_inline bool futex_cmd_has_timeout(u32 cmd) { return false; } > +static __always_inline bool futex_cmd_has_addr2(u32 cmd) { return false; } > #endif > > #endif > diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c > index a1cd512aa502..a46706d6bc6c 100644 > --- a/kernel/futex/syscalls.c > +++ b/kernel/futex/syscalls.c > @@ -158,31 +158,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, > ktime_t *timeout, > return -ENOSYS; > } > > -static __always_inline bool futex_cmd_has_timeout(u32 cmd) > -{ > - switch (cmd) { > - case FUTEX_WAIT: > - case FUTEX_LOCK_PI: > - case FUTEX_LOCK_PI2: > - case FUTEX_WAIT_BITSET: > - case FUTEX_WAIT_REQUEUE_PI: > - return true; > - } > - return false; > -} > - > -static __always_inline bool futex_cmd_has_addr2(u32 cmd) > -{ > - switch (cmd) { > - case FUTEX_REQUEUE: > - case FUTEX_CMP_REQUEUE: > - case FUTEX_WAKE_OP: > - case FUTEX_WAIT_REQUEUE_PI: > - return true; > - } > - return false; > -} > - > static __always_inline int > futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t) > { > @@ -229,7 +204,9 @@ static const char * __futex_cmds[] = > "FUTEX_LOCK_PI2", > }; > > -void futex_print_syscall(struct seq_buf *s, int nr_args, unsigned long > *args, u32 *ptr) > +void futex_print_syscall(struct seq_buf *s, int nr_args, unsigned long *args, > + u32 *ptr1, u32 *ptr2, unsigned long *ts1, > + unsigned long *ts2) > { > unsigned int op, cmd; > bool done = false; > @@ -242,8 +219,8 @@ void futex_print_syscall(struct seq_buf *s, int nr_args, > unsigned long *args, u3 > switch (i) { > case 0: > seq_buf_printf(s, "uaddr: 0x%lx", args[i]); > - if (ptr) { > - u32 val = *ptr; > + if (ptr1) { > + u32 val = *ptr1; > if (val < 10) > seq_buf_printf(s, " (%u)", val); > else > @@ -279,6 +256,15 @@ void futex_print_syscall(struct seq_buf *s, int nr_args, > unsigned long *args, u3 > continue; > > seq_buf_printf(s, ", timespec: 0x%lx", args[i]); > + > + if (!ts1 || !ts2) > + continue; > + > + if (!*ts1 && !*ts2) { > + seq_buf_puts(s, " (0)"); > + continue; > + } > + seq_buf_printf(s, " (%lu.%09lu)", *ts1, *ts2); > continue; > case 4: > if (!futex_cmd_has_addr2(cmd)) { > @@ -286,6 +272,12 @@ void futex_print_syscall(struct seq_buf *s, int nr_args, > unsigned long *args, u3 > continue; > } > seq_buf_printf(s, ", uaddr2: 0x%lx", args[i]); > + > + if (!ptr2) > + continue; > + > + seq_buf_printf(s, " (%x)", *ptr2); > + > continue; > case 5: > seq_buf_printf(s, ", val3: %lu", args[i]); > diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c > index 8cb3af569157..de1fa97547a3 100644 > --- a/kernel/trace/trace_syscalls.c > +++ b/kernel/trace/trace_syscalls.c > @@ -239,21 +239,35 @@ sys_enter_openat_print(struct syscall_trace_enter > *trace, struct syscall_metadat > return trace_handle_return(s); > } > > +struct futex_data { > + u32 val1; > + u32 val2; > + unsigned long ts1; > + unsigned long ts2; > +}; > + > static enum print_line_t > sys_enter_futex_print(struct syscall_trace_enter *trace, struct > syscall_metadata *entry, > struct trace_seq *s, struct trace_event *event, int > ent_size) > { > + struct futex_data *data; > void *end = (void *)trace + ent_size; > - void *ptr; > + unsigned long *ts1 = NULL, *ts2 = NULL; > + u32 *ptr1 = NULL, *ptr2 = NULL; > > /* Set ptr to the user space copied area */ > - ptr = (void *)trace->args + sizeof(unsigned long) * entry->nb_args; > - if (ptr + 4 > end) > - ptr = NULL; > + data = (void *)trace->args + sizeof(unsigned long) * entry->nb_args; > + if ((void *)data + sizeof(*data) <= end) { > + ptr1 = &data->val1; > + ptr2 = &data->val2; > + ts1 = &data->ts1; > + ts2 = &data->ts2; > + } > > trace_seq_printf(s, "%s(", entry->name); > > - futex_print_syscall(&s->seq, entry->nb_args, trace->args, ptr); > + futex_print_syscall(&s->seq, entry->nb_args, trace->args, ptr1, ptr2, > + ts1, ts2); > > trace_seq_puts(s, ")\n"); > > @@ -472,9 +486,9 @@ sys_enter_futex_print_fmt(struct syscall_metadata *entry, > char *buf, int len) > pos += snprintf(buf + pos, LEN_OR_ZERO, > "\"uaddr: 0x%%lx (0x%%lx) cmd=%%s%%s%%s"); > pos += snprintf(buf + pos, LEN_OR_ZERO, > - " val: 0x%%x timeout/val2: 0x%%llx"); > + " val: 0x%%x timeout/val2: 0x%%llx (%%lu.%%lu)"); > pos += snprintf(buf + pos, LEN_OR_ZERO, > - " uaddr2: 0x%%lx val3: 0x%%x\", "); > + " uaddr2: 0x%%lx (0x%%lx) val3: 0x%%x\", "); > > pos += snprintf(buf + pos, LEN_OR_ZERO, > " REC->uaddr,"); > @@ -520,10 +534,12 @@ sys_enter_futex_print_fmt(struct syscall_metadata > *entry, char *buf, int len) > FUTEX_CLOCK_REALTIME); > > pos += snprintf(buf + pos, LEN_OR_ZERO, > - " REC->val, REC->utime,"); > + " REC->val, REC->utime, REC->__ts1, REC->__ts2,"); > > pos += snprintf(buf + pos, LEN_OR_ZERO, > - " REC->uaddr, REC->val3"); > + " REC->uaddr,"); > + pos += snprintf(buf + pos, LEN_OR_ZERO, > + " REC->__value2, REC->val3"); > return pos; > } > > @@ -626,7 +642,39 @@ static int __init futex_fields(struct trace_event_call > *call, int offset) > ret = trace_define_field(call, "u32", arg, offset, sizeof(int), 0, > FILTER_OTHER); > if (ret) > - kfree(arg); > + goto free; > + offset += sizeof(int); > + > + arg = kstrdup("__value2", GFP_KERNEL); > + if (WARN_ON_ONCE(!arg)) > + return -ENOMEM; > + ret = trace_define_field(call, "u32", arg, offset, sizeof(int), 0, > + FILTER_OTHER); > + if (ret) > + goto free; > + offset += sizeof(int); > + > + arg = kstrdup("__ts1", GFP_KERNEL); > + if (WARN_ON_ONCE(!arg)) > + return -ENOMEM; > + ret = trace_define_field(call, "unsigned long", arg, offset, > + sizeof(unsigned long), 0, FILTER_OTHER); > + if (ret) > + goto free; > + offset += sizeof(long); > + > + arg = kstrdup("__ts2", GFP_KERNEL); > + if (WARN_ON_ONCE(!arg)) > + return -ENOMEM; > + ret = trace_define_field(call, "unsigned long", arg, offset, > + sizeof(unsigned long), 0, FILTER_OTHER); > + if (ret) > + goto free; > + > + return 0; > + > +free: > + kfree(arg); > return ret; > } > > @@ -799,11 +847,51 @@ static int syscall_copy_user_array(char *buf, const > char __user *ptr, > return 0; > } > > +struct tp_futex_data { > + u32 cmd; > + const u32 __user *val1; > + const u32 __user *val2; > + void __user *timeout; > +}; > + > +static int syscall_copy_futex(char *buf, const char __user *ptr, > + size_t size, void *data) > +{ > + struct tp_futex_data *tp_data = data; > + struct futex_data *fdata = (void *)buf; > + int cmd = tp_data->cmd & FUTEX_CMD_MASK; > + int ret; > + > + memset(fdata, 0, sizeof(*fdata)); > + > + if (tp_data->val1) { > + ret = __copy_from_user(&fdata->val1, tp_data->val1, 4); > + if (ret) > + return -1; > + } > + > + if (tp_data->val2 && futex_cmd_has_addr2(cmd)) { > + ret = __copy_from_user(&fdata->val2, tp_data->val2, 4); > + if (ret) > + return -1; > + } > + > + if (tp_data->timeout && futex_cmd_has_timeout(cmd)) { > + /* Copies both ts1 and ts2 */ > + ret = __copy_from_user(&fdata->ts1, tp_data->timeout, > + sizeof(long) * 2); > + if (ret) > + return -1; > + } > + > + return 0; > +} > + > static int > syscall_get_futex(unsigned long *args, char **buffer, int *size, int > buf_size) > { > struct syscall_user_buffer *sbuf; > - const char __user *ptr; > + struct tp_futex_data tp_data; > char *buf; > > /* buf_size of zero means user doesn't want user space read */ > @@ -815,14 +903,18 @@ syscall_get_futex(unsigned long *args, char **buffer, > int *size, int buf_size) > if (!sbuf) > return -1; > > - ptr = (char __user *)args[0]; > + tp_data.cmd = args[1]; > + tp_data.val1 = (u32 __user *)args[0]; > + tp_data.val2 = (u32 __user *)args[4]; > + tp_data.timeout = (u64 __user *)args[3]; > > - *buffer = trace_user_fault_read(&sbuf->buf, ptr, 4, NULL, NULL); > + *buffer = trace_user_fault_read(&sbuf->buf, NULL, 0, > + syscall_copy_futex, &tp_data); > if (!*buffer) > return -1; > > - /* Add room for the value */ > - *size += 4; > + /* Add room for values */ > + *size += sizeof(struct futex_data); > > buf = *buffer; > > @@ -833,12 +925,13 @@ static void syscall_put_futex(struct syscall_metadata > *sys_data, > struct syscall_trace_enter *entry, > char *buffer) > { > - u32 *ptr; > + struct futex_data *fdata = (void *)buffer; > + struct futex_data *data; > > /* Place the futex key into the storage */ > - ptr = (void *)entry->args + sizeof(unsigned long) * sys_data->nb_args; > + data = (void *)entry->args + sizeof(unsigned long) * sys_data->nb_args; > > - *ptr = *(u32 *)buffer; > + *data = *fdata; > } > > static char *sys_fault_user(unsigned int buf_size, > -- > 2.51.0 > > -- Masami Hiramatsu (Google) <[email protected]>
