From: Steven Rostedt <[email protected]>

For system call events that have a length field, add a "user_arg_size"
parameter to the system call meta data that denotes the index of the args
array that holds the size of arg that the user_mask field has a bit set
for.

The "user_mask" has a bit set that denotes the arg that points to an array
in the user space address space and if a system call event has the
user_mask field set and the user_arg_size set, it will then record the
content of that address into the trace event, up to the size defined by
SYSCALL_FAULT_BUF_SZ - 1.

This allows the output to look like:

  sys_write(fd: 0xa, buf: 0x5646978d13c0 
(01:00:05:00:00:00:00:00:01:87:55:89:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00),
 count: 0x20)

Signed-off-by: Steven Rostedt (Google) <[email protected]>
---
 include/trace/syscall.h       |   4 +-
 kernel/trace/trace_syscalls.c | 111 +++++++++++++++++++++++++---------
 2 files changed, 86 insertions(+), 29 deletions(-)

diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 85f21ca15a41..9413c139da66 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -16,6 +16,7 @@
  * @name: name of the syscall
  * @syscall_nr: number of the syscall
  * @nb_args: number of parameters it takes
+ * @user_arg_size: holds @arg that has size of the user space to read
  * @user_mask: mask of @args that will read user space
  * @types: list of types as strings
  * @args: list of args as strings (args[i] matches types[i])
@@ -26,7 +27,8 @@
 struct syscall_metadata {
        const char      *name;
        int             syscall_nr;
-       short           nb_args;
+       u8              nb_args;
+       s8              user_arg_size;
        short           user_mask;
        const char      **types;
        const char      **args;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 7ea763c07bb7..7658b592c55f 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -124,7 +124,7 @@ const char *get_syscall_name(int syscall)
        return entry->name;
 }
 
-/* Added to user strings when max limit is reached */
+/* Added to user strings or arrays when max limit is reached */
 #define EXTRA "..."
 
 static enum print_line_t
@@ -136,9 +136,8 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
        struct trace_entry *ent = iter->ent;
        struct syscall_trace_enter *trace;
        struct syscall_metadata *entry;
-       int i, syscall, val;
+       int i, syscall, val, len;
        unsigned char *ptr;
-       int len;
 
        trace = (typeof(trace))ent;
        syscall = trace->nr;
@@ -185,7 +184,23 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
                ptr = (void *)ent + (val & 0xffff);
                len = val >> 16;
 
-               trace_seq_printf(s, " \"%.*s\"", len, ptr);
+               if (entry->user_arg_size < 0) {
+                       trace_seq_printf(s, " \"%.*s\"", len, ptr);
+                       continue;
+               }
+
+               val = trace->args[entry->user_arg_size];
+
+               trace_seq_puts(s, " (");
+               for (int x = 0; x < len; x++, ptr++) {
+                       if (x)
+                               trace_seq_putc(s, ':');
+                       trace_seq_printf(s, "%02x", *ptr);
+               }
+               if (len < val)
+                       trace_seq_printf(s, ", %s", EXTRA);
+
+               trace_seq_putc(s, ')');
        }
 
        trace_seq_putc(s, ')');
@@ -250,8 +265,11 @@ __set_enter_print_fmt(struct syscall_metadata *entry, char 
*buf, int len)
                if (!(BIT(i) & entry->user_mask))
                        continue;
 
-               /* Add the format for the user space string */
-               pos += snprintf(buf + pos, LEN_OR_ZERO, " \\\"%%s\\\"");
+               /* Add the format for the user space string or array */
+               if (entry->user_arg_size < 0)
+                       pos += snprintf(buf + pos, LEN_OR_ZERO, " \\\"%%s\\\"");
+               else
+                       pos += snprintf(buf + pos, LEN_OR_ZERO, " (%%s)");
        }
        pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
 
@@ -260,9 +278,14 @@ __set_enter_print_fmt(struct syscall_metadata *entry, char 
*buf, int len)
                                ", ((unsigned long)(REC->%s))", entry->args[i]);
                if (!(BIT(i) & entry->user_mask))
                        continue;
-               /* The user space string for arg has name __<arg>_val */
-               pos += snprintf(buf + pos, LEN_OR_ZERO, ", __get_str(__%s_val)",
-                               entry->args[i]);
+               /* The user space data for arg has name __<arg>_val */
+               if (entry->user_arg_size < 0) {
+                       pos += snprintf(buf + pos, LEN_OR_ZERO, ", 
__get_str(__%s_val)",
+                                       entry->args[i]);
+               } else {
+                       pos += snprintf(buf + pos, LEN_OR_ZERO, ", 
__print_dynamic_array(__%s_val, 1)",
+                                       entry->args[i]);
+               }
        }
 
 #undef LEN_OR_ZERO
@@ -333,9 +356,9 @@ static int __init syscall_enter_define_fields(struct 
trace_event_call *call)
        idx = ffs(mask) - 1;
 
        /*
-        * User space strings are faulted into a temporary buffer and then
-        * added as a dynamic string to the end of the event.
-        * The user space string name for the arg pointer is "__<arg>_val".
+        * User space data is faulted into a temporary buffer and then
+        * added as a dynamic string or array to the end of the event.
+        * The user space data name for the arg pointer is "__<arg>_val".
         */
        len = strlen(meta->args[idx]) + sizeof("___val");
        arg = kmalloc(len, GFP_KERNEL);
@@ -458,6 +481,7 @@ static char *sys_fault_user(struct syscall_metadata 
*sys_data, struct syscall_bu
        unsigned long mask = sys_data->user_mask;
        unsigned int cnt;
        int idx = ffs(mask) - 1;
+       bool array = false;
        char *ptr;
        int trys = 0;
        int ret;
@@ -500,6 +524,18 @@ static char *sys_fault_user(struct syscall_metadata 
*sys_data, struct syscall_bu
        /* Read the current CPU context switch counter */
        cnt = nr_context_switches_cpu(cpu);
 
+       /*
+        * If this system call event has a size argument, use
+        * it to define how much of user space memory to read,
+        * and read it as an array and not a string.
+        */
+       if (sys_data->user_arg_size >= 0) {
+               array = true;
+               size = args[sys_data->user_arg_size];
+               if (size > SYSCALL_FAULT_BUF_SZ - 1)
+                       size = SYSCALL_FAULT_BUF_SZ - 1;
+       }
+
        /*
         * Preemption is going to be enabled, but this task must
         * remain on this CPU.
@@ -512,7 +548,12 @@ static char *sys_fault_user(struct syscall_metadata 
*sys_data, struct syscall_bu
         */
        preempt_enable_notrace();
 
-       ret = strncpy_from_user(buf, ptr, size);
+       if (array) {
+               ret = __copy_from_user(buf, ptr, size);
+               ret = ret ? -1 : size;
+       } else {
+               ret = strncpy_from_user(buf, ptr, size);
+       }
 
        preempt_disable_notrace();
        migrate_enable();
@@ -530,22 +571,24 @@ static char *sys_fault_user(struct syscall_metadata 
*sys_data, struct syscall_bu
        if (nr_context_switches_cpu(cpu) != cnt)
                goto again;
 
-       /* Replace any non-printable characters with '.' */
-       for (int i = 0; i < ret; i++) {
-               if (!isprint(buf[i]))
-                       buf[i] = '.';
-       }
+       /* For strings, replace any non-printable characters with '.' */
+       if (!array) {
+               for (int i = 0; i < ret; i++) {
+                       if (!isprint(buf[i]))
+                               buf[i] = '.';
+               }
 
-       /*
-        * If the text was truncated due to our max limit, add "..." to
-        * the string.
-        */
-       if (ret > SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA)) {
-               strscpy(buf + SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA),
-                       EXTRA, sizeof(EXTRA));
-               ret = SYSCALL_FAULT_BUF_SZ;
-       } else {
-               buf[ret++] = '\0';
+               /*
+                * If the text was truncated due to our max limit, add "..." to
+                * the string.
+                */
+               if (ret > SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA)) {
+                       strscpy(buf + SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA),
+                               EXTRA, sizeof(EXTRA));
+                       ret = SYSCALL_FAULT_BUF_SZ;
+               } else {
+                       buf[ret++] = '\0';
+               }
        }
 
        *data_size = ret;
@@ -642,6 +685,9 @@ static void ftrace_syscall_enter(void *data, struct pt_regs 
*regs, long id)
                /* Store the offset and the size into the meta data */
                *(int *)ptr = val | (user_size << 16);
 
+               if (WARN_ON_ONCE((ptr - (void *)entry + user_size) > size))
+                       user_size = 0;
+
                /* Nothing to do if the user space was empty or faulted */
                if (user_size) {
                        /* Now store the user space data into the event */
@@ -795,7 +841,16 @@ static void check_faultable_syscall(struct 
trace_event_call *call, int nr)
        if (sys_data->enter_event != call)
                return;
 
+       sys_data->user_arg_size = -1;
+
        switch (nr) {
+       /* user arg 1 with size arg at 2 */
+       case __NR_write:
+       case __NR_mq_timedsend:
+       case __NR_pwrite64:
+               sys_data->user_mask = BIT(1);
+               sys_data->user_arg_size = 2;
+               break;
        /* user arg at position 0 */
        case __NR_access:
        case __NR_acct:
-- 
2.50.1



Reply via email to