Add array type support for probe events.
This allows user to get arraied types from memory address.
The array type syntax is

        TYPE[N]

Where TYPE is one of types (u8/16/32/64,s8/16/32/64,
x8/16/32/64, symbol, string) and N is a fixed value less
than 64.

The string array type is a bit different from other types. For
other base types, <base-type>[1] is equal to <base-type>
(e.g. +0(%di):x32[1] is same as +0(%di):x32.) But string[1] is not
equal to string. The string type itself represents "char array",
but string array type represents "char * array". So, for example,
+0(%di):string[1] is equal to +0(+0(%di)):string.

Signed-off-by: Masami Hiramatsu <mhira...@kernel.org>
---
 Changes in v4:
  - Fix to use calculated size correctly for field definition.
    (Thank you Namhyung!)
 Changes in v2:
  - Add array description in README file
  - Fix to init s3 code out of loop.
  - Fix to proceed code when the last code is OP_ARRAY.
  - Add string array type and bitfield array type.
---
 Documentation/trace/kprobetrace.txt |   13 ++++
 kernel/trace/trace.c                |    3 +
 kernel/trace/trace_probe.c          |  130 +++++++++++++++++++++++++++--------
 kernel/trace/trace_probe.h          |   14 ++++
 kernel/trace/trace_probe_tmpl.h     |   63 +++++++++++++++--
 5 files changed, 183 insertions(+), 40 deletions(-)

diff --git a/Documentation/trace/kprobetrace.txt 
b/Documentation/trace/kprobetrace.txt
index 1d082f8ffeee..8bf752dfc072 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -65,9 +65,22 @@ in decimal ('s' and 'u') or hexadecimal ('x'). Without type 
casting, 'x32'
 or 'x64' is used depends on the architecture (e.g. x86-32 uses x32, and
 x86-64 uses x64).
 
+These value types can be an array. To record array data, you can add '[N]'
+(where N is a fixed number, less than 64) to the base type.
+E.g. 'x16[4]' means an array of x16 (2bytes hex) with 4 elements.
+Note that the array can be applied to memory type fetchargs, you can not
+apply it to registers/stack-entries etc. (for example, '$stack1:x8[8]' is
+wrong, but '+8($stack):x8[8]' is OK.)
+
 String type is a special type, which fetches a "null-terminated" string from
 kernel space. This means it will fail and store NULL if the string container
 has been paged out.
+The string array type is a bit different from other types. For other base
+types, <base-type>[1] is equal to <base-type> (e.g. +0(%di):x32[1] is same
+as +0(%di):x32.) But string[1] is not equal to string. The string type itself
+represents "char array", but string array type represents "char * array".
+So, for example, +0(%di):string[1] is equal to +0(+0(%di)):string.
+
 Bitfield is another special type, which takes 3 parameters, bit-width, bit-
 offset, and container-size (usually 32). The syntax is;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bcd1fd87082d..b7c6698265e5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4614,7 +4614,8 @@ static const char readme_msg[] =
        "\t           $stack<index>, $stack, $retval, $comm\n"
 #endif
        "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
-       "\t           b<bit-width>@<bit-offset>/<container-size>\n"
+       "\t           b<bit-width>@<bit-offset>/<container-size>,\n"
+       "\t           <type>[<array-size>]\n"
 #endif
        "  events/\t\t- Directory containing all trace event subsystems:\n"
        "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 9458800f394a..0e185050e492 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -363,9 +363,9 @@ static int __parse_bitfield_probe_arg(const char *bf,
 int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
                struct probe_arg *parg, unsigned int flags)
 {
-       struct fetch_insn *code, *tmp = NULL;
-       const char *t;
-       int ret;
+       struct fetch_insn *code, *scode, *tmp = NULL;
+       char *t, *t2;
+       int ret, len;
 
        if (strlen(arg) > MAX_ARGSTR_LEN) {
                pr_info("Argument is too long.: %s\n",  arg);
@@ -376,24 +376,42 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
                pr_info("Failed to allocate memory for command '%s'.\n", arg);
                return -ENOMEM;
        }
-       t = strchr(parg->comm, ':');
+       t = strchr(arg, ':');
        if (t) {
-               arg[t - parg->comm] = '\0';
-               t++;
+               *t = '\0';
+               t2 = strchr(++t, '[');
+               if (t2) {
+                       *t2 = '\0';
+                       parg->count = simple_strtoul(t2 + 1, &t2, 0);
+                       if (strcmp(t2, "]") || parg->count == 0)
+                               return -EINVAL;
+                       if (parg->count > MAX_ARRAY_LEN)
+                               return -E2BIG;
+               }
        }
        /*
         * The default type of $comm should be "string", and it can't be
         * dereferenced.
         */
        if (!t && strcmp(arg, "$comm") == 0)
-               t = "string";
-       parg->type = find_fetch_type(t);
+               parg->type = find_fetch_type("string");
+       else
+               parg->type = find_fetch_type(t);
        if (!parg->type) {
                pr_info("Unsupported type: %s\n", t);
                return -EINVAL;
        }
        parg->offset = *size;
-       *size += parg->type->size;
+       *size += parg->type->size * (parg->count ?: 1);
+
+       if (parg->count) {
+               len = strlen(parg->type->fmttype) + 6;
+               parg->fmt = kmalloc(len, GFP_KERNEL);
+               if (!parg->fmt)
+                       return -ENOMEM;
+               snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype,
+                        parg->count);
+       }
 
        code = tmp = kzalloc(sizeof(*code) * FETCH_INSN_MAX, GFP_KERNEL);
        if (!code)
@@ -413,10 +431,20 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
                        ret = -EINVAL;
                        goto fail;
                }
-               /* Since IMM or COMM must be the 1st insn, this is safe */
-               if (code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM)
+               if (code->op != FETCH_OP_DEREF || parg->count) {
+                       /*
+                        * IMM and COMM is pointing actual address, those must
+                        * be kept, and if parg->count != 0, this is an array
+                        * of string pointers instead of string address itself.
+                        */
                        code++;
+                       if (code->op != FETCH_OP_NOP) {
+                               ret = -E2BIG;
+                               goto fail;
+                       }
+               }
                code->op = FETCH_OP_ST_STRING;  /* In DEREF case, replace it */
+               code->size = parg->type->size;
                parg->dynamic = true;
        } else if (code->op == FETCH_OP_DEREF) {
                code->op = FETCH_OP_ST_MEM;
@@ -430,12 +458,29 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
                code->op = FETCH_OP_ST_RAW;
                code->size = parg->type->size;
        }
+       scode = code;
        /* Modify operation */
        if (t != NULL) {
                ret = __parse_bitfield_probe_arg(t, parg->type, &code);
                if (ret)
                        goto fail;
        }
+       /* Loop(Array) operation */
+       if (parg->count) {
+               if (scode->op != FETCH_OP_ST_MEM &&
+                   scode->op != FETCH_OP_ST_STRING) {
+                       pr_info("array only accepts memory or address\n");
+                       ret = -EINVAL;
+                       goto fail;
+               }
+               code++;
+               if (code->op != FETCH_OP_NOP) {
+                       ret = -E2BIG;
+                       goto fail;
+               }
+               code->op = FETCH_OP_LP_ARRAY;
+               code->param = parg->count;
+       }
        code++;
        code->op = FETCH_OP_END;
 
@@ -474,14 +519,17 @@ void traceprobe_free_probe_arg(struct probe_arg *arg)
        kfree(arg->code);
        kfree(arg->name);
        kfree(arg->comm);
+       kfree(arg->fmt);
 }
 
+/* When len=0, we just calculate the needed length */
+#define LEN_OR_ZERO (len ? len - pos : 0)
 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
                           bool is_return)
 {
-       int i;
+       struct probe_arg *parg;
+       int i, j;
        int pos = 0;
-
        const char *fmt, *arg;
 
        if (!is_return) {
@@ -492,33 +540,49 @@ static int __set_print_fmt(struct trace_probe *tp, char 
*buf, int len,
                arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
        }
 
-       /* When len=0, we just calculate the needed length */
-#define LEN_OR_ZERO (len ? len - pos : 0)
-
        pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
 
        for (i = 0; i < tp->nr_args; i++) {
-               pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
-                               tp->args[i].name, tp->args[i].type->fmt);
+               parg = tp->args + i;
+               pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=", parg->name);
+               if (parg->count) {
+                       pos += snprintf(buf + pos, LEN_OR_ZERO, "{%s",
+                                       parg->type->fmt);
+                       for (j = 1; j < parg->count; j++)
+                               pos += snprintf(buf + pos, LEN_OR_ZERO, ",%s",
+                                               parg->type->fmt);
+                       pos += snprintf(buf + pos, LEN_OR_ZERO, "}");
+               } else
+                       pos += snprintf(buf + pos, LEN_OR_ZERO, "%s",
+                                       parg->type->fmt);
        }
 
        pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
 
        for (i = 0; i < tp->nr_args; i++) {
-               if (strcmp(tp->args[i].type->name, "string") == 0)
+               parg = tp->args + i;
+               if (parg->count) {
+                       if (strcmp(parg->type->name, "string") == 0)
+                               fmt = ", __get_str(%s[%d])";
+                       else
+                               fmt = ", REC->%s[%d]";
+                       for (j = 0; j < parg->count; j++)
+                               pos += snprintf(buf + pos, LEN_OR_ZERO,
+                                               fmt, parg->name, j);
+               } else {
+                       if (strcmp(parg->type->name, "string") == 0)
+                               fmt = ", __get_str(%s)";
+                       else
+                               fmt = ", REC->%s";
                        pos += snprintf(buf + pos, LEN_OR_ZERO,
-                                       ", __get_str(%s)",
-                                       tp->args[i].name);
-               else
-                       pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
-                                       tp->args[i].name);
+                                       fmt, parg->name);
+               }
        }
 
-#undef LEN_OR_ZERO
-
        /* return the length of print_fmt */
        return pos;
 }
+#undef LEN_OR_ZERO
 
 int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return)
 {
@@ -546,11 +610,15 @@ int traceprobe_define_arg_fields(struct trace_event_call 
*event_call,
        /* Set argument names as fields */
        for (i = 0; i < tp->nr_args; i++) {
                struct probe_arg *parg = &tp->args[i];
-
-               ret = trace_define_field(event_call, parg->type->fmttype,
-                                        parg->name,
-                                        offset + parg->offset,
-                                        parg->type->size,
+               const char *fmt = parg->type->fmttype;
+               int size = parg->type->size;
+
+               if (parg->fmt)
+                       fmt = parg->fmt;
+               if (parg->count)
+                       size *= parg->count;
+               ret = trace_define_field(event_call, fmt, parg->name,
+                                        offset + parg->offset, size,
                                         parg->type->is_signed,
                                         FILTER_OTHER);
                if (ret)
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index ff91faf70887..d256a19ee6d1 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -43,6 +43,7 @@
 
 #define MAX_TRACE_ARGS         128
 #define MAX_ARGSTR_LEN         63
+#define MAX_ARRAY_LEN          64
 #define MAX_STRING_SIZE                PATH_MAX
 
 /* Reserved field names */
@@ -78,6 +79,14 @@ static nokprobe_inline void *get_loc_data(u32 *dl, void *ent)
        return (u8 *)ent + get_loc_offs(*dl);
 }
 
+static nokprobe_inline u32 update_data_loc(u32 loc, int consumed)
+{
+       u32 maxlen = get_loc_len(loc);
+       u32 offset = get_loc_offs(loc);
+
+       return make_data_loc(maxlen - consumed, offset + consumed);
+}
+
 /* Printing function type */
 typedef int (*print_type_func_t)(struct trace_seq *, void *, void *);
 
@@ -100,6 +109,8 @@ enum fetch_op {
        FETCH_OP_ST_STRING,     /* String: .offset, .size */
        // Stage 4 (modify) op
        FETCH_OP_MOD_BF,        /* Bitfield: .basesize, .lshift, .rshift */
+       // Stage 5 (loop) op
+       FETCH_OP_LP_ARRAY,      /* Array: .param = loop count */
        FETCH_OP_END,
 };
 
@@ -189,6 +200,7 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(symbol);
        _ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, atype)
 
 #define ASSIGN_FETCH_TYPE_END {}
+#define MAX_ARRAY_LEN  64
 
 #ifdef CONFIG_KPROBE_EVENTS
 bool trace_kprobe_on_func_entry(struct trace_event_call *call);
@@ -209,8 +221,10 @@ struct probe_arg {
        struct fetch_insn       *code;
        bool                    dynamic;/* Dynamic array (string) is used */
        unsigned int            offset; /* Offset from argument entry */
+       unsigned int            count;  /* Array count */
        const char              *name;  /* Name of this argument */
        const char              *comm;  /* Command of this argument */
+       char                    *fmt;   /* Format string if needed */
        const struct fetch_type *type;  /* Type of this argument */
 };
 
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index 32ae2fc78190..edc09f2cd6b2 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -67,10 +67,15 @@ static nokprobe_inline int
 process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
                           void *dest, void *base)
 {
-       int ret = 0;
+       struct fetch_insn *s3 = NULL;
+       int total = 0, ret = 0, i = 0;
+       u32 loc = 0;
+       unsigned long lval = val;
 
+stage2:
        /* 2nd stage: dereference memory if needed */
        while (code->op == FETCH_OP_DEREF) {
+               lval = val;
                ret = probe_mem_read(&val, (void *)val + code->offset,
                                        sizeof(val));
                if (ret)
@@ -78,11 +83,15 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned 
long val,
                code++;
        }
 
+       s3 = code;
+stage3:
        /* 3rd stage: store value to buffer */
        if (unlikely(!dest)) {
-               if (code->op == FETCH_OP_ST_STRING)
-                       return fetch_store_strlen(val + code->offset);
-               else
+               if (code->op == FETCH_OP_ST_STRING) {
+                       ret += fetch_store_strlen(val + code->offset);
+                       code++;
+                       goto array;
+               } else
                        return -EILSEQ;
        }
 
@@ -94,6 +103,7 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned 
long val,
                probe_mem_read(dest, (void *)val + code->offset, code->size);
                break;
        case FETCH_OP_ST_STRING:
+               loc = *(u32 *)dest;
                ret = fetch_store_string(val + code->offset, dest, base);
                break;
        default:
@@ -107,6 +117,29 @@ process_fetch_insn_bottom(struct fetch_insn *code, 
unsigned long val,
                code++;
        }
 
+array:
+       /* the last stage: Loop on array */
+       if (code->op == FETCH_OP_LP_ARRAY) {
+               total += ret;
+               if (++i < code->param) {
+                       code = s3;
+                       if (s3->op != FETCH_OP_ST_STRING) {
+                               dest += s3->size;
+                               val += s3->size;
+                               goto stage3;
+                       }
+                       code--;
+                       val = lval + sizeof(char *);
+                       if (dest) {
+                               dest += sizeof(u32);
+                               *(u32 *)dest = update_data_loc(loc, ret);
+                       }
+                       goto stage2;
+               }
+               code++;
+               ret = total;
+       }
+
        return code->op == FETCH_OP_END ? ret : -EILSEQ;
 }
 
@@ -156,12 +189,26 @@ static inline int
 print_probe_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
                 u8 *data, void *field)
 {
-       int i;
+       void *p;
+       int i, j;
 
        for (i = 0; i < nr_args; i++) {
-               trace_seq_printf(s, " %s=", args[i].name);
-               if (!args[i].type->print(s, data + args[i].offset, field))
-                       return -ENOMEM;
+               struct probe_arg *a = args + i;
+
+               trace_seq_printf(s, " %s=", a->name);
+               if (likely(!a->count)) {
+                       if (!a->type->print(s, data + a->offset, field))
+                               return -ENOMEM;
+                       continue;
+               }
+               trace_seq_putc(s, '{');
+               p = data + a->offset;
+               for (j = 0; j < a->count; j++) {
+                       if (!a->type->print(s, p, field))
+                               return -ENOMEM;
+                       trace_seq_putc(s, j == a->count - 1 ? '}' : ',');
+                       p += a->type->size;
+               }
        }
        return 0;
 }

Reply via email to