From: Masami Hiramatsu (Google) <[email protected]>

When tracing the kernel local variables, sometimes we need to get the
CPU local variables. To access it, current simple dereference is not
enough.

Thus, introduce a special +CPU() dereference to access per-cpu variable
for the current CPU (accessing other CPU variable may race with
updates on other CPUs). Also +PCPU() is for accessing per-cpu pointer.

 +CPU(pcp)

is equal to

 this_cpu_read(pcp)

And

 +PCPU(pcp)

 is equal to

  this_cpu_ptr(pcp)

Signed-off-by: Masami Hiramatsu (Google) <[email protected]>
---
 Documentation/trace/eprobetrace.rst |    3 ++
 Documentation/trace/fprobetrace.rst |    3 ++
 Documentation/trace/kprobetrace.rst |    3 ++
 kernel/trace/trace.c                |    1 +
 kernel/trace/trace_probe.c          |   48 +++++++++++++++++++++--------------
 kernel/trace/trace_probe.h          |    2 +
 kernel/trace/trace_probe_tmpl.h     |   30 ++++++++++++++++++----
 7 files changed, 65 insertions(+), 25 deletions(-)

diff --git a/Documentation/trace/eprobetrace.rst 
b/Documentation/trace/eprobetrace.rst
index dcf92d5b4175..0c7878df02f6 100644
--- a/Documentation/trace/eprobetrace.rst
+++ b/Documentation/trace/eprobetrace.rst
@@ -40,6 +40,9 @@ Synopsis of eprobe_events
   $comm                : Fetch current task comm.
   $current     : Fetch the address of the current task_struct.
   +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4)
+  +CPU(FETCHARG) : Fetch memory at FETCHARG address on the CPU specified by 
CPU.
+                  This is useful for fetching per-CPU variables.
+  +PCPU(FETCHARG) : Fetch memory address at FETCHARG address on the per-CPU 
area.
   \IMM         : Store an immediate value to the argument.
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
diff --git a/Documentation/trace/fprobetrace.rst 
b/Documentation/trace/fprobetrace.rst
index 3392cab016b3..c851f98bb310 100644
--- a/Documentation/trace/fprobetrace.rst
+++ b/Documentation/trace/fprobetrace.rst
@@ -52,6 +52,9 @@ Synopsis of fprobe-events
   $comm         : Fetch current task comm.
   $current      : Fetch the address of the current task_struct.
   +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*4)(\*5)
+  +CPU(FETCHARG) : Fetch memory at FETCHARG address on the CPU specified by 
CPU.
+                  This is useful for fetching per-CPU variables.
+  +PCPU(FETCHARG) : Fetch memory address at FETCHARG address on the per-CPU 
area.
   \IMM          : Store an immediate value to the argument.
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
diff --git a/Documentation/trace/kprobetrace.rst 
b/Documentation/trace/kprobetrace.rst
index 81e4fe38791d..bc806fd82a91 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -55,6 +55,9 @@ Synopsis of kprobe_events
   $comm                : Fetch current task comm.
   $current      : Fetch the address of the current task_struct.
   +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4)
+  +CPU(FETCHARG) : Fetch memory at FETCHARG address on the CPU specified by 
CPU.
+                  This is useful for fetching per-CPU variables.
+  +PCPU(FETCHARG) : Fetch memory address at FETCHARG address on the per-CPU 
area.
   \IMM         : Store an immediate value to the argument.
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e185a006cb08..2b8c8ac4036a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4332,6 +4332,7 @@ static const char readme_msg[] =
        "\t           $stack<index>, $stack, $retval, $comm, $current\n"
 #endif
        "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, 
\\\"imm-string\"\n"
+       "\t           +CPU(<fetcharg>), +PCPU(<fetcharg>)\n"
        "\t     kernel return probes support: $retval, $arg<N>, $comm\n"
        "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, 
symbol,\n"
        "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 2c5deb1e1463..fa6757222fe6 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -1396,26 +1396,36 @@ parse_probe_arg(char *arg, const struct fetch_type 
*type,
 
        case '+':       /* deref memory */
        case '-':
-               if (arg[1] == 'u') {
-                       deref = FETCH_OP_UDEREF;
-                       arg[1] = arg[0];
-                       arg++;
-               }
-               if (arg[0] == '+')
-                       arg++;  /* Skip '+', because kstrtol() rejects it. */
-               tmp = strchr(arg, '(');
-               if (!tmp) {
-                       trace_probe_log_err(ctx->offset, DEREF_NEED_BRACE);
-                       return -EINVAL;
-               }
-               *tmp = '\0';
-               ret = kstrtol(arg, 0, &offset);
-               if (ret) {
-                       trace_probe_log_err(ctx->offset, BAD_DEREF_OFFS);
-                       break;
+               if (str_has_prefix(arg, "+CPU(")) {
+                       deref = FETCH_OP_DEREF_CPU;
+                       arg += 5;
+                       ctx->offset += 5;
+               } else if (str_has_prefix(arg, "+PCPU(")) {
+                       deref = FETCH_OP_CPU_PTR;
+                       arg += 6;
+                       ctx->offset += 6;
+               } else {
+                       if (arg[1] == 'u') {
+                               deref = FETCH_OP_UDEREF;
+                               arg[1] = arg[0];
+                               arg++;
+                       }
+                       if (arg[0] == '+')
+                               arg++;  /* Skip '+', because kstrtol() rejects 
it. */
+                       tmp = strchr(arg, '(');
+                       if (!tmp) {
+                               trace_probe_log_err(ctx->offset, 
DEREF_NEED_BRACE);
+                               return -EINVAL;
+                       }
+                       *tmp = '\0';
+                       ret = kstrtol(arg, 0, &offset);
+                       if (ret) {
+                               trace_probe_log_err(ctx->offset, 
BAD_DEREF_OFFS);
+                               break;
+                       }
+                       ctx->offset += (tmp + 1 - arg) + (arg[0] != '-' ? 1 : 
0);
+                       arg = tmp + 1;
                }
-               ctx->offset += (tmp + 1 - arg) + (arg[0] != '-' ? 1 : 0);
-               arg = tmp + 1;
                tmp = strrchr(arg, ')');
                if (!tmp) {
                        trace_probe_log_err(ctx->offset + strlen(arg),
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index f2b31089779c..bec04bcc4226 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -100,6 +100,8 @@ enum fetch_op {
        // Stage 2 (dereference) op
        FETCH_OP_DEREF,         /* Dereference: .offset */
        FETCH_OP_UDEREF,        /* User-space Dereference: .offset */
+       FETCH_OP_DEREF_CPU,     /* Per-CPU Dereference for this CPU */
+       FETCH_OP_CPU_PTR,       /* Per-CPU pointer for this CPU */
        // Stage 3 (store) ops
        FETCH_OP_ST_RAW,        /* Raw: .size */
        FETCH_OP_ST_MEM,        /* Mem: .offset, .size */
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index f630930288d2..82d753decf48 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -129,25 +129,43 @@ process_fetch_insn_bottom(struct fetch_insn *code, 
unsigned long val,
        struct fetch_insn *s3 = NULL;
        int total = 0, ret = 0, i = 0;
        u32 loc = 0;
-       unsigned long lval = val;
+       unsigned long lval, llval = val;
 
 stage2:
        /* 2nd stage: dereference memory if needed */
        do {
-               if (code->op == FETCH_OP_DEREF) {
-                       lval = val;
+               lval = val;
+               switch (code->op) {
+               case FETCH_OP_DEREF:
                        ret = probe_mem_read(&val, (void *)val + code->offset,
                                             sizeof(val));
-               } else if (code->op == FETCH_OP_UDEREF) {
-                       lval = val;
+                       break;
+               case FETCH_OP_UDEREF:
                        ret = probe_mem_read_user(&val,
                                 (void *)val + code->offset, sizeof(val));
-               } else
                        break;
+               case FETCH_OP_DEREF_CPU:
+               case FETCH_OP_CPU_PTR:
+                       if (!is_kernel_percpu_address(val)) {
+                               ret = -EFAULT;
+                               break;
+                       }
+                       val = (unsigned long)this_cpu_ptr((void __percpu *)val);
+                       if (code->op == FETCH_OP_DEREF_CPU)
+                               ret = probe_mem_read(&val, (void *)val, 
sizeof(val));
+                       else
+                               ret = 0;
+                       break;
+               default:
+                       lval = llval;
+                       goto out;
+               }
                if (ret)
                        return ret;
+               llval = lval;
                code++;
        } while (1);
+out:
 
        s3 = code;
 stage3:


Reply via email to