When doing sampling, for example,

perf record -e cycles:u ...

On workloads that do a lot of kernel entry/exits we see kernel
samples, even though :u is specified. This is due to skid existing.

This is a security issue because it can leak kernel addresses even
though kernel sampling support is disabled.

The patch drops the kernel samples if exclude_kernel is specified.

For example, test on Haswell desktop.

perf record -e cycles:u <mgen>
perf report --stdio

Before patch applied:

    99.77%  mgen     mgen              [.] buf_read
     0.20%  mgen     mgen              [.] rand_buf_init
     0.01%  mgen     [kernel.vmlinux]  [k] apic_timer_interrupt
     0.00%  mgen     mgen              [.] last_free_elem
     0.00%  mgen     libc-2.23.so      [.] __random_r
     0.00%  mgen     libc-2.23.so      [.] _int_malloc
     0.00%  mgen     mgen              [.] rand_array_init
     0.00%  mgen     [kernel.vmlinux]  [k] page_fault
     0.00%  mgen     libc-2.23.so      [.] __random
     0.00%  mgen     libc-2.23.so      [.] __strcasestr
     0.00%  mgen     ld-2.23.so        [.] strcmp
     0.00%  mgen     ld-2.23.so        [.] _dl_start
     0.00%  mgen     libc-2.23.so      [.] sched_setaffinity@@GLIBC_2.3.4
     0.00%  mgen     ld-2.23.so        [.] _start

We can see kernel symbols apic_timer_interrupt and page_fault.

After patch applied:

    99.79%  mgen     mgen           [.] buf_read
     0.19%  mgen     mgen           [.] rand_buf_init
     0.00%  mgen     libc-2.23.so   [.] __random_r
     0.00%  mgen     mgen           [.] rand_array_init
     0.00%  mgen     mgen           [.] last_free_elem
     0.00%  mgen     libc-2.23.so   [.] vfprintf
     0.00%  mgen     libc-2.23.so   [.] rand
     0.00%  mgen     libc-2.23.so   [.] __random
     0.00%  mgen     libc-2.23.so   [.] _int_malloc
     0.00%  mgen     libc-2.23.so   [.] _IO_doallocbuf
     0.00%  mgen     ld-2.23.so     [.] do_lookup_x
     0.00%  mgen     ld-2.23.so     [.] open_verify.constprop.7
     0.00%  mgen     ld-2.23.so     [.] _dl_important_hwcaps
     0.00%  mgen     libc-2.23.so   [.] sched_setaffinity@@GLIBC_2.3.4
     0.00%  mgen     ld-2.23.so     [.] _start

There are only userspace symbols.

Change-log:
-----------

v2: Since other arch has similar skid issue in sampling, so move the
    skid checking from x86 code to generic code. Use a new flag
    PERF_PMU_CAP_NO_SKID to let arch decide if keep original behavior
    or drop the skid kernel samples.

Signed-off-by: Jin Yao <[email protected]>
---
 include/linux/perf_event.h |  1 +
 kernel/events/core.c       | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 24a6358..8d48668 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -254,6 +254,7 @@ struct perf_event;
 #define PERF_PMU_CAP_EXCLUSIVE                 0x10
 #define PERF_PMU_CAP_ITRACE                    0x20
 #define PERF_PMU_CAP_HETEROGENEOUS_CPUS                0x40
+#define PERF_PMU_CAP_NO_SKID                   0x80
 
 /**
  * struct pmu - generic performance monitoring unit
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e75a5c..8a49251 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7316,6 +7316,26 @@ int perf_event_account_interrupt(struct perf_event 
*event)
        return __perf_event_account_interrupt(event, 1);
 }
 
+static bool skid_kernel_samples(struct perf_event *event, struct pt_regs *regs)
+{
+       /*
+        * We may get kernel samples even though exclude_kernel
+        * is specified due to potential skid in sampling.
+        * The skid kernel samples could be dropped or just do
+        * nothing by testing the flag PERF_PMU_CAP_NO_SKID.
+        */
+       if (event->pmu->capabilities & PERF_PMU_CAP_NO_SKID)
+               return false;
+
+       if (event->attr.exclude_kernel &&
+           !user_mode(regs) &&
+           (event->attr.sample_type & PERF_SAMPLE_IP)) {
+               return true;
+       }
+
+       return false;
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
@@ -7337,6 +7357,12 @@ static int __perf_event_overflow(struct perf_event 
*event,
        ret = __perf_event_account_interrupt(event, throttle);
 
        /*
+        * For security, drop the skid kernel samples if necessary.
+        */
+       if (skid_kernel_samples(event, regs))
+               return ret;
+
+       /*
         * XXX event_limit might not quite work as expected on inherited
         * events
         */
-- 
2.7.4

Reply via email to