Linus,

please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
perf-urgent-for-linus

Two fixes for the perf subsystem:

 - Fix an inconsistency of RDPMC mm struct tagging across exec() which
   causes RDPMC to fault.

 - Correct the timestamp mechanics across IOC_DISABLE/ENABLE which causes
   incorrect timestamps and total time calculations.

Thanks,

        tglx

------------------>
Peter Zijlstra (2):
      perf/x86: Fix RDPMC vs. mm_struct tracking
      perf/core: Fix time on IOC_ENABLE


 arch/x86/events/core.c     | 16 +++++++---------
 include/linux/perf_event.h |  4 ++--
 kernel/events/core.c       | 47 ++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8e3db8f642a7..af12e294caed 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2114,7 +2114,7 @@ static void refresh_pce(void *ignored)
        load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
 }
 
-static void x86_pmu_event_mapped(struct perf_event *event)
+static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct 
*mm)
 {
        if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
                return;
@@ -2129,22 +2129,20 @@ static void x86_pmu_event_mapped(struct perf_event 
*event)
         * For now, this can't happen because all callers hold mmap_sem
         * for write.  If this changes, we'll need a different solution.
         */
-       lockdep_assert_held_exclusive(&current->mm->mmap_sem);
+       lockdep_assert_held_exclusive(&mm->mmap_sem);
 
-       if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
-               on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+       if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
+               on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
 }
 
-static void x86_pmu_event_unmapped(struct perf_event *event)
+static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct 
*mm)
 {
-       if (!current->mm)
-               return;
 
        if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
                return;
 
-       if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
-               on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+       if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
+               on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
 }
 
 static int x86_pmu_event_idx(struct perf_event *event)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a3b873fc59e4..b14095bcf4bb 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -310,8 +310,8 @@ struct pmu {
         * Notification that the event was mapped or unmapped.  Called
         * in the context of the mapping task.
         */
-       void (*event_mapped)            (struct perf_event *event); /*optional*/
-       void (*event_unmapped)          (struct perf_event *event); /*optional*/
+       void (*event_mapped)            (struct perf_event *event, struct 
mm_struct *mm); /* optional */
+       void (*event_unmapped)          (struct perf_event *event, struct 
mm_struct *mm); /* optional */
 
        /*
         * Flags for ->add()/->del()/ ->start()/->stop(). There are
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 426c2ffba16d..ee20d4c546b5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2217,6 +2217,33 @@ static int group_can_go_on(struct perf_event *event,
        return can_add_hw;
 }
 
+/*
+ * Complement to update_event_times(). This computes the tstamp_* values to
+ * continue 'enabled' state from @now, and effectively discards the time
+ * between the prior tstamp_stopped and now (as we were in the OFF state, or
+ * just switched (context) time base).
+ *
+ * This further assumes '@event->state == INACTIVE' (we just came from OFF) and
+ * cannot have been scheduled in yet. And going into INACTIVE state means
+ * '@event->tstamp_stopped = @now'.
+ *
+ * Thus given the rules of update_event_times():
+ *
+ *   total_time_enabled = tstamp_stopped - tstamp_enabled
+ *   total_time_running = tstamp_stopped - tstamp_running
+ *
+ * We can insert 'tstamp_stopped == now' and reverse them to compute new
+ * tstamp_* values.
+ */
+static void __perf_event_enable_time(struct perf_event *event, u64 now)
+{
+       WARN_ON_ONCE(event->state != PERF_EVENT_STATE_INACTIVE);
+
+       event->tstamp_stopped = now;
+       event->tstamp_enabled = now - event->total_time_enabled;
+       event->tstamp_running = now - event->total_time_running;
+}
+
 static void add_event_to_ctx(struct perf_event *event,
                               struct perf_event_context *ctx)
 {
@@ -2224,9 +2251,12 @@ static void add_event_to_ctx(struct perf_event *event,
 
        list_add_event(event, ctx);
        perf_group_attach(event);
-       event->tstamp_enabled = tstamp;
-       event->tstamp_running = tstamp;
-       event->tstamp_stopped = tstamp;
+       /*
+        * We can be called with event->state == STATE_OFF when we create with
+        * .disabled = 1. In that case the IOC_ENABLE will call this function.
+        */
+       if (event->state == PERF_EVENT_STATE_INACTIVE)
+               __perf_event_enable_time(event, tstamp);
 }
 
 static void ctx_sched_out(struct perf_event_context *ctx,
@@ -2471,10 +2501,11 @@ static void __perf_event_mark_enabled(struct perf_event 
*event)
        u64 tstamp = perf_event_time(event);
 
        event->state = PERF_EVENT_STATE_INACTIVE;
-       event->tstamp_enabled = tstamp - event->total_time_enabled;
+       __perf_event_enable_time(event, tstamp);
        list_for_each_entry(sub, &event->sibling_list, group_entry) {
+               /* XXX should not be > INACTIVE if event isn't */
                if (sub->state >= PERF_EVENT_STATE_INACTIVE)
-                       sub->tstamp_enabled = tstamp - sub->total_time_enabled;
+                       __perf_event_enable_time(sub, tstamp);
        }
 }
 
@@ -5090,7 +5121,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
                atomic_inc(&event->rb->aux_mmap_count);
 
        if (event->pmu->event_mapped)
-               event->pmu->event_mapped(event);
+               event->pmu->event_mapped(event, vma->vm_mm);
 }
 
 static void perf_pmu_output_stop(struct perf_event *event);
@@ -5113,7 +5144,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
        unsigned long size = perf_data_size(rb);
 
        if (event->pmu->event_unmapped)
-               event->pmu->event_unmapped(event);
+               event->pmu->event_unmapped(event, vma->vm_mm);
 
        /*
         * rb->aux_mmap_count will always drop before rb->mmap_count and
@@ -5411,7 +5442,7 @@ static int perf_mmap(struct file *file, struct 
vm_area_struct *vma)
        vma->vm_ops = &perf_mmap_vmops;
 
        if (event->pmu->event_mapped)
-               event->pmu->event_mapped(event);
+               event->pmu->event_mapped(event, vma->vm_mm);
 
        return ret;
 }

Reply via email to