From: "Yan, Zheng" <[email protected]>

Enable pmu context switch callback if LBR is used. Use the callback
to flush LBR stack when process is scheduled in.

Signed-off-by: Yan, Zheng <[email protected]>
---
 arch/x86/kernel/cpu/perf_event.c           |  7 ---
 arch/x86/kernel/cpu/perf_event.h           |  2 -
 arch/x86/kernel/cpu/perf_event_intel.c     | 14 +-----
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 32 +++++++++-----
 include/linux/perf_event.h                 |  5 ---
 kernel/events/core.c                       | 71 ------------------------------
 6 files changed, 21 insertions(+), 110 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6703d17..69e2095 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1852,12 +1852,6 @@ static void x86_pmu_sched_task(struct perf_event_context 
*ctx, bool sched_in)
                x86_pmu.sched_task(ctx, sched_in);
 }
 
-static void x86_pmu_flush_branch_stack(void)
-{
-       if (x86_pmu.flush_branch_stack)
-               x86_pmu.flush_branch_stack();
-}
-
 void perf_check_microcode(void)
 {
        if (x86_pmu.check_microcode)
@@ -1884,7 +1878,6 @@ static struct pmu pmu = {
        .commit_txn             = x86_pmu_commit_txn,
 
        .event_idx              = x86_pmu_event_idx,
-       .flush_branch_stack     = x86_pmu_flush_branch_stack,
        .sched_task             = x86_pmu_sched_task,
 };
 
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 5eb3a58..3ef4b79 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -150,7 +150,6 @@ struct cpu_hw_events {
         * Intel LBR bits
         */
        int                             lbr_users;
-       void                            *lbr_context;
        struct perf_branch_stack        lbr_stack;
        struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
        struct er_account               *lbr_sel;
@@ -416,7 +415,6 @@ struct x86_pmu {
        void            (*cpu_dead)(int cpu);
 
        void            (*check_microcode)(void);
-       void            (*flush_branch_stack)(void);
        void            (*sched_task)(struct perf_event_context *ctx,
                                      bool sched_in);
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c 
b/arch/x86/kernel/cpu/perf_event_intel.c
index b477bfc..84a1c09 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2038,18 +2038,6 @@ static void intel_pmu_cpu_dying(int cpu)
        fini_debug_store_on_cpu(cpu);
 }
 
-static void intel_pmu_flush_branch_stack(void)
-{
-       /*
-        * Intel LBR does not tag entries with the
-        * PID of the current task, then we need to
-        * flush it on ctxsw
-        * For now, we simply reset it
-        */
-       if (x86_pmu.lbr_nr)
-               intel_pmu_lbr_reset();
-}
-
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
 
 PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -2101,7 +2089,7 @@ static __initconst const struct x86_pmu intel_pmu = {
        .cpu_starting           = intel_pmu_cpu_starting,
        .cpu_dying              = intel_pmu_cpu_dying,
        .guest_get_msrs         = intel_guest_get_msrs,
-       .flush_branch_stack     = intel_pmu_flush_branch_stack,
+       .sched_task             = intel_pmu_lbr_sched_task,
 };
 
 static __init void intel_clovertown_quirk(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c 
b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 1bb844e..c33bf84c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -187,24 +187,32 @@ void intel_pmu_lbr_reset(void)
                intel_pmu_lbr_reset_64();
 }
 
-void intel_pmu_lbr_enable(struct perf_event *event)
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-
        if (!x86_pmu.lbr_nr)
                return;
 
        /*
-        * Reset the LBR stack if we changed task context to
-        * avoid data leaks.
+        * It is necessary to flush the stack on context switch. This happens
+        * when the branch stack does not tag its entries with the pid of the
+        * current task.
         */
-       if (event->ctx->task && cpuc->lbr_context != event->ctx) {
+       if (sched_in)
                intel_pmu_lbr_reset();
-               cpuc->lbr_context = event->ctx;
-       }
+}
+
+void intel_pmu_lbr_enable(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+       if (!x86_pmu.lbr_nr)
+               return;
+
        cpuc->br_sel = event->hw.branch_reg.reg;
 
        cpuc->lbr_users++;
+       if (cpuc->lbr_users == 1)
+               perf_sched_cb_enable(event->ctx->pmu);
 }
 
 void intel_pmu_lbr_disable(struct perf_event *event)
@@ -217,10 +225,10 @@ void intel_pmu_lbr_disable(struct perf_event *event)
        cpuc->lbr_users--;
        WARN_ON_ONCE(cpuc->lbr_users < 0);
 
-       if (cpuc->enabled && !cpuc->lbr_users) {
-               __intel_pmu_lbr_disable();
-               /* avoid stale pointer */
-               cpuc->lbr_context = NULL;
+       if (!cpuc->lbr_users) {
+               perf_sched_cb_disable(event->ctx->pmu);
+               if (cpuc->enabled)
+                       __intel_pmu_lbr_disable();
        }
 }
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6a3e603..96cb88b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -248,11 +248,6 @@ struct pmu {
        int (*event_idx)                (struct perf_event *event); /*optional 
*/
 
        /*
-        * flush branch stack on context-switches (needed in cpu-wide mode)
-        */
-       void (*flush_branch_stack)      (void);
-
-       /*
         * PMU callback for context-switches. optional
         */
        void (*sched_task)              (struct perf_event_context *ctx,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 11c63d6..fc97ed6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -140,7 +140,6 @@ enum event_type_t {
  */
 struct static_key_deferred perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
-static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
 static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 
 static atomic_t nr_mmap_events __read_mostly;
@@ -2567,65 +2566,6 @@ static void perf_event_context_sched_in(struct 
perf_event_context *ctx,
        perf_pmu_rotate_start(ctx->pmu);
 }
 
-/*
- * When sampling the branck stack in system-wide, it may be necessary
- * to flush the stack on context switch. This happens when the branch
- * stack does not tag its entries with the pid of the current task.
- * Otherwise it becomes impossible to associate a branch entry with a
- * task. This ambiguity is more likely to appear when the branch stack
- * supports priv level filtering and the user sets it to monitor only
- * at the user level (which could be a useful measurement in system-wide
- * mode). In that case, the risk is high of having a branch stack with
- * branch from multiple tasks. Flushing may mean dropping the existing
- * entries or stashing them somewhere in the PMU specific code layer.
- *
- * This function provides the context switch callback to the lower code
- * layer. It is invoked ONLY when there is at least one system-wide context
- * with at least one active event using taken branch sampling.
- */
-static void perf_branch_stack_sched_in(struct task_struct *prev,
-                                      struct task_struct *task)
-{
-       struct perf_cpu_context *cpuctx;
-       struct pmu *pmu;
-       unsigned long flags;
-
-       /* no need to flush branch stack if not changing task */
-       if (prev == task)
-               return;
-
-       local_irq_save(flags);
-
-       rcu_read_lock();
-
-       list_for_each_entry_rcu(pmu, &pmus, entry) {
-               cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
-               /*
-                * check if the context has at least one
-                * event using PERF_SAMPLE_BRANCH_STACK
-                */
-               if (cpuctx->ctx.nr_branch_stack > 0
-                   && pmu->flush_branch_stack) {
-
-                       pmu = cpuctx->ctx.pmu;
-
-                       perf_ctx_lock(cpuctx, cpuctx->task_ctx);
-
-                       perf_pmu_disable(pmu);
-
-                       pmu->flush_branch_stack();
-
-                       perf_pmu_enable(pmu);
-
-                       perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
-               }
-       }
-
-       rcu_read_unlock();
-
-       local_irq_restore(flags);
-}
 
 /*
  * Called from scheduler to add the events of the current task
@@ -2659,9 +2599,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
        if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
                perf_cgroup_sched_in(prev, task);
 
-       /* check for system-wide branch_stack events */
-       if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
-               perf_branch_stack_sched_in(prev, task);
        if (__get_cpu_var(perf_sched_cb_usages))
                perf_pmu_sched_task(prev, task, true);
 }
@@ -3226,10 +3163,6 @@ static void unaccount_event_cpu(struct perf_event 
*event, int cpu)
        if (event->parent)
                return;
 
-       if (has_branch_stack(event)) {
-               if (!(event->attach_state & PERF_ATTACH_TASK))
-                       atomic_dec(&per_cpu(perf_branch_stack_events, cpu));
-       }
        if (is_cgroup_event(event))
                atomic_dec(&per_cpu(perf_cgroup_events, cpu));
 }
@@ -6642,10 +6575,6 @@ static void account_event_cpu(struct perf_event *event, 
int cpu)
        if (event->parent)
                return;
 
-       if (has_branch_stack(event)) {
-               if (!(event->attach_state & PERF_ATTACH_TASK))
-                       atomic_inc(&per_cpu(perf_branch_stack_events, cpu));
-       }
        if (is_cgroup_event(event))
                atomic_inc(&per_cpu(perf_cgroup_events, cpu));
 }
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to