The regular host lbr perf event doesn't save/restore the LBR_SELECT msr
during a thread context switching, because the LBR_SELECT value is
generated from attr.branch_sample_type and already stored in
event->hw.branch_reg (please see intel_pmu_setup_hw_filter), which
doesn't get lost during thread context switching.

The attr.branch_sample_type for the vcpu lbr event is deliberately set
to the user call stack mode to enable the perf core to save/restore the
lbr related msrs on vcpu switching. So the attr.branch_sample_type
essentially doesn't represent what the guest pmu driver will write to
LBR_SELECT. Meanwhile, the host lbr driver doesn't configure the lbr msrs,
including the LBR_SELECT msr, for the vcpu thread case, as the pmu driver
inside the vcpu will do that.

So for the vcpu case, add the LBR_SELECT save/restore to ensure what the
guest writes to the LBR_SELECT msr doesn't get lost during the vcpu context
switching.

Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Andi Kleen <a...@linux.intel.com>
Cc: Kan Liang <kan.li...@intel.com>
Signed-off-by: Wei Wang <wei.w.w...@intel.com>
---
 arch/x86/events/intel/lbr.c  | 7 +++++++
 arch/x86/events/perf_event.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index a0f3686..236f8352 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -390,6 +390,9 @@ static void __intel_pmu_lbr_restore(struct 
x86_perf_task_context *task_ctx)
 
        wrmsrl(x86_pmu.lbr_tos, tos);
        task_ctx->lbr_stack_state = LBR_NONE;
+
+       if (cpuc->vcpu_lbr)
+               wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
 }
 
 static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
@@ -416,6 +419,10 @@ static void __intel_pmu_lbr_save(struct 
x86_perf_task_context *task_ctx)
                if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
                        rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
        }
+
+       if (cpuc->vcpu_lbr)
+               rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
+
        task_ctx->valid_lbrs = i;
        task_ctx->tos = tos;
        task_ctx->lbr_stack_state = LBR_VALID;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8b90a25..0b2f660 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -699,6 +699,7 @@ struct x86_perf_task_context {
        u64 lbr_from[MAX_LBR_ENTRIES];
        u64 lbr_to[MAX_LBR_ENTRIES];
        u64 lbr_info[MAX_LBR_ENTRIES];
+       u64 lbr_sel;
        int tos;
        int valid_lbrs;
        int lbr_callstack_users;
-- 
2.7.4

Reply via email to