On 6/30/2020 11:49 AM, Peter Zijlstra wrote:
On Fri, Jun 26, 2020 at 11:20:11AM -0700, [email protected] wrote:

+       if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
+               intel_pmu_arch_lbr_init();

+static inline bool is_lbr_call_stack_bit_set(u64 config)
+{
+       if (x86_pmu.arch_lbr)
+               return !!(config & ARCH_LBR_CALL_STACK);
+
+       return !!(config & LBR_CALL_STACK);
+}

+       if (!x86_pmu.arch_lbr && !pmi && cpuc->lbr_sel)
                wrmsrl(MSR_LBR_SELECT, lbr_select);

+       if (!x86_pmu.arch_lbr)
+               debugctl |= DEBUGCTLMSR_LBR;

+       if (x86_pmu.arch_lbr)
+               wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
  }

etc...

+void __init intel_pmu_arch_lbr_init(void)
+{
+       unsigned int unused_edx;
+       u64 lbr_nr;
+
+       /* Arch LBR Capabilities */
+       cpuid(28, &x86_pmu.lbr_eax.full, &x86_pmu.lbr_ebx.full,
+                 &x86_pmu.lbr_ecx.full, &unused_edx);
+
+       lbr_nr = x86_pmu_get_max_arch_lbr_nr();
+       if (!lbr_nr)
+               return;
+
+       /* Apply the max depth of Arch LBR */
+       if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr))
+               return;
+
+       x86_pmu.lbr_nr = lbr_nr;
+       x86_get_pmu()->task_ctx_size = sizeof(struct 
x86_perf_task_context_arch_lbr) +
+                                      lbr_nr * sizeof(struct lbr_entry);
+
+       x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0;
+       x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0;
+       x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0;
+
+       /* LBR callstack requires both CPL and Branch Filtering support */
+       if (!x86_pmu.lbr_ebx.split.lbr_cpl ||
+           !x86_pmu.lbr_ebx.split.lbr_filter ||
+           !x86_pmu.lbr_ebx.split.lbr_call_stack)
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = 
LBR_NOT_SUPP;
+
+       if (!x86_pmu.lbr_ebx.split.lbr_cpl) {
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = 
LBR_NOT_SUPP;
+       } else if (!x86_pmu.lbr_ebx.split.lbr_filter) {
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = 
LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = 
LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = 
LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = 
LBR_NOT_SUPP;
+               arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP;
+       }
+
+       x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK;
+       x86_pmu.lbr_ctl_map  = arch_lbr_ctl_map;
+
+       if (!x86_pmu.lbr_ebx.split.lbr_cpl && !x86_pmu.lbr_ebx.split.lbr_filter)
+               x86_pmu.lbr_ctl_map = NULL;
+
+       x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset;
+       x86_pmu.lbr_read = intel_pmu_arch_lbr_read;
+       x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
+       x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
+
+       x86_pmu.arch_lbr = true;
+       pr_cont("Architectural LBR, ");
+}

How about we make this here clear FEATURE_ARCH_LBR if it fails and then
do away with x86_pmu.arch_lbr and use
static_cpu_has(X86_FEATURE_ARCH_LBR) a lot more?

Yes, it's doable. So we can save a bit for arch_lbr in structure x86_pmu.

I will clear the FEATURE_ARCH_LBR via clear_cpu_cap(&boot_cpu_data, X86_FEATURE_ARCH_LBR);, if the check fails. I will replace x86_pmu.arch_lbr with static_cpu_has(X86_FEATURE_ARCH_LBR) everywhere.

Thanks,
Kan

Reply via email to