hi,
I'm getting following message on the kdump kernel start

  Broken BIOS detected, complain to your hardware vendor.\
  [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0)

it seems to be caused by NMI watchdog being configured
and fixed counter values stays in MSRs, which triggers
warning in check_hw_exists and disables perf support
in kdump kernel.. which probably does not hurt ;-)

zeroing MSRs during kdump shutdown seems to work (attached)
but I'm not sure thats correct place for kdump perf callback

thanks,
jirka


---
 arch/x86/include/asm/perf_event.h |  2 ++
 arch/x86/kernel/cpu/perf_event.c  | 23 +++++++++++++++++++++++
 arch/x86/kernel/cpu/perf_event.h  |  1 +
 arch/x86/kernel/crash.c           |  3 +++
 4 files changed, 29 insertions(+)

diff --git a/arch/x86/include/asm/perf_event.h 
b/arch/x86/include/asm/perf_event.h
index dc0f6ed35b08..8e49668cf8fe 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -217,6 +217,7 @@ static inline u32 get_ibs_caps(void) { return 0; }
 
 #ifdef CONFIG_PERF_EVENTS
 extern void perf_events_lapic_init(void);
+extern void perf_clear_msrs(void);
 
 /*
  * Abuse bits {3,5} of the cpu eflags register. These flags are otherwise
@@ -275,6 +276,7 @@ static inline void perf_get_x86_pmu_capability(struct 
x86_pmu_capability *cap)
 
 static inline void perf_events_lapic_init(void)        { }
 static inline void perf_check_microcode(void) { }
+static inline void perf_clear_msrs(void) { }
 #endif
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3658de47900f..f30dbcfb6905 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -269,6 +269,27 @@ msr_fail:
        return false;
 }
 
+void perf_clear_msrs(void)
+{
+       int i, reg, ret;
+
+       if (!x86_pmu.enabled)
+               return;
+
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               reg = x86_pmu_config_addr(i);
+               ret = wrmsrl_safe(reg, 0);
+               if (WARN_ONCE(ret, "failed to zero perf counter msr, reg %x\n", 
reg))
+                       break;
+       }
+
+       if (x86_pmu.num_counters_fixed) {
+               reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+               ret = wrmsrl_safe(reg, 0);
+               WARN_ONCE(ret, "failed to zero perf fixed counters msr\n");
+       }
+}
+
 static void hw_perf_event_destroy(struct perf_event *event)
 {
        x86_release_hardware();
@@ -1689,6 +1710,8 @@ static int __init init_hw_perf_events(void)
        if (!check_hw_exists())
                return 0;
 
+       x86_pmu.enabled = true;
+
        pr_cont("%s PMU driver.\n", x86_pmu.name);
 
        x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3e7fd27dfe20..ca8a5068f8a0 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -508,6 +508,7 @@ struct x86_pmu {
         */
        const char      *name;
        int             version;
+       bool            enabled;
        int             (*handle_irq)(struct pt_regs *);
        void            (*disable_all)(void);
        void            (*enable_all)(int added);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index e068d6683dba..20ed1ffdab8c 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -35,6 +35,7 @@
 #include <asm/cpu.h>
 #include <asm/reboot.h>
 #include <asm/virtext.h>
+#include <asm/perf_event.h>
 
 /* Alignment required for elf header segment */
 #define ELF_CORE_HEADER_ALIGN   4096
@@ -128,6 +129,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs 
*regs)
        cpu_emergency_svm_disable();
 
        disable_local_APIC();
+       perf_clear_msrs();
 }
 
 static void kdump_nmi_shootdown_cpus(void)
@@ -182,6 +184,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
        hpet_disable();
 #endif
        crash_save_cpu(regs, safe_smp_processor_id());
+       perf_clear_msrs();
 }
 
 #ifdef CONFIG_KEXEC_FILE
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to