From: Yazen Ghannam <yazen.ghan...@amd.com>

We have support for the new SMCA MCA_DE{STAT,ADDR} registers in Linux. So
we've used these registers in place of MCA_{STATUS,ADDR} on SMCA systems.
However, the guidance for current implementations of SMCA is to continue
using MCA_{STATUS,ADDR} and to use MCA_DE{STAT,ADDR} only if a Deferred
error was not found in the former registers. This also means we shouldn't
clear MCA_CONFIG[LogDeferredInMcaStat].

Redo the AMD Deferred error interrupt handler to follow the guidance for
current SMCA systems. Also, don't break after finding the first error.

Don't clear MCA_CONFIG[LogDeferredInMcaStat] during AMD mcheck init.

Signed-off-by: Yazen Ghannam <yazen.ghan...@amd.com>
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 47 ++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 524cc57..4e459e0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -472,20 +472,6 @@ prepare_threshold_block(unsigned int bank, unsigned int 
block, u32 addr,
                smca_high |= BIT(0);
 
                /*
-                * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
-                * registers with the option of additionally logging to
-                * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
-                *
-                * This bit is usually set by BIOS to retain the old behavior
-                * for OSes that don't use the new registers. Linux supports the
-                * new registers so let's disable that additional logging here.
-                *
-                * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
-                * portion of the MSR).
-                */
-               smca_high &= ~BIT(2);
-
-               /*
                 * SMCA sets the Deferred Error Interrupt type per bank.
                 *
                 * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us
@@ -756,7 +742,8 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, 
u64 *sys_addr)
 EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
 
 static void
-__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
+__log_error(unsigned int bank, bool deferred_err, bool use_smca_destat,
+                              bool threshold_err, u64 misc)
 {
        u32 msr_status = msr_ops.status(bank);
        u32 msr_addr = msr_ops.addr(bank);
@@ -765,7 +752,7 @@ __log_error(unsigned int bank, bool deferred_err, bool 
threshold_err, u64 misc)
 
        WARN_ON_ONCE(deferred_err && threshold_err);
 
-       if (deferred_err && mce_flags.smca) {
+       if (deferred_err && use_smca_destat) {
                msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
                msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
        }
@@ -807,6 +794,10 @@ __log_error(unsigned int bank, bool deferred_err, bool 
threshold_err, u64 misc)
 
        mce_log(&m);
 
+       /* We should still clear MCA_DESTAT even if we used MCA_STATUS. */
+       if (mce_flags.smca && !use_smca_destat)
+               wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
+
        wrmsrl(msr_status, 0);
 }
 
@@ -832,25 +823,29 @@ asmlinkage __visible void __irq_entry 
smp_trace_deferred_error_interrupt(void)
        exiting_ack_irq();
 }
 
+static inline bool check_deferred_status(u64 status)
+{
+       return ((status & MCI_STATUS_VAL) && (status & MCI_STATUS_DEFERRED));
+}
+
 /* APIC interrupt handler for deferred errors */
 static void amd_deferred_error_interrupt(void)
 {
        unsigned int bank;
-       u32 msr_status;
        u64 status;
 
        for (bank = 0; bank < mca_cfg.banks; ++bank) {
-               msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
-                                             : msr_ops.status(bank);
+               rdmsrl(msr_ops.status(bank), status);
 
-               rdmsrl(msr_status, status);
+               if (check_deferred_status(status)) {
+                       __log_error(bank, true, false, false, 0);
 
-               if (!(status & MCI_STATUS_VAL) ||
-                   !(status & MCI_STATUS_DEFERRED))
-                       continue;
+               } else if (mce_flags.smca) {
+                       rdmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), status);
 
-               __log_error(bank, true, false, 0);
-               break;
+                       if (check_deferred_status(status))
+                               __log_error(bank, true, true, false, 0);
+               }
        }
 }
 
@@ -904,7 +899,7 @@ static void amd_threshold_interrupt(void)
        return;
 
 log:
-       __log_error(bank, false, true, ((u64)high << 32) | low);
+       __log_error(bank, false, false, true, ((u64)high << 32) | low);
 
        /* Reset threshold block after logging error. */
        memset(&tr, 0, sizeof(tr));
-- 
2.7.4

Reply via email to