From: Yazen Ghannam <yazen.ghan...@amd.com> We have support for the new SMCA MCA_DE{STAT,ADDR} registers in Linux. So we've used these registers in place of MCA_{STATUS,ADDR} on SMCA systems. However, the guidance for current implementations of SMCA is to continue using MCA_{STATUS,ADDR} and to use MCA_DE{STAT,ADDR} only if a Deferred error was not found in the former registers. This also means we shouldn't clear MCA_CONFIG[LogDeferredInMcaStat].
Redo the AMD Deferred error interrupt handler to follow the guidance for current SMCA systems. Also, don't break after finding the first error. Don't clear MCA_CONFIG[LogDeferredInMcaStat] during AMD mcheck init. Signed-off-by: Yazen Ghannam <yazen.ghan...@amd.com> --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 47 ++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 524cc57..4e459e0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -472,20 +472,6 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, smca_high |= BIT(0); /* - * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR} - * registers with the option of additionally logging to - * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set. - * - * This bit is usually set by BIOS to retain the old behavior - * for OSes that don't use the new registers. Linux supports the - * new registers so let's disable that additional logging here. - * - * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high - * portion of the MSR). - */ - smca_high &= ~BIT(2); - - /* * SMCA sets the Deferred Error Interrupt type per bank. * * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us @@ -756,7 +742,8 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr); static void -__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) +__log_error(unsigned int bank, bool deferred_err, bool use_smca_destat, + bool threshold_err, u64 misc) { u32 msr_status = msr_ops.status(bank); u32 msr_addr = msr_ops.addr(bank); @@ -765,7 +752,7 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) WARN_ON_ONCE(deferred_err && threshold_err); - if (deferred_err && mce_flags.smca) { + if (deferred_err && use_smca_destat) { msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank); msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank); } @@ -807,6 +794,10 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) mce_log(&m); + /* We should still clear MCA_DESTAT even if we used MCA_STATUS. */ + if (mce_flags.smca && !use_smca_destat) + wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0); + wrmsrl(msr_status, 0); } @@ -832,25 +823,29 @@ asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void) exiting_ack_irq(); } +static inline bool check_deferred_status(u64 status) +{ + return ((status & MCI_STATUS_VAL) && (status & MCI_STATUS_DEFERRED)); +} + /* APIC interrupt handler for deferred errors */ static void amd_deferred_error_interrupt(void) { unsigned int bank; - u32 msr_status; u64 status; for (bank = 0; bank < mca_cfg.banks; ++bank) { - msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank) - : msr_ops.status(bank); + rdmsrl(msr_ops.status(bank), status); - rdmsrl(msr_status, status); + if (check_deferred_status(status)) { + __log_error(bank, true, false, false, 0); - if (!(status & MCI_STATUS_VAL) || - !(status & MCI_STATUS_DEFERRED)) - continue; + } else if (mce_flags.smca) { + rdmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), status); - __log_error(bank, true, false, 0); - break; + if (check_deferred_status(status)) + __log_error(bank, true, true, false, 0); + } } } @@ -904,7 +899,7 @@ static void amd_threshold_interrupt(void) return; log: - __log_error(bank, false, true, ((u64)high << 32) | low); + __log_error(bank, false, false, true, ((u64)high << 32) | low); /* Reset threshold block after logging error. */ memset(&tr, 0, sizeof(tr)); -- 2.7.4