Until now, the mce_severity mechanism can only identify the
severity of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it
is not able to filter out DEFERRED error for ADM platform.

This patch aims to extend the mce_severity mechanism for
handling UCNA/DEFERRED error. In order to do this, the patch
introduces a new severity level - MCE_UCNA/DEFERRED_SEVERITY.

Signed-off-by: Chen Yucong <[email protected]>
---
 arch/x86/include/asm/mce.h                |    4 ++++
 arch/x86/kernel/cpu/mcheck/mce-internal.h |    2 ++
 arch/x86/kernel/cpu/mcheck/mce-severity.c |    6 +++++-
 arch/x86/kernel/cpu/mcheck/mce.c          |    3 ++-
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 958b90f..40b35a5 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
 #define MCI_STATUS_S    (1ULL<<56)  /* Signaled machine check */
 #define MCI_STATUS_AR   (1ULL<<55)  /* Action required */
 
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED    (1ULL<<44)  /* declare an uncorrected error */
+#define MCI_STATUS_POISON      (1ULL<<43)  /* access poisonous data */
+
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
  * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h 
b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 09edd0b..d32fcbb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -3,6 +3,8 @@
 
 enum severity_level {
        MCE_NO_SEVERITY,
+       MCE_DEFERRED_SEVERITY,
+       MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
        MCE_KEEP_SEVERITY,
        MCE_SOME_SEVERITY,
        MCE_AO_SEVERITY,
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c 
b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c370e1c..c12e0a7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -83,13 +83,17 @@ static struct severity {
                KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
                ),
        MCESEV(
+               DEFERRED, "Deferred error",
+               NOSER, 
MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
+               ),
+       MCESEV(
                KEEP, "Corrected error",
                NOSER, BITCLR(MCI_STATUS_UC)
                ),
 
        /* ignore OVER for UCNA */
        MCESEV(
-               KEEP, "Uncorrected no action required",
+               UCNA, "Uncorrected no action required",
                SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
                ),
        MCESEV(
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 61a9668ce..fdc422e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1101,7 +1101,8 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
                 * When machine check was for corrected handler don't touch,
                 * unless we're panicing.
                 */
-               if (severity == MCE_KEEP_SEVERITY && !no_way_out)
+               if ((severity == MCE_KEEP_SEVERITY ||
+                    severity == MCE_UCNA_SEVERITY) && !no_way_out)
                        continue;
                __set_bit(i, toclear);
                if (severity == MCE_NO_SEVERITY) {
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to