[PATCH 02/15] x86/mce: Add support for new MCA_SYND register

2016-09-12 Thread Borislav Petkov
From: Yazen Ghannam 

Syndrome information is no longer contained in MCA_STATUS for SMCA
systems but in a new register - MCA_SYND.

Add a synd field to struct mce to hold MCA_SYND register value. Add it
to the end of struct mce to maintain compatibility with old versions of
mcelog. Also, add it to the respective tracepoint.

Signed-off-by: Yazen Ghannam 
Cc: Aravind Gopalakrishnan 
Cc: Ashok Raj 
Cc: linux-edac 
Cc: Steven Rostedt 
Cc: Tony Luck 
Cc: x86-ml 
Link: 
http://lkml.kernel.org/r/1467633035-32080-1-git-send-email-yazen.ghan...@amd.com
Signed-off-by: Borislav Petkov 
---
 arch/x86/include/asm/mce.h   | 5 -
 arch/x86/include/uapi/asm/mce.h  | 1 +
 arch/x86/kernel/cpu/mcheck/mce.c | 4 
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 3 +++
 include/trace/events/mce.h   | 6 --
 5 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 8bf766ef0e18..21bc5a3a4c89 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -40,9 +40,10 @@
 #define MCI_STATUS_AR   (1ULL<<55)  /* Action required */
 
 /* AMD-specific bits */
+#define MCI_STATUS_TCC (1ULL<<55)  /* Task context corrupt */
+#define MCI_STATUS_SYNDV   (1ULL<<53)  /* synd reg. valid */
 #define MCI_STATUS_DEFERRED(1ULL<<44)  /* uncorrected error, deferred 
exception */
 #define MCI_STATUS_POISON  (1ULL<<43)  /* access poisonous data */
-#define MCI_STATUS_TCC (1ULL<<55)  /* Task context corrupt */
 
 /*
  * McaX field if set indicates a given bank supports MCA extensions:
@@ -110,6 +111,7 @@
 #define MSR_AMD64_SMCA_MC0_MISC0   0xc0002003
 #define MSR_AMD64_SMCA_MC0_CONFIG  0xc0002004
 #define MSR_AMD64_SMCA_MC0_IPID0xc0002005
+#define MSR_AMD64_SMCA_MC0_SYND0xc0002006
 #define MSR_AMD64_SMCA_MC0_DESTAT  0xc0002008
 #define MSR_AMD64_SMCA_MC0_DEADDR  0xc0002009
 #define MSR_AMD64_SMCA_MC0_MISC1   0xc000200a
@@ -119,6 +121,7 @@
 #define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_CONFIG(x)   (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_SYND(x) (MSR_AMD64_SMCA_MC0_SYND + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_DESTAT(x)   (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_DEADDR(x)   (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + 
(0x10*(x)))
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 2184943341bf..8c75fbc94c3f 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -26,6 +26,7 @@ struct mce {
__u32 socketid; /* CPU socket ID */
__u32 apicid;   /* CPU initial apic ID */
__u64 mcgcap;   /* MCGCAP MSR: machine check capabilities of CPU */
+   __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
 };
 
 #define MCE_GET_RECORD_LEN   _IOR('M', 1, int)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 79d8ec849468..7f11ea5b75fa 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -568,6 +568,7 @@ static void mce_read_aux(struct mce *m, int i)
 {
if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(msr_ops.misc(i));
+
if (m->status & MCI_STATUS_ADDRV) {
m->addr = mce_rdmsrl(msr_ops.addr(i));
 
@@ -580,6 +581,9 @@ static void mce_read_aux(struct mce *m, int i)
m->addr <<= shift;
}
}
+
+   if (mce_flags.smca && (m->status & MCI_STATUS_SYNDV))
+   m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
 }
 
 static bool memory_error(struct mce *m)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 78b7681f7f66..419e0ee3b12f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -479,6 +479,9 @@ __log_error(unsigned int bank, bool deferred_err, bool 
threshold_err, u64 misc)
if (m.status & MCI_STATUS_ADDRV)
rdmsrl(msr_addr, m.addr);
 
+   if (mce_flags.smca && (m.status & MCI_STATUS_SYNDV))
+   rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+
mce_log();
 
wrmsrl(msr_status, 0);
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 4cbbcef6baa8..8be5268caf28 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -20,6 +20,7 @@ TRACE_EVENT(mce_record,
__field(u64,status  )
__field(u64,addr)

[PATCH 02/15] x86/mce: Add support for new MCA_SYND register

2016-09-12 Thread Borislav Petkov
From: Yazen Ghannam 

Syndrome information is no longer contained in MCA_STATUS for SMCA
systems but in a new register - MCA_SYND.

Add a synd field to struct mce to hold MCA_SYND register value. Add it
to the end of struct mce to maintain compatibility with old versions of
mcelog. Also, add it to the respective tracepoint.

Signed-off-by: Yazen Ghannam 
Cc: Aravind Gopalakrishnan 
Cc: Ashok Raj 
Cc: linux-edac 
Cc: Steven Rostedt 
Cc: Tony Luck 
Cc: x86-ml 
Link: 
http://lkml.kernel.org/r/1467633035-32080-1-git-send-email-yazen.ghan...@amd.com
Signed-off-by: Borislav Petkov 
---
 arch/x86/include/asm/mce.h   | 5 -
 arch/x86/include/uapi/asm/mce.h  | 1 +
 arch/x86/kernel/cpu/mcheck/mce.c | 4 
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 3 +++
 include/trace/events/mce.h   | 6 --
 5 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 8bf766ef0e18..21bc5a3a4c89 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -40,9 +40,10 @@
 #define MCI_STATUS_AR   (1ULL<<55)  /* Action required */
 
 /* AMD-specific bits */
+#define MCI_STATUS_TCC (1ULL<<55)  /* Task context corrupt */
+#define MCI_STATUS_SYNDV   (1ULL<<53)  /* synd reg. valid */
 #define MCI_STATUS_DEFERRED(1ULL<<44)  /* uncorrected error, deferred 
exception */
 #define MCI_STATUS_POISON  (1ULL<<43)  /* access poisonous data */
-#define MCI_STATUS_TCC (1ULL<<55)  /* Task context corrupt */
 
 /*
  * McaX field if set indicates a given bank supports MCA extensions:
@@ -110,6 +111,7 @@
 #define MSR_AMD64_SMCA_MC0_MISC0   0xc0002003
 #define MSR_AMD64_SMCA_MC0_CONFIG  0xc0002004
 #define MSR_AMD64_SMCA_MC0_IPID0xc0002005
+#define MSR_AMD64_SMCA_MC0_SYND0xc0002006
 #define MSR_AMD64_SMCA_MC0_DESTAT  0xc0002008
 #define MSR_AMD64_SMCA_MC0_DEADDR  0xc0002009
 #define MSR_AMD64_SMCA_MC0_MISC1   0xc000200a
@@ -119,6 +121,7 @@
 #define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_CONFIG(x)   (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_SYND(x) (MSR_AMD64_SMCA_MC0_SYND + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_DESTAT(x)   (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_DEADDR(x)   (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + 
(0x10*(x)))
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 2184943341bf..8c75fbc94c3f 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -26,6 +26,7 @@ struct mce {
__u32 socketid; /* CPU socket ID */
__u32 apicid;   /* CPU initial apic ID */
__u64 mcgcap;   /* MCGCAP MSR: machine check capabilities of CPU */
+   __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
 };
 
 #define MCE_GET_RECORD_LEN   _IOR('M', 1, int)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 79d8ec849468..7f11ea5b75fa 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -568,6 +568,7 @@ static void mce_read_aux(struct mce *m, int i)
 {
if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(msr_ops.misc(i));
+
if (m->status & MCI_STATUS_ADDRV) {
m->addr = mce_rdmsrl(msr_ops.addr(i));
 
@@ -580,6 +581,9 @@ static void mce_read_aux(struct mce *m, int i)
m->addr <<= shift;
}
}
+
+   if (mce_flags.smca && (m->status & MCI_STATUS_SYNDV))
+   m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
 }
 
 static bool memory_error(struct mce *m)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 78b7681f7f66..419e0ee3b12f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -479,6 +479,9 @@ __log_error(unsigned int bank, bool deferred_err, bool 
threshold_err, u64 misc)
if (m.status & MCI_STATUS_ADDRV)
rdmsrl(msr_addr, m.addr);
 
+   if (mce_flags.smca && (m.status & MCI_STATUS_SYNDV))
+   rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+
mce_log();
 
wrmsrl(msr_status, 0);
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 4cbbcef6baa8..8be5268caf28 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -20,6 +20,7 @@ TRACE_EVENT(mce_record,
__field(u64,status  )
__field(u64,addr)
__field(u64,misc)
+   __field(u64,synd)
__field(u64,ip  )