For bank 4 errors, MCE is logged and reported only on
node base cores. Refer D18F3x44[NbMcaToMstCpuEn] field in
Fam10h and later BKDGs.

This patch ensures that we inject the error on the node base core
for bank 4 errors. Otherwise, triggering #MC or apic interrupts on
a non node base core would not have any effect on the system.
(i.e), we would not see any relevant output on kernel logs for
the error we just injected.

Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrish...@amd.com>
---
 drivers/edac/mce_amd_inj.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
index b7e108c..45aac4f 100644
--- a/drivers/edac/mce_amd_inj.c
+++ b/drivers/edac/mce_amd_inj.c
@@ -17,9 +17,12 @@
 #include <linux/cpu.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
+#include <linux/pci.h>
 #include <asm/mce.h>
+#include <asm/amd_nb.h>
 
 #include "mce_amd.h"
+#include "amd64_edac.h"
 
 /*
  * Collect all the MCi_XXX settings
@@ -200,6 +203,44 @@ static void trigger_thr_int(void *info)
        asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
 }
 
+static u32 amd_get_nbc_for_node(int node_id)
+{
+       struct cpuinfo_x86 *c = &boot_cpu_data;
+       u32 cores_per_node;
+
+       cores_per_node = c->x86_max_cores / amd_get_nodes_cnt();
+
+       return cores_per_node * node_id;
+}
+
+static void toggle_nb_mca_mst_cpu(u16 nid)
+{
+       struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
+       u32 val;
+       int err;
+
+       if (!F3)
+               return;
+
+       err = pci_read_config_dword(F3, NBCFG, &val);
+       if (err) {
+               pr_err("%s: Error reading F%dx%03x.\n", __func__,
+                       PCI_FUNC(F3->devfn),
+                       NBCFG);
+               return;
+       }
+
+       if (!(val & BIT(27))) {
+               pr_err("%s: BIOS not setting D18F3x44[NbMcaToMstCpuEn]. Doing 
that here\n", __func__);
+               val |= BIT(27);
+               err = pci_write_config_dword(F3, NBCFG, val);
+               if (err)
+                       pr_err("%s: Error writing F%dx%03x.\n", __func__,
+                               PCI_FUNC(F3->devfn),
+                               NBCFG);
+       }
+}
+
 static void do_inject(void)
 {
        u64 mcg_status = 0;
@@ -235,6 +276,20 @@ static void do_inject(void)
        if (!(i_mce.status & MCI_STATUS_PCC))
                mcg_status |= MCG_STATUS_RIPV;
 
+       /*
+        * For multi node cpus, logging and reporting of bank == 4 errors
+        * happen only on the node base core. Refer D18F3x44[NbMcaToMstCpuEn]
+        * for Fam10h and later BKDGs
+        */
+       if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) {
+               /*
+                * BIOS sets D18F3x44[NbMcaToMstCpuEn] by default.
+                * But make sure of it here just in case..
+                */
+               toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
+               cpu = amd_get_nbc_for_node(amd_get_nb_id(cpu));
+       }
+
        toggle_hw_mce_inject(cpu, true);
 
        wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
-- 
2.4.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to