Add support for parsing the ARM Error Source Table and basic handling of
errors reported through both memory mapped and system register interfaces.

Signed-off-by: Tyler Baicar <bai...@os.amperecomputing.com>
---
 arch/arm64/include/asm/ras.h |  41 +++++
 arch/arm64/kernel/Makefile   |   2 +-
 arch/arm64/kernel/ras.c      |  67 ++++++++
 drivers/acpi/arm64/Kconfig   |   3 +
 drivers/acpi/arm64/Makefile  |   1 +
 drivers/acpi/arm64/aest.c    | 362 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi_aest.h    |  94 +++++++++++
 7 files changed, 569 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/include/asm/ras.h
 create mode 100644 arch/arm64/kernel/ras.c
 create mode 100644 drivers/acpi/arm64/aest.c
 create mode 100644 include/linux/acpi_aest.h

diff --git a/arch/arm64/include/asm/ras.h b/arch/arm64/include/asm/ras.h
new file mode 100644
index 0000000..36bfff4
--- /dev/null
+++ b/arch/arm64/include/asm/ras.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_RAS_H
+#define __ASM_RAS_H
+
+#define ERR_STATUS_AV          BIT(31)
+#define ERR_STATUS_V           BIT(30)
+#define ERR_STATUS_UE          BIT(29)
+#define ERR_STATUS_ER          BIT(28)
+#define ERR_STATUS_OF          BIT(27)
+#define ERR_STATUS_MV          BIT(26)
+#define ERR_STATUS_CE_SHIFT    24
+#define ERR_STATUS_CE_MASK     0x3
+#define ERR_STATUS_DE          BIT(23)
+#define ERR_STATUS_PN          BIT(22)
+#define ERR_STATUS_UET_SHIFT   20
+#define ERR_STATUS_UET_MASK    0x3
+#define ERR_STATUS_IERR_SHIFT  8
+#define ERR_STATUS_IERR_MASK   0xff
+#define ERR_STATUS_SERR_SHIFT  0
+#define ERR_STATUS_SERR_MASK   0xff
+
+#define ERR_FR_CEC_SHIFT       12
+#define ERR_FR_CEC_MASK                0x7
+
+#define ERR_FR_8B_CEC          BIT(1)
+#define ERR_FR_16B_CEC         BIT(2)
+
+struct ras_ext_regs {
+       u64 err_fr;
+       u64 err_ctlr;
+       u64 err_status;
+       u64 err_addr;
+       u64 err_misc0;
+       u64 err_misc1;
+       u64 err_misc2;
+       u64 err_misc3;
+};
+
+void arch_arm_ras_report_error(void);
+
+#endif /* __ASM_RAS_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 9e7dcb2..294f602 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -19,7 +19,7 @@ obj-y                 := debug-monitors.o entry.o irq.o 
fpsimd.o              \
                           return_address.o cpuinfo.o cpu_errata.o              
\
                           cpufeature.o alternative.o cacheinfo.o               
\
                           smp.o smp_spin_table.o topology.o smccc-call.o       
\
-                          syscall.o
+                          syscall.o ras.o
 
 extra-$(CONFIG_EFI)                    := efi-entry.o
 
diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c
new file mode 100644
index 0000000..ca47efa
--- /dev/null
+++ b/arch/arm64/kernel/ras.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+
+#include <asm/ras.h>
+
+void arch_arm_ras_report_error(void)
+{
+       u64 num_records;
+       unsigned int i, cpu_num;
+       bool fatal = false;
+       struct ras_ext_regs regs;
+
+       if (!this_cpu_has_cap(ARM64_HAS_RAS_EXTN))
+               return;
+
+       cpu_num = get_cpu();
+       num_records = read_sysreg_s(SYS_ERRIDR_EL1);
+
+       for (i = 0; i < num_records; i++) {
+               write_sysreg_s(i, SYS_ERRSELR_EL1);
+               regs.err_status = read_sysreg_s(SYS_ERXSTATUS_EL1);
+
+               if (!(regs.err_status & ERR_STATUS_V))
+                       continue;
+
+               pr_err("CPU%u: ERR%uSTATUS: 0x%llx\n", cpu_num, i,
+                      regs.err_status);
+
+               if (regs.err_status & ERR_STATUS_AV) {
+                       regs.err_addr = read_sysreg_s(SYS_ERXSTATUS_EL1);
+                       pr_err("CPU%u: ERR%uADDR: 0x%llx\n", cpu_num, i,
+                              regs.err_addr);
+               } else
+                       regs.err_addr = 0;
+
+               regs.err_fr = read_sysreg_s(SYS_ERXFR_EL1);
+               pr_err("CPU%u: ERR%uFR: 0x%llx\n", cpu_num, i, regs.err_fr);
+               regs.err_ctlr = read_sysreg_s(SYS_ERXCTLR_EL1);
+               pr_err("CPU%u: ERR%uCTLR: 0x%llx\n", cpu_num, i, regs.err_ctlr);
+
+               if (regs.err_status & ERR_STATUS_MV) {
+                       regs.err_misc0 = read_sysreg_s(SYS_ERXMISC0_EL1);
+                       pr_err("CPU%u: ERR%uMISC0: 0x%llx\n", cpu_num, i,
+                              regs.err_misc0);
+                       regs.err_misc1 = read_sysreg_s(SYS_ERXMISC1_EL1);
+                       pr_err("CPU%u: ERR%uMISC1: 0x%llx\n", cpu_num, i,
+                              regs.err_misc1);
+               }
+
+               /*
+                * In the future, we will treat UER conditions as potentially
+                * recoverable.
+                */
+               if (regs.err_status & ERR_STATUS_UE)
+                       fatal = true;
+
+               write_sysreg_s(regs.err_status, SYS_ERXSTATUS_EL1);
+       }
+
+       if (fatal)
+               panic("uncorrectable error encountered");
+
+       put_cpu();
+}
diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
index 6dba187..8d5cf99 100644
--- a/drivers/acpi/arm64/Kconfig
+++ b/drivers/acpi/arm64/Kconfig
@@ -8,3 +8,6 @@ config ACPI_IORT
 
 config ACPI_GTDT
        bool
+
+config ACPI_AEST
+       bool "ARM Error Source Table Support"
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index 6ff50f4..ea1ba28 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_ACPI_IORT)        += iort.o
 obj-$(CONFIG_ACPI_GTDT)        += gtdt.o
+obj-$(CONFIG_ACPI_AEST)        += aest.o
diff --git a/drivers/acpi/arm64/aest.c b/drivers/acpi/arm64/aest.c
new file mode 100644
index 0000000..fd4f3b5
--- /dev/null
+++ b/drivers/acpi/arm64/aest.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* ARM Error Source Table Support */
+
+#include <linux/acpi.h>
+#include <linux/acpi_aest.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/ratelimit.h>
+
+#include <asm/ras.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "ACPI AEST: " fmt
+
+static struct acpi_table_header *aest_table;
+
+static struct aest_node_data __percpu **ppi_data;
+static u8 num_ppi;
+static u8 ppi_idx;
+
+static void aest_print(struct aest_node_data *data, struct ras_ext_regs regs,
+                      int index)
+{
+       /* No more than 2 corrected messages every 5 seconds */
+       static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
+
+       if (regs.err_status & ERR_STATUS_UE ||
+           regs.err_status & ERR_STATUS_DE ||
+           __ratelimit(&ratelimit_corrected)) {
+               switch (data->node_type) {
+               case AEST_NODE_TYPE_PROC:
+                       pr_err("error from processor 0x%x\n",
+                              data->data.proc.id);
+                       break;
+               case AEST_NODE_TYPE_MEM:
+                       pr_err("error from memory domain 0x%x\n",
+                              data->data.mem.domain);
+                       break;
+               case AEST_NODE_TYPE_VENDOR:
+                       pr_err("error from vendor specific source 0x%x\n",
+                              data->data.vendor.id);
+               }
+
+               pr_err("ERR%dSTATUS = 0x%llx\n", index, regs.err_status);
+               if (regs.err_status & ERR_STATUS_AV)
+                       pr_err("ERR%dADDR = 0x%llx\n", index, regs.err_addr);
+
+               pr_err("ERR%dFR = 0x%llx\n", index, regs.err_fr);
+               pr_err("ERR%dCTLR = 0x%llx\n", index, regs.err_ctlr);
+
+               if (regs.err_status & ERR_STATUS_MV) {
+                       pr_err("ERR%dMISC0 = 0x%llx\n", index, regs.err_misc0);
+                       pr_err("ERR%dMISC1 = 0x%llx\n", index, regs.err_misc1);
+               }
+       }
+}
+
+static void aest_proc(struct aest_node_data *data)
+{
+       struct ras_ext_regs *regs_p, regs;
+       int i;
+       bool fatal = false;
+
+       /*
+        * Currently SR based handling is done through the architected
+        * discovery exposed through SRs. That may change in the future
+        * if there is supplemental information in the AEST that is
+        * needed.
+        */
+       if (data->interface.type == AEST_SYSTEM_REG_INTERFACE) {
+               arch_arm_ras_report_error();
+               return;
+       }
+
+       regs_p = data->interface.regs;
+
+       for (i = data->interface.start; i < data->interface.end; i++) {
+               regs.err_status = readq(&regs_p[i].err_status);
+               if (!(regs.err_status & ERR_STATUS_V))
+                       continue;
+
+               if (regs.err_status & ERR_STATUS_AV)
+                       regs.err_addr = readq(&regs_p[i].err_addr);
+               else
+                       regs.err_addr = 0;
+
+               regs.err_fr = readq(&regs_p[i].err_fr);
+               regs.err_ctlr = readq(&regs_p[i].err_ctlr);
+
+               if (regs.err_status & ERR_STATUS_MV) {
+                       regs.err_misc0 = readq(&regs_p[i].err_misc0);
+                       regs.err_misc1 = readq(&regs_p[i].err_misc1);
+               } else {
+                       regs.err_misc0 = 0;
+                       regs.err_misc1 = 0;
+               }
+
+               aest_print(data, regs, i);
+
+               if (regs.err_status & ERR_STATUS_UE)
+                       fatal = true;
+
+               writeq(regs.err_status, &regs_p[i].err_status);
+       }
+
+       if (fatal)
+               panic("AEST: uncorrectable error encountered");
+
+}
+
+static irqreturn_t aest_irq_func(int irq, void *input)
+{
+       struct aest_node_data *data = input;
+
+       aest_proc(data);
+
+       return IRQ_HANDLED;
+}
+
+static int __init aest_register_gsi(u32 gsi, int trigger, void *data)
+{
+       int cpu, irq;
+
+       irq = acpi_register_gsi(NULL, gsi, trigger, ACPI_ACTIVE_HIGH);
+
+       if (irq == -EINVAL) {
+               pr_err("failed to map AEST GSI %d\n", gsi);
+               return -EINVAL;
+       }
+
+       if (gsi < 16) {
+               pr_err("invalid GSI %d\n", gsi);
+               return -EINVAL;
+       } else if (gsi < 32) {
+               if (ppi_idx >= AEST_MAX_PPI) {
+                       pr_err("Unable to register PPI %d\n", gsi);
+                       return -EINVAL;
+               }
+               enable_percpu_irq(irq, IRQ_TYPE_NONE);
+               for_each_possible_cpu(cpu) {
+                       memcpy(per_cpu_ptr(ppi_data[ppi_idx], cpu), data,
+                              sizeof(struct aest_node_data));
+               }
+               if (request_percpu_irq(irq, aest_irq_func, "AEST",
+                                      ppi_data[ppi_idx++])) {
+                       pr_err("failed to register AEST IRQ %d\n", irq);
+                       return -EINVAL;
+               }
+       } else if (gsi < 1020) {
+               if (request_irq(irq, aest_irq_func, IRQF_SHARED, "AEST",
+                               data)) {
+                       pr_err("failed to register AEST IRQ %d\n", irq);
+                       return -EINVAL;
+               }
+       } else {
+               pr_err("invalid GSI %d\n", gsi);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int __init aest_init_interrupts(struct aest_type_header *node,
+                                      struct aest_node_data *data)
+{
+       struct aest_interrupt *interrupt;
+       int i, trigger, ret = 0;
+
+       interrupt = ACPI_ADD_PTR(struct aest_interrupt, node,
+                                node->interrupt_offset);
+
+       for (i = 0; i < node->interrupt_size; i++, interrupt++) {
+               trigger = (interrupt->flags & AEST_INTERRUPT_MODE) ?
+                         ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
+               if (aest_register_gsi(interrupt->gsiv, trigger, data))
+                       ret = -EINVAL;
+       }
+
+       return ret;
+}
+
+static int __init aest_init_interface(struct aest_type_header *node,
+                                      struct aest_node_data *data)
+{
+       struct aest_interface *interface;
+       struct resource *res;
+       int size;
+
+       interface = ACPI_ADD_PTR(struct aest_interface, node,
+                                node->interface_offset);
+
+       if (interface->type > AEST_MEMORY_MAPPED_INTERFACE) {
+               pr_err("invalid interface type: %d\n", interface->type);
+               return -EINVAL;
+       }
+
+       data->interface.type = interface->type;
+
+       /*
+        * Currently SR based handling is done through the architected
+        * discovery exposed through SRs. That may change in the future
+        * if there is supplemental information in the AEST that is
+        * needed.
+        */
+       if (interface->type == AEST_SYSTEM_REG_INTERFACE)
+               return 0;
+
+       res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+       if (!res)
+               return -ENOMEM;
+
+       size = interface->num_records * sizeof(struct ras_ext_regs);
+       res->name = "AEST";
+       res->start = interface->address;
+       res->end = res->start + size;
+       res->flags = IORESOURCE_MEM;
+       if (request_resource_conflict(&iomem_resource, res)) {
+               pr_err("unable to request region starting at 0x%llx\n",
+                       res->start);
+               kfree(res);
+               return -EEXIST;
+       }
+
+       data->interface.start = interface->start_index;
+       data->interface.end = interface->start_index + interface->num_records;
+
+       data->interface.regs = ioremap(interface->address, size);
+       if (data->interface.regs == NULL)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int __init aest_init_node(struct aest_type_header *node)
+{
+       struct aest_node_data *data;
+       union aest_node_spec *node_spec;
+       int ret;
+
+       data = kzalloc(sizeof(struct aest_node_data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       data->node_type = node->type;
+
+       node_spec = ACPI_ADD_PTR(union aest_node_spec, node, node->data_offset);
+
+       switch (node->type) {
+       case AEST_NODE_TYPE_PROC:
+               memcpy(&data->data, node_spec, sizeof(struct aest_proc_data));
+               break;
+       case AEST_NODE_TYPE_MEM:
+               memcpy(&data->data, node_spec, sizeof(struct aest_mem_data));
+               break;
+       case AEST_NODE_TYPE_VENDOR:
+               memcpy(&data->data, node_spec, sizeof(struct aest_vendor_data));
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       ret = aest_init_interface(node, data);
+       if (ret) {
+               kfree(data);
+               return ret;
+       }
+
+       return aest_init_interrupts(node, data);
+}
+
+static void aest_count_ppi(struct aest_type_header *node)
+{
+       struct aest_interrupt *interrupt;
+       int i;
+
+       interrupt = ACPI_ADD_PTR(struct aest_interrupt, node,
+                                node->interrupt_offset);
+
+       for (i = 0; i < node->interrupt_size; i++, interrupt++) {
+               if (interrupt->gsiv >= 16 && interrupt->gsiv < 32)
+                       num_ppi++;
+       }
+
+}
+
+int __init acpi_aest_init(void)
+{
+       struct acpi_table_aest *aest;
+       struct aest_type_header *aest_node, *aest_end;
+       int i, ret = 0;
+
+       if (acpi_disabled)
+               return 0;
+
+       if (ACPI_FAILURE(acpi_get_table(ACPI_SIG_AEST, 0, &aest_table)))
+               return -EINVAL;
+
+       aest = (struct acpi_table_aest *)aest_table;
+
+       /* Get the first AEST node */
+       aest_node = ACPI_ADD_PTR(struct aest_type_header, aest,
+                                sizeof(struct acpi_table_aest));
+       /* Pointer to the end of the AEST table */
+       aest_end = ACPI_ADD_PTR(struct aest_type_header, aest,
+                               aest_table->length);
+
+       while (aest_node < aest_end) {
+               if (((u64)aest_node + aest_node->length) > (u64)aest_end) {
+                       pr_err("AEST node pointer overflow, bad table\n");
+                       return -EINVAL;
+               }
+
+               aest_count_ppi(aest_node);
+
+               aest_node = ACPI_ADD_PTR(struct aest_type_header, aest_node,
+                                        aest_node->length);
+       }
+
+       if (num_ppi > AEST_MAX_PPI) {
+               pr_err("Limiting PPI support to %d PPIs\n", AEST_MAX_PPI);
+               num_ppi = AEST_MAX_PPI;
+       }
+
+       ppi_data = kcalloc(num_ppi, sizeof(struct aest_node_data *),
+                          GFP_KERNEL);
+
+       for (i = 0; i < num_ppi; i++) {
+               ppi_data[i] = alloc_percpu(struct aest_node_data);
+               if (!ppi_data[i]) {
+                       ret = -ENOMEM;
+                       break;
+               }
+       }
+
+       if (ret) {
+               pr_err("Failed percpu allocation\n");
+               for (i = 0; i < num_ppi; i++)
+                       free_percpu(ppi_data[i]);
+               return ret;
+       }
+
+       aest_node = ACPI_ADD_PTR(struct aest_type_header, aest,
+                                sizeof(struct acpi_table_aest));
+
+       while (aest_node < aest_end) {
+               ret = aest_init_node(aest_node);
+               if (ret)
+                       pr_err("failed to init node: %d", ret);
+
+               aest_node = ACPI_ADD_PTR(struct aest_type_header, aest_node,
+                                        aest_node->length);
+       }
+
+       return 0;
+}
+
+early_initcall(acpi_aest_init);
diff --git a/include/linux/acpi_aest.h b/include/linux/acpi_aest.h
new file mode 100644
index 0000000..376122b
--- /dev/null
+++ b/include/linux/acpi_aest.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef AEST_H
+#define AEST_H
+
+#include <acpi/actbl.h>
+
+#define ACPI_SIG_AEST                  "AEST"  /* ARM Error Source Table */
+
+#define AEST_NODE_TYPE_PROC            0
+#define AEST_NODE_TYPE_MEM             1
+#define AEST_NODE_TYPE_VENDOR          2
+
+#define AEST_SYSTEM_REG_INTERFACE      0x0
+#define AEST_MEMORY_MAPPED_INTERFACE   0x1
+
+#define AEST_INTERRUPT_MODE            BIT(0)
+
+#define AEST_MAX_PPI                   4
+
+#pragma pack(1)
+
+struct acpi_table_aest {
+       struct acpi_table_header header;
+};
+
+struct aest_type_header {
+       u8 type;
+       u16 length;
+       u8 reserved;
+       u32 revision;
+       u32 data_offset;
+       u32 interface_offset;
+       u32 interface_size;
+       u32 interrupt_offset;
+       u32 interrupt_size;
+       u64 timestamp_rate;
+       u64 timestamp_start;
+       u64 countdown_rate;
+};
+
+struct aest_proc_data {
+       u32 id;
+       u32 level;
+       u32 cache_type;
+};
+
+struct aest_mem_data {
+       u32 domain;
+};
+
+struct aest_vendor_data {
+       u32 id;
+       u32 data;
+};
+
+struct aest_interface {
+       u8 type;
+       u8 reserved[3];
+       u32 flags;
+       u64 address;
+       u16 start_index;
+       u16 num_records;
+};
+
+struct aest_interrupt {
+       u8 type;
+       u16 reserved;
+       u8 flags;
+       u32 gsiv;
+       u8 iort_id[20];
+};
+
+#pragma pack()
+
+struct aest_interface_data {
+       u8 type;
+       u16 start;
+       u16 end;
+       struct ras_ext_regs *regs;
+};
+
+union aest_node_spec {
+       struct aest_proc_data proc;
+       struct aest_mem_data mem;
+       struct aest_vendor_data vendor;
+};
+
+struct aest_node_data {
+       u8 node_type;
+       struct aest_interface_data interface;
+       union aest_node_spec data;
+};
+
+#endif /* AEST_H */
-- 
1.8.3.1

Reply via email to