Add error-event support for Correctable errors in CRI. Report an error
event to userspace for every component that has crossed the threshold on
receiving an interrupt.

Cc: Michal Wajdeczko <[email protected]>
Signed-off-by: Riana Tauro <[email protected]>
---
v2: add warns for unexpected values from system controller (Michal)
    send an event at most once per component for each interrupt (Raag)
    use correct parameters for get_counter (Sashiko)
---
 drivers/gpu/drm/xe/xe_ras.c | 75 +++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c
index 44f4e1a3455b..b71d51285954 100644
--- a/drivers/gpu/drm/xe/xe_ras.c
+++ b/drivers/gpu/drm/xe/xe_ras.c
@@ -77,6 +77,18 @@ static u8 drm_to_xe_ras_severity(u8 severity)
        }
 }
 
+static u8 xe_to_drm_ras_severity(u8 severity)
+{
+       switch (severity) {
+       case XE_RAS_SEV_CORRECTABLE:
+               return DRM_XE_RAS_ERR_SEV_CORRECTABLE;
+       case XE_RAS_SEV_UNCORRECTABLE:
+               return DRM_XE_RAS_ERR_SEV_UNCORRECTABLE;
+       default:
+               return DRM_XE_RAS_ERR_SEV_MAX;
+       }
+}
+
 static u8 drm_to_xe_ras_component(u8 component)
 {
        switch (component) {
@@ -95,6 +107,24 @@ static u8 drm_to_xe_ras_component(u8 component)
        }
 }
 
+static u8 xe_to_drm_ras_component(u8 component)
+{
+       switch (component) {
+       case XE_RAS_COMP_DEVICE_MEMORY:
+               return DRM_XE_RAS_ERR_COMP_DEVICE_MEMORY;
+       case XE_RAS_COMP_CORE_COMPUTE:
+               return DRM_XE_RAS_ERR_COMP_CORE_COMPUTE;
+       case XE_RAS_COMP_PCIE:
+               return DRM_XE_RAS_ERR_COMP_PCIE;
+       case XE_RAS_COMP_FABRIC:
+               return DRM_XE_RAS_ERR_COMP_FABRIC;
+       case XE_RAS_COMP_SOC_INTERNAL:
+               return DRM_XE_RAS_ERR_COMP_SOC_INTERNAL;
+       default:
+               return DRM_XE_RAS_ERR_COMP_MAX;
+       }
+}
+
 static int ras_status_to_errno(u32 status)
 {
        switch (status) {
@@ -131,14 +161,41 @@ static inline const char *comp_to_str(u8 component)
        return xe_ras_components[component];
 }
 
+static void ras_send_error_event(struct xe_device *xe, u8 severity, u8 
component)
+{
+       u8 drm_severity, drm_component;
+       u32 value;
+       int ret;
+
+       drm_severity = xe_to_drm_ras_severity(severity);
+       if (drm_severity == DRM_XE_RAS_ERR_SEV_MAX) {
+               xe_warn(xe, "sysctrl: unexpected severity %u\n", severity);
+               return;
+       }
+
+       drm_component = xe_to_drm_ras_component(component);
+       if (drm_component == DRM_XE_RAS_ERR_COMP_MAX) {
+               xe_warn(xe, "sysctrl: unexpected component %u\n", component);
+               return;
+       }
+
+       ret = xe_ras_get_counter(xe, drm_severity, drm_component, &value);
+       if (ret)
+               return;
+
+       xe_drm_ras_event(xe, drm_component, drm_severity, value, GFP_KERNEL);
+}
+
 void xe_ras_counter_threshold_crossed(struct xe_device *xe,
                                      struct xe_sysctrl_event_response 
*response)
 {
        struct xe_ras_threshold_crossed *pending = (void *)&response->data;
        struct xe_ras_error_class *errors = pending->counters;
        u32 id, ncounters = pending->ncounters;
+       u8 sent = 0;
 
        BUILD_BUG_ON(sizeof(response->data) < sizeof(*pending));
+       BUILD_BUG_ON(XE_RAS_COMP_MAX > (BITS_PER_BYTE * sizeof(sent)));
        xe_device_assert_mem_access(xe);
 
        if (!ncounters || ncounters > XE_RAS_NUM_COUNTERS)
@@ -154,6 +211,24 @@ void xe_ras_counter_threshold_crossed(struct xe_device *xe,
 
                xe_warn(xe, "[RAS]: %s %s detected\n",
                        comp_to_str(component), sev_to_str(severity));
+
+               if (severity != XE_RAS_SEV_CORRECTABLE) {
+                       xe_warn(xe, "sysctrl: unexpected severity %s (%u)\n", 
sev_to_str(severity),
+                               severity);
+                       continue;
+               }
+
+               if (component >= XE_RAS_COMP_MAX) {
+                       xe_warn(xe, "sysctrl: unexpected component %u\n", 
component);
+                       continue;
+               }
+
+               /* Send event once per component */
+               if (sent & BIT(component))
+                       continue;
+               sent |= BIT(component);
+
+               ras_send_error_event(xe, severity, component);
        }
 }
 
-- 
2.47.1

Reply via email to