System controller allows getting/setting per counter threshold, which it
uses to raise error events to the driver. Get/set it using the respective
mailbox command.

Signed-off-by: Raag Jadav <[email protected]>
---
v2: Add RAS operation status codes (Riana)
v3: Reuse status codes and uapi mapping from counter series (Riana)
    Access request/response counter using local pointer (Riana)
    Mark unused field as reserved (Riana)
---
 drivers/gpu/drm/xe/xe_ras.c                   | 105 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_ras.h                   |   2 +
 drivers/gpu/drm/xe/xe_ras_types.h             |  51 +++++++++
 drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h |   4 +
 4 files changed, 162 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c
index 7cb6fcb1254a..d6f89b429cec 100644
--- a/drivers/gpu/drm/xe/xe_ras.c
+++ b/drivers/gpu/drm/xe/xe_ras.c
@@ -270,6 +270,111 @@ int xe_ras_clear_counter(struct xe_device *xe, u8 
severity, u8 component)
        return 0;
 }
 
+/**
+ * xe_ras_get_threshold() - Get error counter threshold
+ * @xe: Xe device instance
+ * @severity: Error severity to be queried (&enum drm_xe_ras_error_severity)
+ * @component: Error component to be queried (&enum drm_xe_ras_error_component)
+ * @threshold: Counter threshold
+ *
+ * This function retrieves the error threshold of a specific counter based on
+ * severity and component.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int xe_ras_get_threshold(struct xe_device *xe, u8 severity, u8 component, u32 
*threshold)
+{
+       struct xe_ras_get_threshold_response response = {};
+       struct xe_ras_get_threshold_request request = {};
+       struct xe_sysctrl_mailbox_command command = {};
+       struct xe_ras_error_class *counter;
+       size_t len;
+       int ret;
+
+       counter = &request.counter;
+       counter->common.severity = drm_to_xe_ras_severity(severity);
+       counter->common.component = drm_to_xe_ras_component(component);
+
+       xe_sysctrl_create_command(&command, XE_SYSCTRL_GROUP_GFSP, 
XE_SYSCTRL_CMD_GET_THRESHOLD,
+                                 &request, sizeof(request), &response, 
sizeof(response));
+
+       guard(xe_pm_runtime)(xe);
+       ret = xe_sysctrl_send_command(&xe->sc, &command, &len);
+       if (ret) {
+               xe_err(xe, "sysctrl: failed to get threshold %d\n", ret);
+               return ret;
+       }
+
+       if (len != sizeof(response)) {
+               xe_err(xe, "sysctrl: unexpected get threshold response length 
%zu (expected %zu)\n",
+                      len, sizeof(response));
+               return -EIO;
+       }
+
+       counter = &response.counter;
+       *threshold = response.threshold;
+
+       xe_dbg(xe, "[RAS]: get counter threshold %u for %s %s\n", *threshold,
+              comp_to_str(counter->common.component), 
sev_to_str(counter->common.severity));
+       return 0;
+}
+
+/**
+ * xe_ras_set_threshold() - Set error counter threshold
+ * @xe: Xe device instance
+ * @severity: Error severity to be set (&enum drm_xe_ras_error_severity)
+ * @component: Error component to be set (&enum drm_xe_ras_error_component)
+ * @threshold: Counter threshold
+ *
+ * This function sets the error threshold of a specific counter based on
+ * severity and component.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int xe_ras_set_threshold(struct xe_device *xe, u8 severity, u8 component, u32 
threshold)
+{
+       struct xe_ras_set_threshold_response response = {};
+       struct xe_ras_set_threshold_request request = {};
+       struct xe_sysctrl_mailbox_command command = {};
+       struct xe_ras_error_class *counter;
+       size_t len;
+       int ret;
+
+       counter = &request.counter;
+       counter->common.severity = drm_to_xe_ras_severity(severity);
+       counter->common.component = drm_to_xe_ras_component(component);
+       request.threshold = threshold;
+
+       xe_sysctrl_create_command(&command, XE_SYSCTRL_GROUP_GFSP, 
XE_SYSCTRL_CMD_SET_THRESHOLD,
+                                 &request, sizeof(request), &response, 
sizeof(response));
+
+       guard(xe_pm_runtime)(xe);
+       ret = xe_sysctrl_send_command(&xe->sc, &command, &len);
+       if (ret) {
+               xe_err(xe, "sysctrl: failed to set threshold %d\n", ret);
+               return ret;
+       }
+
+       if (len != sizeof(response)) {
+               xe_err(xe, "sysctrl: unexpected set threshold response length 
%zu (expected %zu)\n",
+                      len, sizeof(response));
+               return -EIO;
+       }
+
+       ret = ras_status_to_errno(response.status);
+       if (ret) {
+               xe_err(xe, "sysctrl: set threshold command failed with status 
%#x\n",
+                      response.status);
+               return ret;
+       }
+
+       counter = &response.counter;
+
+       xe_dbg(xe, "[RAS]: set counter threshold %u for %s %s\n", 
response.threshold,
+              comp_to_str(counter->common.component), 
sev_to_str(counter->common.severity));
+       return 0;
+}
+
 /**
  * xe_ras_init - Initialize Xe RAS
  * @xe: xe device instance
diff --git a/drivers/gpu/drm/xe/xe_ras.h b/drivers/gpu/drm/xe/xe_ras.h
index ba0b0224df23..1aa43c54b710 100644
--- a/drivers/gpu/drm/xe/xe_ras.h
+++ b/drivers/gpu/drm/xe/xe_ras.h
@@ -15,6 +15,8 @@ void xe_ras_counter_threshold_crossed(struct xe_device *xe,
                                      struct xe_sysctrl_event_response 
*response);
 int xe_ras_get_counter(struct xe_device *xe, u8 severity, u8 component, u32 
*value);
 int xe_ras_clear_counter(struct xe_device *xe, u8 severity, u8 component);
+int xe_ras_get_threshold(struct xe_device *xe, u8 severity, u8 component, u32 
*threshold);
+int xe_ras_set_threshold(struct xe_device *xe, u8 severity, u8 component, u32 
threshold);
 void xe_ras_init(struct xe_device *xe);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_ras_types.h 
b/drivers/gpu/drm/xe/xe_ras_types.h
index c6392435d1c6..8ea817583eed 100644
--- a/drivers/gpu/drm/xe/xe_ras_types.h
+++ b/drivers/gpu/drm/xe/xe_ras_types.h
@@ -121,4 +121,55 @@ struct xe_ras_clear_counter_response {
        /** @reserved1: Reserved for future use */
        u32 reserved1[3];
 } __packed;
+
+/**
+ * struct xe_ras_get_threshold_request - Request structure for get threshold
+ */
+struct xe_ras_get_threshold_request {
+       /** @counter: Counter to get threshold for */
+       struct xe_ras_error_class counter;
+       /** @reserved: Reserved for future use */
+       u32 reserved;
+} __packed;
+
+/**
+ * struct xe_ras_get_threshold_response - Response structure for get threshold
+ */
+struct xe_ras_get_threshold_response {
+       /** @counter: Counter ID */
+       struct xe_ras_error_class counter;
+       /** @threshold: Threshold value */
+       u32 threshold;
+       /** @reserved: Reserved for future use */
+       u32 reserved[4];
+} __packed;
+
+/**
+ * struct xe_ras_set_threshold_request - Request structure for set threshold
+ */
+struct xe_ras_set_threshold_request {
+       /** @counter: Counter to set threshold for */
+       struct xe_ras_error_class counter;
+       /** @threshold: Threshold value to set */
+       u32 threshold;
+       /** @reserved: Reserved for future use */
+       u32 reserved;
+} __packed;
+
+/**
+ * struct xe_ras_set_threshold_response - Response structure for set threshold
+ */
+struct xe_ras_set_threshold_response {
+       /** @counter: Counter ID */
+       struct xe_ras_error_class counter;
+       /** @reserved: Reserved */
+       u32 reserved;
+       /** @threshold: Updated threshold value */
+       u32 threshold;
+       /** @status: Set threshold operation status */
+       u32 status;
+       /** @reserved1: Reserved for future use */
+       u32 reserved1[2];
+} __packed;
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h 
b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
index 6e3753554510..10f06aa5c4b5 100644
--- a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
+++ b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
@@ -24,11 +24,15 @@ enum xe_sysctrl_group {
  *
  * @XE_SYSCTRL_CMD_GET_COUNTER: Get error counter value
  * @XE_SYSCTRL_CMD_CLEAR_COUNTER: Clear error counter value
+ * @XE_SYSCTRL_CMD_GET_THRESHOLD: Retrieve error threshold
+ * @XE_SYSCTRL_CMD_SET_THRESHOLD: Set error threshold
  * @XE_SYSCTRL_CMD_GET_PENDING_EVENT: Retrieve pending event
  */
 enum xe_sysctrl_gfsp_cmd {
        XE_SYSCTRL_CMD_GET_COUNTER              = 0x03,
        XE_SYSCTRL_CMD_CLEAR_COUNTER            = 0x04,
+       XE_SYSCTRL_CMD_GET_THRESHOLD            = 0x05,
+       XE_SYSCTRL_CMD_SET_THRESHOLD            = 0x06,
        XE_SYSCTRL_CMD_GET_PENDING_EVENT        = 0x07,
 };
 
-- 
2.43.0

Reply via email to