System controller allows getting/setting per counter threshold, which it uses to raise error events to the driver. Get/set it using the respective mailbox command.
Signed-off-by: Raag Jadav <[email protected]> --- v2: Add RAS operation status codes (Riana) v3: Reuse status codes and uapi mapping from counter series (Riana) Access request/response counter using local pointer (Riana) Mark unused field as reserved (Riana) --- drivers/gpu/drm/xe/xe_ras.c | 105 ++++++++++++++++++ drivers/gpu/drm/xe/xe_ras.h | 2 + drivers/gpu/drm/xe/xe_ras_types.h | 51 +++++++++ drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h | 4 + 4 files changed, 162 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c index 7cb6fcb1254a..d6f89b429cec 100644 --- a/drivers/gpu/drm/xe/xe_ras.c +++ b/drivers/gpu/drm/xe/xe_ras.c @@ -270,6 +270,111 @@ int xe_ras_clear_counter(struct xe_device *xe, u8 severity, u8 component) return 0; } +/** + * xe_ras_get_threshold() - Get error counter threshold + * @xe: Xe device instance + * @severity: Error severity to be queried (&enum drm_xe_ras_error_severity) + * @component: Error component to be queried (&enum drm_xe_ras_error_component) + * @threshold: Counter threshold + * + * This function retrieves the error threshold of a specific counter based on + * severity and component. + * + * Return: 0 on success, negative error code on failure. + */ +int xe_ras_get_threshold(struct xe_device *xe, u8 severity, u8 component, u32 *threshold) +{ + struct xe_ras_get_threshold_response response = {}; + struct xe_ras_get_threshold_request request = {}; + struct xe_sysctrl_mailbox_command command = {}; + struct xe_ras_error_class *counter; + size_t len; + int ret; + + counter = &request.counter; + counter->common.severity = drm_to_xe_ras_severity(severity); + counter->common.component = drm_to_xe_ras_component(component); + + xe_sysctrl_create_command(&command, XE_SYSCTRL_GROUP_GFSP, XE_SYSCTRL_CMD_GET_THRESHOLD, + &request, sizeof(request), &response, sizeof(response)); + + guard(xe_pm_runtime)(xe); + ret = xe_sysctrl_send_command(&xe->sc, &command, &len); + if (ret) { + xe_err(xe, "sysctrl: failed to get threshold %d\n", ret); + return ret; + } + + if (len != sizeof(response)) { + xe_err(xe, "sysctrl: unexpected get threshold response length %zu (expected %zu)\n", + len, sizeof(response)); + return -EIO; + } + + counter = &response.counter; + *threshold = response.threshold; + + xe_dbg(xe, "[RAS]: get counter threshold %u for %s %s\n", *threshold, + comp_to_str(counter->common.component), sev_to_str(counter->common.severity)); + return 0; +} + +/** + * xe_ras_set_threshold() - Set error counter threshold + * @xe: Xe device instance + * @severity: Error severity to be set (&enum drm_xe_ras_error_severity) + * @component: Error component to be set (&enum drm_xe_ras_error_component) + * @threshold: Counter threshold + * + * This function sets the error threshold of a specific counter based on + * severity and component. + * + * Return: 0 on success, negative error code on failure. + */ +int xe_ras_set_threshold(struct xe_device *xe, u8 severity, u8 component, u32 threshold) +{ + struct xe_ras_set_threshold_response response = {}; + struct xe_ras_set_threshold_request request = {}; + struct xe_sysctrl_mailbox_command command = {}; + struct xe_ras_error_class *counter; + size_t len; + int ret; + + counter = &request.counter; + counter->common.severity = drm_to_xe_ras_severity(severity); + counter->common.component = drm_to_xe_ras_component(component); + request.threshold = threshold; + + xe_sysctrl_create_command(&command, XE_SYSCTRL_GROUP_GFSP, XE_SYSCTRL_CMD_SET_THRESHOLD, + &request, sizeof(request), &response, sizeof(response)); + + guard(xe_pm_runtime)(xe); + ret = xe_sysctrl_send_command(&xe->sc, &command, &len); + if (ret) { + xe_err(xe, "sysctrl: failed to set threshold %d\n", ret); + return ret; + } + + if (len != sizeof(response)) { + xe_err(xe, "sysctrl: unexpected set threshold response length %zu (expected %zu)\n", + len, sizeof(response)); + return -EIO; + } + + ret = ras_status_to_errno(response.status); + if (ret) { + xe_err(xe, "sysctrl: set threshold command failed with status %#x\n", + response.status); + return ret; + } + + counter = &response.counter; + + xe_dbg(xe, "[RAS]: set counter threshold %u for %s %s\n", response.threshold, + comp_to_str(counter->common.component), sev_to_str(counter->common.severity)); + return 0; +} + /** * xe_ras_init - Initialize Xe RAS * @xe: xe device instance diff --git a/drivers/gpu/drm/xe/xe_ras.h b/drivers/gpu/drm/xe/xe_ras.h index ba0b0224df23..1aa43c54b710 100644 --- a/drivers/gpu/drm/xe/xe_ras.h +++ b/drivers/gpu/drm/xe/xe_ras.h @@ -15,6 +15,8 @@ void xe_ras_counter_threshold_crossed(struct xe_device *xe, struct xe_sysctrl_event_response *response); int xe_ras_get_counter(struct xe_device *xe, u8 severity, u8 component, u32 *value); int xe_ras_clear_counter(struct xe_device *xe, u8 severity, u8 component); +int xe_ras_get_threshold(struct xe_device *xe, u8 severity, u8 component, u32 *threshold); +int xe_ras_set_threshold(struct xe_device *xe, u8 severity, u8 component, u32 threshold); void xe_ras_init(struct xe_device *xe); #endif diff --git a/drivers/gpu/drm/xe/xe_ras_types.h b/drivers/gpu/drm/xe/xe_ras_types.h index c6392435d1c6..8ea817583eed 100644 --- a/drivers/gpu/drm/xe/xe_ras_types.h +++ b/drivers/gpu/drm/xe/xe_ras_types.h @@ -121,4 +121,55 @@ struct xe_ras_clear_counter_response { /** @reserved1: Reserved for future use */ u32 reserved1[3]; } __packed; + +/** + * struct xe_ras_get_threshold_request - Request structure for get threshold + */ +struct xe_ras_get_threshold_request { + /** @counter: Counter to get threshold for */ + struct xe_ras_error_class counter; + /** @reserved: Reserved for future use */ + u32 reserved; +} __packed; + +/** + * struct xe_ras_get_threshold_response - Response structure for get threshold + */ +struct xe_ras_get_threshold_response { + /** @counter: Counter ID */ + struct xe_ras_error_class counter; + /** @threshold: Threshold value */ + u32 threshold; + /** @reserved: Reserved for future use */ + u32 reserved[4]; +} __packed; + +/** + * struct xe_ras_set_threshold_request - Request structure for set threshold + */ +struct xe_ras_set_threshold_request { + /** @counter: Counter to set threshold for */ + struct xe_ras_error_class counter; + /** @threshold: Threshold value to set */ + u32 threshold; + /** @reserved: Reserved for future use */ + u32 reserved; +} __packed; + +/** + * struct xe_ras_set_threshold_response - Response structure for set threshold + */ +struct xe_ras_set_threshold_response { + /** @counter: Counter ID */ + struct xe_ras_error_class counter; + /** @reserved: Reserved */ + u32 reserved; + /** @threshold: Updated threshold value */ + u32 threshold; + /** @status: Set threshold operation status */ + u32 status; + /** @reserved1: Reserved for future use */ + u32 reserved1[2]; +} __packed; + #endif diff --git a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h index 6e3753554510..10f06aa5c4b5 100644 --- a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h +++ b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h @@ -24,11 +24,15 @@ enum xe_sysctrl_group { * * @XE_SYSCTRL_CMD_GET_COUNTER: Get error counter value * @XE_SYSCTRL_CMD_CLEAR_COUNTER: Clear error counter value + * @XE_SYSCTRL_CMD_GET_THRESHOLD: Retrieve error threshold + * @XE_SYSCTRL_CMD_SET_THRESHOLD: Set error threshold * @XE_SYSCTRL_CMD_GET_PENDING_EVENT: Retrieve pending event */ enum xe_sysctrl_gfsp_cmd { XE_SYSCTRL_CMD_GET_COUNTER = 0x03, XE_SYSCTRL_CMD_CLEAR_COUNTER = 0x04, + XE_SYSCTRL_CMD_GET_THRESHOLD = 0x05, + XE_SYSCTRL_CMD_SET_THRESHOLD = 0x06, XE_SYSCTRL_CMD_GET_PENDING_EVENT = 0x07, }; -- 2.43.0
