On Mon, May 11, 2026 at 10:51:38PM +0530, Tauro, Riana wrote:
> On 4/18/2026 2:46 AM, Raag Jadav wrote:
> > System controller allows programming per error threshold value, which
> > it uses to raise error events to the driver. Set it using mailbox
> > command so that it can be programmed by the user.
> > 
> > Signed-off-by: Raag Jadav <[email protected]>
> > ---
> >   drivers/gpu/drm/xe/xe_ras.c                   | 42 +++++++++++++++++++
> >   drivers/gpu/drm/xe/xe_ras.h                   |  1 +
> >   drivers/gpu/drm/xe/xe_ras_types.h             | 28 +++++++++++++
> >   drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h |  2 +
> >   4 files changed, 73 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c
> > index 3e93f838aa4a..26e063166c5f 100644
> > --- a/drivers/gpu/drm/xe/xe_ras.c
> > +++ b/drivers/gpu/drm/xe/xe_ras.c
> > @@ -163,3 +163,45 @@ int xe_ras_get_threshold(struct xe_device *xe, u32 
> > severity, u32 component, u32
> >            comp_to_str(counter.common.component), 
> > sev_to_str(counter.common.severity));
> >     return 0;
> >   }
> > +
> > +int xe_ras_set_threshold(struct xe_device *xe, u32 severity, u32 
> > component, u32 threshold)
> > +{
> > +   struct xe_ras_set_threshold_response response = {};
> > +   struct xe_ras_set_threshold_request request = {};
> > +   struct xe_sysctrl_mailbox_command command = {};
> > +   struct xe_ras_error_class counter = {};
> > +   size_t len;
> > +   int ret;
> > +
> > +   counter.common.severity = drm_to_xe_ras_severities[severity];
> > +   counter.common.component = drm_to_xe_ras_components[component];
> > +   request.counter = counter;
> > +   request.threshold = threshold;
> 
> We might need a max check here to avoid unnecessary values from user.

We may want to avoid hardcoding it in driver since it can potentially be
different per product.

> > +   ras_command_prepare(&command, &request, sizeof(request), &response,
> > +                       sizeof(response), XE_SYSCTRL_CMD_SET_THRESHOLD);
> 
> Nit: command, request, response seems to be a better format

Sure, I'll likely create a separate sysctrl helper.

> > +   guard(xe_pm_runtime)(xe);
> > +   ret = xe_sysctrl_send_command(&xe->sc, &command, &len);
> > +   if (ret) {
> > +           xe_err(xe, "sysctrl: failed to set threshold %d\n", ret);
> > +           return ret;
> > +   }
> > +
> > +   if (len != sizeof(response)) {
> > +           xe_err(xe, "sysctrl: unexpected set threshold response length 
> > %zu (expected %zu)\n",
> > +                  len, sizeof(response));
> > +           return -EIO;
> > +   }
> > +
> > +   if (response.status) {
> > +           xe_err(xe, "sysctrl: set threshold operation failed %#x\n", 
> > response.status);
> 
> Status should be converted to visible error codes. check [PATCH v5 3/6]
> drm/xe/xe_ras: Add helper to clear error counter - Riana Tauro
> 
> <https://lore.kernel.org/intel-xe/[email protected]/>

Coming right up.

Raag

> > +           return -EIO;
> > +   }
> > +
> > +   counter = response.counter;
> > +
> > +   xe_dbg(xe, "[RAS]: Set threshold %u for %s %s\n", response.threshold,
> > +          comp_to_str(counter.common.component), 
> > sev_to_str(counter.common.severity));
> Again not required. Value should be visible to user
> > +   return 0;
> > +}
> > diff --git a/drivers/gpu/drm/xe/xe_ras.h b/drivers/gpu/drm/xe/xe_ras.h
> > index 982bbe61461e..d1f71b1de723 100644
> > --- a/drivers/gpu/drm/xe/xe_ras.h
> > +++ b/drivers/gpu/drm/xe/xe_ras.h
> > @@ -14,5 +14,6 @@ struct xe_sysctrl_event_response;
> >   void xe_ras_counter_threshold_crossed(struct xe_device *xe,
> >                                   struct xe_sysctrl_event_response 
> > *response);
> >   int xe_ras_get_threshold(struct xe_device *xe, u32 severity, u32 
> > component, u32 *threshold);
> > +int xe_ras_set_threshold(struct xe_device *xe, u32 severity, u32 
> > component, u32 threshold);
> >   #endif
> > diff --git a/drivers/gpu/drm/xe/xe_ras_types.h 
> > b/drivers/gpu/drm/xe/xe_ras_types.h
> > index d5da93d65cf5..d7e4a02a661d 100644
> > --- a/drivers/gpu/drm/xe/xe_ras_types.h
> > +++ b/drivers/gpu/drm/xe/xe_ras_types.h
> > @@ -92,4 +92,32 @@ struct xe_ras_get_threshold_response {
> >     u32 reserved[4];
> >   } __packed;
> > +/**
> > + * struct xe_ras_set_threshold_request - Request structure for set 
> > threshold
> > + */
> > +struct xe_ras_set_threshold_request {
> > +   /** @counter: Counter to set threshold for */
> > +   struct xe_ras_error_class counter;
> > +   /** @threshold: Threshold value to set */
> > +   u32 threshold;
> > +   /** @reserved: Reserved for future use */
> > +   u32 reserved;
> > +} __packed;
> > +
> > +/**
> > + * struct xe_ras_set_threshold_response - Response structure for set 
> > threshold
> > + */
> > +struct xe_ras_set_threshold_response {
> > +   /** @counter: Counter id */
> 
> Nit: ID
> 
> > +   struct xe_ras_error_class counter;
> > +   /** @threshold_old: Old threshold value */
> 
> Nit: prev
> 
> Thanks
> Riana
> 
> > +   u32 threshold_old;
> > +   /** @threshold: New threshold value */
> > +   u32 threshold;
> > +   /** @status: Set threshold operation status */
> > +   u32 status;
> > +   /** @reserved: Reserved for future use */
> > +   u32 reserved[2];
> > +} __packed;
> > +
> >   #endif
> > diff --git a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h 
> > b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
> > index a1b71218deca..b865768e903b 100644
> > --- a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
> > +++ b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h
> > @@ -23,10 +23,12 @@ enum xe_sysctrl_group {
> >    * enum xe_sysctrl_gfsp_cmd - Commands supported by GFSP group
> >    *
> >    * @XE_SYSCTRL_CMD_GET_THRESHOLD: Retrieve error threshold
> > + * @XE_SYSCTRL_CMD_SET_THRESHOLD: Set error threshold
> >    * @XE_SYSCTRL_CMD_GET_PENDING_EVENT: Retrieve pending event
> >    */
> >   enum xe_sysctrl_gfsp_cmd {
> >     XE_SYSCTRL_CMD_GET_THRESHOLD            = 0x05,
> > +   XE_SYSCTRL_CMD_SET_THRESHOLD            = 0x06,
> >     XE_SYSCTRL_CMD_GET_PENDING_EVENT        = 0x07,
> >   };

Reply via email to