On Wed, May 20, 2026 at 12:53:52PM +0530, Tauro, Riana wrote:
> On 5/15/2026 1:58 AM, Raag Jadav wrote:
> > cleanup_node_param() is not registered in case of counter allocation
> > failure, which results in stale memory of previous node that isn't
> > cleaned up on unwind.
>
> It is registered.
>
> ret = assign_node_params(xe, node, i);
> if (ret)
> cleanup_node_param(ras, i);
Is that also true for previous node params (in case second node
registration fails)?
> > Add per node cleanup action which guarantees
> > cleanup on unwind and also simplifies the cleanup logic.
> >
> > Fixes: b40db12b542f ("drm/xe/xe_drm_ras: Add support for XE DRM RAS")
> > Signed-off-by: Raag Jadav <[email protected]>
> > ---
> > drivers/gpu/drm/xe/xe_drm_ras.c | 42 +++++++++++++--------------------
> > 1 file changed, 17 insertions(+), 25 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c
> > b/drivers/gpu/drm/xe/xe_drm_ras.c
> > index 89640ffb1c33..40abde29a26f 100644
> > --- a/drivers/gpu/drm/xe/xe_drm_ras.c
> > +++ b/drivers/gpu/drm/xe/xe_drm_ras.c
> > @@ -131,14 +131,20 @@ static int assign_node_params(struct xe_device *xe,
> > struct drm_ras_node *node,
> > return 0;
> > }
> > -static void cleanup_node_param(struct xe_drm_ras *ras, const enum
> > drm_xe_ras_error_severity severity)
> > +static void cleanup_node_param(struct drm_ras_node *node)
> > {
> > - struct drm_ras_node *node = &ras->node[severity];
> > -
> > kfree(node->device_name);
> > node->device_name = NULL;
> > }
> > +static void cleanup_node(struct drm_device *drm, void *arg)
> > +{
> > + struct drm_ras_node *node = arg;
> > +
> > + drm_ras_node_unregister(node);
> > + cleanup_node_param(node);
> > +}
> > +
> > static int register_nodes(struct xe_device *xe)
> > {
> > struct xe_drm_ras *ras = &xe->ras;
> > @@ -150,13 +156,19 @@ static int register_nodes(struct xe_device *xe)
> > ret = assign_node_params(xe, node, i);
> > if (ret) {
> > - cleanup_node_param(ras, i);
> > + cleanup_node_param(node);
>
> At this point drm_ras node is not registered.
Yes, and I don't believe we're attempting to unregister here :)
> > return ret;
> > }
> > ret = drm_ras_node_register(node);
> > if (ret) {
> > - cleanup_node_param(ras, i);
> > + cleanup_node_param(node);
>
> Ditto
Ditto.
Raag
> > + return ret;
> > + }
> > +
> > + ret = drmm_add_action_or_reset(&xe->drm, cleanup_node, node);
> > + if (ret) {
> > + cleanup_node(&xe->drm, node);
> > return ret;
> > }
> > }
> > @@ -164,20 +176,6 @@ static int register_nodes(struct xe_device *xe)
> > return 0;
> > }
> > -static void xe_drm_ras_unregister_nodes(struct drm_device *device, void
> > *arg)
> > -{
> > - struct xe_device *xe = arg;
> > - struct xe_drm_ras *ras = &xe->ras;
> > - int i;
> > -
> > - for_each_error_severity(i) {
> > - struct drm_ras_node *node = &ras->node[i];
> > -
> > - drm_ras_node_unregister(node);
> > - cleanup_node_param(ras, i);
> > - }
> > -}
> > -
> > /**
> > * xe_drm_ras_init() - Initialize DRM RAS
> > * @xe: xe device instance
> > @@ -204,11 +202,5 @@ int xe_drm_ras_init(struct xe_device *xe)
> > return err;
> > }
> > - err = drmm_add_action_or_reset(&xe->drm, xe_drm_ras_unregister_nodes,
> > xe);
> > - if (err) {
> > - drm_err(&xe->drm, "Failed to add action for Xe DRM RAS
> > (%pe)\n", ERR_PTR(err));
> > - return err;
> > - }
> > -
> > return 0;
> > }