On Wed, May 20, 2026 at 12:53:52PM +0530, Tauro, Riana wrote:
> On 5/15/2026 1:58 AM, Raag Jadav wrote:
> > cleanup_node_param() is not registered in case of counter allocation
> > failure, which results in stale memory of previous node that isn't
> > cleaned up on unwind.
> 
> It is registered.
> 
> ret = assign_node_params(xe, node, i);
> if (ret)
>     cleanup_node_param(ras, i);

Is that also true for previous node params (in case second node
registration fails)?

> >   Add per node cleanup action which guarantees
> > cleanup on unwind and also simplifies the cleanup logic.
> > 
> > Fixes: b40db12b542f ("drm/xe/xe_drm_ras: Add support for XE DRM RAS")
> > Signed-off-by: Raag Jadav <[email protected]>
> > ---
> >   drivers/gpu/drm/xe/xe_drm_ras.c | 42 +++++++++++++--------------------
> >   1 file changed, 17 insertions(+), 25 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_drm_ras.c 
> > b/drivers/gpu/drm/xe/xe_drm_ras.c
> > index 89640ffb1c33..40abde29a26f 100644
> > --- a/drivers/gpu/drm/xe/xe_drm_ras.c
> > +++ b/drivers/gpu/drm/xe/xe_drm_ras.c
> > @@ -131,14 +131,20 @@ static int assign_node_params(struct xe_device *xe, 
> > struct drm_ras_node *node,
> >     return 0;
> >   }
> > -static void cleanup_node_param(struct xe_drm_ras *ras, const enum 
> > drm_xe_ras_error_severity severity)
> > +static void cleanup_node_param(struct drm_ras_node *node)
> >   {
> > -   struct drm_ras_node *node = &ras->node[severity];
> > -
> >     kfree(node->device_name);
> >     node->device_name = NULL;
> >   }
> > +static void cleanup_node(struct drm_device *drm, void *arg)
> > +{
> > +   struct drm_ras_node *node = arg;
> > +
> > +   drm_ras_node_unregister(node);
> > +   cleanup_node_param(node);
> > +}
> > +
> >   static int register_nodes(struct xe_device *xe)
> >   {
> >     struct xe_drm_ras *ras = &xe->ras;
> > @@ -150,13 +156,19 @@ static int register_nodes(struct xe_device *xe)
> >             ret = assign_node_params(xe, node, i);
> >             if (ret) {
> > -                   cleanup_node_param(ras, i);
> > +                   cleanup_node_param(node);
> 
> At this point drm_ras node is not registered.

Yes, and I don't believe we're attempting to unregister here :)

> >                     return ret;
> >             }
> >             ret = drm_ras_node_register(node);
> >             if (ret) {
> > -                   cleanup_node_param(ras, i);
> > +                   cleanup_node_param(node);
> 
> Ditto

Ditto.

Raag

> > +                   return ret;
> > +           }
> > +
> > +           ret = drmm_add_action_or_reset(&xe->drm, cleanup_node, node);
> > +           if (ret) {
> > +                   cleanup_node(&xe->drm, node);
> >                     return ret;
> >             }
> >     }
> > @@ -164,20 +176,6 @@ static int register_nodes(struct xe_device *xe)
> >     return 0;
> >   }
> > -static void xe_drm_ras_unregister_nodes(struct drm_device *device, void 
> > *arg)
> > -{
> > -   struct xe_device *xe = arg;
> > -   struct xe_drm_ras *ras = &xe->ras;
> > -   int i;
> > -
> > -   for_each_error_severity(i) {
> > -           struct drm_ras_node *node = &ras->node[i];
> > -
> > -           drm_ras_node_unregister(node);
> > -           cleanup_node_param(ras, i);
> > -   }
> > -}
> > -
> >   /**
> >    * xe_drm_ras_init() - Initialize DRM RAS
> >    * @xe: xe device instance
> > @@ -204,11 +202,5 @@ int xe_drm_ras_init(struct xe_device *xe)
> >             return err;
> >     }
> > -   err = drmm_add_action_or_reset(&xe->drm, xe_drm_ras_unregister_nodes, 
> > xe);
> > -   if (err) {
> > -           drm_err(&xe->drm, "Failed to add action for Xe DRM RAS 
> > (%pe)\n", ERR_PTR(err));
> > -           return err;
> > -   }
> > -
> >     return 0;
> >   }

Reply via email to