On Thu, 2026-06-18 at 19:32 +0000, Alessio Belle wrote:
> *** NOTE: This is an internal email from Imagination Technologies ***
> 
> 
> 
> 
> Hi Brajesh,
> 
> On Thu, 2026-06-11 at 12:47 +0530, Brajesh Gupta wrote:
> > Call sequence of double call:
> > pvr_context_destroy
> >   pvr_context_kill_queues
> >     pvr_queue_kill
> >       drm_sched_entity_destroy
> >         drm_sched_entity_fini // here
> >   pvr_context_put
> >     kref_put(..., pvr_context_release)
> >       pvr_context_destroy_queues
> >         pvr_queue_destroy
> >           drm_sched_entity_fini // here
> > 
> > Call to drm_sched_entity_destroy() from pvr_context_kill_queues() calls
> > drm_sched_entity_flush() + drm_sched_entity_fini().
> > drm_sched_entity_flush() ensures all pending jobs are completed and
> > drm_sched_entity_fini() ensures no further submission is allowed as
> > per expectation from pvr_context_kill_queues(). Drop the extra
> > call to drm_sched_entity_fini().
> > 
> > Stack trace for issue with addition of refcounting for DRM entity
> > stats in commit fd177135f0e6 ("drm/sched: Account entity GPU time"):
> > [  789.490527] ------------[ cut here ]------------
> > [  789.490559] refcount_t: underflow; use-after-free.
> > [  789.490657] WARNING: lib/refcount.c:28 at 
> > refcount_warn_saturate+0xf4/0x144, CPU#0: kworker/u16:1/440
> > [  789.490695] Modules linked in: powervr drm_gpuvm drm_exec gpu_sched 
> > drm_shmem_helper xhci_plat_hcd xhci_hcd dwc3 usbcore usb_common 
> > snd_soc_simple_card snd_soc_simple_card_utils sa2ul sha512 sha256 dwc3_am62 
> > sha1 authenc rti_wdt libsha512 at24 sch_fq_codel fuse dm_mod ipv6
> > [  789.490798] CPU: 0 UID: 0 PID: 440 Comm: kworker/u16:1 Not tainted 
> > 7.0.0-rc7-02049-g5e2c0700091b #22 PREEMPT
> > [  789.490809] Hardware name: Texas Instruments AM625 SK (DT)
> > [  789.490815] Workqueue: powervr-sched pvr_queue_fence_release_work 
> > [powervr]
> > [  789.490868] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS 
> > BTYPE=--)
> > [  789.490876] pc : refcount_warn_saturate+0xf4/0x144
> > [  789.490884] lr : refcount_warn_saturate+0xf4/0x144
> > [  789.490892] sp : ffff8000822cbcc0
> > [  789.490895] x29: ffff8000822cbcc0 x28: 0000000000000000 x27: 
> > 0000000000000000
> > [  789.490909] x26: 0000000000000000 x25: ffff800081b1e338 x24: 
> > ffff000004541405
> > [  789.490922] x23: ffff000004bea950 x22: ffff00000042e400 x21: 
> > ffff000007123e30
> > [  789.490935] x20: ffff000007123000 x19: ffff000007a80d50 x18: 
> > fffffffffffe7768
> > [  789.490948] x17: 74736574202c6e6f x16: 697461746e656d65 x15: 
> > ffff800081b269f0
> > [  789.490962] x14: 0000000000000030 x13: ffff800081b26a70 x12: 
> > 0000000000000211
> > [  789.490975] x11: 00000000000000c0 x10: 0000000000000b50 x9 : 
> > ffff8000822cbb30
> > [  789.490988] x8 : ffff0000014e7bb0 x7 : ffff00007725e780 x6 : 
> > 0000000372a05f49
> > [  789.491001] x5 : 0000000000000000 x4 : 0000000000000001 x3 : 
> > 0000000000000010
> > [  789.491013] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 
> > ffff0000014e7000
> > [  789.491027] Call trace:
> > [  789.491032]  refcount_warn_saturate+0xf4/0x144 (P)
> > [  789.491043]  drm_sched_entity_fini+0x164/0x18c [gpu_sched]
> > [  789.491081]  pvr_queue_destroy+0x64/0x134 [powervr]
> > [  789.491110]  pvr_context_destroy_queues+0x34/0x64 [powervr]
> > [  789.491138]  pvr_context_release+0x70/0xac [powervr]
> > [  789.491166]  pvr_context_put.part.0+0x5c/0x7c [powervr]
> > [  789.491193]  pvr_context_put+0x14/0x24 [powervr]
> > [  789.491221]  pvr_queue_fence_release_work+0x20/0x38 [powervr]
> > [  789.491249]  process_one_work+0x160/0x4c4
> > [  789.491264]  worker_thread+0x188/0x310
> > [  789.491276]  kthread+0x130/0x13c
> > [  789.491287]  ret_from_fork+0x10/0x20
> > [  789.491300] ---[ end trace 0000000000000000 ]---
> > 
> > Signed-off-by: Brajesh Gupta <[email protected]>
> > Fixes: eaf01ee5ba28 ("drm/imagination: Implement job submission and 
> > scheduling")
> > References: fd177135f0e6 ("drm/sched: Account entity GPU time")
> > ---
> > Changes in v3:
> > - Fixed a typo.
> > - Handled missing memory leak for RENDER_CONTEXT.
> > - Link to v2: 
> > https://lore.kernel.org/r/[email protected]
> > 
> > Changes in v2:
> > - Fixed memory leak identified in following error path handling of 
> > pvr_context_create():
> > - pvr_context_create()
> > -   ...
> > -   err_destroy_queues:
> > -     pvr_context_destroy_queues()
> > -       pvr_queue_destroy()
> > - Link to v1: 
> > https://lore.kernel.org/r/[email protected]
> > ---
> >  drivers/gpu/drm/imagination/pvr_context.c | 41 
> > +++++++++++++++++++++++++------
> >  drivers/gpu/drm/imagination/pvr_queue.c   |  4 ---
> >  2 files changed, 33 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/imagination/pvr_context.c 
> > b/drivers/gpu/drm/imagination/pvr_context.c
> > index eba4694400b5..e2e30908dfce 100644
> > --- a/drivers/gpu/drm/imagination/pvr_context.c
> > +++ b/drivers/gpu/drm/imagination/pvr_context.c
> > @@ -161,22 +161,47 @@ ctx_fw_data_init(void *cpu_ptr, void *priv)
> >  /**
> >   * pvr_context_destroy_queues() - Destroy all queues attached to a context.
> >   * @ctx: Context to destroy queues on.
> > + * @queue_entity_fini: Corresponding queues of context to be released for
> > + *                     failure path in context creation.
> >   *
> >   * Should be called when the last reference to a context object is dropped.
> >   * It releases all resources attached to the queues bound to this context.
> >   */
> > -static void pvr_context_destroy_queues(struct pvr_context *ctx)
> > +static void pvr_context_destroy_queues(struct pvr_context *ctx, bool 
> > queue_entity_fini)
> >  {
> >       switch (ctx->type) {
> >       case DRM_PVR_CTX_TYPE_RENDER:
> > -             pvr_queue_destroy(ctx->queues.fragment);
> > -             pvr_queue_destroy(ctx->queues.geometry);
> > +             if (ctx->queues.fragment) {
> > +                     if (queue_entity_fini)
> > +                             
> > drm_sched_entity_fini(&ctx->queues.fragment->entity);
> > +
> > +                     pvr_queue_destroy(ctx->queues.fragment);
> > +             }
> > +
> > +             if (ctx->queues.geometry) {
> > +                     if (queue_entity_fini)
> > +                             
> > drm_sched_entity_fini(&ctx->queues.geometry->entity);
> > +
> > +                     pvr_queue_destroy(ctx->queues.geometry);
> > +             }
> 
> Would it make sense to push the new flag queue_entity_fini into
> pvr_queue_destroy() and keep calling drm_sched_entity_fini() from there but
> conditionally, to avoid all the new ifs around here?
Yes, will update.
> 
> A potential alternative, skimming through the scheduler internals, could be to
> replace drm_sched_entity_destroy() in pvr_queue_kill() with
> drm_sched_entity_flush() + drm_sched_entity_kill() (not fini()), just not
> entirely sure that is expected usage given that the commit message from [1]
> seems to suggest kill() is to be used in place of flush() in general (calling
> both might not hurt).
> 
> Also if this needs backporting, which is unclear since the bug didn't seem to 
> do
> anything visible until the referenced issue, you'd have to go with the current
> fix for the backports, since drm_sched_entity_kill() hasn't made it to any
> kernel release yet.
Will keep existing logic for now. Handle this case specifically for newer kernel
in a separate patch.
> 
> [1] 
> https://lore.kernel.org/dri-devel/[email protected]/
> 
> Thanks,
> Alessio
> 
> >               break;
> > +
> >       case DRM_PVR_CTX_TYPE_COMPUTE:
> > -             pvr_queue_destroy(ctx->queues.compute);
> > +             if (ctx->queues.compute) {
> > +                     if (queue_entity_fini)
> > +                             
> > drm_sched_entity_fini(&ctx->queues.compute->entity);
> > +
> > +                     pvr_queue_destroy(ctx->queues.compute);
> > +             }
> >               break;
> > +
> >       case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
> > -             pvr_queue_destroy(ctx->queues.transfer);
> > +             if (ctx->queues.transfer) {
> > +                     if (queue_entity_fini)
> > +                             
> > drm_sched_entity_fini(&ctx->queues.transfer->entity);
> > +
> > +                     pvr_queue_destroy(ctx->queues.transfer);
> > +             }
> >               break;
> >       }
> >  }
> > @@ -240,7 +265,7 @@ static int pvr_context_create_queues(struct pvr_context 
> > *ctx,
> >       return -EINVAL;
> > 
> >  err_destroy_queues:
> > -     pvr_context_destroy_queues(ctx);
> > +     pvr_context_destroy_queues(ctx, true);
> >       return err;
> >  }
> > 
> > @@ -349,7 +374,7 @@ int pvr_context_create(struct pvr_file *pvr_file, 
> > struct drm_pvr_ioctl_create_co
> >       pvr_fw_object_destroy(ctx->fw_obj);
> > 
> >  err_destroy_queues:
> > -     pvr_context_destroy_queues(ctx);
> > +     pvr_context_destroy_queues(ctx, true);
> > 
> >  err_free_ctx_id:
> >       /*
> > @@ -384,7 +409,7 @@ pvr_context_release(struct kref *ref_count)
> >       spin_unlock(&pvr_dev->ctx_list_lock);
> > 
> >       xa_erase(&pvr_dev->ctx_ids, ctx->ctx_id);
> > -     pvr_context_destroy_queues(ctx);
> > +     pvr_context_destroy_queues(ctx, false);
> >       pvr_fw_object_destroy(ctx->fw_obj);
> >       kfree(ctx->data);
> >       pvr_vm_context_put(ctx->vm_ctx);
> > diff --git a/drivers/gpu/drm/imagination/pvr_queue.c 
> > b/drivers/gpu/drm/imagination/pvr_queue.c
> > index 7ed60e1c1a86..15cd8adc7528 100644
> > --- a/drivers/gpu/drm/imagination/pvr_queue.c
> > +++ b/drivers/gpu/drm/imagination/pvr_queue.c
> > @@ -1445,15 +1445,11 @@ void pvr_queue_kill(struct pvr_queue *queue)
> >   */
> >  void pvr_queue_destroy(struct pvr_queue *queue)
> >  {
> > -     if (!queue)
> > -             return;
> > -
> >       mutex_lock(&queue->ctx->pvr_dev->queues.lock);
> >       list_del_init(&queue->node);
> >       mutex_unlock(&queue->ctx->pvr_dev->queues.lock);
> > 
> >       drm_sched_fini(&queue->scheduler);
> > -     drm_sched_entity_fini(&queue->entity);
> > 
> >       if (WARN_ON(queue->last_queued_job_scheduled_fence))
> >               dma_fence_put(queue->last_queued_job_scheduled_fence);
> > 
> > ---
> > base-commit: 61de054a772a1feda6364931ab1baf9038abf1c8
> > change-id: 20260610-b4-sched_fix-ac3b920f475b
> > 
> > Best regards,
> 
- Brajesh

Reply via email to