amdgpu: protect potential NULL pointer dereferencing

Alex Deucher Wed, 27 Aug 2025 06:10:15 -0700

On Tue, Aug 26, 2025 at 5:57 PM David Wu <david...@amd.com> wrote:
>
> On 2025-08-26 17:20, Alex Deucher wrote:
>
> > On Tue, Aug 26, 2025 at 5:11 PM David Wu <david...@amd.com> wrote:
> >> On 2025-08-26 16:58, Alex Deucher wrote:
> >>
> >> On Tue, Aug 26, 2025 at 3:48 PM David (Ming Qiang) Wu <david....@amd.com> 
> >> wrote:
> >>
> >> to_amdgpu_fence() could return NULL pointer which needs
> >> to be protected for dereferencing.
> >>
> >> I don't think any of these cases could ever be NULL.  The amdgpu_fence
> >> is a container for the dma_fence so it will alway be present.  See
> >> struct amdgpu_fence.
> >>
> >> hmmm... but - the function could return NULL based on base.ops - see below
> >> if it should never happen then we should remove the checking. I doubt we
> >> will ever return NULL, however someone knowledgeable PLEASE fix it 
> >> properly.
> >> The patch is only to protect it just in case.
> > if you look at amdgpu_fence_emit() the fences will only be created
> > with either amdgpu_job_fence_ops or amdgpu_fence_ops so there is no
> > way it will be NULL.  It's a false positive.
> My proposition is  there is no guarantee for future even though it is a
> false positive now as the original code is not
> confident either about the condition should never met. I think there is
> no harm to protect it. Or better I can come up with
> another patch to remove the checking - I feel keeping NULL without being
> handled is not acceptable (at least to silence
> Coverity).
> Alex - please suggest - either removing the checking or do nothing. I am
> fine either way.


Ideally we'd clean up the fence handling so that we don't need
separate fence ops for the job and non-job case.  Then we could remove
the checks in to_amdgpu_fence().  At this point we could remove the
checks in there anyway.  I think it was mostly just added on the off
chance someone tries to lift that helper out of amdgpu_fence.c and
tries to use it for other cases.

Alex

> David
> > Alex
> >
> >> static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
> >>
> >> {
> >>      struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
> >>
> >>      if (__f->base.ops == &amdgpu_fence_ops ||
> >>          __f->base.ops == &amdgpu_job_fence_ops)
> >>          return __f;
> >>
> >>      return NULL;
> >> }
> >>
> >> Alex
> >>
> >> Signed-off-by: David (Ming Qiang) Wu <david....@amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 22 +++++++++++++++-------
> >>   1 file changed, 15 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> >> index 2d58aefbd68a7..1432b64d379ef 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> >> @@ -160,7 +160,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
> >> dma_fence **f,
> >>                  }
> >>          }
> >>
> >> -       to_amdgpu_fence(fence)->start_timestamp = ktime_get();
> >> +       am_fence = to_amdgpu_fence(fence);
> >> +       if (am_fence)
> >> +               am_fence->start_timestamp = ktime_get();
> >>
> >>          /* This function can't be called concurrently anyway, otherwise
> >>           * emitting the fence would mess up the hardware ring buffer.
> >> @@ -387,6 +389,7 @@ u64 amdgpu_fence_last_unsignaled_time_us(struct 
> >> amdgpu_ring *ring)
> >>          struct amdgpu_fence_driver *drv = &ring->fence_drv;
> >>          struct dma_fence *fence;
> >>          uint32_t last_seq, sync_seq;
> >> +       struct amdgpu_fence *f;
> >>
> >>          last_seq = atomic_read(&ring->fence_drv.last_seq);
> >>          sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
> >> @@ -399,8 +402,8 @@ u64 amdgpu_fence_last_unsignaled_time_us(struct 
> >> amdgpu_ring *ring)
> >>          if (!fence)
> >>                  return 0;
> >>
> >> -       return ktime_us_delta(ktime_get(),
> >> -               to_amdgpu_fence(fence)->start_timestamp);
> >> +       f = to_amdgpu_fence(fence);
> >> +       return f ? ktime_us_delta(ktime_get(), f->start_timestamp) : 0;
> >>   }
> >>
> >>   /**
> >> @@ -417,13 +420,16 @@ void amdgpu_fence_update_start_timestamp(struct 
> >> amdgpu_ring *ring, uint32_t seq,
> >>   {
> >>          struct amdgpu_fence_driver *drv = &ring->fence_drv;
> >>          struct dma_fence *fence;
> >> +       struct amdgpu_fence *f;
> >>
> >>          seq &= drv->num_fences_mask;
> >>          fence = drv->fences[seq];
> >>          if (!fence)
> >>                  return;
> >>
> >> -       to_amdgpu_fence(fence)->start_timestamp = timestamp;
> >> +       f = to_amdgpu_fence(fence);
> >> +       if (f)
> >> +               f->start_timestamp = timestamp;
> >>   }
> >>
> >>   /**
> >> @@ -827,7 +833,8 @@ static const char *amdgpu_fence_get_driver_name(struct 
> >> dma_fence *fence)
> >>
> >>   static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
> >>   {
> >> -       return (const char *)to_amdgpu_fence(f)->ring->name;
> >> +       struct amdgpu_fence *am_f = to_amdgpu_fence(f);
> >> +       return (const char *) (am_f ? am_f->ring->name : "");
> >>   }
> >>
> >>   static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence 
> >> *f)
> >> @@ -847,8 +854,9 @@ static const char 
> >> *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
> >>    */
> >>   static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
> >>   {
> >> -       if 
> >> (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer))
> >> -               amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring);
> >> +       struct amdgpu_fence *am_f = to_amdgpu_fence(f);
> >> +       if (am_f && !timer_pending(&am_f->ring->fence_drv.fallback_timer))
> >> +               amdgpu_fence_schedule_fallback(am_f->ring);
> >>
> >>          return true;
> >>   }
> >> --
> >> 2.43.0
> >>

Re: [PATCH 2/2] drm/amdgpu: protect potential NULL pointer dereferencing

Reply via email to