Re: [PATCH] drm/amdgpu: clean up and unify hw fence handling

Christian König Thu, 28 Aug 2025 00:03:57 -0700

On 27.08.25 18:03, Alex Deucher wrote:
> Decouple the amdgpu fence from the amdgpu_job structure.
> This lets us clean up the separate fence ops for the embedded
> fence and other fences.  This also allows us to allocate the
> vm fence up front when we allocate the job.
> 
> Cc: david....@amd.com
> Cc: christian.koe...@amd.com
> Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   | 111 +++-----------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c      |  12 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  36 ++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h     |   3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h    |   3 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      |   6 +-
>  8 files changed, 58 insertions(+), 117 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index f81608330a3d0..7ea3cb6491b1b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1902,7 +1902,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct 
> amdgpu_ring *ring)
>                       continue;
>               }
>               job = to_amdgpu_job(s_job);
> -             if (preempted && (&job->hw_fence.base) == fence)
> +             if (preempted && (&job->hw_fence->base) == fence)
>                       /* mark the job as preempted */
>                       job->preemption_status |= AMDGPU_IB_PREEMPTED;
>       }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e117494e80547..efd86f095323a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -6494,7 +6494,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
>        *
>        * job->base holds a reference to parent fence
>        */
> -     if (job && dma_fence_is_signaled(&job->hw_fence.base)) {
> +     if (job && dma_fence_is_signaled(&job->hw_fence->base)) {
>               job_signaled = true;
>               dev_info(adev->dev, "Guilty job already signaled, skipping HW 
> reset");
>               goto skip_hw_reset;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 2d58aefbd68a7..0afe2427c25cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -45,16 +45,11 @@
>   * Cast helper
>   */
>  static const struct dma_fence_ops amdgpu_fence_ops;
> -static const struct dma_fence_ops amdgpu_job_fence_ops;
>  static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
>  {
>       struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
>  
> -     if (__f->base.ops == &amdgpu_fence_ops ||
> -         __f->base.ops == &amdgpu_job_fence_ops)
> -             return __f;
> -
> -     return NULL;
> +     return __f;
>  }
>  
>  /**
> @@ -110,35 +105,18 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
> dma_fence **f,
>  {


While at it I would also drop the "struct dma_fence **f" parameter.

>       struct amdgpu_device *adev = ring->adev;
>       struct dma_fence *fence;
> -     struct amdgpu_fence *am_fence;
>       struct dma_fence __rcu **ptr;
>       uint32_t seq;
>       int r;
>  
> -     if (!af) {
> -             /* create a separate hw fence */
> -             am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL);
> -             if (!am_fence)
> -                     return -ENOMEM;
> -     } else {
> -             am_fence = af;
> -     }
> -     fence = &am_fence->base;
> -     am_fence->ring = ring;
> +     fence = &af->base;
> +     af->ring = ring;
>  
>       seq = ++ring->fence_drv.sync_seq;
> -     am_fence->seq = seq;
> -     if (af) {
> -             dma_fence_init(fence, &amdgpu_job_fence_ops,
> -                            &ring->fence_drv.lock,
> -                            adev->fence_context + ring->idx, seq);
> -             /* Against remove in amdgpu_job_{free, free_cb} */
> -             dma_fence_get(fence);
> -     } else {
> -             dma_fence_init(fence, &amdgpu_fence_ops,
> -                            &ring->fence_drv.lock,
> -                            adev->fence_context + ring->idx, seq);
> -     }
> +     af->seq = seq;
> +     dma_fence_init(fence, &amdgpu_fence_ops,
> +                    &ring->fence_drv.lock,
> +                    adev->fence_context + ring->idx, seq);
>  
>       amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>                              seq, flags | AMDGPU_FENCE_FLAG_INT);
> @@ -683,18 +661,22 @@ void amdgpu_fence_driver_clear_job_fences(struct 
> amdgpu_ring *ring)

Please nuke that complete function.

As far as I can see it is unecessary now and the whole concept was always a 
completely broken idea since it signals fences in an incorrect order.

>       for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
>               ptr = &ring->fence_drv.fences[i];
>               old = rcu_dereference_protected(*ptr, 1);
> -             if (old && old->ops == &amdgpu_job_fence_ops) {
> +             if (old) {
> +                     struct amdgpu_fence *af;
>                       struct amdgpu_job *job;
>  
>                       /* For non-scheduler bad job, i.e. failed ib test, we 
> need to signal
>                        * it right here or we won't be able to track them in 
> fence_drv
>                        * and they will remain unsignaled during sa_bo free.
>                        */
> -                     job = container_of(old, struct amdgpu_job, 
> hw_fence.base);
> -                     if (!job->base.s_fence && !dma_fence_is_signaled(old))
> -                             dma_fence_signal(old);
> -                     RCU_INIT_POINTER(*ptr, NULL);
> -                     dma_fence_put(old);
> +                     af = container_of(old, struct amdgpu_fence, base);
> +                     job = af->job;
> +                     if (job) {
> +                             if (!job->base.s_fence && 
> !dma_fence_is_signaled(old))
> +                                     dma_fence_signal(old);
> +                             RCU_INIT_POINTER(*ptr, NULL);
> +                             dma_fence_put(old);
> +                     }
>               }
>       }
>  }
> @@ -830,13 +812,6 @@ static const char *amdgpu_fence_get_timeline_name(struct 
> dma_fence *f)
>       return (const char *)to_amdgpu_fence(f)->ring->name;
>  }
>  
> -static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
> -{
> -     struct amdgpu_job *job = container_of(f, struct amdgpu_job, 
> hw_fence.base);
> -
> -     return (const char *)to_amdgpu_ring(job->base.sched)->name;
> -}
> -
>  /**
>   * amdgpu_fence_enable_signaling - enable signalling on fence
>   * @f: fence
> @@ -853,23 +828,6 @@ static bool amdgpu_fence_enable_signaling(struct 
> dma_fence *f)
>       return true;
>  }
>  
> -/**
> - * amdgpu_job_fence_enable_signaling - enable signalling on job fence
> - * @f: fence
> - *
> - * This is the simliar function with amdgpu_fence_enable_signaling above, it
> - * only handles the job embedded fence.
> - */
> -static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
> -{
> -     struct amdgpu_job *job = container_of(f, struct amdgpu_job, 
> hw_fence.base);
> -
> -     if 
> (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
> -             amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
> -
> -     return true;
> -}
> -
>  /**
>   * amdgpu_fence_free - free up the fence memory
>   *
> @@ -885,21 +843,6 @@ static void amdgpu_fence_free(struct rcu_head *rcu)
>       kfree(to_amdgpu_fence(f));
>  }
>  
> -/**
> - * amdgpu_job_fence_free - free up the job with embedded fence
> - *
> - * @rcu: RCU callback head
> - *
> - * Free up the job with embedded fence after the RCU grace period.
> - */
> -static void amdgpu_job_fence_free(struct rcu_head *rcu)
> -{
> -     struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
> -
> -     /* free job if fence has a parent job */
> -     kfree(container_of(f, struct amdgpu_job, hw_fence.base));
> -}
> -
>  /**
>   * amdgpu_fence_release - callback that fence can be freed
>   *
> @@ -913,19 +856,6 @@ static void amdgpu_fence_release(struct dma_fence *f)
>       call_rcu(&f->rcu, amdgpu_fence_free);
>  }
>  
> -/**
> - * amdgpu_job_fence_release - callback that job embedded fence can be freed
> - *
> - * @f: fence
> - *
> - * This is the simliar function with amdgpu_fence_release above, it
> - * only handles the job embedded fence.
> - */
> -static void amdgpu_job_fence_release(struct dma_fence *f)
> -{
> -     call_rcu(&f->rcu, amdgpu_job_fence_free);
> -}
> -
>  static const struct dma_fence_ops amdgpu_fence_ops = {
>       .get_driver_name = amdgpu_fence_get_driver_name,
>       .get_timeline_name = amdgpu_fence_get_timeline_name,
> @@ -933,13 +863,6 @@ static const struct dma_fence_ops amdgpu_fence_ops = {
>       .release = amdgpu_fence_release,
>  };
>  
> -static const struct dma_fence_ops amdgpu_job_fence_ops = {
> -     .get_driver_name = amdgpu_fence_get_driver_name,
> -     .get_timeline_name = amdgpu_job_fence_get_timeline_name,
> -     .enable_signaling = amdgpu_job_fence_enable_signaling,
> -     .release = amdgpu_job_fence_release,
> -};
> -
>  /*
>   * Fence debugfs
>   */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 7d9bcb72e8dd3..192ed1bd2ec63 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -128,7 +128,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
> int num_ibs,
>       struct amdgpu_device *adev = ring->adev;
>       struct amdgpu_ib *ib = &ibs[0];
>       struct dma_fence *tmp = NULL;
> -     struct amdgpu_fence *af;
> +     struct amdgpu_fence *af, *vm_af;
>       bool need_ctx_switch;
>       struct amdgpu_vm *vm;
>       uint64_t fence_ctx;
> @@ -154,12 +154,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
> unsigned int num_ibs,
>               csa_va = job->csa_va;
>               gds_va = job->gds_va;
>               init_shadow = job->init_shadow;
> -             af = &job->hw_fence;
> +             af = job->hw_fence;
>               /* Save the context of the job for reset handling.
>                * The driver needs this so it can skip the ring
>                * contents for guilty contexts.
>                */
>               af->context = job->base.s_fence ? 
> job->base.s_fence->finished.context : 0;
> +             vm_af = job->hw_vm_fence;
> +             /* the vm fence is also part of the job's context */
> +             vm_af->context = job->base.s_fence ? 
> job->base.s_fence->finished.context : 0;

We already have the fence_ctx around as local variable, but at the moment it 
uses job->base.s_fence->scheduled.context.

Should be trivial to switch that to job->base.s_fence->finished.context so that 
we don't need to duplicate the "job->base.s_fence ? " part multiple times.

>       } else {
>               vm = NULL;
>               fence_ctx = 0;
> @@ -167,7 +170,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
> unsigned int num_ibs,
>               csa_va = 0;
>               gds_va = 0;
>               init_shadow = false;
> -             af = NULL;
> +             af = kzalloc(sizeof(*af), GFP_NOWAIT);
> +             if (!af)
> +                     return -ENOMEM;
> +             vm_af = NULL;
>       }
>  
>       if (!ring->sched.ready) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 311e97c96c4e0..4189a2b8cb674 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -138,7 +138,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
> drm_sched_job *s_job)
>                  ring->funcs->reset) {
>               dev_err(adev->dev, "Starting %s ring reset\n",
>                       s_job->sched->name);
> -             r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence);
> +             r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
>               if (!r) {
>                       atomic_inc(&ring->adev->gpu_reset_counter);
>                       dev_err(adev->dev, "Ring %s reset succeeded\n",
> @@ -185,6 +185,8 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct 
> amdgpu_vm *vm,
>                    struct drm_sched_entity *entity, void *owner,
>                    unsigned int num_ibs, struct amdgpu_job **job)
>  {
> +     struct amdgpu_fence *af;
> +
>       if (num_ibs == 0)
>               return -EINVAL;
>  
> @@ -192,6 +194,23 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct 
> amdgpu_vm *vm,
>       if (!*job)
>               return -ENOMEM;
>  
> +     af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
> +     if (!af) {
> +             kfree(*job);
> +             return -ENOMEM;
> +     }

> +     /* assign the job so we can clean up in 
> amdgpu_fence_driver_clear_job_fences() */
> +     af->job = *job;

That becomes unecessary when we nuke amdgpu_fence_driver_clear_job_fences().

> +     (*job)->hw_fence = af;
> +
> +     af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
> +     if (!af) {
> +             kfree((*job)->hw_fence);
> +             kfree(*job);
> +             return -ENOMEM;
> +     }

Not a must have, but goto cleanup would probably look nicer.

> +     (*job)->hw_vm_fence = af;
> +
>       (*job)->vm = vm;
>  
>       amdgpu_sync_create(&(*job)->explicit_sync);
> @@ -251,8 +270,8 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
>       /* Check if any fences where initialized */
>       if (job->base.s_fence && job->base.s_fence->finished.ops)
>               f = &job->base.s_fence->finished;
> -     else if (job->hw_fence.base.ops)
> -             f = &job->hw_fence.base;
> +     else if (job->hw_fence)
> +             f = &job->hw_fence->base;
>       else
>               f = NULL;
>  
> @@ -268,11 +287,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job 
> *s_job)
>  
>       amdgpu_sync_free(&job->explicit_sync);
>  
> -     /* only put the hw fence if has embedded fence */
> -     if (!job->hw_fence.base.ops)
> -             kfree(job);
> -     else
> -             dma_fence_put(&job->hw_fence.base);
> +     kfree(job);
>  }
>  
>  void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
> @@ -301,10 +316,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
>       if (job->gang_submit != &job->base.s_fence->scheduled)
>               dma_fence_put(job->gang_submit);
>  
> -     if (!job->hw_fence.base.ops)
> -             kfree(job);
> -     else
> -             dma_fence_put(&job->hw_fence.base);
> +     kfree(job);
>  }
>  
>  struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> index 931fed8892cc1..077b2414a24b7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> @@ -48,7 +48,8 @@ struct amdgpu_job {
>       struct drm_sched_job    base;
>       struct amdgpu_vm        *vm;
>       struct amdgpu_sync      explicit_sync;
> -     struct amdgpu_fence     hw_fence;
> +     struct amdgpu_fence     *hw_fence;
> +     struct amdgpu_fence     *hw_vm_fence;
>       struct dma_fence        *gang_submit;
>       uint32_t                preamble_status;
>       uint32_t                preemption_status;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 7670f5d82b9e4..ceadd7e4bdb58 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -143,6 +143,9 @@ struct amdgpu_fence {
>       struct amdgpu_ring              *ring;
>       ktime_t                         start_timestamp;
>  
> +     /* store the job for cleanup */
> +     struct amdgpu_job *job;
> +

That can be dropped as well.

Regards,
Christian.

>       /* wptr for the fence for resets */
>       u64                             wptr;
>       /* fence context for resets */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index bf42246a3db2f..a287718506aa6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -772,7 +772,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
> amdgpu_job *job,
>       bool cleaner_shader_needed = false;
>       bool pasid_mapping_needed = false;
>       struct dma_fence *fence = NULL;
> -     struct amdgpu_fence *af;
>       unsigned int patch;
>       int r;
>  
> @@ -835,12 +834,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
> amdgpu_job *job,
>       }
>  
>       if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
> -             r = amdgpu_fence_emit(ring, &fence, NULL, 0);
> +             r = amdgpu_fence_emit(ring, &fence, job->hw_vm_fence, 0);
>               if (r)
>                       return r;
> -             /* this is part of the job's context */
> -             af = container_of(fence, struct amdgpu_fence, base);
> -             af->context = job->base.s_fence ? 
> job->base.s_fence->finished.context : 0;
>       }
>  
>       if (vm_flush_needed) {

Re: [PATCH] drm/amdgpu: clean up and unify hw fence handling

Reply via email to