Re: [PATCH] drm/amdgpu: clean up and unify hw fence handling

David Wu Thu, 28 Aug 2025 13:46:28 -0700

On 2025-08-28 16:26, Alex Deucher wrote:

On Thu, Aug 28, 2025 at 1:38 PM David Wu <david...@amd.com> wrote:

On 2025-08-28 10:05, Alex Deucher wrote:


Decouple the amdgpu fence from the amdgpu_job structure.
This lets us clean up the separate fence ops for the embedded
fence and other fences.  This also allows us to allocate the
vm fence up front when we allocate the job.

Cc: david....@amd.com
Cc: christian.koe...@amd.com
Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>
---

v2: Additional cleanup suggested by Christian

  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |   7 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   | 140 ++------------------
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c      |  22 +--
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  41 ++++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h     |   3 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h    |   7 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      |   7 +-
  8 files changed, 64 insertions(+), 165 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index f81608330a3d0..7ea3cb6491b1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1902,7 +1902,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct 
amdgpu_ring *ring)
   continue;
   }
   job = to_amdgpu_job(s_job);
- if (preempted && (&job->hw_fence.base) == fence)
+ if (preempted && (&job->hw_fence->base) == fence)
   /* mark the job as preempted */
   job->preemption_status |= AMDGPU_IB_PREEMPTED;
   }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7783272a79302..add272fa31288 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5798,11 +5798,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
   if (!amdgpu_ring_sched_ready(ring))
   continue;

- /* Clear job fence from fence drv to avoid force_completion
- * leave NULL and vm flush fence in fence drv
- */
- amdgpu_fence_driver_clear_job_fences(ring);
-
   /* after all hw jobs are reset, hw fence is meaningless, so force_completion 
*/
   amdgpu_fence_driver_force_completion(ring);
   }
@@ -6526,7 +6521,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
   *
   * job->base holds a reference to parent fence
   */
- if (job && dma_fence_is_signaled(&job->hw_fence.base)) {
+ if (job && dma_fence_is_signaled(&job->hw_fence->base)) {
   job_signaled = true;
   dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
   goto skip_hw_reset;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 2d58aefbd68a7..1355fee0e978d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -45,16 +45,11 @@
   * Cast helper
   */
  static const struct dma_fence_ops amdgpu_fence_ops;
-static const struct dma_fence_ops amdgpu_job_fence_ops;
  static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
  {
   struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);

- if (__f->base.ops == &amdgpu_fence_ops ||
-    __f->base.ops == &amdgpu_job_fence_ops)
- return __f;
-
- return NULL;
+ return __f;
  }

  /**
@@ -98,51 +93,33 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
   * amdgpu_fence_emit - emit a fence on the requested ring
   *
   * @ring: ring the fence is associated with
- * @f: resulting fence object
   * @af: amdgpu fence input
   * @flags: flags to pass into the subordinate .emit_fence() call
   *
   * Emits a fence command on the requested ring (all asics).
   * Returns 0 on success, -ENOMEM on failure.
   */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
-      struct amdgpu_fence *af, unsigned int flags)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+      unsigned int flags)
  {
   struct amdgpu_device *adev = ring->adev;
   struct dma_fence *fence;
- struct amdgpu_fence *am_fence;
   struct dma_fence __rcu **ptr;
   uint32_t seq;
   int r;

- if (!af) {
- /* create a separate hw fence */
- am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL);
- if (!am_fence)
- return -ENOMEM;
- } else {
- am_fence = af;
- }
- fence = &am_fence->base;
- am_fence->ring = ring;
+ fence = &af->base;
+ af->ring = ring;

   seq = ++ring->fence_drv.sync_seq;
- am_fence->seq = seq;
- if (af) {
- dma_fence_init(fence, &amdgpu_job_fence_ops,
-       &ring->fence_drv.lock,
-       adev->fence_context + ring->idx, seq);
- /* Against remove in amdgpu_job_{free, free_cb} */
- dma_fence_get(fence);
- } else {
- dma_fence_init(fence, &amdgpu_fence_ops,
-       &ring->fence_drv.lock,
-       adev->fence_context + ring->idx, seq);
- }
+ af->seq = seq;
+ dma_fence_init(fence, &amdgpu_fence_ops,
+       &ring->fence_drv.lock,
+       adev->fence_context + ring->idx, seq);


seems we are missing a dma_fence_get(fence) somewhere as I got the following 
error

[    8.317720] ------------[ cut here ]------------
[    8.317723] refcount_t: underflow; use-after-free.
[    8.317734] WARNING: CPU: 18 PID: 752 at lib/refcount.c:28 
refcount_warn_saturate+0xf7/0x150
[    8.317743] Modules linked in: amdgpu(E) amdxcp drm_ttm_helper ttm drm_exec 
gpu_sched drm_suballoc_helper video drm_panel_backlight_quirks cec rc_core 
drm_buddy drm_display_helper drm_client_lib drm_kms_helper nvme drm igb ahci 
nvme_core dca i2c_algo_bit libahci wmi hid_generic usbhid hid
[    8.317786] CPU: 18 UID: 0 PID: 752 Comm: kworker/u256:2 Tainted: G          
  E      6.14.0+ #61
[    8.317790] Tainted: [E]=UNSIGNED_MODULE
[    8.317792] Hardware name: Gigabyte Technology Co., Ltd. X399 DESIGNARE 
EX/X399 DESIGNARE EX-CF, BIOS F11 10/04/2018
[    8.317795] Workqueue: sdma1 drm_sched_run_job_work [gpu_sched]
[    8.317804] RIP: 0010:refcount_warn_saturate+0xf7/0x150
[    8.317808] Code: eb 9e 0f b6 1d 92 c1 ba 01 80 fb 01 0f 87 32 6e 7c ff 83 e3 01 
75 89 48 c7 c7 f0 c7 1e a2 c6 05 76 c1 ba 01 01 e8 69 93 8f ff <0f> 0b e9 6f ff 
ff ff 0f b6 1d 64 c1 ba 01 80 fb 01 0f 87 ef 6d 7c
...

For posterity, the problem is here:
-       else if (job->hw_fence.base.ops)
-               f = &job->hw_fence.base;
+       else if (job->hw_fence)
+               f = &job->hw_fence->base;

We need to check if the fence is initialized (ops is present), not
just whether the fence is allocated.

thanks for the info 🙂 I will test and report back to the new patch...

Alex

   amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
         seq, flags | AMDGPU_FENCE_FLAG_INT);
- amdgpu_fence_save_wptr(fence);
+ amdgpu_fence_save_wptr(af);
   pm_runtime_get_noresume(adev_to_drm(adev)->dev);
   ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
   if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@@ -167,8 +144,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f,
   */
   rcu_assign_pointer(*ptr, dma_fence_get(fence));

- *f = fence;
-
   return 0;
  }

@@ -669,36 +644,6 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device 
*adev)
   }
  }

-/**
- * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring
- *
- * @ring: fence of the ring to be cleared
- *
- */
-void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
-{
- int i;
- struct dma_fence *old, **ptr;
-
- for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
- ptr = &ring->fence_drv.fences[i];
- old = rcu_dereference_protected(*ptr, 1);
- if (old && old->ops == &amdgpu_job_fence_ops) {
- struct amdgpu_job *job;
-
- /* For non-scheduler bad job, i.e. failed ib test, we need to signal
- * it right here or we won't be able to track them in fence_drv
- * and they will remain unsignaled during sa_bo free.
- */
- job = container_of(old, struct amdgpu_job, hw_fence.base);
- if (!job->base.s_fence && !dma_fence_is_signaled(old))
- dma_fence_signal(old);
- RCU_INIT_POINTER(*ptr, NULL);
- dma_fence_put(old);
- }
- }
-}
-
  /**
   * amdgpu_fence_driver_set_error - set error code on fences
   * @ring: the ring which contains the fences
@@ -765,11 +710,9 @@ void amdgpu_fence_driver_guilty_force_completion(struct 
amdgpu_fence *fence)
   amdgpu_fence_process(fence->ring);
  }

-void amdgpu_fence_save_wptr(struct dma_fence *fence)
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
  {
- struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, 
base);
-
- am_fence->wptr = am_fence->ring->wptr;
+ af->wptr = af->ring->wptr;
  }

  static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
@@ -830,13 +773,6 @@ static const char *amdgpu_fence_get_timeline_name(struct 
dma_fence *f)
   return (const char *)to_amdgpu_fence(f)->ring->name;
  }

-static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
-{
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
-
- return (const char *)to_amdgpu_ring(job->base.sched)->name;
-}
-
  /**
   * amdgpu_fence_enable_signaling - enable signalling on fence
   * @f: fence
@@ -853,23 +789,6 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence 
*f)
   return true;
  }

-/**
- * amdgpu_job_fence_enable_signaling - enable signalling on job fence
- * @f: fence
- *
- * This is the simliar function with amdgpu_fence_enable_signaling above, it
- * only handles the job embedded fence.
- */
-static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
-{
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
-
- if 
(!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
- amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
-
- return true;
-}
-
  /**
   * amdgpu_fence_free - free up the fence memory
   *
@@ -885,21 +804,6 @@ static void amdgpu_fence_free(struct rcu_head *rcu)
   kfree(to_amdgpu_fence(f));
  }

-/**
- * amdgpu_job_fence_free - free up the job with embedded fence
- *
- * @rcu: RCU callback head
- *
- * Free up the job with embedded fence after the RCU grace period.
- */
-static void amdgpu_job_fence_free(struct rcu_head *rcu)
-{
- struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
-
- /* free job if fence has a parent job */
- kfree(container_of(f, struct amdgpu_job, hw_fence.base));
-}
-
  /**
   * amdgpu_fence_release - callback that fence can be freed
   *
@@ -913,19 +817,6 @@ static void amdgpu_fence_release(struct dma_fence *f)
   call_rcu(&f->rcu, amdgpu_fence_free);
  }

-/**
- * amdgpu_job_fence_release - callback that job embedded fence can be freed
- *
- * @f: fence
- *
- * This is the simliar function with amdgpu_fence_release above, it
- * only handles the job embedded fence.
- */
-static void amdgpu_job_fence_release(struct dma_fence *f)
-{
- call_rcu(&f->rcu, amdgpu_job_fence_free);
-}
-
  static const struct dma_fence_ops amdgpu_fence_ops = {
   .get_driver_name = amdgpu_fence_get_driver_name,
   .get_timeline_name = amdgpu_fence_get_timeline_name,
@@ -933,13 +824,6 @@ static const struct dma_fence_ops amdgpu_fence_ops = {
   .release = amdgpu_fence_release,
  };

-static const struct dma_fence_ops amdgpu_job_fence_ops = {
- .get_driver_name = amdgpu_fence_get_driver_name,
- .get_timeline_name = amdgpu_job_fence_get_timeline_name,
- .enable_signaling = amdgpu_job_fence_enable_signaling,
- .release = amdgpu_job_fence_release,
-};
-
  /*
   * Fence debugfs
   */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 7d9bcb72e8dd3..71215aeb1b6f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -128,7 +128,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
int num_ibs,
   struct amdgpu_device *adev = ring->adev;
   struct amdgpu_ib *ib = &ibs[0];
   struct dma_fence *tmp = NULL;
- struct amdgpu_fence *af;
+ struct amdgpu_fence *af, *vm_af;
   bool need_ctx_switch;
   struct amdgpu_vm *vm;
   uint64_t fence_ctx;
@@ -148,18 +148,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
int num_ibs,
   /* ring tests don't use a job */
   if (job) {
   vm = job->vm;
- fence_ctx = job->base.s_fence ?
- job->base.s_fence->scheduled.context : 0;
+ fence_ctx = job->base.s_fence ? job->base.s_fence->finished.context : 0;
   shadow_va = job->shadow_va;
   csa_va = job->csa_va;
   gds_va = job->gds_va;
   init_shadow = job->init_shadow;
- af = &job->hw_fence;
+ af = job->hw_fence;
   /* Save the context of the job for reset handling.
   * The driver needs this so it can skip the ring
   * contents for guilty contexts.
   */
- af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0;
+ af->context = fence_ctx;
+ vm_af = job->hw_vm_fence;
+ /* the vm fence is also part of the job's context */
+ vm_af->context = fence_ctx;

I think vm_af is not needed - above code can be

job->hw_vm_fence->context = fence_ctx;

   } else {
   vm = NULL;
   fence_ctx = 0;
@@ -167,7 +169,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
int num_ibs,
   csa_va = 0;
   gds_va = 0;
   init_shadow = false;
- af = NULL;
+ af = kzalloc(sizeof(*af), GFP_NOWAIT);
+ if (!af)
+ return -ENOMEM;
+ vm_af = NULL;

vm_af can be removed.

   }

   if (!ring->sched.ready) {
@@ -289,7 +294,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
int num_ibs,
   amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
   }

- r = amdgpu_fence_emit(ring, f, af, fence_flags);
+ r = amdgpu_fence_emit(ring, af, fence_flags);
   if (r) {
   dev_err(adev->dev, "failed to emit fence (%d)\n", r);
   if (job && job->vmid)
@@ -297,6 +302,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
int num_ibs,
   amdgpu_ring_undo(ring);
   return r;
   }
+ *f = &af->base;

   if (ring->funcs->insert_end)
   ring->funcs->insert_end(ring);
@@ -317,7 +323,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
int num_ibs,
   * fence so we know what rings contents to backup
   * after we reset the queue.
   */
- amdgpu_fence_save_wptr(*f);
+ amdgpu_fence_save_wptr(af);

   amdgpu_ring_ib_end(ring);
   amdgpu_ring_commit(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 311e97c96c4e0..9a78fe01efa3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -138,7 +138,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
     ring->funcs->reset) {
   dev_err(adev->dev, "Starting %s ring reset\n",
   s_job->sched->name);
- r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence);
+ r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
   if (!r) {
   atomic_inc(&ring->adev->gpu_reset_counter);
   dev_err(adev->dev, "Ring %s reset succeeded\n",
@@ -185,6 +185,9 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
       struct drm_sched_entity *entity, void *owner,
       unsigned int num_ibs, struct amdgpu_job **job)
  {
+ struct amdgpu_fence *af;
+ int r;
+
   if (num_ibs == 0)
   return -EINVAL;

@@ -192,6 +195,20 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
   if (!*job)
   return -ENOMEM;

+ af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+ if (!af) {
+ r = -ENOMEM;
+ goto err_job;
+ }
+ (*job)->hw_fence = af;
+
+ af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+ if (!af) {
+ r = -ENOMEM;
+ goto err_fence;
+ }
+ (*job)->hw_vm_fence = af;
+
   (*job)->vm = vm;

   amdgpu_sync_create(&(*job)->explicit_sync);
@@ -202,6 +219,13 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
   return 0;

   return drm_sched_job_init(&(*job)->base, entity, 1, owner);
+
+err_fence:
+ kfree((*job)->hw_fence);
+err_job:
+ kfree(*job);
+
+ return r;
  }

  int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
@@ -251,8 +275,8 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
   /* Check if any fences where initialized */
   if (job->base.s_fence && job->base.s_fence->finished.ops)
   f = &job->base.s_fence->finished;
- else if (job->hw_fence.base.ops)
- f = &job->hw_fence.base;
+ else if (job->hw_fence)
+ f = &job->hw_fence->base;
   else
   f = NULL;

@@ -268,11 +292,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)

   amdgpu_sync_free(&job->explicit_sync);

- /* only put the hw fence if has embedded fence */
- if (!job->hw_fence.base.ops)
- kfree(job);
- else
- dma_fence_put(&job->hw_fence.base);
+ kfree(job);
  }

  void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
@@ -301,10 +321,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
   if (job->gang_submit != &job->base.s_fence->scheduled)
   dma_fence_put(job->gang_submit);

- if (!job->hw_fence.base.ops)
- kfree(job);
- else
- dma_fence_put(&job->hw_fence.base);
+ kfree(job);
  }

  struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 931fed8892cc1..077b2414a24b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -48,7 +48,8 @@ struct amdgpu_job {
   struct drm_sched_job    base;
   struct amdgpu_vm *vm;
   struct amdgpu_sync explicit_sync;
- struct amdgpu_fence hw_fence;
+ struct amdgpu_fence *hw_fence;
+ struct amdgpu_fence *hw_vm_fence;
   struct dma_fence *gang_submit;
   uint32_t preamble_status;
   uint32_t                preemption_status;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7670f5d82b9e4..901f8bd375212 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -152,11 +152,10 @@ struct amdgpu_fence {

  extern const struct drm_sched_backend_ops amdgpu_sched_ops;

-void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
  void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
  void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
  void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence);
-void amdgpu_fence_save_wptr(struct dma_fence *fence);
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af);

  int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
  int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
@@ -166,8 +165,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device 
*adev);
  void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
  int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev);
  void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
-      struct amdgpu_fence *af, unsigned int flags);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+      unsigned int flags);
  int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
        uint32_t timeout);
  bool amdgpu_fence_process(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index bf42246a3db2f..7d1a363ad6878 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -772,7 +772,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
   bool cleaner_shader_needed = false;
   bool pasid_mapping_needed = false;
   struct dma_fence *fence = NULL;
- struct amdgpu_fence *af;
   unsigned int patch;
   int r;

@@ -835,13 +834,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
   }

   if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
- r = amdgpu_fence_emit(ring, &fence, NULL, 0);
+ r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
   if (r)
   return r;
- /* this is part of the job's context */
- af = container_of(fence, struct amdgpu_fence, base);
- af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0;
   }
+ fence = &job->hw_vm_fence->base;

   if (vm_flush_needed) {
   mutex_lock(&id_mgr->lock);

Re: [PATCH] drm/amdgpu: clean up and unify hw fence handling

Reply via email to