On Tue, 10 Feb 2026 11:01:59 +0100
"Christian König" <[email protected]> wrote:

> Implement per-fence spinlocks, allowing implementations to not give an
> external spinlock to protect the fence internal statei. Instead a spinlock
> embedded into the fence structure itself is used in this case.
> 
> Shared spinlocks have the problem that implementations need to guarantee
> that the lock live at least as long all fences referencing them.
> 
> Using a per-fence spinlock allows completely decoupling spinlock producer
> and consumer life times, simplifying the handling in most use cases.
> 
> v2: improve naming, coverage and function documentation
> v3: fix one additional locking in the selftests
> v4: separate out some changes to make the patch smaller,
>     fix one amdgpu crash found by CI systems
> 
> Signed-off-by: Christian König <[email protected]>
> ---
>  drivers/dma-buf/dma-fence.c             | 21 ++++++++++++++++-----
>  drivers/dma-buf/sync_debug.h            |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  2 +-
>  drivers/gpu/drm/drm_crtc.c              |  2 +-
>  drivers/gpu/drm/drm_writeback.c         |  2 +-
>  drivers/gpu/drm/nouveau/nouveau_fence.c |  3 ++-
>  drivers/gpu/drm/qxl/qxl_release.c       |  3 ++-
>  drivers/gpu/drm/vmwgfx/vmwgfx_fence.c   |  3 ++-
>  drivers/gpu/drm/xe/xe_hw_fence.c        |  3 ++-
>  include/linux/dma-fence.h               | 19 +++++++++++++------
>  10 files changed, 41 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 56aa59867eaa..1833889e7466 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -343,7 +343,6 @@ void __dma_fence_might_wait(void)
>  }
>  #endif
>  
> -
>  /**
>   * dma_fence_signal_timestamp_locked - signal completion of a fence
>   * @fence: the fence to signal
> @@ -1067,7 +1066,6 @@ static void
>  __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
>                spinlock_t *lock, u64 context, u64 seqno, unsigned long flags)
>  {
> -     BUG_ON(!lock);
>       BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name);
>  
>       kref_init(&fence->refcount);
> @@ -1078,10 +1076,15 @@ __dma_fence_init(struct dma_fence *fence, const 
> struct dma_fence_ops *ops,
>        */
>       RCU_INIT_POINTER(fence->ops, ops);
>       INIT_LIST_HEAD(&fence->cb_list);
> -     fence->lock = lock;
>       fence->context = context;
>       fence->seqno = seqno;
>       fence->flags = flags | BIT(DMA_FENCE_FLAG_INITIALIZED_BIT);
> +     if (lock) {
> +             fence->extern_lock = lock;
> +     } else {
> +             spin_lock_init(&fence->inline_lock);
> +             fence->flags |= BIT(DMA_FENCE_FLAG_INLINE_LOCK_BIT);

Hm, does this even make a different in term of instructions to check for
a bit instead of extern_lock == NULL? If not, I'd be in favor of
killing this redundancy and checking extern_lock against NULL in
dma_fence_spinlock().

> +     }
>       fence->error = 0;
>  
>       trace_dma_fence_init(fence);
> @@ -1091,7 +1094,7 @@ __dma_fence_init(struct dma_fence *fence, const struct 
> dma_fence_ops *ops,
>   * dma_fence_init - Initialize a custom fence.
>   * @fence: the fence to initialize
>   * @ops: the dma_fence_ops for operations on this fence
> - * @lock: the irqsafe spinlock to use for locking this fence
> + * @lock: optional irqsafe spinlock to use for locking this fence
>   * @context: the execution context this fence is run on
>   * @seqno: a linear increasing sequence number for this context
>   *
> @@ -1101,6 +1104,10 @@ __dma_fence_init(struct dma_fence *fence, const struct 
> dma_fence_ops *ops,
>   *
>   * context and seqno are used for easy comparison between fences, allowing
>   * to check which fence is later by simply using dma_fence_later().
> + *
> + * It is strongly discouraged to provide an external lock. This is only 
> allowed
> + * for legacy use cases when multiple fences need to be prevented from
> + * signaling out of order.
>   */
>  void
>  dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
> @@ -1114,7 +1121,7 @@ EXPORT_SYMBOL(dma_fence_init);
>   * dma_fence_init64 - Initialize a custom fence with 64-bit seqno support.
>   * @fence: the fence to initialize
>   * @ops: the dma_fence_ops for operations on this fence
> - * @lock: the irqsafe spinlock to use for locking this fence
> + * @lock: optional irqsafe spinlock to use for locking this fence
>   * @context: the execution context this fence is run on
>   * @seqno: a linear increasing sequence number for this context
>   *
> @@ -1124,6 +1131,10 @@ EXPORT_SYMBOL(dma_fence_init);
>   *
>   * Context and seqno are used for easy comparison between fences, allowing
>   * to check which fence is later by simply using dma_fence_later().
> + *
> + * It is strongly discouraged to provide an external lock. This is only 
> allowed
> + * for legacy use cases when multiple fences need to be prevented from
> + * signaling out of order.
>   */
>  void
>  dma_fence_init64(struct dma_fence *fence, const struct dma_fence_ops *ops,
> diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h
> index 02af347293d0..c49324505b20 100644
> --- a/drivers/dma-buf/sync_debug.h
> +++ b/drivers/dma-buf/sync_debug.h
> @@ -47,7 +47,7 @@ struct sync_timeline {
>  
>  static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence)
>  {
> -     return container_of(fence->lock, struct sync_timeline, lock);
> +     return container_of(fence->extern_lock, struct sync_timeline, lock);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 139642eacdd0..d5c41e24fb51 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -638,7 +638,7 @@ static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm 
> *vm)
>        * sure that the dma_fence structure isn't freed up.
>        */
>       rcu_read_lock();
> -     lock = vm->last_tlb_flush->lock;
> +     lock = dma_fence_spinlock(vm->last_tlb_flush);
>       rcu_read_unlock();
>  
>       spin_lock_irqsave(lock, flags);
> diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
> index a7797d260f1e..17472915842f 100644
> --- a/drivers/gpu/drm/drm_crtc.c
> +++ b/drivers/gpu/drm/drm_crtc.c
> @@ -159,7 +159,7 @@ static const struct dma_fence_ops drm_crtc_fence_ops;
>  static struct drm_crtc *fence_to_crtc(struct dma_fence *fence)
>  {
>       BUG_ON(fence->ops != &drm_crtc_fence_ops);
> -     return container_of(fence->lock, struct drm_crtc, fence_lock);
> +     return container_of(fence->extern_lock, struct drm_crtc, fence_lock);
>  }
>  
>  static const char *drm_crtc_fence_get_driver_name(struct dma_fence *fence)
> diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
> index 95b8a2e4bda6..624a4e8b6c99 100644
> --- a/drivers/gpu/drm/drm_writeback.c
> +++ b/drivers/gpu/drm/drm_writeback.c
> @@ -81,7 +81,7 @@
>   *   From userspace, this property will always read as zero.
>   */
>  
> -#define fence_to_wb_connector(x) container_of(x->lock, \
> +#define fence_to_wb_connector(x) container_of(x->extern_lock, \
>                                             struct drm_writeback_connector, \
>                                             fence_lock)
>  
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c 
> b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index 4a193b7d6d9e..c282c94138b2 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -41,7 +41,8 @@ static const struct dma_fence_ops nouveau_fence_ops_legacy;
>  static inline struct nouveau_fence_chan *
>  nouveau_fctx(struct nouveau_fence *fence)
>  {
> -     return container_of(fence->base.lock, struct nouveau_fence_chan, lock);
> +     return container_of(fence->base.extern_lock, struct nouveau_fence_chan,
> +                         lock);
>  }
>  
>  static bool
> diff --git a/drivers/gpu/drm/qxl/qxl_release.c 
> b/drivers/gpu/drm/qxl/qxl_release.c
> index 06b0b2aa7953..37d4ae0faf0d 100644
> --- a/drivers/gpu/drm/qxl/qxl_release.c
> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> @@ -62,7 +62,8 @@ static long qxl_fence_wait(struct dma_fence *fence, bool 
> intr,
>       struct qxl_device *qdev;
>       unsigned long cur, end = jiffies + timeout;
>  
> -     qdev = container_of(fence->lock, struct qxl_device, release_lock);
> +     qdev = container_of(fence->extern_lock, struct qxl_device,
> +                         release_lock);
>  
>       if (!wait_event_timeout(qdev->release_event,
>                               (dma_fence_is_signaled(fence) ||
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c 
> b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> index 85795082fef9..d251eec57df9 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> @@ -47,7 +47,8 @@ struct vmw_event_fence_action {
>  static struct vmw_fence_manager *
>  fman_from_fence(struct vmw_fence_obj *fence)
>  {
> -     return container_of(fence->base.lock, struct vmw_fence_manager, lock);
> +     return container_of(fence->base.extern_lock, struct vmw_fence_manager,
> +                         lock);
>  }
>  
>  static void vmw_fence_obj_destroy(struct dma_fence *f)
> diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c 
> b/drivers/gpu/drm/xe/xe_hw_fence.c
> index ae8ed15b64c5..14720623ad00 100644
> --- a/drivers/gpu/drm/xe/xe_hw_fence.c
> +++ b/drivers/gpu/drm/xe/xe_hw_fence.c
> @@ -124,7 +124,8 @@ static struct xe_hw_fence *to_xe_hw_fence(struct 
> dma_fence *fence);
>  
>  static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence)
>  {
> -     return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock);
> +     return container_of(fence->dma.extern_lock, struct xe_hw_fence_irq,
> +                         lock);
>  }
>  
>  static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence)
> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> index 88c842fc35d5..6eabbb1c471c 100644
> --- a/include/linux/dma-fence.h
> +++ b/include/linux/dma-fence.h
> @@ -34,7 +34,8 @@ struct seq_file;
>   * @ops: dma_fence_ops associated with this fence
>   * @rcu: used for releasing fence with kfree_rcu
>   * @cb_list: list of all callbacks to call
> - * @lock: spin_lock_irqsave used for locking
> + * @extern_lock: external spin_lock_irqsave used for locking
> + * @inline_lock: alternative internal spin_lock_irqsave used for locking
>   * @context: execution context this fence belongs to, returned by
>   *           dma_fence_context_alloc()
>   * @seqno: the sequence number of this fence inside the execution context,
> @@ -49,6 +50,7 @@ struct seq_file;
>   * of the time.
>   *
>   * DMA_FENCE_FLAG_INITIALIZED_BIT - fence was initialized
> + * DMA_FENCE_FLAG_INLINE_LOCK_BIT - use inline spinlock instead of external 
> one
>   * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
>   * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
>   * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
> @@ -66,7 +68,10 @@ struct seq_file;
>   * been completed, or never called at all.
>   */
>  struct dma_fence {
> -     spinlock_t *lock;
> +     union {
> +             spinlock_t *extern_lock;
> +             spinlock_t inline_lock;
> +     };
>       const struct dma_fence_ops __rcu *ops;
>       /*
>        * We clear the callback list on kref_put so that by the time we
> @@ -100,6 +105,7 @@ struct dma_fence {
>  
>  enum dma_fence_flag_bits {
>       DMA_FENCE_FLAG_INITIALIZED_BIT,
> +     DMA_FENCE_FLAG_INLINE_LOCK_BIT,
>       DMA_FENCE_FLAG_SEQNO64_BIT,
>       DMA_FENCE_FLAG_SIGNALED_BIT,
>       DMA_FENCE_FLAG_TIMESTAMP_BIT,
> @@ -381,11 +387,12 @@ dma_fence_get_rcu_safe(struct dma_fence __rcu **fencep)
>   * dma_fence_spinlock - return pointer to the spinlock protecting the fence
>   * @fence: the fence to get the lock from
>   *
> - * Return the pointer to the extern lock.
> + * Return either the pointer to the embedded or the external spin lock.
>   */
>  static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence)
>  {
> -     return fence->lock;
> +     return test_bit(DMA_FENCE_FLAG_INLINE_LOCK_BIT, &fence->flags) ?
> +             &fence->inline_lock : fence->extern_lock;
>  }
>  
>  /**
> @@ -396,7 +403,7 @@ static inline spinlock_t *dma_fence_spinlock(struct 
> dma_fence *fence)
>   * Lock the fence, preventing it from changing to the signaled state.
>   */
>  #define dma_fence_lock_irqsave(fence, flags) \
> -     spin_lock_irqsave(fence->lock, flags)
> +     spin_lock_irqsave(dma_fence_spinlock(fence), flags)
>  
>  /**
>   * dma_fence_unlock_irqrestore - unlock the fence and irqrestore
> @@ -406,7 +413,7 @@ static inline spinlock_t *dma_fence_spinlock(struct 
> dma_fence *fence)
>   * Unlock the fence, allowing it to change it's state to signaled again.
>   */
>  #define dma_fence_unlock_irqrestore(fence, flags)    \
> -     spin_unlock_irqrestore(fence->lock, flags)
> +     spin_unlock_irqrestore(dma_fence_spinlock(fence), flags)
>  
>  /**
>   * dma_fence_assert_held - lockdep assertion that fence is locked

Reply via email to