On Fri, 13 Feb 2026 15:27:33 +0100
Boris Brezillon <[email protected]> wrote:

> On Tue, 10 Feb 2026 11:01:59 +0100
> "Christian König" <[email protected]> wrote:
> 
> > Implement per-fence spinlocks, allowing implementations to not give an
> > external spinlock to protect the fence internal statei. Instead a spinlock
> > embedded into the fence structure itself is used in this case.
> > 
> > Shared spinlocks have the problem that implementations need to guarantee
> > that the lock live at least as long all fences referencing them.
> > 
> > Using a per-fence spinlock allows completely decoupling spinlock producer
> > and consumer life times, simplifying the handling in most use cases.
> > 
> > v2: improve naming, coverage and function documentation
> > v3: fix one additional locking in the selftests
> > v4: separate out some changes to make the patch smaller,
> >     fix one amdgpu crash found by CI systems
> > 
> > Signed-off-by: Christian König <[email protected]>
> > ---
> >  drivers/dma-buf/dma-fence.c             | 21 ++++++++++++++++-----
> >  drivers/dma-buf/sync_debug.h            |  2 +-
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  2 +-
> >  drivers/gpu/drm/drm_crtc.c              |  2 +-
> >  drivers/gpu/drm/drm_writeback.c         |  2 +-
> >  drivers/gpu/drm/nouveau/nouveau_fence.c |  3 ++-
> >  drivers/gpu/drm/qxl/qxl_release.c       |  3 ++-
> >  drivers/gpu/drm/vmwgfx/vmwgfx_fence.c   |  3 ++-
> >  drivers/gpu/drm/xe/xe_hw_fence.c        |  3 ++-
> >  include/linux/dma-fence.h               | 19 +++++++++++++------
> >  10 files changed, 41 insertions(+), 19 deletions(-)
> > 
> > diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> > index 56aa59867eaa..1833889e7466 100644
> > --- a/drivers/dma-buf/dma-fence.c
> > +++ b/drivers/dma-buf/dma-fence.c
> > @@ -343,7 +343,6 @@ void __dma_fence_might_wait(void)
> >  }
> >  #endif
> >  
> > -
> >  /**
> >   * dma_fence_signal_timestamp_locked - signal completion of a fence
> >   * @fence: the fence to signal
> > @@ -1067,7 +1066,6 @@ static void
> >  __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
> >              spinlock_t *lock, u64 context, u64 seqno, unsigned long flags)
> >  {
> > -   BUG_ON(!lock);
> >     BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name);
> >  
> >     kref_init(&fence->refcount);
> > @@ -1078,10 +1076,15 @@ __dma_fence_init(struct dma_fence *fence, const 
> > struct dma_fence_ops *ops,
> >      */
> >     RCU_INIT_POINTER(fence->ops, ops);
> >     INIT_LIST_HEAD(&fence->cb_list);
> > -   fence->lock = lock;
> >     fence->context = context;
> >     fence->seqno = seqno;
> >     fence->flags = flags | BIT(DMA_FENCE_FLAG_INITIALIZED_BIT);
> > +   if (lock) {
> > +           fence->extern_lock = lock;
> > +   } else {
> > +           spin_lock_init(&fence->inline_lock);
> > +           fence->flags |= BIT(DMA_FENCE_FLAG_INLINE_LOCK_BIT);  
> 
> Hm, does this even make a different in term of instructions to check for
> a bit instead of extern_lock == NULL? If not, I'd be in favor of
> killing this redundancy and checking extern_lock against NULL in
> dma_fence_spinlock().

Scratch that, I didn't notice {extern,inline}_lock were under a union
(which makes sense). Looks all good to me.

Reviewed-by: Boris Brezillon <[email protected]>

> 
> > +   }
> >     fence->error = 0;
> >  
> >     trace_dma_fence_init(fence);
> > @@ -1091,7 +1094,7 @@ __dma_fence_init(struct dma_fence *fence, const 
> > struct dma_fence_ops *ops,
> >   * dma_fence_init - Initialize a custom fence.
> >   * @fence: the fence to initialize
> >   * @ops: the dma_fence_ops for operations on this fence
> > - * @lock: the irqsafe spinlock to use for locking this fence
> > + * @lock: optional irqsafe spinlock to use for locking this fence
> >   * @context: the execution context this fence is run on
> >   * @seqno: a linear increasing sequence number for this context
> >   *
> > @@ -1101,6 +1104,10 @@ __dma_fence_init(struct dma_fence *fence, const 
> > struct dma_fence_ops *ops,
> >   *
> >   * context and seqno are used for easy comparison between fences, allowing
> >   * to check which fence is later by simply using dma_fence_later().
> > + *
> > + * It is strongly discouraged to provide an external lock. This is only 
> > allowed
> > + * for legacy use cases when multiple fences need to be prevented from
> > + * signaling out of order.
> >   */
> >  void
> >  dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
> > @@ -1114,7 +1121,7 @@ EXPORT_SYMBOL(dma_fence_init);
> >   * dma_fence_init64 - Initialize a custom fence with 64-bit seqno support.
> >   * @fence: the fence to initialize
> >   * @ops: the dma_fence_ops for operations on this fence
> > - * @lock: the irqsafe spinlock to use for locking this fence
> > + * @lock: optional irqsafe spinlock to use for locking this fence
> >   * @context: the execution context this fence is run on
> >   * @seqno: a linear increasing sequence number for this context
> >   *
> > @@ -1124,6 +1131,10 @@ EXPORT_SYMBOL(dma_fence_init);
> >   *
> >   * Context and seqno are used for easy comparison between fences, allowing
> >   * to check which fence is later by simply using dma_fence_later().
> > + *
> > + * It is strongly discouraged to provide an external lock. This is only 
> > allowed
> > + * for legacy use cases when multiple fences need to be prevented from
> > + * signaling out of order.
> >   */
> >  void
> >  dma_fence_init64(struct dma_fence *fence, const struct dma_fence_ops *ops,
> > diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h
> > index 02af347293d0..c49324505b20 100644
> > --- a/drivers/dma-buf/sync_debug.h
> > +++ b/drivers/dma-buf/sync_debug.h
> > @@ -47,7 +47,7 @@ struct sync_timeline {
> >  
> >  static inline struct sync_timeline *dma_fence_parent(struct dma_fence 
> > *fence)
> >  {
> > -   return container_of(fence->lock, struct sync_timeline, lock);
> > +   return container_of(fence->extern_lock, struct sync_timeline, lock);
> >  }
> >  
> >  /**
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > index 139642eacdd0..d5c41e24fb51 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > @@ -638,7 +638,7 @@ static inline uint64_t amdgpu_vm_tlb_seq(struct 
> > amdgpu_vm *vm)
> >      * sure that the dma_fence structure isn't freed up.
> >      */
> >     rcu_read_lock();
> > -   lock = vm->last_tlb_flush->lock;
> > +   lock = dma_fence_spinlock(vm->last_tlb_flush);
> >     rcu_read_unlock();
> >  
> >     spin_lock_irqsave(lock, flags);
> > diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
> > index a7797d260f1e..17472915842f 100644
> > --- a/drivers/gpu/drm/drm_crtc.c
> > +++ b/drivers/gpu/drm/drm_crtc.c
> > @@ -159,7 +159,7 @@ static const struct dma_fence_ops drm_crtc_fence_ops;
> >  static struct drm_crtc *fence_to_crtc(struct dma_fence *fence)
> >  {
> >     BUG_ON(fence->ops != &drm_crtc_fence_ops);
> > -   return container_of(fence->lock, struct drm_crtc, fence_lock);
> > +   return container_of(fence->extern_lock, struct drm_crtc, fence_lock);
> >  }
> >  
> >  static const char *drm_crtc_fence_get_driver_name(struct dma_fence *fence)
> > diff --git a/drivers/gpu/drm/drm_writeback.c 
> > b/drivers/gpu/drm/drm_writeback.c
> > index 95b8a2e4bda6..624a4e8b6c99 100644
> > --- a/drivers/gpu/drm/drm_writeback.c
> > +++ b/drivers/gpu/drm/drm_writeback.c
> > @@ -81,7 +81,7 @@
> >   * From userspace, this property will always read as zero.
> >   */
> >  
> > -#define fence_to_wb_connector(x) container_of(x->lock, \
> > +#define fence_to_wb_connector(x) container_of(x->extern_lock, \
> >                                           struct drm_writeback_connector, \
> >                                           fence_lock)
> >  
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c 
> > b/drivers/gpu/drm/nouveau/nouveau_fence.c
> > index 4a193b7d6d9e..c282c94138b2 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> > @@ -41,7 +41,8 @@ static const struct dma_fence_ops 
> > nouveau_fence_ops_legacy;
> >  static inline struct nouveau_fence_chan *
> >  nouveau_fctx(struct nouveau_fence *fence)
> >  {
> > -   return container_of(fence->base.lock, struct nouveau_fence_chan, lock);
> > +   return container_of(fence->base.extern_lock, struct nouveau_fence_chan,
> > +                       lock);
> >  }
> >  
> >  static bool
> > diff --git a/drivers/gpu/drm/qxl/qxl_release.c 
> > b/drivers/gpu/drm/qxl/qxl_release.c
> > index 06b0b2aa7953..37d4ae0faf0d 100644
> > --- a/drivers/gpu/drm/qxl/qxl_release.c
> > +++ b/drivers/gpu/drm/qxl/qxl_release.c
> > @@ -62,7 +62,8 @@ static long qxl_fence_wait(struct dma_fence *fence, bool 
> > intr,
> >     struct qxl_device *qdev;
> >     unsigned long cur, end = jiffies + timeout;
> >  
> > -   qdev = container_of(fence->lock, struct qxl_device, release_lock);
> > +   qdev = container_of(fence->extern_lock, struct qxl_device,
> > +                       release_lock);
> >  
> >     if (!wait_event_timeout(qdev->release_event,
> >                             (dma_fence_is_signaled(fence) ||
> > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c 
> > b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> > index 85795082fef9..d251eec57df9 100644
> > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> > @@ -47,7 +47,8 @@ struct vmw_event_fence_action {
> >  static struct vmw_fence_manager *
> >  fman_from_fence(struct vmw_fence_obj *fence)
> >  {
> > -   return container_of(fence->base.lock, struct vmw_fence_manager, lock);
> > +   return container_of(fence->base.extern_lock, struct vmw_fence_manager,
> > +                       lock);
> >  }
> >  
> >  static void vmw_fence_obj_destroy(struct dma_fence *f)
> > diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c 
> > b/drivers/gpu/drm/xe/xe_hw_fence.c
> > index ae8ed15b64c5..14720623ad00 100644
> > --- a/drivers/gpu/drm/xe/xe_hw_fence.c
> > +++ b/drivers/gpu/drm/xe/xe_hw_fence.c
> > @@ -124,7 +124,8 @@ static struct xe_hw_fence *to_xe_hw_fence(struct 
> > dma_fence *fence);
> >  
> >  static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence)
> >  {
> > -   return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock);
> > +   return container_of(fence->dma.extern_lock, struct xe_hw_fence_irq,
> > +                       lock);
> >  }
> >  
> >  static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence)
> > diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
> > index 88c842fc35d5..6eabbb1c471c 100644
> > --- a/include/linux/dma-fence.h
> > +++ b/include/linux/dma-fence.h
> > @@ -34,7 +34,8 @@ struct seq_file;
> >   * @ops: dma_fence_ops associated with this fence
> >   * @rcu: used for releasing fence with kfree_rcu
> >   * @cb_list: list of all callbacks to call
> > - * @lock: spin_lock_irqsave used for locking
> > + * @extern_lock: external spin_lock_irqsave used for locking
> > + * @inline_lock: alternative internal spin_lock_irqsave used for locking
> >   * @context: execution context this fence belongs to, returned by
> >   *           dma_fence_context_alloc()
> >   * @seqno: the sequence number of this fence inside the execution context,
> > @@ -49,6 +50,7 @@ struct seq_file;
> >   * of the time.
> >   *
> >   * DMA_FENCE_FLAG_INITIALIZED_BIT - fence was initialized
> > + * DMA_FENCE_FLAG_INLINE_LOCK_BIT - use inline spinlock instead of 
> > external one
> >   * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
> >   * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
> >   * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been 
> > called
> > @@ -66,7 +68,10 @@ struct seq_file;
> >   * been completed, or never called at all.
> >   */
> >  struct dma_fence {
> > -   spinlock_t *lock;
> > +   union {
> > +           spinlock_t *extern_lock;
> > +           spinlock_t inline_lock;
> > +   };
> >     const struct dma_fence_ops __rcu *ops;
> >     /*
> >      * We clear the callback list on kref_put so that by the time we
> > @@ -100,6 +105,7 @@ struct dma_fence {
> >  
> >  enum dma_fence_flag_bits {
> >     DMA_FENCE_FLAG_INITIALIZED_BIT,
> > +   DMA_FENCE_FLAG_INLINE_LOCK_BIT,
> >     DMA_FENCE_FLAG_SEQNO64_BIT,
> >     DMA_FENCE_FLAG_SIGNALED_BIT,
> >     DMA_FENCE_FLAG_TIMESTAMP_BIT,
> > @@ -381,11 +387,12 @@ dma_fence_get_rcu_safe(struct dma_fence __rcu 
> > **fencep)
> >   * dma_fence_spinlock - return pointer to the spinlock protecting the fence
> >   * @fence: the fence to get the lock from
> >   *
> > - * Return the pointer to the extern lock.
> > + * Return either the pointer to the embedded or the external spin lock.
> >   */
> >  static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence)
> >  {
> > -   return fence->lock;
> > +   return test_bit(DMA_FENCE_FLAG_INLINE_LOCK_BIT, &fence->flags) ?
> > +           &fence->inline_lock : fence->extern_lock;
> >  }
> >  
> >  /**
> > @@ -396,7 +403,7 @@ static inline spinlock_t *dma_fence_spinlock(struct 
> > dma_fence *fence)
> >   * Lock the fence, preventing it from changing to the signaled state.
> >   */
> >  #define dma_fence_lock_irqsave(fence, flags)       \
> > -   spin_lock_irqsave(fence->lock, flags)
> > +   spin_lock_irqsave(dma_fence_spinlock(fence), flags)
> >  
> >  /**
> >   * dma_fence_unlock_irqrestore - unlock the fence and irqrestore
> > @@ -406,7 +413,7 @@ static inline spinlock_t *dma_fence_spinlock(struct 
> > dma_fence *fence)
> >   * Unlock the fence, allowing it to change it's state to signaled again.
> >   */
> >  #define dma_fence_unlock_irqrestore(fence, flags)  \
> > -   spin_unlock_irqrestore(fence->lock, flags)
> > +   spin_unlock_irqrestore(dma_fence_spinlock(fence), flags)
> >  
> >  /**
> >   * dma_fence_assert_held - lockdep assertion that fence is locked  
> 

Reply via email to