On 6/8/26 17:41, Philipp Stanner wrote:
> On Mon, 2026-06-08 at 17:35 +0200, Christian König wrote:
>> On 6/8/26 16:24, Philipp Stanner wrote:
>>> The dma_fence backend_ops can access a fence. Hereby, a driver callback
>>> will be running which likely will access driver specific data through
>>> container_of(). If now, simultaneously, a driver signals the fence and
>>> afterwards expects to run a driver specific destructor (using the same
>>> data accessed through container_of()), there can be a race.
>>>
>>> A driver very likely trusts that once it has signaled a fence, no one
>>> will be accessing it anymore. Moreover, it might already want to free up
>>> resources, making UAF bugs possible.
>>>
>>> The race occurs because there are only pragmatic checks for the signaled
>>> flag of a fence, without taking the fence lock. RCU guards exist, but
>>> their purpose is to guard accesses through the backend_ops callbacks
>>> against the driver (which implements the TEXT segment these callbacks
>>> live in) from unloading.
>>>
>>> Proper synchronization can be ensured by taking the fence lock. RCU is
>>> still simultaneously required to guard against the unload.
>>>
>>> Fix the races by taking the lock for all non-deprecated backend_ops
>>> callbacks.
>>
>> That sounds like the fundamentally wrong approach to me.
>>
>> The lock protects the dma_fence signaling state and *NOT* any driver
>> state, so it should not be used to protect any driver state.
>>
>> Drivers need to make sure that they protect their driver state with
>> separate lock and don't rely on the dma_fence lock for this. This is
>> actually the core of why we want to deprecate the shared dma_fence
>> spinlock.
> 
> It's not so much about protecting data, it's about correctness:
> 
> A driver that calls
> 
> dma_fence_signal(f)
> 
> expects that after signalling, no callback will be running into the
> driver again.

No, that is not even remotely correct.

That's why we need the RCU grace period to make sure that nobody is referencing 
the driver stuff any more.

DMA fence destruction has to wait for an RCU grace period for exactly the same 
reason as well.

If we want to cleanup I would start there. And then eventually stop calling 
callbacks with the fence lock held and only hold the RCU read side.

Regards,
Christian.

> It's a fix synchronization point.
> 
> Only the fence lock can grant such synchronization.
> 
> Positive effects would be:
> 
> 1. Drivers can do their cleanup immediately, without having to wait for
> a grace period
> 
> 2. Drivers could be sure that their driver_fence data, allocated
> together with fence and accessed through container_of(fence), is not
> being accessed anymore.
> 
> 
> I see only advantages. Safer, faster. :)
> 
> P.
> 
>>
>> Regards,
>> Christian.
>>
>>>
>>> Conveniently, this also fixes a race where backend_ops->set_deadline()
>>> might try to set a deadline for an already signaled fence.
>>>
>>> Suggested-by: Danilo Krummrich <[email protected]>
>>> Signed-off-by: Philipp Stanner <[email protected]>
>>> ---
>>> We discovered this problem through our Rust abstractions, but it can
>>> also occur in C.
>>>
>>> The by far cleanest solution seems to be to use the fence lock. This RFC
>>> serves to discuss whether there is anything preventing that.
>>>
>>> (Patch so far just compile tested, to have some groundlayer for the
>>> rough idea, to discuss it first)
>>> ---
>>>  drivers/dma-buf/dma-fence.c | 39 ++++++++++++++++++++++++++++---------
>>>  include/linux/dma-fence.h   | 17 ++++++++++++----
>>>  2 files changed, 43 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
>>> index c7ea1e75d38a..b74f02f3cca8 100644
>>> --- a/drivers/dma-buf/dma-fence.c
>>> +++ b/drivers/dma-buf/dma-fence.c
>>> @@ -629,7 +629,8 @@ EXPORT_SYMBOL(dma_fence_free);
>>>  static bool __dma_fence_enable_signaling(struct dma_fence *fence)
>>>  {
>>>     const struct dma_fence_ops *ops;
>>> -   bool was_set;
>>> +   bool was_set, success;
>>> +   unsigned long flags;
>>>  
>>>     dma_fence_assert_held(fence);
>>>  
>>> @@ -644,7 +645,10 @@ static bool __dma_fence_enable_signaling(struct 
>>> dma_fence *fence)
>>>     if (!was_set && ops && ops->enable_signaling) {
>>>             trace_dma_fence_enable_signal(fence);
>>>  
>>> -           if (!ops->enable_signaling(fence)) {
>>> +           dma_fence_lock_irqsave(fence, flags);
>>> +           success = ops->enable_signaling(fence);
>>> +           dma_fence_unlock_irqrestore(fence, flags);
>>> +           if (!success) {
>>>                     rcu_read_unlock();
>>>                     dma_fence_signal_locked(fence);
>>>                     return false;
>>> @@ -1020,11 +1024,20 @@ EXPORT_SYMBOL(dma_fence_wait_any_timeout);
>>>  void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
>>>  {
>>>     const struct dma_fence_ops *ops;
>>> +   unsigned long flags;
>>>  
>>>     rcu_read_lock();
>>>     ops = rcu_dereference(fence->ops);
>>> -   if (ops && ops->set_deadline && !dma_fence_is_signaled(fence))
>>> +   if (!ops || !ops->set_deadline) {
>>> +           rcu_read_unlock();
>>> +           return;
>>> +   }
>>> +
>>> +   dma_fence_lock_irqsave(fence, flags);
>>> +   if (!dma_fence_is_signaled_locked(fence))
>>>             ops->set_deadline(fence, deadline);
>>> +
>>> +   dma_fence_unlock_irqrestore(fence, flags);
>>>     rcu_read_unlock();
>>>  }
>>>  EXPORT_SYMBOL(dma_fence_set_deadline);
>>> @@ -1166,14 +1179,18 @@ EXPORT_SYMBOL(dma_fence_init64);
>>>   */
>>>  const char __rcu *dma_fence_driver_name(struct dma_fence *fence)
>>>  {
>>> +   const char __rcu *name = "detached-driver";
>>>     const struct dma_fence_ops *ops;
>>> +   unsigned long flags;
>>>  
>>>     /* RCU protection is required for safe access to returned string */
>>>     ops = rcu_dereference(fence->ops);
>>> +   dma_fence_lock_irqsave(fence, flags);
>>>     if (!dma_fence_test_signaled_flag(fence))
>>> -           return (const char __rcu *)ops->get_driver_name(fence);
>>> -   else
>>> -           return (const char __rcu *)"detached-driver";
>>> +           name = ops->get_driver_name(fence);
>>> +   dma_fence_unlock_irqrestore(fence, flags);
>>> +
>>> +   return name;
>>>  }
>>>  EXPORT_SYMBOL(dma_fence_driver_name);
>>>  
>>> @@ -1199,13 +1216,17 @@ EXPORT_SYMBOL(dma_fence_driver_name);
>>>   */
>>>  const char __rcu *dma_fence_timeline_name(struct dma_fence *fence)
>>>  {
>>> +   const char __rcu *name = "signaled-timeline";
>>>     const struct dma_fence_ops *ops;
>>> +   unsigned long flags;
>>>  
>>>     /* RCU protection is required for safe access to returned string */
>>>     ops = rcu_dereference(fence->ops);
>>> +   dma_fence_lock_irqsave(fence, flags);
>>>     if (!dma_fence_test_signaled_flag(fence))
>>> -           return (const char __rcu *)ops->get_driver_name(fence);
>>> -   else
>>> -           return (const char __rcu *)"signaled-timeline";
>>> +           name = ops->get_driver_name(fence);
>>> +   dma_fence_unlock_irqrestore(fence, flags);
>>> +
>>> +   return name;
>>>  }
>>>  EXPORT_SYMBOL(dma_fence_timeline_name);
>>> diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
>>> index b52ab692b22e..b93c3f7f69fb 100644
>>> --- a/include/linux/dma-fence.h
>>> +++ b/include/linux/dma-fence.h
>>> @@ -547,20 +547,29 @@ static inline bool
>>>  dma_fence_is_signaled(struct dma_fence *fence)
>>>  {
>>>     const struct dma_fence_ops *ops;
>>> +   unsigned long flags;
>>> +   bool signaled;
>>>  
>>>     if (dma_fence_test_signaled_flag(fence))
>>>             return true;
>>>  
>>>     rcu_read_lock();
>>>     ops = rcu_dereference(fence->ops);
>>> -   if (ops && ops->signaled && ops->signaled(fence)) {
>>> +   if (!ops || !ops->signaled) {
>>>             rcu_read_unlock();
>>> -           dma_fence_signal(fence);
>>> -           return true;
>>> +           return false;
>>>     }
>>> +
>>> +   dma_fence_lock_irqsave(fence, flags);
>>> +   signaled = ops->signaled(fence);
>>> +
>>> +   if (signaled)
>>> +           dma_fence_signal_locked(fence);
>>> +
>>> +   dma_fence_unlock_irqrestore(fence, flags);
>>>     rcu_read_unlock();
>>>  
>>> -   return false;
>>> +   return signaled;
>>>  }
>>>  
>>>  /**

Reply via email to