On 1/29/26 16:16, Tvrtko Ursulin wrote:
> 
> On 29/01/2026 12:53, Christian König wrote:
>> When the memory allocated by userspace isn't sufficient for all the
>> fences then just wait on them instead of returning an error.
> 
> Hmm..
> 
>> Signed-off-by: Christian König <[email protected]>
>> ---
>>   .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 52 ++++++++++---------
>>   1 file changed, 28 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> index ee8a5fbbd53b..d059712741fb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> @@ -734,7 +734,7 @@ amdgpu_userq_wait_count_fences(struct drm_file *filp,
>>               num_fences++;
>>       }
>>   -    wait_info->num_fences = num_fences;
>> +    wait_info->num_fences = min(num_fences, USHRT_MAX);
> 
> Oh it is actually a weakness in the uapi with wait_info->num_fences being 
> u16. I did not pick that from the commit message straight away.

No, that's not a weakness. It just doesn't make sense to return so many 
addr/value pairs.

> De-dup the array when over the uapi limit, and retry?

No, giving back addr/value pairs to userspace is just an optimization and not 
for technical correctness.

We could wait for everything inside the kernel, but that inserts a CPU bubble 
when submissions from multiple applications depend on each other.

> Is it userq fences or other fences that cause the spill in practice? If the 
> former then the patch adds a kernel wait where there wasn't one before so 
> de-duping more aggressively could maybe limit that path.

This is actually not a practical issue for performance. We just need to make 
sure that userspace can't abuse the API to kill X/Wayland for example by giving 
a BO to display with tons of fences on it.

Thanks for the comments,
Christian.

> 
> Regards,
> 
> Tvrtko
> 
>>       r = 0;
>>     error_unlock:
>> @@ -743,6 +743,19 @@ amdgpu_userq_wait_count_fences(struct drm_file *filp,
>>       return r;
>>   }
>>   +static int
>> +amdgpu_userq_wait_add_fence(struct drm_amdgpu_userq_wait *wait_info,
>> +                struct dma_fence **fences, unsigned int *num_fences,
>> +                struct dma_fence *fence)
>> +{
>> +    /* As fallback shouldn't userspace allocate enough space */
>> +    if (*num_fences >= wait_info->num_fences)
>> +        return dma_fence_wait(fence, true);
>> +
>> +    fences[(*num_fences)++] = dma_fence_get(fence);
>> +    return 0;
>> +}
>> +
>>   static int
>>   amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
>>                       struct drm_amdgpu_userq_wait *wait_info,
>> @@ -786,12 +799,10 @@ amdgpu_userq_wait_return_fence_info(struct drm_file 
>> *filp,
>>               goto free_fences;
>>             dma_fence_unwrap_for_each(f, &iter, fence) {
>> -            if (num_fences >= wait_info->num_fences) {
>> -                r = -EINVAL;
>> +            r = amdgpu_userq_wait_add_fence(wait_info, fences,
>> +                            &num_fences, f);
>> +            if (r)
>>                   goto free_fences;
>> -            }
>> -
>> -            fences[num_fences++] = dma_fence_get(f);
>>           }
>>             dma_fence_put(fence);
>> @@ -808,12 +819,11 @@ amdgpu_userq_wait_return_fence_info(struct drm_file 
>> *filp,
>>           if (r)
>>               goto free_fences;
>>   -        if (num_fences >= wait_info->num_fences) {
>> -            r = -EINVAL;
>> +        r = amdgpu_userq_wait_add_fence(wait_info, fences,
>> +                        &num_fences, f);
>> +        if (r)
>>               goto free_fences;
>> -        }
>>   -        fences[num_fences++] = fence;
>>           dma_fence_put(fence);
>>       }
>>   @@ -844,13 +854,10 @@ amdgpu_userq_wait_return_fence_info(struct drm_file 
>> *filp,
>>             dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
>>                       DMA_RESV_USAGE_READ, fence) {
>> -            if (num_fences >= wait_info->num_fences) {
>> -                r = -EINVAL;
>> -                goto error_unlock;
>> -            }
>> -
>> -            fences[num_fences++] = fence;
>> -            dma_fence_get(fence);
>> +            r = amdgpu_userq_wait_add_fence(wait_info, fences,
>> +                            &num_fences, f);
>> +            if (r)
>> +                goto free_fences;
>>           }
>>       }
>>   @@ -861,13 +868,10 @@ amdgpu_userq_wait_return_fence_info(struct drm_file 
>> *filp,
>>             dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
>>                       DMA_RESV_USAGE_WRITE, fence) {
>> -            if (num_fences >= wait_info->num_fences) {
>> -                r = -EINVAL;
>> -                goto error_unlock;
>> -            }
>> -
>> -            fences[num_fences++] = fence;
>> -            dma_fence_get(fence);
>> +            r = amdgpu_userq_wait_add_fence(wait_info, fences,
>> +                            &num_fences, f);
>> +            if (r)
>> +                goto free_fences;
>>           }
>>       }
>>   
> 

Reply via email to