On 1/28/26 04:31, Wang, Beyond wrote:
> [AMD Official Use Only - AMD Internal Distribution Only]
> 
> 
> I see.
> 
> "Additional to that BO lists are not really used that much any more, OpenGL 
> Mesa has completely dropped them IIRC and RADV isn't using them either."
> 
> I suppose this is a corner case, here I'm facing a Vulkan App uses lots of 
> VK_EXT_external_memory_host,  these kind of BOs created from 
> amdgpu_gem_userptr_ioctl,
> and can not be VM_ALWAYS_VALID, that make UMD have to pass all of them to KMD 
> on each command buffer submission, this makes amdgpu_cs_parser_bos take
> much longer(~1.5ms on strix1), that's why I'm trying to reuse the bo_list 
> handle instead of let KMD creating temporary bo_list on each submission.

Yeah that is completely expected and can't be optimized much.

As far as I can see there really isn't much you can do except avoiding to use 
userptrs.

This functionality is only really made for temporary uploads and can't be used 
as performant replacement for buffer allocations.

Regards,
Christian.

> 
> Any other suggestions will be grateful.
> 
> Thanks,
> Beyond
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
> *From:* Koenig, Christian <[email protected]>
> *Sent:* Tuesday, January 27, 2026 7:47 PM
> *To:* Wang, Beyond <[email protected]>; [email protected] 
> <[email protected]>
> *Cc:* Deucher, Alexander <[email protected]>; Jin, Jason(Yong) 
> <[email protected]>
> *Subject:* Re: [PATCH] drm/amdgpu: Avoid redundant allocate/free when reusing 
> a bo_list handle
> 
> On 1/27/26 04:12, Wang, Beyond wrote:
>> [Public]
>>
>>
>> When a bo_list handle was reused across multi command submission, reusing
>> of those allocated HMM range structure can avoid redundant allocate/free
>> operations on each submission.
>> Doing this way benefits the amdgpu_cs_parser_bos time, especially for
>> large bo_list
> 
> And creates a massive security issue, so clear NAK to that.
> 
> That we have the HMM range in the BO list is extremely questionable to begin 
> with but wasn't doable otherwise in the past.
> 
> Additional to that BO lists are not really used that much any more, OpenGL 
> Mesa has completely dropped them IIRC and RADV isn't using them either.
> 
> Regards,
> Christian.
> 
>>
>> Signed-off-by: Wang, Beyond <[email protected]>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c |  4 +++-
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 16 +++++++++-------
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c     | 19 +++++++++++++++++++
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h     |  2 ++
>>  4 files changed, 33 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> index 66fb37b64388..9c662369d292 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> @@ -51,8 +51,10 @@ static void amdgpu_bo_list_free(struct kref *ref)
>>                            refcount);
>>     struct amdgpu_bo_list_entry *e;
>>
>> -   amdgpu_bo_list_for_each_entry(e, list)
>> +   amdgpu_bo_list_for_each_entry(e, list) {
>> +       amdgpu_hmm_range_free(e->range);
>>         amdgpu_bo_unref(&e->bo);
>> +   }
>>     call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
>>  }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index ecdfe6cb36cc..fc195fa2c0c0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -891,9 +891,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
>> *p,
>>         bool userpage_invalidated = false;
>>         struct amdgpu_bo *bo = e->bo;
>>
>> -       e->range = amdgpu_hmm_range_alloc(NULL);
>> -       if (unlikely(!e->range))
>> -           return -ENOMEM;
>> +       if (!e->range) {
>> +           e->range = amdgpu_hmm_range_alloc(NULL);
>> +           if (unlikely(!e->range))
>> +               return -ENOMEM;
>> +       } else {
>> +           amdgpu_hmm_range_reset(e->range);
>> +       }
>>
>>         r = amdgpu_ttm_tt_get_user_pages(bo, e->range);
>>         if (r)
>> @@ -995,8 +999,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
>> *p,
>>
>>  out_free_user_pages:
>>     amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>> -       amdgpu_hmm_range_free(e->range);
>> -       e->range = NULL;
>> +       amdgpu_hmm_range_reset(e->range);
>>     }
>>     mutex_unlock(&p->bo_list->bo_list_mutex);
>>     return r;
>> @@ -1327,8 +1330,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>     r = 0;
>>     amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>>         r |= !amdgpu_hmm_range_valid(e->range);
>> -       amdgpu_hmm_range_free(e->range);
>> -       e->range = NULL;
>> +       amdgpu_hmm_range_reset(e->range);
>>     }
>>     if (r) {
>>         r = -EAGAIN;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
>> index 90d26d820bac..5b72ea5a3db7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
>> @@ -273,6 +273,25 @@ struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct 
>> amdgpu_bo *bo)
>>     return range;
>>  }
>>
>> +/**
>> + * amdgpu_hmm_range_reset - reset an AMDGPU HMM range
>> + * @range: pointer to the range object to reset
>> + *
>> + * Free the hmm_pfns associated with range, but keep the allocated struct 
>> range
>> + * for reuse, in order to avoid repeated allocation/free overhead when the 
>> same
>> + * bo_list handle reused across multiple command submissions.
>> + *
>> + * Return: void
>> + */
>> +void amdgpu_hmm_range_reset(struct amdgpu_hmm_range *range)
>> +{
>> +   if (!range)
>> +       return;
>> +
>> +   kvfree(range->hmm_range.hmm_pfns);
>> +   range->hmm_range.hmm_pfns = NULL;
>> +}
>> +
>>  /**
>>   * amdgpu_hmm_range_free - release an AMDGPU HMM range
>>   * @range: pointer to the range object to free
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
>> index 140bc9cd57b4..558f3f22c617 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
>> @@ -44,6 +44,7 @@ int amdgpu_hmm_range_get_pages(struct 
>> mmu_interval_notifier *notifier,
>>  #if defined(CONFIG_HMM_MIRROR)
>>  bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range);
>>  struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo);
>> +void amdgpu_hmm_range_reset(struct amdgpu_hmm_range *range);
>>  void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range);
>>  int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr);
>>  void amdgpu_hmm_unregister(struct amdgpu_bo *bo);
>> @@ -67,6 +68,7 @@ static inline struct amdgpu_hmm_range 
>> *amdgpu_hmm_range_alloc(struct amdgpu_bo *
>>     return NULL;
>>  }
>>
>> +static inline void amdgpu_hmm_range_reset(struct amdgpu_hmm_range *range) {}
>>  static inline void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range) {}
>>  #endif
>>
>> --
>> 2.43.0
> 

Reply via email to