Re: [PATCH] drm/amdgpu: implement per-process MES context

Christian König Tue, 26 May 2026 04:18:35 -0700

On 5/26/26 13:16, Zhu, Lingshan wrote:
> On 5/26/2026 4:46 PM, Christian König wrote:
> 
>> On 5/26/26 09:54, Zhu, Lingshan wrote:
>>> On 5/26/2026 3:02 AM, Christian König wrote:
>>>
>>>> On 5/25/26 10:23, Zhu Lingshan wrote:
>>>>> MES process context is a process-level page
>>>>> where process specific context is saved for
>>>>> MES scheduler.
>>>>>
>>>>> However, current user-queue code path assigns
>>>>> fw_obj of a queue to MES process_context_addr
>>>>> when adding the queue to MES.
>>>>>
>>>>> This means every new queue from the same process
>>>>> would replace the previous process context address
>>>>> with that queue's fw_obj address.
>>>>> What's worse is, when user space frees a queue,
>>>>> its fw_obj will be freed as well, causing MES
>>>>> working on a NULL page pointer.
>>>>>
>>>>> This issue leads to inconsistency and crash
>>>>> in the scheduler.
>>>>>
>>>>> This commit allocates a process-level page for
>>>>> MES process contexts for a process other than queue-level
>>>>>
>>>>> Signed-off-by: Zhu Lingshan <[email protected]>
>>>>> ---
>>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c  |  5 +++
>>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h  |  1 +
>>>>>  drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 48 ++++++++++++++++------
>>>>>  3 files changed, 42 insertions(+), 12 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>>>> index 38e310a8694d..0c4d6f80616e 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>>>> @@ -1225,6 +1225,11 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr 
>>>>> *userq_mgr)
>>>>>    */
>>>>>   cancel_work_sync(&userq_mgr->reset_work);
>>>>>  
>>>>> + if (userq_mgr->proc_ctx_obj.obj)
>>>> Please drop that check it is unecessary.
>>> sure, I can drop this in V2.
>>>
>>>>> +         amdgpu_bo_free_kernel(&userq_mgr->proc_ctx_obj.obj,
>>>>> +                               &userq_mgr->proc_ctx_obj.gpu_addr,
>>>>> +                               &userq_mgr->proc_ctx_obj.cpu_ptr);
>>>>> +
>>>>>   mutex_destroy(&userq_mgr->userq_mutex);
>>>>>  }
>>>>>  
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>>>> index 28cfc6682333..fe85234e58b3 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>>>> @@ -127,6 +127,7 @@ struct amdgpu_userq_mgr {
>>>>>   struct amdgpu_device            *adev;
>>>>>   struct delayed_work             resume_work;
>>>>>   struct drm_file                 *file;
>>>>> + struct amdgpu_userq_obj         proc_ctx_obj;
>>>>>  
>>>>>   /**
>>>>>    * @reset_work:
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>>>>> index e9189f07c6dc..3022025bc2ec 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>>>>> @@ -133,8 +133,8 @@ static int mes_userq_map(struct amdgpu_usermode_queue 
>>>>> *queue)
>>>>>   queue_input.gang_quantum = 10000;
>>>>>   queue_input.paging = false;
>>>>>  
>>>>> - queue_input.process_context_addr = ctx->gpu_addr;
>>>>> - queue_input.gang_context_addr = ctx->gpu_addr + 
>>>>> AMDGPU_USERQ_PROC_CTX_SZ;
>>>>> + queue_input.process_context_addr = uq_mgr->proc_ctx_obj.gpu_addr;
>>>>> + queue_input.gang_context_addr = ctx->gpu_addr;
>>>>>   queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
>>>>>   queue_input.gang_global_priority_level = 
>>>>> convert_to_mes_priority(queue->priority);
>>>>>  
>>>>> @@ -169,7 +169,7 @@ static int mes_userq_unmap(struct 
>>>>> amdgpu_usermode_queue *queue)
>>>>>  
>>>>>   memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
>>>>>   queue_input.doorbell_offset = queue->doorbell_index;
>>>>> - queue_input.gang_context_addr = ctx->gpu_addr + 
>>>>> AMDGPU_USERQ_PROC_CTX_SZ;
>>>>> + queue_input.gang_context_addr = ctx->gpu_addr;
>>>>>  
>>>>>   amdgpu_mes_lock(&adev->mes);
>>>>>   r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
>>>>> @@ -186,12 +186,8 @@ static int mes_userq_create_ctx_space(struct 
>>>>> amdgpu_userq_mgr *uq_mgr,
>>>>>   struct amdgpu_userq_obj *ctx = &queue->fw_obj;
>>>>>   int r, size;
>>>>>  
>>>>> - /*
>>>>> -  * The FW expects at least one page space allocated for
>>>>> -  * process ctx and gang ctx each. Create an object
>>>>> -  * for the same.
>>>>> -  */
>>>>> - size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ;
>>>>> + /* The FW expects at least one page space allocated for gang ctx. */
>>>>> + size = AMDGPU_USERQ_GANG_CTX_SZ;
>>>>>   r = amdgpu_bo_create_kernel(uq_mgr->adev, size, 0,
>>>>>                               AMDGPU_GEM_DOMAIN_GTT,
>>>>>                               &ctx->obj, &ctx->gpu_addr,
>>>>> @@ -257,6 +253,27 @@ static int mes_userq_detect_and_reset(struct 
>>>>> amdgpu_device *adev,
>>>>>   return r;
>>>>>  }
>>>>>  
>>>>> +static int mes_userq_create_proc_ctx_space(struct amdgpu_userq_mgr 
>>>>> *uq_mgr)
>>>>> +{
>>>>> + int r = 0;
>>>>> +
>>>>> + mutex_lock(&uq_mgr->userq_mutex);
>>>> Clear NAK. We can't allocate anything while holding that lock.
>>>>
>>>> Please add a different lock to protected the buffer or just 
>>>> oportunistically allocate it with CMPXCHG().
>>> I will introduce a different lock in V2.
>>>
>>>>> + if (!uq_mgr->proc_ctx_obj.obj) {
>>>> Please drop that check, amdgpu_bo_create_kernel() should already take care 
>>>> of that.
>>> I think we still need this check, because although 
>>> amdgpu_bo_create_kernel() checks (!*bo_ptr), but:
>>> 1) it does not immediately return if bo_ptr is valid. It only skips 
>>> re-creating the bo,
>>> it still calls amdgpu_bo_reserve(), amdgpu_bo_pin(), 
>>> amdgpu_ttm_alloc_gart(), and amdgpu_bo_kmap()
>>> on every invocation.
>>>
>>> 2) it calls memset() unconditionally on every invocation.
>>>
>>> So I think this check is still necessary, and another thing, do you think
>>> amdgpu_bo_create_kernel() should immediately return if *bo_ptr is not NULL?
>>> It looks like this deserve a fix.
>> Good point, IIRC we added this handling to make it easier to re-create 
>> kernel buffers after suspend/resume.
>>
>> I'm not sure if any code path is actually still using this since we found 
>> that for a lot of use cases you need to keep the FW buffers at the same 
>> location even after suspend/resume.
>>
>> Anyway just add an if and comment why it is necessary.
>>
>>
>>> Thanks
>>> Lingshan
>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> +         r = amdgpu_bo_create_kernel(uq_mgr->adev, 
>>>>> AMDGPU_USERQ_PROC_CTX_SZ,
>>>>> +                                     0, AMDGPU_GEM_DOMAIN_GTT,
>>>>> +                                     &uq_mgr->proc_ctx_obj.obj,
>>>>> +                                     &uq_mgr->proc_ctx_obj.gpu_addr,
>>>>> +                                     &uq_mgr->proc_ctx_obj.cpu_ptr);
>>>>> +
>>>>> +         if (!r)
>>>>> +                 memset(uq_mgr->proc_ctx_obj.cpu_ptr, 0, 
>>>>> AMDGPU_USERQ_PROC_CTX_SZ);
>> When amdgpu_bo_create_kernel() does that the memset here can probably be 
>> dropped.
> 
> oops, I missed this comment.
> 
> amdgpu_bo_create_reserved() sets the struct amdgpu_bo_param all zero by
> memset(&bp, 0, sizeof(bp)), and amdgpu_bo_create() kvzalloc a struct 
> amdgpu_bo.
> However I think we need to set the BO all zero, just like what 
> mes_userq_create_ctx_space() does.

Yeah those are just the housekeeping structures. If you need the BO content to 
be zeroed out then you indeed need to do that manually.

Regards,
Christian.

> 
> Thanks
> Lingshan  
> 
>> Regards,
>> Christian.
>>
>>>>> + }
>>>>> +
>>>>> + mutex_unlock(&uq_mgr->userq_mutex);
>>>>> +
>>>>> + return r;
>>>>> +}
>>>>> +
>>>>>  static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
>>>>>                           struct drm_amdgpu_userq_in *args_in)
>>>>>  {
>>>>> @@ -429,7 +446,14 @@ static int mes_userq_mqd_create(struct 
>>>>> amdgpu_usermode_queue *queue,
>>>>>           goto free_mqd;
>>>>>   }
>>>>>  
>>>>> - /* Create BO for FW operations */
>>>>> + /* Create per-process MES process context BO */
>>>>> + r = mes_userq_create_proc_ctx_space(uq_mgr);
>>>>> + if (r) {
>>>>> +         DRM_ERROR("Failed to allocate MES process context space bo, 
>>>>> error: %d\n", r);
>>>>> +         goto free_mqd;
>>>>> + }
>>>>> +
>>>>> + /* Create BO of a gang for FW operations */
>>>>>   r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user);
>>>>>   if (r) {
>>>>>           DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
>>>>> @@ -492,7 +516,7 @@ static int mes_userq_preempt(struct 
>>>>> amdgpu_usermode_queue *queue)
>>>>>   *fence_ptr = 0;
>>>>>  
>>>>>   memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input));
>>>>> - queue_input.gang_context_addr = ctx->gpu_addr + 
>>>>> AMDGPU_USERQ_PROC_CTX_SZ;
>>>>> + queue_input.gang_context_addr = ctx->gpu_addr;
>>>>>   queue_input.suspend_fence_addr = fence_gpu_addr;
>>>>>   queue_input.suspend_fence_value = 1;
>>>>>   amdgpu_mes_lock(&adev->mes);
>>>>> @@ -529,7 +553,7 @@ static int mes_userq_restore(struct 
>>>>> amdgpu_usermode_queue *queue)
>>>>>           return 0;
>>>>>  
>>>>>   memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input));
>>>>> - queue_input.gang_context_addr = ctx->gpu_addr + 
>>>>> AMDGPU_USERQ_PROC_CTX_SZ;
>>>>> + queue_input.gang_context_addr = ctx->gpu_addr;
>>>>>  
>>>>>   amdgpu_mes_lock(&adev->mes);
>>>>>   r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input);
Re: [PATCH] drm/amdgpu: implement per-process MES context

Reply via email to