Re: [PATCH] drm/amdgpu: implement per-process MES context

Christian König Tue, 26 May 2026 01:46:41 -0700

On 5/26/26 09:54, Zhu, Lingshan wrote:
> On 5/26/2026 3:02 AM, Christian König wrote:
> 
>> On 5/25/26 10:23, Zhu Lingshan wrote:
>>> MES process context is a process-level page
>>> where process specific context is saved for
>>> MES scheduler.
>>>
>>> However, current user-queue code path assigns
>>> fw_obj of a queue to MES process_context_addr
>>> when adding the queue to MES.
>>>
>>> This means every new queue from the same process
>>> would replace the previous process context address
>>> with that queue's fw_obj address.
>>> What's worse is, when user space frees a queue,
>>> its fw_obj will be freed as well, causing MES
>>> working on a NULL page pointer.
>>>
>>> This issue leads to inconsistency and crash
>>> in the scheduler.
>>>
>>> This commit allocates a process-level page for
>>> MES process contexts for a process other than queue-level
>>>
>>> Signed-off-by: Zhu Lingshan <[email protected]>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c  |  5 +++
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h  |  1 +
>>>  drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 48 ++++++++++++++++------
>>>  3 files changed, 42 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>> index 38e310a8694d..0c4d6f80616e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>> @@ -1225,6 +1225,11 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr 
>>> *userq_mgr)
>>>      */
>>>     cancel_work_sync(&userq_mgr->reset_work);
>>>  
>>> +   if (userq_mgr->proc_ctx_obj.obj)
>> Please drop that check it is unecessary.
> 
> sure, I can drop this in V2.
> 
>>> +           amdgpu_bo_free_kernel(&userq_mgr->proc_ctx_obj.obj,
>>> +                                 &userq_mgr->proc_ctx_obj.gpu_addr,
>>> +                                 &userq_mgr->proc_ctx_obj.cpu_ptr);
>>> +
>>>     mutex_destroy(&userq_mgr->userq_mutex);
>>>  }
>>>  
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>> index 28cfc6682333..fe85234e58b3 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>> @@ -127,6 +127,7 @@ struct amdgpu_userq_mgr {
>>>     struct amdgpu_device            *adev;
>>>     struct delayed_work             resume_work;
>>>     struct drm_file                 *file;
>>> +   struct amdgpu_userq_obj         proc_ctx_obj;
>>>  
>>>     /**
>>>      * @reset_work:
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c 
>>> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>>> index e9189f07c6dc..3022025bc2ec 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>>> @@ -133,8 +133,8 @@ static int mes_userq_map(struct amdgpu_usermode_queue 
>>> *queue)
>>>     queue_input.gang_quantum = 10000;
>>>     queue_input.paging = false;
>>>  
>>> -   queue_input.process_context_addr = ctx->gpu_addr;
>>> -   queue_input.gang_context_addr = ctx->gpu_addr + 
>>> AMDGPU_USERQ_PROC_CTX_SZ;
>>> +   queue_input.process_context_addr = uq_mgr->proc_ctx_obj.gpu_addr;
>>> +   queue_input.gang_context_addr = ctx->gpu_addr;
>>>     queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
>>>     queue_input.gang_global_priority_level = 
>>> convert_to_mes_priority(queue->priority);
>>>  
>>> @@ -169,7 +169,7 @@ static int mes_userq_unmap(struct amdgpu_usermode_queue 
>>> *queue)
>>>  
>>>     memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
>>>     queue_input.doorbell_offset = queue->doorbell_index;
>>> -   queue_input.gang_context_addr = ctx->gpu_addr + 
>>> AMDGPU_USERQ_PROC_CTX_SZ;
>>> +   queue_input.gang_context_addr = ctx->gpu_addr;
>>>  
>>>     amdgpu_mes_lock(&adev->mes);
>>>     r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
>>> @@ -186,12 +186,8 @@ static int mes_userq_create_ctx_space(struct 
>>> amdgpu_userq_mgr *uq_mgr,
>>>     struct amdgpu_userq_obj *ctx = &queue->fw_obj;
>>>     int r, size;
>>>  
>>> -   /*
>>> -    * The FW expects at least one page space allocated for
>>> -    * process ctx and gang ctx each. Create an object
>>> -    * for the same.
>>> -    */
>>> -   size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ;
>>> +   /* The FW expects at least one page space allocated for gang ctx. */
>>> +   size = AMDGPU_USERQ_GANG_CTX_SZ;
>>>     r = amdgpu_bo_create_kernel(uq_mgr->adev, size, 0,
>>>                                 AMDGPU_GEM_DOMAIN_GTT,
>>>                                 &ctx->obj, &ctx->gpu_addr,
>>> @@ -257,6 +253,27 @@ static int mes_userq_detect_and_reset(struct 
>>> amdgpu_device *adev,
>>>     return r;
>>>  }
>>>  
>>> +static int mes_userq_create_proc_ctx_space(struct amdgpu_userq_mgr *uq_mgr)
>>> +{
>>> +   int r = 0;
>>> +
>>> +   mutex_lock(&uq_mgr->userq_mutex);
>> Clear NAK. We can't allocate anything while holding that lock.
>>
>> Please add a different lock to protected the buffer or just oportunistically 
>> allocate it with CMPXCHG().
> 
> I will introduce a different lock in V2.
> 
>>> +   if (!uq_mgr->proc_ctx_obj.obj) {
>> Please drop that check, amdgpu_bo_create_kernel() should already take care 
>> of that.
> 
> I think we still need this check, because although amdgpu_bo_create_kernel() 
> checks (!*bo_ptr), but:
> 1) it does not immediately return if bo_ptr is valid. It only skips 
> re-creating the bo,
> it still calls amdgpu_bo_reserve(), amdgpu_bo_pin(), amdgpu_ttm_alloc_gart(), 
> and amdgpu_bo_kmap()
> on every invocation.
> 
> 2) it calls memset() unconditionally on every invocation.
> 
> So I think this check is still necessary, and another thing, do you think
> amdgpu_bo_create_kernel() should immediately return if *bo_ptr is not NULL?
> It looks like this deserve a fix.


Good point, IIRC we added this handling to make it easier to re-create kernel 
buffers after suspend/resume.

I'm not sure if any code path is actually still using this since we found that 
for a lot of use cases you need to keep the FW buffers at the same location 
even after suspend/resume.

Anyway just add an if and comment why it is necessary.


> 
> Thanks
> Lingshan
> 
>> Regards,
>> Christian.
>>
>>> +           r = amdgpu_bo_create_kernel(uq_mgr->adev, 
>>> AMDGPU_USERQ_PROC_CTX_SZ,
>>> +                                       0, AMDGPU_GEM_DOMAIN_GTT,
>>> +                                       &uq_mgr->proc_ctx_obj.obj,
>>> +                                       &uq_mgr->proc_ctx_obj.gpu_addr,
>>> +                                       &uq_mgr->proc_ctx_obj.cpu_ptr);
>>> +
>>> +           if (!r)
>>> +                   memset(uq_mgr->proc_ctx_obj.cpu_ptr, 0, 
>>> AMDGPU_USERQ_PROC_CTX_SZ);

When amdgpu_bo_create_kernel() does that the memset here can probably be 
dropped.

Regards,
Christian.

>>> +   }
>>> +
>>> +   mutex_unlock(&uq_mgr->userq_mutex);
>>> +
>>> +   return r;
>>> +}
>>> +
>>>  static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
>>>                             struct drm_amdgpu_userq_in *args_in)
>>>  {
>>> @@ -429,7 +446,14 @@ static int mes_userq_mqd_create(struct 
>>> amdgpu_usermode_queue *queue,
>>>             goto free_mqd;
>>>     }
>>>  
>>> -   /* Create BO for FW operations */
>>> +   /* Create per-process MES process context BO */
>>> +   r = mes_userq_create_proc_ctx_space(uq_mgr);
>>> +   if (r) {
>>> +           DRM_ERROR("Failed to allocate MES process context space bo, 
>>> error: %d\n", r);
>>> +           goto free_mqd;
>>> +   }
>>> +
>>> +   /* Create BO of a gang for FW operations */
>>>     r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user);
>>>     if (r) {
>>>             DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
>>> @@ -492,7 +516,7 @@ static int mes_userq_preempt(struct 
>>> amdgpu_usermode_queue *queue)
>>>     *fence_ptr = 0;
>>>  
>>>     memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input));
>>> -   queue_input.gang_context_addr = ctx->gpu_addr + 
>>> AMDGPU_USERQ_PROC_CTX_SZ;
>>> +   queue_input.gang_context_addr = ctx->gpu_addr;
>>>     queue_input.suspend_fence_addr = fence_gpu_addr;
>>>     queue_input.suspend_fence_value = 1;
>>>     amdgpu_mes_lock(&adev->mes);
>>> @@ -529,7 +553,7 @@ static int mes_userq_restore(struct 
>>> amdgpu_usermode_queue *queue)
>>>             return 0;
>>>  
>>>     memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input));
>>> -   queue_input.gang_context_addr = ctx->gpu_addr + 
>>> AMDGPU_USERQ_PROC_CTX_SZ;
>>> +   queue_input.gang_context_addr = ctx->gpu_addr;
>>>  
>>>     amdgpu_mes_lock(&adev->mes);
>>>     r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input);

Re: [PATCH] drm/amdgpu: implement per-process MES context

Reply via email to