Re: [PATCH] drm/amdgpu: implement per-process MES context

Zhu, Lingshan Tue, 26 May 2026 04:17:07 -0700

On 5/26/2026 4:46 PM, Christian König wrote:

> On 5/26/26 09:54, Zhu, Lingshan wrote:
>> On 5/26/2026 3:02 AM, Christian König wrote:
>>
>>> On 5/25/26 10:23, Zhu Lingshan wrote:
>>>> MES process context is a process-level page
>>>> where process specific context is saved for
>>>> MES scheduler.
>>>>
>>>> However, current user-queue code path assigns
>>>> fw_obj of a queue to MES process_context_addr
>>>> when adding the queue to MES.
>>>>
>>>> This means every new queue from the same process
>>>> would replace the previous process context address
>>>> with that queue's fw_obj address.
>>>> What's worse is, when user space frees a queue,
>>>> its fw_obj will be freed as well, causing MES
>>>> working on a NULL page pointer.
>>>>
>>>> This issue leads to inconsistency and crash
>>>> in the scheduler.
>>>>
>>>> This commit allocates a process-level page for
>>>> MES process contexts for a process other than queue-level
>>>>
>>>> Signed-off-by: Zhu Lingshan <[email protected]>
>>>> ---
>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c  |  5 +++
>>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h  |  1 +
>>>>  drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 48 ++++++++++++++++------
>>>>  3 files changed, 42 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>>> index 38e310a8694d..0c4d6f80616e 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
>>>> @@ -1225,6 +1225,11 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr 
>>>> *userq_mgr)
>>>>     */
>>>>    cancel_work_sync(&userq_mgr->reset_work);
>>>>  
>>>> +  if (userq_mgr->proc_ctx_obj.obj)
>>> Please drop that check it is unecessary.
>> sure, I can drop this in V2.
>>
>>>> +          amdgpu_bo_free_kernel(&userq_mgr->proc_ctx_obj.obj,
>>>> +                                &userq_mgr->proc_ctx_obj.gpu_addr,
>>>> +                                &userq_mgr->proc_ctx_obj.cpu_ptr);
>>>> +
>>>>    mutex_destroy(&userq_mgr->userq_mutex);
>>>>  }
>>>>  
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>>> index 28cfc6682333..fe85234e58b3 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
>>>> @@ -127,6 +127,7 @@ struct amdgpu_userq_mgr {
>>>>    struct amdgpu_device            *adev;
>>>>    struct delayed_work             resume_work;
>>>>    struct drm_file                 *file;
>>>> +  struct amdgpu_userq_obj         proc_ctx_obj;
>>>>  
>>>>    /**
>>>>     * @reset_work:
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>>>> index e9189f07c6dc..3022025bc2ec 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>>>> @@ -133,8 +133,8 @@ static int mes_userq_map(struct amdgpu_usermode_queue 
>>>> *queue)
>>>>    queue_input.gang_quantum = 10000;
>>>>    queue_input.paging = false;
>>>>  
>>>> -  queue_input.process_context_addr = ctx->gpu_addr;
>>>> -  queue_input.gang_context_addr = ctx->gpu_addr + 
>>>> AMDGPU_USERQ_PROC_CTX_SZ;
>>>> +  queue_input.process_context_addr = uq_mgr->proc_ctx_obj.gpu_addr;
>>>> +  queue_input.gang_context_addr = ctx->gpu_addr;
>>>>    queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
>>>>    queue_input.gang_global_priority_level = 
>>>> convert_to_mes_priority(queue->priority);
>>>>  
>>>> @@ -169,7 +169,7 @@ static int mes_userq_unmap(struct 
>>>> amdgpu_usermode_queue *queue)
>>>>  
>>>>    memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
>>>>    queue_input.doorbell_offset = queue->doorbell_index;
>>>> -  queue_input.gang_context_addr = ctx->gpu_addr + 
>>>> AMDGPU_USERQ_PROC_CTX_SZ;
>>>> +  queue_input.gang_context_addr = ctx->gpu_addr;
>>>>  
>>>>    amdgpu_mes_lock(&adev->mes);
>>>>    r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
>>>> @@ -186,12 +186,8 @@ static int mes_userq_create_ctx_space(struct 
>>>> amdgpu_userq_mgr *uq_mgr,
>>>>    struct amdgpu_userq_obj *ctx = &queue->fw_obj;
>>>>    int r, size;
>>>>  
>>>> -  /*
>>>> -   * The FW expects at least one page space allocated for
>>>> -   * process ctx and gang ctx each. Create an object
>>>> -   * for the same.
>>>> -   */
>>>> -  size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ;
>>>> +  /* The FW expects at least one page space allocated for gang ctx. */
>>>> +  size = AMDGPU_USERQ_GANG_CTX_SZ;
>>>>    r = amdgpu_bo_create_kernel(uq_mgr->adev, size, 0,
>>>>                                AMDGPU_GEM_DOMAIN_GTT,
>>>>                                &ctx->obj, &ctx->gpu_addr,
>>>> @@ -257,6 +253,27 @@ static int mes_userq_detect_and_reset(struct 
>>>> amdgpu_device *adev,
>>>>    return r;
>>>>  }
>>>>  
>>>> +static int mes_userq_create_proc_ctx_space(struct amdgpu_userq_mgr 
>>>> *uq_mgr)
>>>> +{
>>>> +  int r = 0;
>>>> +
>>>> +  mutex_lock(&uq_mgr->userq_mutex);
>>> Clear NAK. We can't allocate anything while holding that lock.
>>>
>>> Please add a different lock to protected the buffer or just 
>>> oportunistically allocate it with CMPXCHG().
>> I will introduce a different lock in V2.
>>
>>>> +  if (!uq_mgr->proc_ctx_obj.obj) {
>>> Please drop that check, amdgpu_bo_create_kernel() should already take care 
>>> of that.
>> I think we still need this check, because although amdgpu_bo_create_kernel() 
>> checks (!*bo_ptr), but:
>> 1) it does not immediately return if bo_ptr is valid. It only skips 
>> re-creating the bo,
>> it still calls amdgpu_bo_reserve(), amdgpu_bo_pin(), 
>> amdgpu_ttm_alloc_gart(), and amdgpu_bo_kmap()
>> on every invocation.
>>
>> 2) it calls memset() unconditionally on every invocation.
>>
>> So I think this check is still necessary, and another thing, do you think
>> amdgpu_bo_create_kernel() should immediately return if *bo_ptr is not NULL?
>> It looks like this deserve a fix.
> Good point, IIRC we added this handling to make it easier to re-create kernel 
> buffers after suspend/resume.
>
> I'm not sure if any code path is actually still using this since we found 
> that for a lot of use cases you need to keep the FW buffers at the same 
> location even after suspend/resume.
>
> Anyway just add an if and comment why it is necessary.
>
>
>> Thanks
>> Lingshan
>>
>>> Regards,
>>> Christian.
>>>
>>>> +          r = amdgpu_bo_create_kernel(uq_mgr->adev, 
>>>> AMDGPU_USERQ_PROC_CTX_SZ,
>>>> +                                      0, AMDGPU_GEM_DOMAIN_GTT,
>>>> +                                      &uq_mgr->proc_ctx_obj.obj,
>>>> +                                      &uq_mgr->proc_ctx_obj.gpu_addr,
>>>> +                                      &uq_mgr->proc_ctx_obj.cpu_ptr);
>>>> +
>>>> +          if (!r)
>>>> +                  memset(uq_mgr->proc_ctx_obj.cpu_ptr, 0, 
>>>> AMDGPU_USERQ_PROC_CTX_SZ);
> When amdgpu_bo_create_kernel() does that the memset here can probably be 
> dropped.


oops, I missed this comment.

amdgpu_bo_create_reserved() sets the struct amdgpu_bo_param all zero by
memset(&bp, 0, sizeof(bp)), and amdgpu_bo_create() kvzalloc a struct amdgpu_bo.
However I think we need to set the BO all zero, just like what 
mes_userq_create_ctx_space() does.

Thanks
Lingshan  

>
> Regards,
> Christian.
>
>>>> +  }
>>>> +
>>>> +  mutex_unlock(&uq_mgr->userq_mutex);
>>>> +
>>>> +  return r;
>>>> +}
>>>> +
>>>>  static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
>>>>                            struct drm_amdgpu_userq_in *args_in)
>>>>  {
>>>> @@ -429,7 +446,14 @@ static int mes_userq_mqd_create(struct 
>>>> amdgpu_usermode_queue *queue,
>>>>            goto free_mqd;
>>>>    }
>>>>  
>>>> -  /* Create BO for FW operations */
>>>> +  /* Create per-process MES process context BO */
>>>> +  r = mes_userq_create_proc_ctx_space(uq_mgr);
>>>> +  if (r) {
>>>> +          DRM_ERROR("Failed to allocate MES process context space bo, 
>>>> error: %d\n", r);
>>>> +          goto free_mqd;
>>>> +  }
>>>> +
>>>> +  /* Create BO of a gang for FW operations */
>>>>    r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user);
>>>>    if (r) {
>>>>            DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
>>>> @@ -492,7 +516,7 @@ static int mes_userq_preempt(struct 
>>>> amdgpu_usermode_queue *queue)
>>>>    *fence_ptr = 0;
>>>>  
>>>>    memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input));
>>>> -  queue_input.gang_context_addr = ctx->gpu_addr + 
>>>> AMDGPU_USERQ_PROC_CTX_SZ;
>>>> +  queue_input.gang_context_addr = ctx->gpu_addr;
>>>>    queue_input.suspend_fence_addr = fence_gpu_addr;
>>>>    queue_input.suspend_fence_value = 1;
>>>>    amdgpu_mes_lock(&adev->mes);
>>>> @@ -529,7 +553,7 @@ static int mes_userq_restore(struct 
>>>> amdgpu_usermode_queue *queue)
>>>>            return 0;
>>>>  
>>>>    memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input));
>>>> -  queue_input.gang_context_addr = ctx->gpu_addr + 
>>>> AMDGPU_USERQ_PROC_CTX_SZ;
>>>> +  queue_input.gang_context_addr = ctx->gpu_addr;
>>>>  
>>>>    amdgpu_mes_lock(&adev->mes);
>>>>    r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input);

Re: [PATCH] drm/amdgpu: implement per-process MES context

Reply via email to