AMD General

Gentle ping, Christian and Alex

-----Original Message-----
From: Zhu, Lingshan <[email protected]>
Sent: Monday, June 1, 2026 2:41 PM
To: Deucher, Alexander <[email protected]>; Koenig, Christian 
<[email protected]>
Cc: [email protected]; Huang, Ray <[email protected]>; Zhu, 
Lingshan <[email protected]>
Subject: [PATCH V3] drm/amdgpu: implement per-process MES context

MES process context is a process-level page where process specific context is 
saved for MES scheduler.

However, current user-queue code path assigns fw_obj of a queue to MES 
process_context_addr when adding the queue to MES.

This means every new queue from the same process would replace the previous 
process context address with that queue's fw_obj address.
What's worse is, when user space frees a queue, its fw_obj will be freed as 
well, causing MES working on a NULL page pointer.

This issue leads to inconsistency and crash in the scheduler.

This commit allocates a process-level page for MES process contexts for a 
process other than queue-level

Signed-off-by: Zhu Lingshan <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c  |  6 +++  
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h  |  2 +  
drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 51 +++++++++++++++++-----
 3 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 38e310a8694d..951d5da850be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -1172,6 +1172,7 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr 
*userq_mgr, struct drm_file *f
        xa_init_flags(&userq_mgr->userq_xa, XA_FLAGS_ALLOC);
        userq_mgr->adev = adev;
        userq_mgr->file = file_priv;
+       mutex_init(&userq_mgr->proc_ctx_lock);

        INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
        INIT_WORK(&userq_mgr->reset_work, amdgpu_userq_mgr_reset_work); @@ 
-1225,6 +1226,11 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr 
*userq_mgr)
         */
        cancel_work_sync(&userq_mgr->reset_work);

+       amdgpu_bo_free_kernel(&userq_mgr->proc_ctx_obj.obj,
+                             &userq_mgr->proc_ctx_obj.gpu_addr,
+                             &userq_mgr->proc_ctx_obj.cpu_ptr);
+
+       mutex_destroy(&userq_mgr->proc_ctx_lock);
        mutex_destroy(&userq_mgr->userq_mutex);
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 28cfc6682333..a5867ffe6988 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -127,6 +127,8 @@ struct amdgpu_userq_mgr {
        struct amdgpu_device            *adev;
        struct delayed_work             resume_work;
        struct drm_file                 *file;
+       struct mutex                    proc_ctx_lock;
+       struct amdgpu_userq_obj         proc_ctx_obj;

        /**
         * @reset_work:
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index e9189f07c6dc..6c8a44cee34b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -133,8 +133,8 @@ static int mes_userq_map(struct amdgpu_usermode_queue 
*queue)
        queue_input.gang_quantum = 10000;
        queue_input.paging = false;

-       queue_input.process_context_addr = ctx->gpu_addr;
-       queue_input.gang_context_addr = ctx->gpu_addr + 
AMDGPU_USERQ_PROC_CTX_SZ;
+       queue_input.process_context_addr = uq_mgr->proc_ctx_obj.gpu_addr;
+       queue_input.gang_context_addr = ctx->gpu_addr;
        queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
        queue_input.gang_global_priority_level = 
convert_to_mes_priority(queue->priority);

@@ -169,7 +169,7 @@ static int mes_userq_unmap(struct amdgpu_usermode_queue 
*queue)

        memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
        queue_input.doorbell_offset = queue->doorbell_index;
-       queue_input.gang_context_addr = ctx->gpu_addr + 
AMDGPU_USERQ_PROC_CTX_SZ;
+       queue_input.gang_context_addr = ctx->gpu_addr;

        amdgpu_mes_lock(&adev->mes);
        r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); @@ 
-186,12 +186,8 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr 
*uq_mgr,
        struct amdgpu_userq_obj *ctx = &queue->fw_obj;
        int r, size;

-       /*
-        * The FW expects at least one page space allocated for
-        * process ctx and gang ctx each. Create an object
-        * for the same.
-        */
-       size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ;
+       /* The FW expects at least one page space allocated for gang ctx. */
+       size = AMDGPU_USERQ_GANG_CTX_SZ;
        r = amdgpu_bo_create_kernel(uq_mgr->adev, size, 0,
                                    AMDGPU_GEM_DOMAIN_GTT,
                                    &ctx->obj, &ctx->gpu_addr,
@@ -257,6 +253,30 @@ static int mes_userq_detect_and_reset(struct amdgpu_device 
*adev,
        return r;
 }

+static int mes_userq_create_proc_ctx_space(struct amdgpu_userq_mgr
+*uq_mgr) {
+       int r = 0;
+
+       mutex_lock(&uq_mgr->proc_ctx_lock);
+       /* This check is a necessary because amdgpu_bo_create_kernel()
+        * calls helpers like amdgpu_bo_pin() and memset() unconditionally
+        */
+       if (!uq_mgr->proc_ctx_obj.obj) {
+               r = amdgpu_bo_create_kernel(uq_mgr->adev, 
AMDGPU_USERQ_PROC_CTX_SZ,
+                                           0, AMDGPU_GEM_DOMAIN_GTT,
+                                           &uq_mgr->proc_ctx_obj.obj,
+                                           &uq_mgr->proc_ctx_obj.gpu_addr,
+                                           &uq_mgr->proc_ctx_obj.cpu_ptr);
+
+               if (!r)
+                       memset(uq_mgr->proc_ctx_obj.cpu_ptr, 0, 
AMDGPU_USERQ_PROC_CTX_SZ);
+       }
+
+       mutex_unlock(&uq_mgr->proc_ctx_lock);
+
+       return r;
+}
+
 static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
                                struct drm_amdgpu_userq_in *args_in)  { @@ 
-429,7 +449,14 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue 
*queue,
                goto free_mqd;
        }

-       /* Create BO for FW operations */
+       /* Create per-process MES process context BO */
+       r = mes_userq_create_proc_ctx_space(uq_mgr);
+       if (r) {
+               DRM_ERROR("Failed to allocate MES process context space bo, 
error: %d\n", r);
+               goto free_mqd;
+       }
+
+       /* Create BO of a gang for FW operations */
        r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user);
        if (r) {
                DRM_ERROR("Failed to allocate BO for userqueue (%d)", r); @@ 
-492,7 +519,7 @@ static int mes_userq_preempt(struct amdgpu_usermode_queue 
*queue)
        *fence_ptr = 0;

        memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input));
-       queue_input.gang_context_addr = ctx->gpu_addr + 
AMDGPU_USERQ_PROC_CTX_SZ;
+       queue_input.gang_context_addr = ctx->gpu_addr;
        queue_input.suspend_fence_addr = fence_gpu_addr;
        queue_input.suspend_fence_value = 1;
        amdgpu_mes_lock(&adev->mes);
@@ -529,7 +556,7 @@ static int mes_userq_restore(struct amdgpu_usermode_queue 
*queue)
                return 0;

        memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input));
-       queue_input.gang_context_addr = ctx->gpu_addr + 
AMDGPU_USERQ_PROC_CTX_SZ;
+       queue_input.gang_context_addr = ctx->gpu_addr;

        amdgpu_mes_lock(&adev->mes);
        r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input);
--
2.54.0

Reply via email to