AMD General Gentle ping, Christian and Alex
-----Original Message----- From: Zhu, Lingshan <[email protected]> Sent: Monday, June 1, 2026 2:41 PM To: Deucher, Alexander <[email protected]>; Koenig, Christian <[email protected]> Cc: [email protected]; Huang, Ray <[email protected]>; Zhu, Lingshan <[email protected]> Subject: [PATCH V3] drm/amdgpu: implement per-process MES context MES process context is a process-level page where process specific context is saved for MES scheduler. However, current user-queue code path assigns fw_obj of a queue to MES process_context_addr when adding the queue to MES. This means every new queue from the same process would replace the previous process context address with that queue's fw_obj address. What's worse is, when user space frees a queue, its fw_obj will be freed as well, causing MES working on a NULL page pointer. This issue leads to inconsistency and crash in the scheduler. This commit allocates a process-level page for MES process contexts for a process other than queue-level Signed-off-by: Zhu Lingshan <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 6 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 + drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 51 +++++++++++++++++----- 3 files changed, 47 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 38e310a8694d..951d5da850be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -1172,6 +1172,7 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f xa_init_flags(&userq_mgr->userq_xa, XA_FLAGS_ALLOC); userq_mgr->adev = adev; userq_mgr->file = file_priv; + mutex_init(&userq_mgr->proc_ctx_lock); INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker); INIT_WORK(&userq_mgr->reset_work, amdgpu_userq_mgr_reset_work); @@ -1225,6 +1226,11 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) */ cancel_work_sync(&userq_mgr->reset_work); + amdgpu_bo_free_kernel(&userq_mgr->proc_ctx_obj.obj, + &userq_mgr->proc_ctx_obj.gpu_addr, + &userq_mgr->proc_ctx_obj.cpu_ptr); + + mutex_destroy(&userq_mgr->proc_ctx_lock); mutex_destroy(&userq_mgr->userq_mutex); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 28cfc6682333..a5867ffe6988 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -127,6 +127,8 @@ struct amdgpu_userq_mgr { struct amdgpu_device *adev; struct delayed_work resume_work; struct drm_file *file; + struct mutex proc_ctx_lock; + struct amdgpu_userq_obj proc_ctx_obj; /** * @reset_work: diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index e9189f07c6dc..6c8a44cee34b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -133,8 +133,8 @@ static int mes_userq_map(struct amdgpu_usermode_queue *queue) queue_input.gang_quantum = 10000; queue_input.paging = false; - queue_input.process_context_addr = ctx->gpu_addr; - queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + queue_input.process_context_addr = uq_mgr->proc_ctx_obj.gpu_addr; + queue_input.gang_context_addr = ctx->gpu_addr; queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; queue_input.gang_global_priority_level = convert_to_mes_priority(queue->priority); @@ -169,7 +169,7 @@ static int mes_userq_unmap(struct amdgpu_usermode_queue *queue) memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); queue_input.doorbell_offset = queue->doorbell_index; - queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + queue_input.gang_context_addr = ctx->gpu_addr; amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); @@ -186,12 +186,8 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userq_obj *ctx = &queue->fw_obj; int r, size; - /* - * The FW expects at least one page space allocated for - * process ctx and gang ctx each. Create an object - * for the same. - */ - size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ; + /* The FW expects at least one page space allocated for gang ctx. */ + size = AMDGPU_USERQ_GANG_CTX_SZ; r = amdgpu_bo_create_kernel(uq_mgr->adev, size, 0, AMDGPU_GEM_DOMAIN_GTT, &ctx->obj, &ctx->gpu_addr, @@ -257,6 +253,30 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev, return r; } +static int mes_userq_create_proc_ctx_space(struct amdgpu_userq_mgr +*uq_mgr) { + int r = 0; + + mutex_lock(&uq_mgr->proc_ctx_lock); + /* This check is a necessary because amdgpu_bo_create_kernel() + * calls helpers like amdgpu_bo_pin() and memset() unconditionally + */ + if (!uq_mgr->proc_ctx_obj.obj) { + r = amdgpu_bo_create_kernel(uq_mgr->adev, AMDGPU_USERQ_PROC_CTX_SZ, + 0, AMDGPU_GEM_DOMAIN_GTT, + &uq_mgr->proc_ctx_obj.obj, + &uq_mgr->proc_ctx_obj.gpu_addr, + &uq_mgr->proc_ctx_obj.cpu_ptr); + + if (!r) + memset(uq_mgr->proc_ctx_obj.cpu_ptr, 0, AMDGPU_USERQ_PROC_CTX_SZ); + } + + mutex_unlock(&uq_mgr->proc_ctx_lock); + + return r; +} + static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, struct drm_amdgpu_userq_in *args_in) { @@ -429,7 +449,14 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, goto free_mqd; } - /* Create BO for FW operations */ + /* Create per-process MES process context BO */ + r = mes_userq_create_proc_ctx_space(uq_mgr); + if (r) { + DRM_ERROR("Failed to allocate MES process context space bo, error: %d\n", r); + goto free_mqd; + } + + /* Create BO of a gang for FW operations */ r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user); if (r) { DRM_ERROR("Failed to allocate BO for userqueue (%d)", r); @@ -492,7 +519,7 @@ static int mes_userq_preempt(struct amdgpu_usermode_queue *queue) *fence_ptr = 0; memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input)); - queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + queue_input.gang_context_addr = ctx->gpu_addr; queue_input.suspend_fence_addr = fence_gpu_addr; queue_input.suspend_fence_value = 1; amdgpu_mes_lock(&adev->mes); @@ -529,7 +556,7 @@ static int mes_userq_restore(struct amdgpu_usermode_queue *queue) return 0; memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input)); - queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + queue_input.gang_context_addr = ctx->gpu_addr; amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input); -- 2.54.0
