Fix two issues in amdgpu_gfx_run_cleaner_shader_job(): 1. IB buffer overflow: The indirect buffer is hardcoded to 64 bytes, but the initialization loop writes up to (align_mask + 1) dwords. On modern GFX rings with align_mask = 0xff, this writes 1024 bytes, overflowing the 64-byte allocation and corrupting memory.
2. Scheduler entity leak: The drm_sched_entity is not cleaned up on the error path after amdgpu_job_alloc_with_ib() fails. Fix by: - Dynamically calculating IB size based on ring->funcs->align_mask - Adding drm_sched_entity_destroy() to the error path Cc: [email protected] Fixes: d361ad5d2fc0 ("drm/amdgpu: Add sysfs interface for running cleaner shader") Fixes: 256576ed6895 ("drm/amdgpu: give each kernel job a unique id") Fixes: 559a285816af ("drm/amdgpu: Replace 'amdgpu_job_submit_direct' with 'drm_sched_entity' in cleaner shader") Signed-off-by: Wentao Liang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index b8ca876694ff..b50ec1a5c645 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1651,6 +1651,7 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) struct amdgpu_job *job; struct amdgpu_ib *ib; void *owner; + unsigned int ib_size; int i, r; /* Initialize the scheduler entity */ @@ -1658,7 +1659,7 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) &sched, 1, NULL); if (r) { dev_err(adev->dev, "Failed setting up GFX kernel entity.\n"); - goto err; + return r; } /* @@ -1668,8 +1669,15 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) */ owner = (void *)(unsigned long)atomic_inc_return(&counter); + /* + * Allocate IB with enough space for align_mask + 1 dwords. + * The initialization loop below writes exactly this many dwords. + * Each dword is 4 bytes. + */ + ib_size = (ring->funcs->align_mask + 1) * sizeof(uint32_t); + r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner, - 64, 0, &job, + ib_size, 0, &job, AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER); if (r) goto err; @@ -1686,8 +1694,6 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) f = amdgpu_job_submit(job); r = dma_fence_wait(f, false); - if (r) - goto err; dma_fence_put(f); @@ -1696,6 +1702,8 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) return 0; err: + /* Clean up the scheduler entity */ + drm_sched_entity_destroy(&entity); return r; } -- 2.39.5 (Apple Git-154)
