Fix two issues in amdgpu_gfx_run_cleaner_shader_job():

1. IB buffer overflow: The indirect buffer is hardcoded to 64 bytes,
   but the initialization loop writes up to (align_mask + 1) dwords.
   On modern GFX rings with align_mask = 0xff, this writes 1024 bytes,
   overflowing the 64-byte allocation and corrupting memory.

2. Scheduler entity leak: The drm_sched_entity is not cleaned up on
   the error path after amdgpu_job_alloc_with_ib() fails.

Fix by:
- Dynamically calculating IB size based on ring->funcs->align_mask
- Adding drm_sched_entity_destroy() to the error path

Cc: [email protected]
Fixes: d361ad5d2fc0 ("drm/amdgpu: Add sysfs interface for running cleaner 
shader")
Fixes: 256576ed6895 ("drm/amdgpu: give each kernel job a unique id")
Fixes: 559a285816af ("drm/amdgpu: Replace 'amdgpu_job_submit_direct' with 
'drm_sched_entity' in cleaner shader")
Signed-off-by: Wentao Liang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index b8ca876694ff..b50ec1a5c645 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1651,6 +1651,7 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct 
amdgpu_ring *ring)
        struct amdgpu_job *job;
        struct amdgpu_ib *ib;
        void *owner;
+       unsigned int ib_size;
        int i, r;
 
        /* Initialize the scheduler entity */
@@ -1658,7 +1659,7 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct 
amdgpu_ring *ring)
                                  &sched, 1, NULL);
        if (r) {
                dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
-               goto err;
+               return r;
        }
 
        /*
@@ -1668,8 +1669,15 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct 
amdgpu_ring *ring)
         */
        owner = (void *)(unsigned long)atomic_inc_return(&counter);
 
+       /*
+        * Allocate IB with enough space for align_mask + 1 dwords.
+        * The initialization loop below writes exactly this many dwords.
+        * Each dword is 4 bytes.
+        */
+       ib_size = (ring->funcs->align_mask + 1) * sizeof(uint32_t);
+
        r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner,
-                                    64, 0, &job,
+                                    ib_size, 0, &job,
                                     AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER);
        if (r)
                goto err;
@@ -1686,8 +1694,6 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct 
amdgpu_ring *ring)
        f = amdgpu_job_submit(job);
 
        r = dma_fence_wait(f, false);
-       if (r)
-               goto err;
 
        dma_fence_put(f);
 
@@ -1696,6 +1702,8 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct 
amdgpu_ring *ring)
        return 0;
 
 err:
+       /* Clean up the scheduler entity */
+       drm_sched_entity_destroy(&entity);
        return r;
 }
 
-- 
2.39.5 (Apple Git-154)

Reply via email to