The function mqd_symmetrically_map_cu_mask_v12_1() indexes a fixed-size
array cu_per_sh[2][2] using hardware-reported values from
gfx_info->max_shader_engines and gfx_info->max_sh_per_se without
validating they are within array bounds.

This can cause stack buffer overflow if hardware reports more than 2
shader engines or more than 2 shader arrays per engine. The lack of
bounds checking is a risk for future hardware and is a regression from
the v12 implementation, which checks these bounds via
mqd_symmetrically_map_cu_mask() in gfx12.

Signed-off-by: Sunday Clement <[email protected]>
---
 .../drm/amd/amdkfd/kfd_mqd_manager_v12_1.c    | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c
index 9014912ed82c..fb9d4fb6705f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c
@@ -52,7 +52,7 @@ static void mqd_symmetrically_map_cu_mask_v12_1(struct 
mqd_manager *mm,
 {
        struct amdgpu_cu_info *cu_info = &mm->dev->adev->gfx.cu_info;
        struct amdgpu_gfx_config *gfx_info = &mm->dev->adev->gfx.config;
-       uint32_t cu_per_sh[2][2] = {0};
+       uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
        uint32_t en_mask = 0x3;
        int i, se, sh, cu, cu_inc = 0;
        uint32_t cu_active_per_node;
@@ -63,6 +63,23 @@ static void mqd_symmetrically_map_cu_mask_v12_1(struct 
mqd_manager *mm,
        if (cu_mask_count > cu_active_per_node)
                cu_mask_count = cu_active_per_node;
 
+       /* Exceeding these bounds corrupts the stack and indicates a coding 
error.
+       * Returning with no CU's enabled will hang the queue, which should be
+       * attention grabbing.
+       */
+       if (gfx_info->max_shader_engines > KFD_MAX_NUM_SE) {
+               dev_err(mm->dev->adev->dev,
+                       "Exceeded KFD_MAX_NUM_SE, chip reports %d\n",
+                       gfx_info->max_shader_engines);
+               return;
+       }
+       if (gfx_info->max_sh_per_se > KFD_MAX_NUM_SH_PER_SE) {
+               dev_err(mm->dev->adev->dev,
+                       "Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
+                       gfx_info->max_sh_per_se * gfx_info->max_shader_engines);
+               return;
+       }
+
        /*
         * Count active CUs per SE/SH.
         */
-- 
2.43.0

Reply via email to