On Wed, Mar 4, 2026 at 1:33 PM David Francis <[email protected]> wrote:
>
> get_checkpoint_info() in kfd_mqd_manager_v9.c finds 32-bit value
> ctl_stack_size by multiplying two 32-bit values. This can overflow to a
> lower value, which could result in copying outside the bounds of
> a buffer in checkpoint_mqd() in the same file.
>
> Put in a check for the overflow, and fail with -EINVAL if detected.
>
> v2: use check_mul_overflow()
>
> Signed-off-by: David Francis <[email protected]>

Reviewed-by: Alex Deucher <[email protected]>

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 7 +++++--
>  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  | 2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h           | 3 ++-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c        | 7 +++++--
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c        | 3 ++-
>  drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 8 +++++++-
>  6 files changed, 22 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 804851632c4c..18bc5ba25f8f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -2720,7 +2720,7 @@ static int get_wave_state(struct device_queue_manager 
> *dqm,
>                         ctl_stack, ctl_stack_used_size, save_area_used_size);
>  }
>
> -static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
> +static int get_queue_checkpoint_info(struct device_queue_manager *dqm,
>                         const struct queue *q,
>                         u32 *mqd_size,
>                         u32 *ctl_stack_size)
> @@ -2728,6 +2728,7 @@ static void get_queue_checkpoint_info(struct 
> device_queue_manager *dqm,
>         struct mqd_manager *mqd_mgr;
>         enum KFD_MQD_TYPE mqd_type =
>                         get_mqd_type_from_queue_type(q->properties.type);
> +       int ret = 0;
>
>         dqm_lock(dqm);
>         mqd_mgr = dqm->mqd_mgrs[mqd_type];
> @@ -2735,9 +2736,11 @@ static void get_queue_checkpoint_info(struct 
> device_queue_manager *dqm,
>         *ctl_stack_size = 0;
>
>         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && 
> mqd_mgr->get_checkpoint_info)
> -               mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
> +               ret = mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, 
> ctl_stack_size);
>
>         dqm_unlock(dqm);
> +
> +       return ret;
>  }
>
>  static int checkpoint_mqd(struct device_queue_manager *dqm,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index ef07e44916f8..3272328da11f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -192,7 +192,7 @@ struct device_queue_manager_ops {
>
>         int (*reset_queues)(struct device_queue_manager *dqm,
>                                         uint16_t pasid);
> -       void    (*get_queue_checkpoint_info)(struct device_queue_manager *dqm,
> +       int     (*get_queue_checkpoint_info)(struct device_queue_manager *dqm,
>                                   const struct queue *q, u32 *mqd_size,
>                                   u32 *ctl_stack_size);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
> index 2429d278ef0e..06ca6235ff1b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
> @@ -102,7 +102,8 @@ struct mqd_manager {
>                                   u32 *ctl_stack_used_size,
>                                   u32 *save_area_used_size);
>
> -       void    (*get_checkpoint_info)(struct mqd_manager *mm, void *mqd, 
> uint32_t *ctl_stack_size);
> +       int     (*get_checkpoint_info)(struct mqd_manager *mm, void *mqd,
> +                                      uint32_t *ctl_stack_size);
>
>         void    (*checkpoint_mqd)(struct mqd_manager *mm,
>                                   void *mqd,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> index 273d52c8d332..8630f679a5d4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> @@ -385,11 +385,14 @@ static int get_wave_state(struct mqd_manager *mm, void 
> *mqd,
>         return 0;
>  }
>
> -static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 
> *ctl_stack_size)
> +static int get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 
> *ctl_stack_size)
>  {
>         struct v9_mqd *m = get_mqd(mqd);
>
> -       *ctl_stack_size = m->cp_hqd_cntl_stack_size * 
> NUM_XCC(mm->dev->xcc_mask);
> +       if (check_mul_overflow(m->cp_hqd_cntl_stack_size, 
> NUM_XCC(mm->dev->xcc_mask), ctl_stack_size))
> +               return -EINVAL;
> +
> +       return 0;
>  }
>
>  static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, 
> void *ctl_stack_dst)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> index c192c66a5c7b..499d366db91c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> @@ -274,10 +274,11 @@ static int get_wave_state(struct mqd_manager *mm, void 
> *mqd,
>         return 0;
>  }
>
> -static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 
> *ctl_stack_size)
> +static int get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 
> *ctl_stack_size)
>  {
>         /* Control stack is stored in user mode */
>         *ctl_stack_size = 0;
> +       return 0;
>  }
>
>  static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, 
> void *ctl_stack_dst)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index 449be58e884c..cb2416687137 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -1069,6 +1069,7 @@ int pqm_get_queue_checkpoint_info(struct 
> process_queue_manager *pqm,
>                                   uint32_t *ctl_stack_size)
>  {
>         struct process_queue_node *pqn;
> +       int ret;
>
>         pqn = get_queue_by_qid(pqm, qid);
>         if (!pqn) {
> @@ -1081,9 +1082,14 @@ int pqm_get_queue_checkpoint_info(struct 
> process_queue_manager *pqm,
>                 return -EOPNOTSUPP;
>         }
>
> -       
> pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
> +       ret = 
> pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
>                                                        pqn->q, mqd_size,
>                                                        ctl_stack_size);
> +       if (ret) {
> +               pr_debug("amdkfd: Overflow while computing stack size for 
> queue %d\n", qid);
> +               return ret;
> +       }
> +
>         return 0;
>  }
>
> --
> 2.34.1
>

Reply via email to