On Wed, Mar 4, 2026 at 1:33 PM David Francis <[email protected]> wrote: > > get_checkpoint_info() in kfd_mqd_manager_v9.c finds 32-bit value > ctl_stack_size by multiplying two 32-bit values. This can overflow to a > lower value, which could result in copying outside the bounds of > a buffer in checkpoint_mqd() in the same file. > > Put in a check for the overflow, and fail with -EINVAL if detected. > > v2: use check_mul_overflow() > > Signed-off-by: David Francis <[email protected]>
Reviewed-by: Alex Deucher <[email protected]> > --- > drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 7 +++++-- > drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 2 +- > drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 3 ++- > drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 7 +++++-- > drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 3 ++- > drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 8 +++++++- > 6 files changed, 22 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index 804851632c4c..18bc5ba25f8f 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -2720,7 +2720,7 @@ static int get_wave_state(struct device_queue_manager > *dqm, > ctl_stack, ctl_stack_used_size, save_area_used_size); > } > > -static void get_queue_checkpoint_info(struct device_queue_manager *dqm, > +static int get_queue_checkpoint_info(struct device_queue_manager *dqm, > const struct queue *q, > u32 *mqd_size, > u32 *ctl_stack_size) > @@ -2728,6 +2728,7 @@ static void get_queue_checkpoint_info(struct > device_queue_manager *dqm, > struct mqd_manager *mqd_mgr; > enum KFD_MQD_TYPE mqd_type = > get_mqd_type_from_queue_type(q->properties.type); > + int ret = 0; > > dqm_lock(dqm); > mqd_mgr = dqm->mqd_mgrs[mqd_type]; > @@ -2735,9 +2736,11 @@ static void get_queue_checkpoint_info(struct > device_queue_manager *dqm, > *ctl_stack_size = 0; > > if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && > mqd_mgr->get_checkpoint_info) > - mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); > + ret = mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, > ctl_stack_size); > > dqm_unlock(dqm); > + > + return ret; > } > > static int checkpoint_mqd(struct device_queue_manager *dqm, > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > index ef07e44916f8..3272328da11f 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > @@ -192,7 +192,7 @@ struct device_queue_manager_ops { > > int (*reset_queues)(struct device_queue_manager *dqm, > uint16_t pasid); > - void (*get_queue_checkpoint_info)(struct device_queue_manager *dqm, > + int (*get_queue_checkpoint_info)(struct device_queue_manager *dqm, > const struct queue *q, u32 *mqd_size, > u32 *ctl_stack_size); > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h > b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h > index 2429d278ef0e..06ca6235ff1b 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h > @@ -102,7 +102,8 @@ struct mqd_manager { > u32 *ctl_stack_used_size, > u32 *save_area_used_size); > > - void (*get_checkpoint_info)(struct mqd_manager *mm, void *mqd, > uint32_t *ctl_stack_size); > + int (*get_checkpoint_info)(struct mqd_manager *mm, void *mqd, > + uint32_t *ctl_stack_size); > > void (*checkpoint_mqd)(struct mqd_manager *mm, > void *mqd, > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > index 273d52c8d332..8630f679a5d4 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > @@ -385,11 +385,14 @@ static int get_wave_state(struct mqd_manager *mm, void > *mqd, > return 0; > } > > -static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 > *ctl_stack_size) > +static int get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 > *ctl_stack_size) > { > struct v9_mqd *m = get_mqd(mqd); > > - *ctl_stack_size = m->cp_hqd_cntl_stack_size * > NUM_XCC(mm->dev->xcc_mask); > + if (check_mul_overflow(m->cp_hqd_cntl_stack_size, > NUM_XCC(mm->dev->xcc_mask), ctl_stack_size)) > + return -EINVAL; > + > + return 0; > } > > static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, > void *ctl_stack_dst) > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c > b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c > index c192c66a5c7b..499d366db91c 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c > @@ -274,10 +274,11 @@ static int get_wave_state(struct mqd_manager *mm, void > *mqd, > return 0; > } > > -static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 > *ctl_stack_size) > +static int get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 > *ctl_stack_size) > { > /* Control stack is stored in user mode */ > *ctl_stack_size = 0; > + return 0; > } > > static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, > void *ctl_stack_dst) > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > index 449be58e884c..cb2416687137 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > @@ -1069,6 +1069,7 @@ int pqm_get_queue_checkpoint_info(struct > process_queue_manager *pqm, > uint32_t *ctl_stack_size) > { > struct process_queue_node *pqn; > + int ret; > > pqn = get_queue_by_qid(pqm, qid); > if (!pqn) { > @@ -1081,9 +1082,14 @@ int pqm_get_queue_checkpoint_info(struct > process_queue_manager *pqm, > return -EOPNOTSUPP; > } > > - > pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm, > + ret = > pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm, > pqn->q, mqd_size, > ctl_stack_size); > + if (ret) { > + pr_debug("amdkfd: Overflow while computing stack size for > queue %d\n", qid); > + return ret; > + } > + > return 0; > } > > -- > 2.34.1 >
