[AMD Official Use Only - AMD Internal Distribution Only] > -----Original Message----- > From: Alex Deucher <[email protected]> > Sent: Saturday, January 24, 2026 5:05 AM > To: Zhang, Jesse(Jie) <[email protected]> > Cc: [email protected]; Deucher, Alexander > <[email protected]>; Koenig, Christian <[email protected]> > Subject: Re: [PATCH 6/7] drm/amdgpu: implement CU mask modification support > for AMDGPU user queues > > On Fri, Jan 23, 2026 at 5:11 AM Jesse.Zhang <[email protected]> wrote: > > > > Add support for dynamic Compute Unit (CU) mask modification to AMDGPU > > user queues via a new MODIFY_CU_MASK operation. This enables userspace > > to update CU allocation for existing queues at runtime. > > > > v2: add a new op for AMDGPU_USERQ. E.g., > AMDGPU_USERQ_OP_CU_MASK > > > > Suggested-by: Alex Deucher <[email protected]> > > Signed-off-by: Jesse Zhang <[email protected]> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 111 > ++++++++++++++++++++++ > > include/uapi/drm/amdgpu_drm.h | 13 +++ > > 2 files changed, 124 insertions(+) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > > index 256ceca6d429..4d7841f47dd3 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c > > @@ -901,6 +901,113 @@ bool amdgpu_userq_enabled(struct drm_device *dev) > > return false; > > } > > > > +static int amdgpu_userq_update_queue(struct amdgpu_usermode_queue > > +*queue) { > > + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; > > + struct amdgpu_device *adev = uq_mgr->adev; > > + const struct amdgpu_userq_funcs *uq_funcs; > > + bool unmap_queue = false; > > + int r; > > + > > + uq_funcs = adev->userq_funcs[queue->queue_type]; > > + if (!uq_funcs || (queue->queue_type != AMDGPU_HW_IP_COMPUTE)) > > + return -EOPNOTSUPP; > > + > > + /* > > + * Unmap the queue if it's mapped or preempted to ensure a clean > > update. > > + * If the queue is already unmapped or hung, we skip this step. > > + */ > > + if (queue->state == AMDGPU_USERQ_STATE_MAPPED || > > + queue->state == AMDGPU_USERQ_STATE_PREEMPTED) { > > + r = amdgpu_userq_unmap_helper(queue); > > + if (r) > > + return r; > > + unmap_queue = true; > > + } > > + > > + r = uq_funcs->mqd_update(queue); > > + > > + if (unmap_queue) { > > + r = amdgpu_userq_map_helper(queue); > > + if (r) > > + drm_file_err(uq_mgr->file, "Failed to remap queue > > %llu after > update\n", > > + queue->doorbell_index); > > + } > > + > > + return r; > > +} > > + > > +static int amdgpu_userq_set_cu_mask(struct drm_file *filp, union > > +drm_amdgpu_userq *args) { > > + struct amdgpu_fpriv *fpriv = filp->driver_priv; > > + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; > > + struct amdgpu_device *adev = uq_mgr->adev; > > + struct amdgpu_usermode_queue *queue; > > + struct amdgpu_mqd_prop *props; > > + const int max_num_cus = 1024; > > + size_t cu_mask_size; > > + uint32_t count; > > + uint32_t *ptr; > > + int r; > > + > > + mutex_lock(&uq_mgr->userq_mutex); > > + queue = amdgpu_userq_find(uq_mgr, args->in.queue_id); > > + if (!queue) { > > + mutex_unlock(&uq_mgr->userq_mutex); > > + return -EINVAL; > > + } > > + props = queue->userq_prop; > > + > > + if (args->in.cu_mask_count == 0 || args->in.cu_mask_count % 32) { > > + r = -EINVAL; > > + goto unlock; > > + } > > + > > + count = args->in.cu_mask_count; > > + /* To prevent an unreasonably large CU mask size, set an arbitrary > > + * limit of max_num_cus bits. We can then just drop any CU mask bits > > + * past max_num_cus bits and just use the first max_num_cus bits. > > + */ > > + if (count > max_num_cus) { > > + drm_file_err(uq_mgr->file, "CU mask cannot be greater than > > 1024 > bits"); > > + count = max_num_cus; > > + cu_mask_size = sizeof(uint32_t) * (max_num_cus / 32); > > + } else { > > + cu_mask_size = sizeof(uint32_t) * (args->in.cu_mask_count / > > 32); > > + } > > + > > + ptr = memdup_user(u64_to_user_ptr(args->in.cu_mask_ptr), > > + cu_mask_size); > > + if (IS_ERR(ptr)) { > > + r = PTR_ERR(ptr); > > + goto unlock; > > + } > > + > > + /* ASICs that have WGPs must enforce pairwise enabled mask checks. > > */ > > + if (ptr && adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0)) { > > + for (int i = 0; i < count; i +=2) { > > + uint32_t cu_pair = (ptr[i / 32] >> (i % 32)) & > > + 0x3; > > + > > + if (cu_pair && cu_pair != 0x3) { > > + drm_file_err(uq_mgr->file, "CUs must be > > adjacent pairwise > enabled.\n"); > > + kfree(ptr); > > + r = -EINVAL; > > + goto unlock; > > + } > > + } > > + } > > + > > + props->cu_mask = ptr; > > + props->cu_mask_count = count; > > + r = amdgpu_userq_update_queue(queue); > > + > > + kfree(ptr); > > +unlock: > > + mutex_unlock(&uq_mgr->userq_mutex); > > + > > + return r; > > +} > > + > > int amdgpu_userq_ioctl(struct drm_device *dev, void *data, > > struct drm_file *filp) { @@ -920,6 +1027,10 @@ > > int amdgpu_userq_ioctl(struct drm_device *dev, void *data, > > drm_file_err(filp, "Failed to create usermode > > queue\n"); > > break; > > > > + case AMDGPU_USERQ_OP_MODIFY_CU_MASK: > > + amdgpu_userq_set_cu_mask(filp, args); > > + break; > > + > > case AMDGPU_USERQ_OP_FREE: > > r = amdgpu_userq_destroy(filp, args->in.queue_id); > > if (r) > > diff --git a/include/uapi/drm/amdgpu_drm.h > > b/include/uapi/drm/amdgpu_drm.h index ab2bf47553e1..cfc3a9313229 > > 100644 > > --- a/include/uapi/drm/amdgpu_drm.h > > +++ b/include/uapi/drm/amdgpu_drm.h > > @@ -330,6 +330,7 @@ union drm_amdgpu_ctx { > > /* user queue IOCTL operations */ > > #define AMDGPU_USERQ_OP_CREATE 1 > > #define AMDGPU_USERQ_OP_FREE 2 > > +#define AMDGPU_USERQ_OP_MODIFY_CU_MASK 3 > > > > /* queue priority levels */ > > /* low < normal low < normal high < high */ @@ -410,6 +411,18 @@ > > struct drm_amdgpu_userq_in { > > * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11). > > */ > > __u64 mqd_size; > > + /** > > + * @cu_mask_ptr: User-space pointer to CU (Compute Unit) mask array > > + * Points to an array of __u32 values that define which CUs are > > enabled > > + * for this queue (0 = disabled, 1 = enabled per bit) > > + */ > > + __u64 cu_mask_ptr; > > + /** > > + * @cu_mask_count: Number of entries in the CU mask array > > + * Total count of __u32 elements in the cu_mask_ptr array (each > > element > > + * represents 32 CUs/WGPs) > > + */ > > + __u32 cu_mask_count; > > I'd put these in drm_amdgpu_userq_mqd_compute_gfx11. Then you can use the > mqd to set the CU mask on queue creation or modification. I don't see why we > should limit it to just modify. Thanks, Alex. This will make it more flexible. I will update the patch.
Thanks Jesse > > Alex > > > }; > > > > /* The structure to carry output of userqueue ops */ > > -- > > 2.49.0 > >
