amdgpu: implement CU mask modification support for AMDGPU user queues

Zhang, Jesse(Jie) Mon, 26 Jan 2026 22:54:32 -0800

[AMD Official Use Only - AMD Internal Distribution Only]

> -----Original Message-----
> From: Alex Deucher <[email protected]>
> Sent: Saturday, January 24, 2026 5:05 AM
> To: Zhang, Jesse(Jie) <[email protected]>
> Cc: [email protected]; Deucher, Alexander
> <[email protected]>; Koenig, Christian <[email protected]>
> Subject: Re: [PATCH 6/7] drm/amdgpu: implement CU mask modification support
> for AMDGPU user queues
>
> On Fri, Jan 23, 2026 at 5:11 AM Jesse.Zhang <[email protected]> wrote:
> >
> > Add support for dynamic Compute Unit (CU) mask modification to AMDGPU
> > user queues via a new MODIFY_CU_MASK operation. This enables userspace
> > to update CU allocation for existing queues at runtime.
> >
> > v2: add a new op for AMDGPU_USERQ. E.g.,
> AMDGPU_USERQ_OP_CU_MASK
> >
> > Suggested-by: Alex Deucher <[email protected]>
> > Signed-off-by: Jesse Zhang <[email protected]>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 111
> ++++++++++++++++++++++
> >  include/uapi/drm/amdgpu_drm.h             |  13 +++
> >  2 files changed, 124 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > index 256ceca6d429..4d7841f47dd3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> > @@ -901,6 +901,113 @@ bool amdgpu_userq_enabled(struct drm_device *dev)
> >         return false;
> >  }
> >
> > +static int amdgpu_userq_update_queue(struct amdgpu_usermode_queue
> > +*queue) {
> > +       struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
> > +       struct amdgpu_device *adev = uq_mgr->adev;
> > +       const struct amdgpu_userq_funcs *uq_funcs;
> > +       bool unmap_queue = false;
> > +       int r;
> > +
> > +       uq_funcs = adev->userq_funcs[queue->queue_type];
> > +       if (!uq_funcs || (queue->queue_type != AMDGPU_HW_IP_COMPUTE))
> > +               return -EOPNOTSUPP;
> > +
> > +       /*
> > +        * Unmap the queue if it's mapped or preempted to ensure a clean 
> > update.
> > +        * If the queue is already unmapped or hung, we skip this step.
> > +        */
> > +       if (queue->state == AMDGPU_USERQ_STATE_MAPPED ||
> > +           queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
> > +               r = amdgpu_userq_unmap_helper(queue);
> > +               if (r)
> > +                       return r;
> > +               unmap_queue = true;
> > +       }
> > +
> > +       r = uq_funcs->mqd_update(queue);
> > +
> > +       if (unmap_queue) {
> > +               r = amdgpu_userq_map_helper(queue);
> > +               if (r)
> > +                       drm_file_err(uq_mgr->file, "Failed to remap queue 
> > %llu after
> update\n",
> > +                               queue->doorbell_index);
> > +       }
> > +
> > +       return r;
> > +}
> > +
> > +static int amdgpu_userq_set_cu_mask(struct drm_file *filp,  union
> > +drm_amdgpu_userq *args) {
> > +       struct amdgpu_fpriv *fpriv = filp->driver_priv;
> > +       struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
> > +       struct amdgpu_device *adev = uq_mgr->adev;
> > +       struct amdgpu_usermode_queue *queue;
> > +       struct amdgpu_mqd_prop *props;
> > +       const int max_num_cus = 1024;
> > +       size_t cu_mask_size;
> > +       uint32_t count;
> > +       uint32_t *ptr;
> > +       int r;
> > +
> > +       mutex_lock(&uq_mgr->userq_mutex);
> > +       queue = amdgpu_userq_find(uq_mgr, args->in.queue_id);
> > +       if (!queue) {
> > +               mutex_unlock(&uq_mgr->userq_mutex);
> > +               return -EINVAL;
> > +       }
> > +       props = queue->userq_prop;
> > +
> > +       if (args->in.cu_mask_count == 0 || args->in.cu_mask_count % 32) {
> > +               r = -EINVAL;
> > +               goto unlock;
> > +       }
> > +
> > +       count = args->in.cu_mask_count;
> > +       /* To prevent an unreasonably large CU mask size, set an arbitrary
> > +       * limit of max_num_cus bits.  We can then just drop any CU mask bits
> > +       * past max_num_cus bits and just use the first max_num_cus bits.
> > +       */
> > +       if (count > max_num_cus) {
> > +               drm_file_err(uq_mgr->file, "CU mask cannot be greater than 
> > 1024
> bits");
> > +               count = max_num_cus;
> > +               cu_mask_size = sizeof(uint32_t) * (max_num_cus / 32);
> > +       } else {
> > +               cu_mask_size = sizeof(uint32_t) * (args->in.cu_mask_count / 
> > 32);
> > +       }
> > +
> > +       ptr = memdup_user(u64_to_user_ptr(args->in.cu_mask_ptr),
> > +                                   cu_mask_size);
> > +       if (IS_ERR(ptr)) {
> > +               r = PTR_ERR(ptr);
> > +               goto unlock;
> > +       }
> > +
> > +       /* ASICs that have WGPs must enforce pairwise enabled mask checks. 
> > */
> > +       if (ptr && adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0)) {
> > +               for (int i = 0; i < count; i +=2) {
> > +                       uint32_t cu_pair = (ptr[i / 32] >> (i % 32)) &
> > + 0x3;
> > +
> > +                       if (cu_pair && cu_pair != 0x3) {
> > +                               drm_file_err(uq_mgr->file, "CUs must be 
> > adjacent pairwise
> enabled.\n");
> > +                               kfree(ptr);
> > +                               r = -EINVAL;
> > +                               goto unlock;
> > +                       }
> > +               }
> > +       }
> > +
> > +       props->cu_mask = ptr;
> > +       props->cu_mask_count = count;
> > +       r = amdgpu_userq_update_queue(queue);
> > +
> > +       kfree(ptr);
> > +unlock:
> > +       mutex_unlock(&uq_mgr->userq_mutex);
> > +
> > +       return r;
> > +}
> > +
> >  int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
> >                        struct drm_file *filp)  { @@ -920,6 +1027,10 @@
> > int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
> >                         drm_file_err(filp, "Failed to create usermode 
> > queue\n");
> >                 break;
> >
> > +       case AMDGPU_USERQ_OP_MODIFY_CU_MASK:
> > +               amdgpu_userq_set_cu_mask(filp, args);
> > +               break;
> > +
> >         case AMDGPU_USERQ_OP_FREE:
> >                 r = amdgpu_userq_destroy(filp, args->in.queue_id);
> >                 if (r)
> > diff --git a/include/uapi/drm/amdgpu_drm.h
> > b/include/uapi/drm/amdgpu_drm.h index ab2bf47553e1..cfc3a9313229
> > 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -330,6 +330,7 @@ union drm_amdgpu_ctx {
> >  /* user queue IOCTL operations */
> >  #define AMDGPU_USERQ_OP_CREATE 1
> >  #define AMDGPU_USERQ_OP_FREE   2
> > +#define AMDGPU_USERQ_OP_MODIFY_CU_MASK 3
> >
> >  /* queue priority levels */
> >  /* low < normal low < normal high < high */ @@ -410,6 +411,18 @@
> > struct drm_amdgpu_userq_in {
> >          * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11).
> >          */
> >         __u64 mqd_size;
> > +       /**
> > +        * @cu_mask_ptr: User-space pointer to CU (Compute Unit) mask array
> > +        * Points to an array of __u32 values that define which CUs are 
> > enabled
> > +        * for this queue (0 = disabled, 1 = enabled per bit)
> > +        */
> > +       __u64 cu_mask_ptr;
> > +       /**
> > +        * @cu_mask_count: Number of entries in the CU mask array
> > +        * Total count of __u32 elements in the cu_mask_ptr array (each 
> > element
> > +        * represents 32 CUs/WGPs)
> > +        */
> > +       __u32 cu_mask_count;
>
> I'd put these in drm_amdgpu_userq_mqd_compute_gfx11.  Then you can use the
> mqd to set the CU mask on queue creation or modification.  I don't see why we
> should limit it to just modify.
Thanks, Alex. This will make it more flexible. I will update the patch.


Thanks
Jesse
>
> Alex
>
> >  };
> >
> >  /* The structure to carry output of userqueue ops */
> > --
> > 2.49.0
> >

RE: [PATCH 6/7] drm/amdgpu: implement CU mask modification support for AMDGPU user queues

Reply via email to