This patch introduces a new DRM ioctl to allow userspace to set the
CU (Compute Unit) mask for user queues, enabling fine-grained control
over compute workload distribution.

Signed-off-by: Jesse Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 107 ++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h |   3 +
 include/uapi/drm/amdgpu_drm.h             |  12 +++
 4 files changed, 123 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index d67bbaa8ce02..9c425169a4f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -3075,6 +3075,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
        DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SET_CU_MASK, 
amdgpu_userq_set_cu_mask_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, 
DRM_AUTH|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_HANDLES, 
amdgpu_gem_list_handles_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 256ceca6d429..4cbf75723c08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -901,6 +901,113 @@ bool amdgpu_userq_enabled(struct drm_device *dev)
        return false;
 }
 
+static int amdgpu_userq_update_queue(struct amdgpu_usermode_queue *queue,
+                                    struct amdgpu_mqd_update_info *minfo)
+{
+       struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
+       struct amdgpu_device *adev = uq_mgr->adev;
+       const struct amdgpu_userq_funcs *uq_funcs;
+       bool unmap_queue = false;
+       int r;
+
+       uq_funcs = adev->userq_funcs[queue->queue_type];
+       if (!uq_funcs || !uq_funcs->mqd_update)
+               return -EOPNOTSUPP;
+
+       /*
+        * Unmap the queue if it's mapped or preempted to ensure a clean update.
+        * If the queue is already unmapped or hung, we skip this step.
+        */
+       if (queue->state == AMDGPU_USERQ_STATE_MAPPED ||
+           queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
+               r = amdgpu_userq_unmap_helper(queue);
+               if (r)
+                       return r;
+               unmap_queue = true;
+       }
+
+       r = uq_funcs->mqd_update(queue, minfo);
+
+       if (unmap_queue) {
+               int map_r = amdgpu_userq_map_helper(queue);
+               if (map_r)
+                       dev_err(adev->dev, "Failed to remap queue %llu after 
update\n",
+                               queue->doorbell_index);
+               if (!r)
+                       r = map_r;
+       }
+
+       return r;
+}
+
+int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void *data,
+                                    struct drm_file *filp)
+{
+       struct amdgpu_device *adev = drm_to_adev(dev);
+       struct amdgpu_fpriv *fpriv = filp->driver_priv;
+       struct amdgpu_userq_set_cu_mask_args *args = data;
+       struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+       struct amdgpu_usermode_queue *queue;
+       struct amdgpu_mqd_update_info minfo = {0};
+       const int max_num_cus = 1024;
+       size_t cu_mask_size;
+       int r;
+
+       mutex_lock(&uq_mgr->userq_mutex);
+       queue = amdgpu_userq_find(uq_mgr, args->queue_id);
+       if (!queue) {
+               mutex_unlock(&uq_mgr->userq_mutex);
+               return -EINVAL;
+       }
+
+       if (args->num_cu_mask == 0 || args->num_cu_mask % 32) {
+               r = -EINVAL;
+               goto unlock;
+       }
+
+       minfo.cu_mask.count = args->num_cu_mask;
+       /* To prevent an unreasonably large CU mask size, set an arbitrary
+       * limit of max_num_cus bits.  We can then just drop any CU mask bits
+       * past max_num_cus bits and just use the first max_num_cus bits.
+       */
+       if (minfo.cu_mask.count > max_num_cus) {
+               drm_file_err(uq_mgr->file, "CU mask cannot be greater than 1024 
bits");
+               minfo.cu_mask.count = max_num_cus;
+               cu_mask_size = sizeof(uint32_t) * (max_num_cus / 32);
+       } else {
+               cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
+       }
+
+       minfo.cu_mask.ptr = memdup_user(u64_to_user_ptr(args->cu_mask_ptr),
+                                   cu_mask_size);
+       if (IS_ERR(minfo.cu_mask.ptr)) {
+               r = PTR_ERR(minfo.cu_mask.ptr);
+               goto unlock;
+       }
+
+       /* ASICs that have WGPs must enforce pairwise enabled mask checks. */
+       if (minfo.cu_mask.ptr && adev->ip_versions[GC_HWIP][0] >= 
IP_VERSION(10, 0, 0)) {
+               for (int i = 0; i < minfo.cu_mask.count; i +=2) {
+                       uint32_t cu_pair = (minfo.cu_mask.ptr[i / 32] >> (i % 
32)) & 0x3;
+
+                       if (cu_pair && cu_pair != 0x3) {
+                               drm_file_err(uq_mgr->file, "CUs must be 
adjacent pairwise enabled.\n");
+                               kfree(minfo.cu_mask.ptr);
+                               r = -EINVAL;
+                               goto unlock;
+                       }
+               }
+       }
+
+       r = amdgpu_userq_update_queue(queue, &minfo);
+
+       kfree(minfo.cu_mask.ptr);
+unlock:
+       mutex_unlock(&uq_mgr->userq_mutex);
+
+       return r;
+}
+
 int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
                       struct drm_file *filp)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 1ff0f611f882..43bf104d2fb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -115,6 +115,9 @@ struct amdgpu_db_info {
 
 int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file 
*filp);
 
+int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void *data,
+                                       struct drm_file *filp);
+
 int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file 
*file_priv,
                          struct amdgpu_device *adev);
 
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index ab2bf47553e1..41b6b3cea834 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -58,6 +58,7 @@ extern "C" {
 #define DRM_AMDGPU_USERQ_SIGNAL                0x17
 #define DRM_AMDGPU_USERQ_WAIT          0x18
 #define DRM_AMDGPU_GEM_LIST_HANDLES    0x19
+#define DRM_AMDGPU_USERQ_SET_CU_MASK   0x1a
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE    DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP      DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -79,6 +80,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
 #define DRM_IOCTL_AMDGPU_USERQ_WAIT    DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
 #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
+#define DRM_IOCTL_AMDGPU_USERQ_SET_CU_MASK DRM_IOWR(DRM_COMMAND_BASE + 
DRM_AMDGPU_USERQ_SET_CU_MASK, struct amdgpu_userq_set_cu_mask_args)
 
 /**
  * DOC: memory domains
@@ -428,6 +430,16 @@ union drm_amdgpu_userq {
        struct drm_amdgpu_userq_out out;
 };
 
+/* IOCTL arguments for setting user queue CU mask */
+struct amdgpu_userq_set_cu_mask_args {
+       /* Target user queue ID */
+       __u32 queue_id;
+        /* CU mask bit count (multiple of 32) */
+       __u32 num_cu_mask;
+       /* User-space pointer to CU mask data */
+       __u64 cu_mask_ptr;
+};
+
 /* GFX V11 IP specific MQD parameters */
 struct drm_amdgpu_userq_mqd_gfx11 {
        /**
-- 
2.49.0

Reply via email to