This patch introduces a new DRM ioctl to allow userspace to set the CU (Compute Unit) mask for user queues, enabling fine-grained control over compute workload distribution.
Signed-off-by: Jesse Zhang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 107 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 3 + include/uapi/drm/amdgpu_drm.h | 12 +++ 4 files changed, 123 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index d67bbaa8ce02..9c425169a4f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -3075,6 +3075,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SET_CU_MASK, amdgpu_userq_set_cu_mask_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_HANDLES, amdgpu_gem_list_handles_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 256ceca6d429..4cbf75723c08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -901,6 +901,113 @@ bool amdgpu_userq_enabled(struct drm_device *dev) return false; } +static int amdgpu_userq_update_queue(struct amdgpu_usermode_queue *queue, + struct amdgpu_mqd_update_info *minfo) +{ + struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *uq_funcs; + bool unmap_queue = false; + int r; + + uq_funcs = adev->userq_funcs[queue->queue_type]; + if (!uq_funcs || !uq_funcs->mqd_update) + return -EOPNOTSUPP; + + /* + * Unmap the queue if it's mapped or preempted to ensure a clean update. + * If the queue is already unmapped or hung, we skip this step. + */ + if (queue->state == AMDGPU_USERQ_STATE_MAPPED || + queue->state == AMDGPU_USERQ_STATE_PREEMPTED) { + r = amdgpu_userq_unmap_helper(queue); + if (r) + return r; + unmap_queue = true; + } + + r = uq_funcs->mqd_update(queue, minfo); + + if (unmap_queue) { + int map_r = amdgpu_userq_map_helper(queue); + if (map_r) + dev_err(adev->dev, "Failed to remap queue %llu after update\n", + queue->doorbell_index); + if (!r) + r = map_r; + } + + return r; +} + +int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_set_cu_mask_args *args = data; + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_usermode_queue *queue; + struct amdgpu_mqd_update_info minfo = {0}; + const int max_num_cus = 1024; + size_t cu_mask_size; + int r; + + mutex_lock(&uq_mgr->userq_mutex); + queue = amdgpu_userq_find(uq_mgr, args->queue_id); + if (!queue) { + mutex_unlock(&uq_mgr->userq_mutex); + return -EINVAL; + } + + if (args->num_cu_mask == 0 || args->num_cu_mask % 32) { + r = -EINVAL; + goto unlock; + } + + minfo.cu_mask.count = args->num_cu_mask; + /* To prevent an unreasonably large CU mask size, set an arbitrary + * limit of max_num_cus bits. We can then just drop any CU mask bits + * past max_num_cus bits and just use the first max_num_cus bits. + */ + if (minfo.cu_mask.count > max_num_cus) { + drm_file_err(uq_mgr->file, "CU mask cannot be greater than 1024 bits"); + minfo.cu_mask.count = max_num_cus; + cu_mask_size = sizeof(uint32_t) * (max_num_cus / 32); + } else { + cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32); + } + + minfo.cu_mask.ptr = memdup_user(u64_to_user_ptr(args->cu_mask_ptr), + cu_mask_size); + if (IS_ERR(minfo.cu_mask.ptr)) { + r = PTR_ERR(minfo.cu_mask.ptr); + goto unlock; + } + + /* ASICs that have WGPs must enforce pairwise enabled mask checks. */ + if (minfo.cu_mask.ptr && adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0)) { + for (int i = 0; i < minfo.cu_mask.count; i +=2) { + uint32_t cu_pair = (minfo.cu_mask.ptr[i / 32] >> (i % 32)) & 0x3; + + if (cu_pair && cu_pair != 0x3) { + drm_file_err(uq_mgr->file, "CUs must be adjacent pairwise enabled.\n"); + kfree(minfo.cu_mask.ptr); + r = -EINVAL; + goto unlock; + } + } + } + + r = amdgpu_userq_update_queue(queue, &minfo); + + kfree(minfo.cu_mask.ptr); +unlock: + mutex_unlock(&uq_mgr->userq_mutex); + + return r; +} + int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 1ff0f611f882..43bf104d2fb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -115,6 +115,9 @@ struct amdgpu_db_info { int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdgpu_userq_set_cu_mask_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, struct amdgpu_device *adev); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index ab2bf47553e1..41b6b3cea834 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -58,6 +58,7 @@ extern "C" { #define DRM_AMDGPU_USERQ_SIGNAL 0x17 #define DRM_AMDGPU_USERQ_WAIT 0x18 #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19 +#define DRM_AMDGPU_USERQ_SET_CU_MASK 0x1a #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -79,6 +80,7 @@ extern "C" { #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal) #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait) #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles) +#define DRM_IOCTL_AMDGPU_USERQ_SET_CU_MASK DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SET_CU_MASK, struct amdgpu_userq_set_cu_mask_args) /** * DOC: memory domains @@ -428,6 +430,16 @@ union drm_amdgpu_userq { struct drm_amdgpu_userq_out out; }; +/* IOCTL arguments for setting user queue CU mask */ +struct amdgpu_userq_set_cu_mask_args { + /* Target user queue ID */ + __u32 queue_id; + /* CU mask bit count (multiple of 32) */ + __u32 num_cu_mask; + /* User-space pointer to CU mask data */ + __u64 cu_mask_ptr; +}; + /* GFX V11 IP specific MQD parameters */ struct drm_amdgpu_userq_mqd_gfx11 { /** -- 2.49.0
