From: Honglei Huang <[email protected]> Add the ioctl handler for AMDKFD_IOC_ALLOC_MEMORY_OF_GPU_BATCH that processes userspace requests for batch userptr allocation.
The handler performs validation of input parameters including: - Checking all ranges are page-aligned and non-zero - Verifying total size matches sum of range sizes - Ensuring no conflicts with existing SVM allocations - Validating that USERPTR flag is set This completes the batch userptr feature by connecting the UAPI to the implementation. Signed-off-by: Honglei Huang <[email protected]> --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 159 +++++++++++++++++++++++ 1 file changed, 159 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index a72cc980a..d8cfd8697 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1196,6 +1196,162 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return err; } +static int kfd_ioctl_alloc_memory_of_gpu_batch(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_alloc_memory_of_gpu_batch_args *args = data; + struct kfd_ioctl_userptr_range *ranges = NULL; + struct kfd_process_device *pdd; + void *mem; + struct kfd_node *dev; + int idr_handle; + long err; + uint32_t flags = args->flags; + uint32_t i; + uint64_t total_size = 0; + + if (args->total_size == 0) { + pr_err("Batch allocation: total size cannot be zero\n"); + return -EINVAL; + } + + if (args->num_ranges == 0) { + pr_err("Batch allocation: invalid number of ranges %u\n", + args->num_ranges); + return -EINVAL; + } + + if (!args->ranges_ptr) { + pr_err("Batch allocation: ranges pointer is NULL\n"); + return -EINVAL; + } + + if (!(flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) { + pr_err("Batch allocation requires USERPTR flag\n"); + return -EOPNOTSUPP; + } + + if (p->context_id != KFD_CONTEXT_ID_PRIMARY) { + pr_debug("Batch USERPTR is not supported on non-primary kfd_process\n"); + return -EOPNOTSUPP; + } + + ranges = kvmalloc_array(args->num_ranges, sizeof(*ranges), GFP_KERNEL); + if (!ranges) { + err = -ENOMEM; + goto err_alloc_ranges; + } + + if (copy_from_user(ranges, (void __user *)args->ranges_ptr, + args->num_ranges * sizeof(*ranges))) { + pr_err("Failed to copy ranges from user space\n"); + err = -EFAULT; + goto err_copy_ranges; + } + + for (i = 0; i < args->num_ranges; i++) { + if (!ranges[i].start || !ranges[i].size || + (ranges[i].start & ~PAGE_MASK) || + (ranges[i].size & ~PAGE_MASK)) { + pr_err("Invalid range %u: start=0x%llx size=0x%llx\n", + i, ranges[i].start, ranges[i].size); + err = -EINVAL; + goto err_copy_ranges; + } + total_size += ranges[i].size; + } + + if (total_size != args->total_size) { + pr_err("Size mismatch: provided %llu != calculated %llu\n", + args->total_size, total_size); + err = -EINVAL; + goto err_copy_ranges; + } + +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) + /* Check for conflicts with SVM */ + svm_range_list_lock_and_flush_work(&p->svms, current->mm); + mutex_lock(&p->svms.lock); + mmap_write_unlock(current->mm); + + /* Check GPU VA for SVM conflicts */ + if (args->va_addr && + interval_tree_iter_first(&p->svms.objects, + args->va_addr >> PAGE_SHIFT, + (args->va_addr + args->total_size - 1) >> PAGE_SHIFT)) { + pr_err("GPU VA 0x%llx already allocated by SVM\n", args->va_addr); + mutex_unlock(&p->svms.lock); + err = -EADDRINUSE; + goto err_copy_ranges; + } + + /* Check each userptr range for SVM conflicts */ + for (i = 0; i < args->num_ranges; i++) { + if (interval_tree_iter_first(&p->svms.objects, + ranges[i].start >> PAGE_SHIFT, + (ranges[i].start + ranges[i].size - 1) >> PAGE_SHIFT)) { + pr_err("Userptr range %u (0x%llx) already allocated by SVM\n", + i, ranges[i].start); + mutex_unlock(&p->svms.lock); + err = -EADDRINUSE; + goto err_copy_ranges; + } + } + + mutex_unlock(&p->svms.lock); +#endif + + mutex_lock(&p->mutex); + pdd = kfd_process_device_data_by_id(p, args->gpu_id); + if (!pdd) { + err = -EINVAL; + goto err_pdd; + } + + dev = pdd->dev; + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto err_unlock; + } + + err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu_batch( + dev->adev, args->va_addr, args->total_size, pdd->drm_priv, + (struct kgd_mem **)&mem, NULL, ranges, args->num_ranges, + flags, false); + + if (err) + goto err_unlock; + + idr_handle = kfd_process_device_create_obj_handle(pdd, mem); + if (idr_handle < 0) { + err = -EFAULT; + goto err_free; + } + + args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); + + mutex_unlock(&p->mutex); + kvfree(ranges); + + pr_debug("Batch userptr allocated: va=0x%llx size=0x%llx ranges=%u handle=0x%llx\n", + args->va_addr, args->total_size, args->num_ranges, args->handle); + + return 0; + +err_free: + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem, + pdd->drm_priv, NULL); +err_unlock: +err_pdd: + mutex_unlock(&p->mutex); +err_copy_ranges: + kvfree(ranges); +err_alloc_ranges: + return err; +} + static int kfd_ioctl_free_memory_of_gpu(struct file *filep, struct kfd_process *p, void *data) { @@ -3309,6 +3465,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_PROCESS, kfd_ioctl_create_process, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU_BATCH, + kfd_ioctl_alloc_memory_of_gpu_batch, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) -- 2.34.1
