On 14.08.25 12:35, Liang, Prike wrote: > [Public] > >> From: Alex Deucher <alexdeuc...@gmail.com> >> Sent: Thursday, August 14, 2025 6:38 AM >> To: Liang, Prike <prike.li...@amd.com> >> Cc: amd-gfx@lists.freedesktop.org; Deucher, Alexander >> <alexander.deuc...@amd.com>; Koenig, Christian <christian.koe...@amd.com> >> Subject: Re: [PATCH v8 14/14] drm/amdgpu: validate userq va for GEM unmap >> >> The start address may not align with the start address of the vital >> queue buffers. Just retain a list of vital virtual address ranges >> for each userq and then check if the address range the user wants to >> unmap falls into any of those ranges. If so, then preempt and unmap >> the queue and set the status to USER_UNMAPPED or something like that. > [Prike] Each userq has various virtual addresses, but this unmap IOCTL > request can only unmap > one kind of VA at a time
That is not even remotely correct. The CLEAR and REPLACE operations can unmap many VAs at a time. >, so do we need to validate the userq whether all the VAs have been unmapped > before unmapping the userq HW mapping? No, we need to unmap the userq HW mapping before we unmap the VA addresses. > > Aside from that, do we still need to identify the invalid userq VA unmap case > by checking userq fence to > see whether it is signaled when user is trying to unmap one of its VAs? > Without this check, how do we > identify the userq GEM unmap VA case? I don't fully understand the question, please clarify. Regards, Christian. > >> Then you don't have to worry about queue specific details as to what >> addresses are >> vital for that queue type. >> >> Alex >> >> On Fri, Aug 8, 2025 at 2:29 AM Prike Liang <prike.li...@amd.com> wrote: >>> >>> This change validates the userq to see whether can be unmapped prior >>> to the userq VA GEM unmap. The solution is based on the following >>> idea: >>> 1) Find out the GEM unmap VA belonds to which userq, >>> 2) Check the userq fence fence whether is signaled, >>> 3) If the userq attached fences signal failed, then >>> mark it as illegal VA opt and give a warning message >>> for this illegal userspace request. >>> >>> Suggested-by: Christian König <christian.koe...@amd.com> >>> Signed-off-by: Prike Liang <prike.li...@amd.com> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 107 +++++++++++++++++++++- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 2 + >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++ >>> 3 files changed, 118 insertions(+), 1 deletion(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c >>> index 771f57d09060..314d482849c8 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c >>> @@ -676,7 +676,6 @@ amdgpu_userq_create(struct drm_file *filp, union >> drm_amdgpu_userq *args) >>> } >>> } >>> >>> - >>> args->out.queue_id = qid; >>> >>> unlock: >>> @@ -1214,3 +1213,109 @@ int >> amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, >>> mutex_unlock(&adev->userq_mutex); >>> return ret; >>> } >>> + >>> +/** >>> + * amdgpu_userq_gem_va_unmap_queue_retrieve - find out userq by gem >>> +unmap va >>> + * @queue: destinated userq for finding out from unmap va >>> + * @va: the GEM unmap virtual address already aligned in mapping >>> +range >>> + * Find out the corresponding userq by comparing >>> + * the GEM unmap VA with userq VAs. >>> + */ >>> +static bool amdgpu_userq_gem_va_unmap_queue_retrieve(struct >> amdgpu_usermode_queue *queue, >>> + uint64_t va) { >>> + va = va << AMDGPU_GPU_PAGE_SHIFT | AMDGPU_GMC_HOLE_END; >>> + >>> + switch (queue->queue_type) { >>> + case AMDGPU_HW_IP_GFX: >>> + if (queue->queue_va == va || >>> + queue->wptr_va == va || >>> + queue->rptr_va == va || >>> + queue->shadow_va == va || >>> + queue->csa_va == va) >>> + return true; >>> + break; >>> + case AMDGPU_HW_IP_COMPUTE: >>> + if (queue->queue_va == va || >>> + queue->wptr_va == va || >>> + queue->rptr_va == va || >>> + queue->eop_va == va) >>> + return true; >>> + break; >>> + case AMDGPU_HW_IP_DMA: >>> + if (queue->queue_va == va || >>> + queue->wptr_va == va || >>> + queue->rptr_va == va || >>> + queue->csa_va == va) >>> + return true; >>> + break; >>> + default: >>> + break; >>> + } >>> + >>> + return false; >>> +} >>> + >>> + >>> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev, >>> + uint64_t va) { >>> + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); >>> + struct amdgpu_usermode_queue *queue; >>> + struct amdgpu_userq_mgr *uqm, *tmp; >>> + int queue_id; >>> + int ret; >>> + >>> + if (!ip_mask) >>> + return 0; >>> + >>> + /** >>> + * validate the unmap va sequence: >>> + * 1) Find out the GEM unmap VA belonds to which userq, >>> + * 2) Check the userq fence whether is signaled, >>> + * 3) If the userq attached fences signal failed, then >>> + * mark as invalid va opt and give a warning message >>> + * for this illegal userspace request. >>> + */ >>> + >>> + if (mutex_trylock(&adev->userq_mutex)) { >>> + list_for_each_entry_safe(uqm, tmp, >>> + &adev->userq_mgr_list, list) { >>> + >>> + if (!mutex_trylock(&uqm->userq_mutex)) >>> + continue; >>> + >>> + idr_for_each_entry(&uqm->userq_idr, queue, >>> + queue_id) { >>> + >>> + if >>> (!amdgpu_userq_gem_va_unmap_queue_retrieve(queue, >> va)) { >>> + dev_dbg(uqm->adev->dev, "va: 0x%llx >>> not belond to >> queue id: %d\n", >>> + va, queue_id); >>> + continue; >>> + } >>> + >>> + if (queue->last_fence && >>> !dma_fence_is_signaled(queue- >>> last_fence)) { >>> + drm_file_err(uqm->file, "an illegal >>> VA unmap for the >> userq\n"); >>> + queue->state = >> AMDGPU_USERQ_STATE_INVALID_VA; >>> + ret = -ETIMEDOUT; >>> + goto err; >>> + } >>> + /* >>> + * At here still can't suspend the userq >>> since here just one >> kind of >>> + * VA unmapped, and some other VAs of userq >>> may still be >> mapped. After >>> + * this point this VA mapping will be >>> deteled and the VA will be >> unmapped >>> + * so will not resume the userq when its VA >>> unmapped. >>> + */ >>> + } >>> + mutex_unlock(&uqm->userq_mutex); >>> + } >>> + } else { >>> + /* Maybe we need a try lock again before return*/ >>> + return -EBUSY; >>> + } >>> + >>> + mutex_unlock(&adev->userq_mutex); >>> + return 0; >>> +err: >>> + mutex_unlock(&uqm->userq_mutex); >>> + mutex_unlock(&adev->userq_mutex); >>> + return ret; >>> +} >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h >>> index cf35b6140a3d..27ab8a6a7be6 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h >>> @@ -149,4 +149,6 @@ bool amdgpu_userq_buffer_vas_mapped(struct >>> amdgpu_vm *vm, int amdgpu_userq_buffer_va_put(struct amdgpu_vm *vm, >>> u64 addr); int amdgpu_userq_buffer_vas_put(struct amdgpu_vm *vm, >>> struct amdgpu_usermode_queue *queue); >>> +int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev, >>> + uint64_t va); >>> #endif >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> index f042372d9f2e..533954c0d234 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> @@ -1929,6 +1929,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device >> *adev, >>> struct amdgpu_bo_va_mapping *mapping; >>> struct amdgpu_vm *vm = bo_va->base.vm; >>> bool valid = true; >>> + int r; >>> >>> saddr /= AMDGPU_GPU_PAGE_SIZE; >>> >>> @@ -1949,6 +1950,15 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device >> *adev, >>> return -ENOENT; >>> } >>> >>> + /* It's unlikely to happen that the mapping userq hasn't been idled >>> + * during user requests GEM unmap IOCTL except for forcing the unmap >>> + * from user space. >>> + */ >>> + >>> + r = amdgpu_userq_gem_va_unmap_validate(adev, saddr); >>> + if (unlikely(r && r != -EBUSY)) >>> + dev_warn(adev->dev, "Here should be an improper unmap >>> + request from user space\n"); >>> + >>> list_del(&mapping->list); >>> amdgpu_vm_it_remove(mapping, &vm->va); >>> mapping->bo_va = NULL; >>> -- >>> 2.34.1 >>>