On 10/10/2025 1:59 PM, Christian König wrote:
On 09.10.25 15:18, Sunil Khatri wrote:
userptrs could be changed by the user at any time and
hence while locking all the bos before GPU start processing
validate all the userptr bos.
Signed-off-by: Sunil Khatri<[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 87 +++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 5 ++
2 files changed, 92 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 8dc12064da49..c7737201ec23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -25,10 +25,12 @@
#include <drm/drm_auth.h>
#include <drm/drm_exec.h>
#include <linux/pm_runtime.h>
+#include <drm/ttm/ttm_tt.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
#include "amdgpu_userq.h"
+#include "amdgpu_hmm.h"
#include "amdgpu_userq_fence.h"
u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
@@ -761,9 +763,18 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
struct amdgpu_device *adev = uq_mgr->adev;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
struct drm_exec exec;
+ struct xarray xa;
+ struct amdgpu_userq_hmm_range *userq_range;
+ bool invalidated = false, new_addition = false;
+ unsigned long key = 0, tmp_key;
int ret;
+ struct ttm_operation_ctx ctx = { true, false };
Please sort that by reverse xmas tree order.
Noted
+
+ xa_init(&xa);
+retry_lock:
drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
ret = amdgpu_vm_lock_pd(vm, &exec, 1);
@@ -794,6 +805,73 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
if (ret)
goto unlock_all;
+ if (invalidated) {
+ xa_for_each(&xa, tmp_key, userq_range) {
+ bo = userq_range->bo;
+ amdgpu_bo_placement_from_domain(bo,
AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto unlock_all;
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
userq_range->range);
+ }
+ invalidated = false;
+ }
+
+ key = 0;
+ /* Validate User Ptr BOs */
+ list_for_each_entry(bo_va, &vm->done, base.vm_status) {
+ bo = bo_va->base.bo;
+
+ if (!amdgpu_ttm_tt_is_userptr(bo->tbo.ttm))
+ continue;
+
+ userq_range = xa_load(&xa, key);
+ if (userq_range && userq_range->bo != bo) {
+ amdgpu_bo_unref(&userq_range->bo);
+ xa_erase(&xa, key);
+ amdgpu_hmm_range_free(userq_range->range);
+ kvfree(userq_range);
+ userq_range = NULL;
+ }
+
+ if (!userq_range) {
+ userq_range = kvmalloc(sizeof(*userq_range),
GFP_KERNEL);
+ if (!userq_range) {
+ ret = -ENOMEM;
+ goto unlock_all;
+ }
+
+ userq_range->range = amdgpu_hmm_range_alloc();
+ if (!userq_range->range) {
+ ret = -ENOMEM;
+ goto unlock_all;
+ }
It would be better if we embed the hmm_range structure into the userq_range.
I did the same first but eventually realised since the functions
amdgpu_hmm_range_free function takes the ptr to hmm_range and hence we
could not use that function to clear the memory and we also clear the
pfns also so that wont work thats why i kept the ptr reference within
the object and allocated it using standard fns amdgpu_hmm_range_alloc/free
A structure as small as userq_range is really inefficient to allocate.
And BTW don't call the structure amdgpu_userq_hmm_range, I would like to use
that in the CS IOCTL as well so that we have a common handling for both kernel
and user queues.
I could name amdgpu_bo_hmm_range and place it in file amdgpu_hmm.h if
that is ok ?
Regards
Sunil khatri
Apart from that it looks like it should work now, but keep in mind that this is
only the first step we will probably need a second step on top of it.
Regards,
Christian.
+
+ userq_range->bo = amdgpu_bo_ref(bo);
+ xa_store(&xa, key, userq_range, GFP_KERNEL);
+ new_addition = true;
+ }
+ key++;
+ }
+
+ if (new_addition) {
+ drm_file_err(uq_mgr->file, "userptr bos:%lu\n", key);
+ drm_exec_fini(&exec);
+ xa_for_each(&xa, tmp_key, userq_range) {
+ if (!userq_range)
+ continue;
+ bo = userq_range->bo;
+ ret = amdgpu_ttm_tt_get_user_pages(bo,
userq_range->range);
+ if (ret)
+ goto unlock_all;
+ }
+
+ invalidated = true;
+ new_addition = false;
+ goto retry_lock;
+ }
+
ret = amdgpu_vm_update_pdes(adev, vm, false);
if (ret)
goto unlock_all;
@@ -813,6 +891,15 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
unlock_all:
drm_exec_fini(&exec);
+ xa_for_each(&xa, tmp_key, userq_range) {
+ if (!userq_range)
+ continue;
+ bo = userq_range->bo;
+ amdgpu_bo_unref(&bo);
+ amdgpu_hmm_range_free(userq_range->range);
+ kvfree(userq_range);
+ }
+ xa_destroy(&xa);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index ded33fe76e1c..795b38d0353d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -48,6 +48,11 @@ struct amdgpu_userq_obj {
struct amdgpu_bo *obj;
};
+struct amdgpu_userq_hmm_range {
+ struct hmm_range *range;
+ struct amdgpu_bo *bo;
+};
+
struct amdgpu_usermode_queue {
int queue_type;
enum amdgpu_userq_state state;