On 2025-09-17 13:22, Sunil Khatri wrote:
we dont need to allocate local array of pages to hold
the pages returned by the hmm, instead we could use
the hmm_range structure itself to get to hmm_pfn
and get the required pages directly.

This saved alloc/free a lot of memory without
any impact on performance.

Signed-off-by: Sunil Khatri <sunil.kha...@amd.com>
Suggested-by: Christian König <christian.koe...@amd.com>

Acked-by: Felix Kuehling <felix.kuehl...@amd.com>


---
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 10 +++++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |  1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 30 ++++---------------
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  5 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c       | 10 +------
  drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h       |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |  8 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  5 ++--
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c          |  2 +-
  9 files changed, 25 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index c3b34a410375..7c54fe6b0f5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1089,7 +1089,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t 
user_addr,
                return 0;
        }
- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range);
+       ret = amdgpu_ttm_tt_get_user_pages(bo, &range);
        if (ret) {
                if (ret == -EAGAIN)
                        pr_debug("Failed to get user pages, try again\n");
@@ -1103,6 +1103,9 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t 
user_addr,
                pr_err("%s: Failed to reserve BO\n", __func__);
                goto release_out;
        }
+
+       amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
        amdgpu_bo_placement_from_domain(bo, mem->domain);
        ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
        if (ret)
@@ -2565,8 +2568,7 @@ static int update_invalid_user_pages(struct 
amdkfd_process_info *process_info,
                }
/* Get updated user pages */
-               ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
-                                                  &mem->range);
+               ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range);
                if (ret) {
                        pr_debug("Failed %d to get user pages\n", ret);
@@ -2595,6 +2597,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
                        ret = 0;
                }
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range);
+
                mutex_lock(&process_info->notifier_lock);
/* Mark the BO as valid unless it was invalidated
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 555cd6d877c3..a716c9886c74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -38,7 +38,6 @@ struct amdgpu_bo_list_entry {
        struct amdgpu_bo                *bo;
        struct amdgpu_bo_va             *bo_va;
        uint32_t                        priority;
-       struct page                     **user_pages;
        struct hmm_range                *range;
        bool                            user_invalidated;
  };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index defb511acc5a..744e6ff69814 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -29,6 +29,7 @@
  #include <linux/pagemap.h>
  #include <linux/sync_file.h>
  #include <linux/dma-buf.h>
+#include <linux/hmm.h>
#include <drm/amdgpu_drm.h>
  #include <drm/drm_syncobj.h>
@@ -885,24 +886,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
*p,
                struct amdgpu_bo *bo = e->bo;
                int i;
- e->user_pages = kvcalloc(bo->tbo.ttm->num_pages,
-                                        sizeof(struct page *),
-                                        GFP_KERNEL);
-               if (!e->user_pages) {
-                       drm_err(adev_to_drm(p->adev), "kvmalloc_array 
failure\n");
-                       r = -ENOMEM;
-                       goto out_free_user_pages;
-               }
-
-               r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range);
-               if (r) {
-                       kvfree(e->user_pages);
-                       e->user_pages = NULL;
+               r = amdgpu_ttm_tt_get_user_pages(bo, &e->range);
+               if (r)
                        goto out_free_user_pages;
-               }
for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
-                       if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
+                       if (bo->tbo.ttm->pages[i] != 
hmm_pfn_to_page(e->range->hmm_pfns[i])) {
                                userpage_invalidated = true;
                                break;
                        }
@@ -946,7 +935,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                }
if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
-                   e->user_invalidated && e->user_pages) {
+                   e->user_invalidated) {
                        amdgpu_bo_placement_from_domain(e->bo,
                                                        AMDGPU_GEM_DOMAIN_CPU);
                        r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
@@ -955,11 +944,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                                goto out_free_user_pages;
amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
-                                                    e->user_pages);
+                                                    e->range);
                }
-
-               kvfree(e->user_pages);
-               e->user_pages = NULL;
        }
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -1001,11 +987,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
*p,
        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
                struct amdgpu_bo *bo = e->bo;
- if (!e->user_pages)
-                       continue;
                amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
-               kvfree(e->user_pages);
-               e->user_pages = NULL;
                e->range = NULL;
        }
        mutex_unlock(&p->bo_list->bo_list_mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index a8fa09184459..8524aa55e057 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -571,8 +571,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void 
*data,
                goto release_object;
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
-               r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
-                                                &range);
+               r = amdgpu_ttm_tt_get_user_pages(bo, &range);
                if (r)
                        goto release_object;
@@ -580,6 +579,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
                if (r)
                        goto user_pages_done;
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
                amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
                r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
                amdgpu_bo_unreserve(bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index e36fede7f74c..4441572d6ad1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -167,7 +167,7 @@ void amdgpu_hmm_unregister(struct amdgpu_bo *bo)
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
                               uint64_t start, uint64_t npages, bool readonly,
-                              void *owner, struct page **pages,
+                              void *owner,
                               struct hmm_range **phmm_range)
  {
        struct hmm_range *hmm_range;
@@ -222,14 +222,6 @@ int amdgpu_hmm_range_get_pages(struct 
mmu_interval_notifier *notifier,
        hmm_range->start = start;
        hmm_range->hmm_pfns = pfns;
- /*
-        * Due to default_flags, all pages are HMM_PFN_VALID or
-        * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
-        * the notifier_lock, and mmu_interval_read_retry() must be done first.
-        */
-       for (i = 0; pages && i < npages; i++)
-               pages[i] = hmm_pfn_to_page(pfns[i]);
-
        *phmm_range = hmm_range;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
index e2edcd010ccc..953e1d06de20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
@@ -33,7 +33,7 @@
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
                               uint64_t start, uint64_t npages, bool readonly,
-                              void *owner, struct page **pages,
+                              void *owner,
                               struct hmm_range **phmm_range);
  bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4e2486dbc0a6..280400b13c54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -707,7 +707,7 @@ struct amdgpu_ttm_tt {
   * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
   * once afterwards to stop HMM tracking
   */
-int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
                                 struct hmm_range **range)
  {
        struct ttm_tt *ttm = bo->tbo.ttm;
@@ -744,7 +744,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages,
readonly = amdgpu_ttm_tt_is_readonly(ttm);
        r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages,
-                                      readonly, NULL, pages, range);
+                                      readonly, NULL, range);
  out_unlock:
        mmap_read_unlock(mm);
        if (r)
@@ -796,12 +796,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
   * that backs user memory and will ultimately be mapped into the device
   * address space.
   */
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range)
  {
        unsigned long i;
for (i = 0; i < ttm->num_pages; ++i)
-               ttm->pages[i] = pages ? pages[i] : NULL;
+               ttm->pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
  }
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index bb17987f0447..6ac94469ed40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -190,7 +190,7 @@ void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
  uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
                                 struct hmm_range **range);
  void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
                                      struct hmm_range *range);
@@ -198,7 +198,6 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
                                       struct hmm_range *range);
  #else
  static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
-                                              struct page **pages,
                                               struct hmm_range **range)
  {
        return -EPERM;
@@ -214,7 +213,7 @@ static inline bool amdgpu_ttm_tt_get_user_pages_done(struct 
ttm_tt *ttm,
  }
  #endif
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range);
  int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
                              uint64_t *user_addr);
  int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 68ba239b2e5d..273f42e3afdd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1738,7 +1738,7 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
WRITE_ONCE(p->svms.faulting_task, current);
                        r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, 
npages,
-                                                      readonly, owner, NULL,
+                                                      readonly, owner,
                                                       &hmm_range);
                        WRITE_ONCE(p->svms.faulting_task, NULL);
                        if (r)

Reply via email to