This is a nice simplification. See a few comments inline.

On 2019-05-30 10:41 a.m., Yang, Philip wrote:
> HMM provides new APIs and helps in kernel 5.2-rc1 to simplify driver
> path. The old hmm APIs are deprecated and will be removed in future.
>
> Below are changes in driver:
>
> 1. Change hmm_vma_fault to hmm_range_register and hmm_range_fault which
> supports range with multiple vmas, remove the multiple vmas handle path
> and data structure.
> 2. Change hmm_vma_range_done to hmm_range_unregister.
> 3. Use default flags to avoid pre-fill pfn arrays.
> 4. Use new hmm_device_ helpers.
>
> Change-Id: I1a00f88459f3b96c9536933e9a17eb1ebaa78113
> Signed-off-by: Philip Yang <philip.y...@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c  |   1 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 140 +++++++++---------------
>   2 files changed, 53 insertions(+), 88 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index 41ccee49a224..e0bb47ce570b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -519,7 +519,6 @@ void amdgpu_hmm_init_range(struct hmm_range *range)
>               range->flags = hmm_range_flags;
>               range->values = hmm_range_values;
>               range->pfn_shift = PAGE_SHIFT;
> -             range->pfns = NULL;
>               INIT_LIST_HEAD(&range->list);
>       }
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 7138dc1dd1f4..25a9fcb409c6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -711,8 +711,7 @@ struct amdgpu_ttm_tt {
>       struct task_struct      *usertask;
>       uint32_t                userflags;
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> -     struct hmm_range        *ranges;
> -     int                     nr_ranges;
> +     struct hmm_range        *range;
>   #endif
>   };
>   
> @@ -725,57 +724,33 @@ struct amdgpu_ttm_tt {
>    */
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
>   
> -/* Support Userptr pages cross max 16 vmas */
> -#define MAX_NR_VMAS  (16)
> -
>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>   {
>       struct amdgpu_ttm_tt *gtt = (void *)ttm;
>       struct mm_struct *mm = gtt->usertask->mm;
>       unsigned long start = gtt->userptr;
> -     unsigned long end = start + ttm->num_pages * PAGE_SIZE;
> -     struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
> -     struct hmm_range *ranges;
> -     unsigned long nr_pages, i;
> -     uint64_t *pfns, f;
> +     struct vm_area_struct *vma;
> +     struct hmm_range *range;
> +     unsigned long i;
> +     uint64_t *pfns;
>       int r = 0;
>   
>       if (!mm) /* Happens during process shutdown */
>               return -ESRCH;
>   
> -     down_read(&mm->mmap_sem);
> -
> -     /* user pages may cross multiple VMAs */
> -     gtt->nr_ranges = 0;
> -     do {
> -             unsigned long vm_start;
> -
> -             if (gtt->nr_ranges >= MAX_NR_VMAS) {
> -                     DRM_ERROR("Too many VMAs in userptr range\n");
> -                     r = -EFAULT;
> -                     goto out;
> -             }
> -
> -             vm_start = vma ? vma->vm_end : start;
> -             vma = find_vma(mm, vm_start);
> -             if (unlikely(!vma || vm_start < vma->vm_start)) {
> -                     r = -EFAULT;
> -                     goto out;
> -             }
> -             vmas[gtt->nr_ranges++] = vma;
> -     } while (end > vma->vm_end);
> -
> -     DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
> -             start, gtt->nr_ranges, ttm->num_pages);
> -
> +     vma = find_vma(mm, start);
> +     if (unlikely(!vma || start < vma->vm_start)) {
> +             r = -EFAULT;
> +             goto out;
> +     }
>       if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> -             vmas[0]->vm_file)) {
> +             vma->vm_file)) {
>               r = -EPERM;
>               goto out;
>       }
>   
> -     ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
> -     if (unlikely(!ranges)) {
> +     range = kvmalloc(sizeof(*range), GFP_KERNEL | __GFP_ZERO);

A single range is pretty small. You can probably allocate that with 
kmalloc now (or kcalloc/kzalloc since you specified GFP_ZERO).


> +     if (unlikely(!range)) {
>               r = -ENOMEM;
>               goto out;
>       }
> @@ -786,61 +761,52 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, 
> struct page **pages)
>               goto out_free_ranges;
>       }
>   
> -     for (i = 0; i < gtt->nr_ranges; i++)
> -             amdgpu_hmm_init_range(&ranges[i]);
> +     amdgpu_hmm_init_range(range);
> +     range->default_flags = range->flags[HMM_PFN_VALID];
> +     range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ?
> +                             0 : range->flags[HMM_PFN_WRITE];
> +     range->pfn_flags_mask = 0;
> +     range->pfns = pfns;
> +     hmm_range_register(range, mm, start,
> +                        start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT);
>   
> -     f = ranges[0].flags[HMM_PFN_VALID];
> -     f |= amdgpu_ttm_tt_is_readonly(ttm) ?
> -                             0 : ranges[0].flags[HMM_PFN_WRITE];
> -     memset64(pfns, f, ttm->num_pages);
> -
> -     for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
> -             ranges[i].vma = vmas[i];
> -             ranges[i].start = max(start, vmas[i]->vm_start);
> -             ranges[i].end = min(end, vmas[i]->vm_end);
> -             ranges[i].pfns = pfns + nr_pages;
> -             nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE;
> +     /*
> +      * Just wait for range to be valid, safe to ignore return value as we
> +      * will use the return value of hmm_range_fault() below under the
> +      * mmap_sem to ascertain the validity of the range.
> +     */

Documentation/vm/hmm.rst suggests a retry if hmm_range_fault (or 
snapshot) returns -EAGAIN. I think this would be the right place to put 
the again: label.


> +     hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
>   
> -             r = hmm_vma_fault(&ranges[i], true);
> -             if (unlikely(r))
> -                     break;
> -     }
> -     if (unlikely(r)) {
> -             while (i--)
> -                     hmm_vma_range_done(&ranges[i]);
> +     down_read(&mm->mmap_sem);
>   
> -             goto out_free_pfns;
> -     }
> +     r = hmm_range_fault(range, true);
>   
>       up_read(&mm->mmap_sem);
>   
> +     if (unlikely(r < 0))
> +             goto out_free_pfns;

If r == -EAGAIN, you could goto again for a retry here as suggested by 
the HMM documentation.


> +
>       for (i = 0; i < ttm->num_pages; i++) {
> -             pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]);
> -             if (!pages[i]) {
> +             pages[i] = hmm_device_entry_to_page(range, pfns[i]);
> +             if (unlikely(!pages[i])) {
>                       pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
>                              i, pfns[i]);
> -                     goto out_invalid_pfn;
> +                     r = -ENOMEM;
> +
> +                     goto out_free_pfns;
>               }
>       }
> -     gtt->ranges = ranges;
> +     gtt->range = range;
>   
>       return 0;
>   
>   out_free_pfns:
> +     hmm_range_unregister(range);
>       kvfree(pfns);
>   out_free_ranges:
> -     kvfree(ranges);
> +     kvfree(range);

If you use kmalloc/kcalloc/kzalloc above, change this to kfree.


>   out:
> -     up_read(&mm->mmap_sem);
> -
>       return r;
> -
> -out_invalid_pfn:
> -     for (i = 0; i < gtt->nr_ranges; i++)
> -             hmm_vma_range_done(&ranges[i]);
> -     kvfree(pfns);
> -     kvfree(ranges);
> -     return -ENOMEM;
>   }
>   
>   /**
> @@ -853,23 +819,23 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt 
> *ttm)
>   {
>       struct amdgpu_ttm_tt *gtt = (void *)ttm;
>       bool r = false;
> -     int i;
>   
>       if (!gtt || !gtt->userptr)
>               return false;
>   
> -     DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n",
> -             gtt->userptr, gtt->nr_ranges, ttm->num_pages);
> +     DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n",
> +             gtt->userptr, ttm->num_pages);
>   
> -     WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns,
> +     WARN_ONCE(!gtt->range || !gtt->range->pfns,
>               "No user pages to check\n");
>   
> -     if (gtt->ranges) {
> -             for (i = 0; i < gtt->nr_ranges; i++)
> -                     r |= hmm_vma_range_done(&gtt->ranges[i]);
> -             kvfree(gtt->ranges[0].pfns);
> -             kvfree(gtt->ranges);
> -             gtt->ranges = NULL;
> +     if (gtt->range) {
> +             r = hmm_range_valid(gtt->range);
> +             hmm_range_unregister(gtt->range);
> +
> +             kvfree(gtt->range->pfns);
> +             kvfree(gtt->range);
> +             gtt->range = NULL;
>       }
>   
>       return r;
> @@ -953,9 +919,9 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt 
> *ttm)
>       sg_free_table(ttm->sg);
>   
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> -     if (gtt->ranges &&
> -         ttm->pages[0] == hmm_pfn_to_page(&gtt->ranges[0],
> -                                          gtt->ranges[0].pfns[0]))
> +     if (gtt->range &&
> +         ttm->pages[0] == hmm_device_entry_to_page(gtt->range,
> +                                                   gtt->range->pfns[0]))

I think just checking gtt->range here is enough. If gtt->range is not 
NULL here, we're leaking memory.

Regards,
   Felix


>               WARN_ONCE(1, "Missing get_user_page_done\n");
>   #endif
>   }
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to