Am 2021-08-02 um 10:56 a.m. schrieb Philip Yang:
> HMM migrate helper migrate_vma_pages do not migrate file backed pages to
> replace it with device pages because the pages are used by file cache.
> We can not migrate the file backed range to VRAM, otherwise CPU access
> range will not trigger page fault to migrate updated data from VRAM back
> to system memory.
>
> For file backed range, don't prefetch migrate range to VRAM, always map
> system pages to GPU and also use system pages to recover GPU retry
> fault.
>
> Add helper to check if range is file backed or anonymous mapping.
>
> Signed-off-by: Philip Yang <philip.y...@amd.com>

This patch should not be submitted to amd-staging-drm-next. As I
understand it, the real fix is in Alex's partial-migration patch series.
This patch is a hack that could be useful for older release branches
that won't get Alex's patch series because it's too invasive. So let's
review this on the internal mailing list.

Thanks,
  Felix


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 40 ++++++++++++++++++++++++++++
>  1 file changed, 40 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index f811a3a24cd2..69237d2ab2ad 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -2400,6 +2400,36 @@ svm_range_count_fault(struct amdgpu_device *adev, 
> struct kfd_process *p,
>               WRITE_ONCE(pdd->faults, pdd->faults + 1);
>  }
>  
> +/**
> + * svm_range_is_file_backed - decide if prange is file backed mmap
> + * @mm: the mm structure
> + * @prange: svm range structure
> + *
> + * Context: caller must hold mmap_read_lock
> + *
> + * Return:
> + * false if entire range is anonymous mapping
> + * true if entire or partial range is file backed, or invalid mapping address
> + */
> +static bool
> +svm_range_is_file_backed(struct mm_struct *mm, struct svm_range *prange)
> +{
> +     struct vm_area_struct *vma;
> +     unsigned long start, end;
> +
> +     start = prange->start << PAGE_SHIFT;
> +     end = (prange->last + 1) << PAGE_SHIFT;
> +
> +     do {
> +             vma = find_vma(mm, start);
> +             if (!vma || !vma_is_anonymous(vma))
> +                     return true;
> +             start = min(end, vma->vm_end);
> +     } while (start < end);
> +
> +     return false;
> +}
> +
>  int
>  svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>                       uint64_t addr)
> @@ -2496,6 +2526,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>                svms, prange->start, prange->last, best_loc,
>                prange->actual_loc);
>  
> +     /* for file backed range, use system memory pages for GPU mapping */
> +     if (svm_range_is_file_backed(mm, prange))
> +             goto out_validate_and_map;
> +
>       if (prange->actual_loc != best_loc) {
>               if (best_loc) {
>                       r = svm_migrate_to_vram(prange, best_loc, mm);
> @@ -2520,6 +2554,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
> unsigned int pasid,
>               }
>       }
>  
> +out_validate_and_map:
>       r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
>       if (r)
>               pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
> @@ -2850,6 +2885,11 @@ svm_range_trigger_migration(struct mm_struct *mm, 
> struct svm_range *prange,
>       int r = 0;
>  
>       *migrated = false;
> +
> +     /* Don't migrate file backed range to VRAM */
> +     if (svm_range_is_file_backed(mm, prange))
> +             return 0;
> +
>       best_loc = svm_range_best_prefetch_location(prange);
>  
>       if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||

Reply via email to