When migrating the range to unmap the range from GPUs, align unmap start
and last address to the range granularity boundary. Skip unmap from
GPU if range is already unmapped from GPUs based on bitmap_mapped flag.

This optimizes the TLB flush and also solve the rocgdb CWSR migration
related issue.

Signed-off-by: Philip Yang <philip.y...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 35 ++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 4e1af4b181ea..daa996d7039d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2066,6 +2066,26 @@ static void svm_range_restore_work(struct work_struct 
*work)
        mmput(mm);
 }
 
+static unsigned long
+svm_range_align_start(struct svm_range *prange, unsigned long start)
+{
+       unsigned long start_align;
+
+       start_align = ALIGN_DOWN(start, 1UL << prange->granularity);
+       start_align = max_t(unsigned long, start_align, prange->start);
+       return start_align;
+}
+
+static unsigned long
+svm_range_align_last(struct svm_range *prange, unsigned long last)
+{
+       unsigned long last_align;
+
+       last_align = ALIGN(last + 1, 1UL << prange->granularity) - 1;
+       last_align = min_t(unsigned long, last_align, prange->last);
+       return last_align;
+}
+
 /**
  * svm_range_evict - evict svm range
  * @prange: svm range structure
@@ -2126,6 +2146,12 @@ svm_range_evict(struct svm_range *prange, struct 
mm_struct *mm,
                unsigned long s, l;
                uint32_t trigger;
 
+               if (!svm_range_partial_mapped(prange, start, last)) {
+                       pr_debug("svms 0x%p [0x%lx 0x%lx] unmapped already\n",
+                               prange->svms, start, last);
+                       return 0;
+               }
+
                if (event == MMU_NOTIFY_MIGRATE)
                        trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE;
                else
@@ -2133,16 +2159,17 @@ svm_range_evict(struct svm_range *prange, struct 
mm_struct *mm,
 
                pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
                         prange->svms, start, last);
+
                list_for_each_entry(pchild, &prange->child_list, child_list) {
                        mutex_lock_nested(&pchild->lock, 1);
-                       s = max(start, pchild->start);
-                       l = min(last, pchild->last);
+                       s = svm_range_align_start(pchild, start);
+                       l = svm_range_align_last(pchild, last);
                        if (l >= s)
                                svm_range_unmap_from_gpus(pchild, s, l, 
trigger);
                        mutex_unlock(&pchild->lock);
                }
-               s = max(start, prange->start);
-               l = min(last, prange->last);
+               s = svm_range_align_start(prange, start);
+               l = svm_range_align_last(prange, last);
                if (l >= s)
                        svm_range_unmap_from_gpus(prange, s, l, trigger);
        }
-- 
2.35.1

Reply via email to