Re: [PATCH v4] drm/amdkfd: Use partial migrations in GPU page faults

2023-10-05 Thread Chen, Xiaogang



On 10/5/2023 8:25 AM, Philip Yang wrote:


Sorry for the late reply, just notice 2 other issues:

1. function svm_range_split_by_granularity can be removed now.


yes, the code has been sent to gerrit and merged. Will do it next time.


2. svm_range_restore_pages should map partial range to GPUs after 
partial migration.


I think partial mapping is next step after partial migration is done. I 
have been thinking partial mapping. Will submit a new patch to address it.


Regards

Xiaogang


Regards,

Philip

On 2023-10-03 19:31, Xiaogang.Chen wrote:

From: Xiaogang Chen

This patch implements partial migration in gpu page fault according to migration
granularity(default 2MB) and not split svm range in cpu page fault handling.
A svm range may include pages from both system ram and vram of one gpu now.
These chagnes are expected to improve migration performance and reduce mmu
callback and TLB flush workloads.

Signed-off-by: Xiaogang Chen
---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +--
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  83 +---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   6 +-
  4 files changed, 162 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6c25dab051d5..6a059e4aff86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
goto out_free;
}
if (cpages != npages)
-   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+   pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 cpages, npages);
else
-   pr_debug("0x%lx pages migrated\n", cpages);
+   pr_debug("0x%lx pages collected\n", cpages);
  
  	r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset);

migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
   * svm_migrate_ram_to_vram - migrate svm range from system to device
   * @prange: range structure
   * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
   * @mm: the process mm structure
   * @trigger: reason of migration
   *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
   */
  static int
  svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   unsigned long start_mgr, unsigned long last_mgr,
struct mm_struct *mm, uint32_t trigger)
  {
unsigned long addr, start, end;
@@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
unsigned long cpages = 0;
long r = 0;
  
-	if (prange->actual_loc == best_loc) {

-   pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-prange->svms, prange->start, prange->last, best_loc);
+   if (!best_loc) {
+   pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+   prange->svms, start_mgr, last_mgr);
return 0;
}
  
+	if (start_mgr < prange->start || last_mgr > prange->last) {

+   pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+start_mgr, last_mgr, prange->start, 
prange->last);
+   return -EFAULT;
+   }
+
node = svm_range_get_node_by_id(prange, best_loc);
if (!node) {
pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
return -ENODEV;
}
  
-	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,

-prange->start, prange->last, best_loc);
+   pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+   prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+   best_loc);
  
-	start = prange->start << PAGE_SHIFT;

-   end = (prange->last + 1) << PAGE_SHIFT;
+   start = start_mgr << PAGE_SHIFT;
+   end = (last_mgr + 1) << PAGE_SHIFT;
  
  	r = svm_range_vram_node_new(node, prange, true);

if (r) {
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
  
  	if (cpages) {

prange->actual_loc = best_loc;
-   svm_range_dma_unmap(prange);
-   } else {
+   prange->vram_pages = prange->vram_pages + cpages;
+   } else if (!prange->actual_loc) {
+   /* if no page migrated and all pages from prange are at
+* sys ram drop svm_bo got from svm_range_vram_node_new
+*/
svm_range_vram_node_free(prange);
}
  
@@ -663,19 +676,19 @@ 

Re: [PATCH v4] drm/amdkfd: Use partial migrations in GPU page faults

2023-10-05 Thread Philip Yang

  
Sorry for the late reply, just notice 2 other issues:

1. function svm_range_split_by_granularity can be removed now.
2. svm_range_restore_pages should map partial range to GPUs after
  partial migration.
Regards,
Philip

On 2023-10-03 19:31, Xiaogang.Chen
  wrote:


  From: Xiaogang Chen 

This patch implements partial migration in gpu page fault according to migration
granularity(default 2MB) and not split svm range in cpu page fault handling.
A svm range may include pages from both system ram and vram of one gpu now.
These chagnes are expected to improve migration performance and reduce mmu
callback and TLB flush workloads.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +--
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  83 +---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   6 +-
 4 files changed, 162 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6c25dab051d5..6a059e4aff86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 		goto out_free;
 	}
 	if (cpages != npages)
-		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+		pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 			 cpages, npages);
 	else
-		pr_debug("0x%lx pages migrated\n", cpages);
+		pr_debug("0x%lx pages collected\n", cpages);
 
 	r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset);
 	migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
  * @mm: the process mm structure
  * @trigger: reason of migration
  *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
  */
 static int
 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+			unsigned long start_mgr, unsigned long last_mgr,
 			struct mm_struct *mm, uint32_t trigger)
 {
 	unsigned long addr, start, end;
@@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 	unsigned long cpages = 0;
 	long r = 0;
 
-	if (prange->actual_loc == best_loc) {
-		pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-			 prange->svms, prange->start, prange->last, best_loc);
+	if (!best_loc) {
+		pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+			prange->svms, start_mgr, last_mgr);
 		return 0;
 	}
 
+	if (start_mgr < prange->start || last_mgr > prange->last) {
+		pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+ start_mgr, last_mgr, prange->start, prange->last);
+		return -EFAULT;
+	}
+
 	node = svm_range_get_node_by_id(prange, best_loc);
 	if (!node) {
 		pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
 		return -ENODEV;
 	}
 
-	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
-		 prange->start, prange->last, best_loc);
+	pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+		prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+		best_loc);
 
-	start = prange->start << PAGE_SHIFT;
-	end = (prange->last + 1) << PAGE_SHIFT;
+	start = start_mgr << PAGE_SHIFT;
+	end = (last_mgr + 1) << PAGE_SHIFT;
 
 	r = svm_range_vram_node_new(node, prange, true);
 	if (r) {
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 
 	if (cpages) {
 		prange->actual_loc = best_loc;
-		svm_range_dma_unmap(prange);
-	} else {
+		prange->vram_pages = prange->vram_pages + cpages;
+	} else if (!prange->actual_loc) {
+		/* if no page migrated and all pages from prange are at
+		 * sys ram drop svm_bo got from svm_range_vram_node_new
+		 */
 		svm_range_vram_node_free(prange);
 	}
 
@@ -663,19 +676,19 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
  * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
  *
  * Return:
- *   0 - success with all pages migrated
  *   negative values - indicate error
- *   positive values - partial migration, number of pages not migrated
+ *   positive values or zero - number of pages got migrated
  */
 static long
 svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
-		   struct vm_area_struct *vma, uint64_t start, uint64_t end,
-		   uint32_t trigger, struct page *fault_page)
+			struct vm_area_struct *vma, uint64_t start, uint64_t end,
+			uint32_t trigger, struct page *fault_page)
 {
 	struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
 	uint64_t npages = (end - 

Re: [PATCH v4] drm/amdkfd: Use partial migrations in GPU page faults

2023-10-04 Thread Chen, Xiaogang



On 10/4/2023 1:47 PM, Felix Kuehling wrote:


On 2023-10-03 19:31, Xiaogang.Chen wrote:

From: Xiaogang Chen 

This patch implements partial migration in gpu page fault according 
to migration
granularity(default 2MB) and not split svm range in cpu page fault 
handling.
A svm range may include pages from both system ram and vram of one 
gpu now.
These chagnes are expected to improve migration performance and 
reduce mmu

callback and TLB flush workloads.

Signed-off-by: Xiaogang Chen


Minor (mostly cosemtic) nit-picks inline. With those fixed, the patch is

Reviewed-by: Felix Kuehling 

Thanks for the review. These indentations was due to my editor on Linux 
that it does not show some special characters correctly. I changed these 
by vi.


I need use a different editor now.

Regards

Xiaogang




---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +--
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  83 +---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   6 +-
  4 files changed, 162 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c

index 6c25dab051d5..6a059e4aff86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, 
struct svm_range *prange,

  goto out_free;
  }
  if (cpages != npages)
-    pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+    pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
   cpages, npages);
  else
-    pr_debug("0x%lx pages migrated\n", cpages);
+    pr_debug("0x%lx pages collected\n", cpages);
    r = svm_migrate_copy_to_vram(node, prange, , , 
scratch, ttm_res_offset);

  migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, 
struct svm_range *prange,

   * svm_migrate_ram_to_vram - migrate svm range from system to device
   * @prange: range structure
   * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
   * @mm: the process mm structure
   * @trigger: reason of migration
   *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, 
struct svm_range *prange,

   */
  static int
  svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+    unsigned long start_mgr, unsigned long last_mgr,
  struct mm_struct *mm, uint32_t trigger)
  {
  unsigned long addr, start, end;
@@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

  unsigned long cpages = 0;
  long r = 0;
  -    if (prange->actual_loc == best_loc) {
-    pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
- prange->svms, prange->start, prange->last, best_loc);
+    if (!best_loc) {
+    pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+    prange->svms, start_mgr, last_mgr);
  return 0;
  }
  +    if (start_mgr < prange->start || last_mgr > prange->last) {
+    pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+ start_mgr, last_mgr, prange->start, prange->last);
+    return -EFAULT;
+    }
+
  node = svm_range_get_node_by_id(prange, best_loc);
  if (!node) {
  pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
  return -ENODEV;
  }
  -    pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
- prange->start, prange->last, best_loc);
+    pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+    prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+    best_loc);
  -    start = prange->start << PAGE_SHIFT;
-    end = (prange->last + 1) << PAGE_SHIFT;
+    start = start_mgr << PAGE_SHIFT;
+    end = (last_mgr + 1) << PAGE_SHIFT;
    r = svm_range_vram_node_new(node, prange, true);
  if (r) {
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

    if (cpages) {
  prange->actual_loc = best_loc;
-    svm_range_dma_unmap(prange);
-    } else {
+    prange->vram_pages = prange->vram_pages + cpages;
+    } else if (!prange->actual_loc) {
+    /* if no page migrated and all pages from prange are at
+ * sys ram drop svm_bo got from svm_range_vram_node_new
+ */
  svm_range_vram_node_free(prange);
  }
  @@ -663,19 +676,19 @@ svm_migrate_copy_to_ram(struct amdgpu_device 
*adev, struct svm_range *prange,
   * Context: Process context, caller hold mmap read lock, 
prange->migrate_mutex

   *
   * Return:
- *   0 - success with all pages migrated
   *   negative values - indicate error
- *   positive values - partial migration, number of pages not migrated
+ *   positive values or zero - number of pages got migrated
   */
  

Re: [PATCH v4] drm/amdkfd: Use partial migrations in GPU page faults

2023-10-04 Thread Felix Kuehling



On 2023-10-03 19:31, Xiaogang.Chen wrote:

From: Xiaogang Chen 

This patch implements partial migration in gpu page fault according to migration
granularity(default 2MB) and not split svm range in cpu page fault handling.
A svm range may include pages from both system ram and vram of one gpu now.
These chagnes are expected to improve migration performance and reduce mmu
callback and TLB flush workloads.

Signed-off-by: Xiaogang Chen


Minor (mostly cosemtic) nit-picks inline. With those fixed, the patch is

Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +--
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  83 +---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   6 +-
  4 files changed, 162 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6c25dab051d5..6a059e4aff86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
goto out_free;
}
if (cpages != npages)
-   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+   pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 cpages, npages);
else
-   pr_debug("0x%lx pages migrated\n", cpages);
+   pr_debug("0x%lx pages collected\n", cpages);
  
  	r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset);

migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
   * svm_migrate_ram_to_vram - migrate svm range from system to device
   * @prange: range structure
   * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
   * @mm: the process mm structure
   * @trigger: reason of migration
   *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
   */
  static int
  svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   unsigned long start_mgr, unsigned long last_mgr,
struct mm_struct *mm, uint32_t trigger)
  {
unsigned long addr, start, end;
@@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
unsigned long cpages = 0;
long r = 0;
  
-	if (prange->actual_loc == best_loc) {

-   pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-prange->svms, prange->start, prange->last, best_loc);
+   if (!best_loc) {
+   pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+   prange->svms, start_mgr, last_mgr);
return 0;
}
  
+	if (start_mgr < prange->start || last_mgr > prange->last) {

+   pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+start_mgr, last_mgr, prange->start, 
prange->last);
+   return -EFAULT;
+   }
+
node = svm_range_get_node_by_id(prange, best_loc);
if (!node) {
pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
return -ENODEV;
}
  
-	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,

-prange->start, prange->last, best_loc);
+   pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+   prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+   best_loc);
  
-	start = prange->start << PAGE_SHIFT;

-   end = (prange->last + 1) << PAGE_SHIFT;
+   start = start_mgr << PAGE_SHIFT;
+   end = (last_mgr + 1) << PAGE_SHIFT;
  
  	r = svm_range_vram_node_new(node, prange, true);

if (r) {
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
  
  	if (cpages) {

prange->actual_loc = best_loc;
-   svm_range_dma_unmap(prange);
-   } else {
+   prange->vram_pages = prange->vram_pages + cpages;
+   } else if (!prange->actual_loc) {
+   /* if no page migrated and all pages from prange are at
+* sys ram drop svm_bo got from svm_range_vram_node_new
+*/
svm_range_vram_node_free(prange);
}
  
@@ -663,19 +676,19 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,

   * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
   *
   * Return:
- *   0 - success with all pages migrated
   *   negative values - indicate error
- *   positive values - partial migration, number of pages not migrated
+ *   positive values or zero - number of pages got migrated
   */
  static long
  

[PATCH v4] drm/amdkfd: Use partial migrations in GPU page faults

2023-10-03 Thread Xiaogang . Chen
From: Xiaogang Chen 

This patch implements partial migration in gpu page fault according to migration
granularity(default 2MB) and not split svm range in cpu page fault handling.
A svm range may include pages from both system ram and vram of one gpu now.
These chagnes are expected to improve migration performance and reduce mmu
callback and TLB flush workloads.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +--
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  83 +---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   6 +-
 4 files changed, 162 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6c25dab051d5..6a059e4aff86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
goto out_free;
}
if (cpages != npages)
-   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+   pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 cpages, npages);
else
-   pr_debug("0x%lx pages migrated\n", cpages);
+   pr_debug("0x%lx pages collected\n", cpages);
 
r = svm_migrate_copy_to_vram(node, prange, , , scratch, 
ttm_res_offset);
migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
  * @mm: the process mm structure
  * @trigger: reason of migration
  *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
  */
 static int
 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   unsigned long start_mgr, unsigned long last_mgr,
struct mm_struct *mm, uint32_t trigger)
 {
unsigned long addr, start, end;
@@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
unsigned long cpages = 0;
long r = 0;
 
-   if (prange->actual_loc == best_loc) {
-   pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-prange->svms, prange->start, prange->last, best_loc);
+   if (!best_loc) {
+   pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+   prange->svms, start_mgr, last_mgr);
return 0;
}
 
+   if (start_mgr < prange->start || last_mgr > prange->last) {
+   pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+start_mgr, last_mgr, prange->start, 
prange->last);
+   return -EFAULT;
+   }
+
node = svm_range_get_node_by_id(prange, best_loc);
if (!node) {
pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
return -ENODEV;
}
 
-   pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
-prange->start, prange->last, best_loc);
+   pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+   prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+   best_loc);
 
-   start = prange->start << PAGE_SHIFT;
-   end = (prange->last + 1) << PAGE_SHIFT;
+   start = start_mgr << PAGE_SHIFT;
+   end = (last_mgr + 1) << PAGE_SHIFT;
 
r = svm_range_vram_node_new(node, prange, true);
if (r) {
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
 
if (cpages) {
prange->actual_loc = best_loc;
-   svm_range_dma_unmap(prange);
-   } else {
+   prange->vram_pages = prange->vram_pages + cpages;
+   } else if (!prange->actual_loc) {
+   /* if no page migrated and all pages from prange are at
+* sys ram drop svm_bo got from svm_range_vram_node_new
+*/
svm_range_vram_node_free(prange);
}
 
@@ -663,19 +676,19 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, 
struct svm_range *prange,
  * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
  *
  * Return:
- *   0 - success with all pages migrated
  *   negative values - indicate error
- *   positive values - partial migration, number of pages not migrated
+ *   positive values or zero - number of pages got migrated
  */
 static long
 svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
-  struct vm_area_struct *vma, uint64_t start, uint64_t end,