Re: [PATCH v4] drm/amdkfd: Use partial hmm page walk during buffer validation in SVM

2023-12-15 Thread Philip Yang

  


On 2023-12-13 19:24, Xiaogang.Chen
  wrote:


  From: Xiaogang Chen 

v2:
-not need calculate vram page number for new registered svm range, only
do it for split vram pages.

v3:
-use dma address to calculate vram page number of split svm range;
use migrate_vma from hmm to calculate page number that migrate to vram.

v4:
-combine calculating of vram page number of split svm range and page dma
address copy in same loop if original svm range includes vram pages.


Please move the version changes after the patch description.

  
SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial
migration/mapping do validation on same vm range as migration/map do instead of
whole svm range that can be very large. This change is expected to improve svm
code performance.

Signed-off-by: Xiaogang Chen

Thanks for the effort to improve the partial migration
  performance, with some nitpicks below fixed, this patch is
Reviewed-by: Philip Yang 


  
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 35 ---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 79 +++-
 2 files changed, 48 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index b854cbf06dce..3fb8e59acfbf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -260,19 +260,6 @@ static void svm_migrate_put_sys_page(unsigned long addr)
 	put_page(page);
 }
 
-static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
-{
-	unsigned long cpages = 0;
-	unsigned long i;
-
-	for (i = 0; i < migrate->npages; i++) {
-		if (migrate->src[i] & MIGRATE_PFN_VALID &&
-		migrate->src[i] & MIGRATE_PFN_MIGRATE)
-			cpages++;
-	}
-	return cpages;
-}

This was called incorrectly before migrate_vma_finalize, agree to
remove it.

  
-
 static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
 {
 	unsigned long upages = 0;
@@ -402,6 +389,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 	struct dma_fence *mfence = NULL;
 	struct migrate_vma migrate = { 0 };
 	unsigned long cpages = 0;
+	unsigned long mpages = 0;
 	dma_addr_t *scratch;
 	void *buf;
 	int r = -ENOMEM;
@@ -450,12 +438,13 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 	r = svm_migrate_copy_to_vram(node, prange, , , scratch, ttm_res_offset);
 	migrate_vma_pages();
 
-	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
-		svm_migrate_successful_pages(), cpages, migrate.npages);
-
 	svm_migrate_copy_done(adev, mfence);
 	migrate_vma_finalize();
 
+	mpages = cpages - svm_migrate_unsuccessful_pages();
+	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+			 mpages, cpages, migrate.npages);
+
 	kfd_smi_event_migration_end(node, p->lead_thread->pid,
 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
 0, node->id, trigger);
@@ -465,12 +454,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
 out_free:
 	kvfree(buf);
 out:
-	if (!r && cpages) {
+	if (!r && mpages) {
 		pdd = svm_range_get_pdd_by_node(prange, node);
 		if (pdd)
-			WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
+			WRITE_ONCE(pdd->page_in, pdd->page_in + mpages);
 
-		return cpages;
+		return mpages;
 	}
 	return r;
 }
@@ -498,7 +487,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 	struct vm_area_struct *vma;
 	uint64_t ttm_res_offset;
 	struct kfd_node *node;
-	unsigned long cpages = 0;
+	unsigned long mpages = 0;
 	long r = 0;
 
 	if (start_mgr < prange->start || last_mgr > prange->last) {
@@ -540,15 +529,15 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 			pr_debug("failed %ld to migrate\n", r);
 			break;
 		} else {
-			cpages += r;
+			mpages += r;
 		}
 		ttm_res_offset += next - addr;
 		addr = next;
 	}
 
-	if (cpages) {
+	if (mpages) {
 		prange->actual_loc = best_loc;
-		prange->vram_pages = prange->vram_pages + cpages;
+		prange->vram_pages = prange->vram_pages + mpages;

prange->vram_pages += mpages;

  
 	} else if (!prange->actual_loc) {
 		/* if no page migrated and all pages from prange are at
 		 * sys ram drop svm_bo got from svm_range_vram_node_new
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 2834fb351818..61e363e388f8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr)
 static int
 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		  unsigned long offset, unsigned long npages,
-		  unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages)
+		  unsigned long *hmm_pfns, uint32_t gpuidx)
 {
 	enum dma_data_direction dir = DMA_BIDIRECTIONAL;
 	

[PATCH v4] drm/amdkfd: Use partial hmm page walk during buffer validation in SVM

2023-12-13 Thread Xiaogang . Chen
From: Xiaogang Chen 

v2:
-not need calculate vram page number for new registered svm range, only
do it for split vram pages.

v3:
-use dma address to calculate vram page number of split svm range;
use migrate_vma from hmm to calculate page number that migrate to vram.

v4:
-combine calculating of vram page number of split svm range and page dma
address copy in same loop if original svm range includes vram pages.

SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial
migration/mapping do validation on same vm range as migration/map do instead of
whole svm range that can be very large. This change is expected to improve svm
code performance.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 35 ---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 79 +++-
 2 files changed, 48 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index b854cbf06dce..3fb8e59acfbf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -260,19 +260,6 @@ static void svm_migrate_put_sys_page(unsigned long addr)
put_page(page);
 }
 
-static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
-{
-   unsigned long cpages = 0;
-   unsigned long i;
-
-   for (i = 0; i < migrate->npages; i++) {
-   if (migrate->src[i] & MIGRATE_PFN_VALID &&
-   migrate->src[i] & MIGRATE_PFN_MIGRATE)
-   cpages++;
-   }
-   return cpages;
-}
-
 static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma 
*migrate)
 {
unsigned long upages = 0;
@@ -402,6 +389,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
struct dma_fence *mfence = NULL;
struct migrate_vma migrate = { 0 };
unsigned long cpages = 0;
+   unsigned long mpages = 0;
dma_addr_t *scratch;
void *buf;
int r = -ENOMEM;
@@ -450,12 +438,13 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
r = svm_migrate_copy_to_vram(node, prange, , , scratch, 
ttm_res_offset);
migrate_vma_pages();
 
-   pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
-   svm_migrate_successful_pages(), cpages, migrate.npages);
-
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize();
 
+   mpages = cpages - svm_migrate_unsuccessful_pages();
+   pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+mpages, cpages, migrate.npages);
+
kfd_smi_event_migration_end(node, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0, node->id, trigger);
@@ -465,12 +454,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
 out_free:
kvfree(buf);
 out:
-   if (!r && cpages) {
+   if (!r && mpages) {
pdd = svm_range_get_pdd_by_node(prange, node);
if (pdd)
-   WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
+   WRITE_ONCE(pdd->page_in, pdd->page_in + mpages);
 
-   return cpages;
+   return mpages;
}
return r;
 }
@@ -498,7 +487,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
struct vm_area_struct *vma;
uint64_t ttm_res_offset;
struct kfd_node *node;
-   unsigned long cpages = 0;
+   unsigned long mpages = 0;
long r = 0;
 
if (start_mgr < prange->start || last_mgr > prange->last) {
@@ -540,15 +529,15 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
pr_debug("failed %ld to migrate\n", r);
break;
} else {
-   cpages += r;
+   mpages += r;
}
ttm_res_offset += next - addr;
addr = next;
}
 
-   if (cpages) {
+   if (mpages) {
prange->actual_loc = best_loc;
-   prange->vram_pages = prange->vram_pages + cpages;
+   prange->vram_pages = prange->vram_pages + mpages;
} else if (!prange->actual_loc) {
/* if no page migrated and all pages from prange are at
 * sys ram drop svm_bo got from svm_range_vram_node_new
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 2834fb351818..61e363e388f8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, 
dma_addr_t dma_addr)
 static int
 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
  unsigned long offset, unsigned long npages,