Re: [PATCH v2] drm/amdkfd: Cal vram offset in TTM resource for each svm_migrate_copy_to_vram

2023-03-01 Thread Felix Kuehling



On 2023-03-01 14:24, Chen, Xiaogang wrote:


On 3/1/2023 12:54 PM, Felix Kuehling wrote:

Am 2023-03-01 um 11:34 schrieb Xiaogang.Chen:

From: Xiaogang Chen 

svm_migrate_ram_to_vram migrates a prange from sys ram to vram. The 
prange may
cross multiple vma. Need remember current dst vram offset in the TTM 
resource for

each migration.

Signed-off-by: Xiaogang Chen 


The patch looks good to me. Is this a typo in the title: "Cal vram 
offset ..."? Should this be "Calculate vram offset ..."?


For a shorter title I'd suggest: "Fix BO offset for multi-VMA page 
migration".



ok.
Is a similar fix needed for migration in the other direction, VRAM to 
system memory?


From vram to sys ram migration, vram is src and hmm collects vram 
pages by migrate_vma_setup. kfd gets each vram page addr by 
svm_migrate_addr from page. So addr is correspondent to each pages. I 
think it is ok.


Makes sense. The patch is

Reviewed-by: Felix Kuehling 




Regards

Xiaogang



Regards,
  Felix



---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 17 ++---
  1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c

index 1c625433ff30..373cd7b0e1ca 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -294,7 +294,7 @@ static unsigned long 
svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)

  static int
  svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,

   struct migrate_vma *migrate, struct dma_fence **mfence,
- dma_addr_t *scratch)
+ dma_addr_t *scratch, uint64_t ttm_res_offset)
  {
  uint64_t npages = migrate->npages;
  struct device *dev = adev->dev;
@@ -304,8 +304,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device 
*adev, struct svm_range *prange,

  uint64_t i, j;
  int r;
  -    pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, 
prange->start,

- prange->last);
+    pr_debug("svms 0x%p [0x%lx 0x%lx 0x%lx]\n", prange->svms, 
prange->start,

+ prange->last, ttm_res_offset);
    src = scratch;
  dst = (uint64_t *)(scratch + npages);
@@ -316,7 +316,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device 
*adev, struct svm_range *prange,

  goto out;
  }
  -    amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
+    amdgpu_res_first(prange->ttm_res, ttm_res_offset,
   npages << PAGE_SHIFT, );
  for (i = j = 0; i < npages; i++) {
  struct page *spage;
@@ -403,7 +403,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device 
*adev, struct svm_range *prange,

  static long
  svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,

  struct vm_area_struct *vma, uint64_t start,
-    uint64_t end, uint32_t trigger)
+    uint64_t end, uint32_t trigger, uint64_t ttm_res_offset)
  {
  struct kfd_process *p = container_of(prange->svms, struct 
kfd_process, svms);

  uint64_t npages = (end - start) >> PAGE_SHIFT;
@@ -456,7 +456,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device 
*adev, struct svm_range *prange,

  else
  pr_debug("0x%lx pages migrated\n", cpages);
  -    r = svm_migrate_copy_to_vram(adev, prange, , , 
scratch);
+    r = svm_migrate_copy_to_vram(adev, prange, , , 
scratch, ttm_res_offset);

  migrate_vma_pages();
    pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
@@ -504,6 +504,7 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

  unsigned long addr, start, end;
  struct vm_area_struct *vma;
  struct amdgpu_device *adev;
+    uint64_t ttm_res_offset;
  unsigned long cpages = 0;
  long r = 0;
  @@ -524,6 +525,7 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

    start = prange->start << PAGE_SHIFT;
  end = (prange->last + 1) << PAGE_SHIFT;
+    ttm_res_offset = prange->offset << PAGE_SHIFT;
    for (addr = start; addr < end;) {
  unsigned long next;
@@ -533,13 +535,14 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

  break;
    next = min(vma->vm_end, end);
-    r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, 
trigger);
+    r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, 
trigger, ttm_res_offset);

  if (r < 0) {
  pr_debug("failed %ld to migrate\n", r);
  break;
  } else {
  cpages += r;
  }
+    ttm_res_offset += next - addr;
  addr = next;
  }


Re: [PATCH v2] drm/amdkfd: Cal vram offset in TTM resource for each svm_migrate_copy_to_vram

2023-03-01 Thread Chen, Xiaogang



On 3/1/2023 12:54 PM, Felix Kuehling wrote:

Am 2023-03-01 um 11:34 schrieb Xiaogang.Chen:

From: Xiaogang Chen 

svm_migrate_ram_to_vram migrates a prange from sys ram to vram. The 
prange may
cross multiple vma. Need remember current dst vram offset in the TTM 
resource for

each migration.

Signed-off-by: Xiaogang Chen 


The patch looks good to me. Is this a typo in the title: "Cal vram 
offset ..."? Should this be "Calculate vram offset ..."?


For a shorter title I'd suggest: "Fix BO offset for multi-VMA page 
migration".



ok.
Is a similar fix needed for migration in the other direction, VRAM to 
system memory?


From vram to sys ram migration, vram is src and hmm collects vram pages 
by migrate_vma_setup. kfd gets each vram page addr by svm_migrate_addr 
from page. So addr is correspondent to each pages. I think it is ok.


Regards

Xiaogang



Regards,
  Felix



---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 17 ++---
  1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c

index 1c625433ff30..373cd7b0e1ca 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -294,7 +294,7 @@ static unsigned long 
svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)

  static int
  svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,

   struct migrate_vma *migrate, struct dma_fence **mfence,
- dma_addr_t *scratch)
+ dma_addr_t *scratch, uint64_t ttm_res_offset)
  {
  uint64_t npages = migrate->npages;
  struct device *dev = adev->dev;
@@ -304,8 +304,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device 
*adev, struct svm_range *prange,

  uint64_t i, j;
  int r;
  -    pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, 
prange->start,

- prange->last);
+    pr_debug("svms 0x%p [0x%lx 0x%lx 0x%lx]\n", prange->svms, 
prange->start,

+ prange->last, ttm_res_offset);
    src = scratch;
  dst = (uint64_t *)(scratch + npages);
@@ -316,7 +316,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device 
*adev, struct svm_range *prange,

  goto out;
  }
  -    amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
+    amdgpu_res_first(prange->ttm_res, ttm_res_offset,
   npages << PAGE_SHIFT, );
  for (i = j = 0; i < npages; i++) {
  struct page *spage;
@@ -403,7 +403,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device 
*adev, struct svm_range *prange,

  static long
  svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,

  struct vm_area_struct *vma, uint64_t start,
-    uint64_t end, uint32_t trigger)
+    uint64_t end, uint32_t trigger, uint64_t ttm_res_offset)
  {
  struct kfd_process *p = container_of(prange->svms, struct 
kfd_process, svms);

  uint64_t npages = (end - start) >> PAGE_SHIFT;
@@ -456,7 +456,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device 
*adev, struct svm_range *prange,

  else
  pr_debug("0x%lx pages migrated\n", cpages);
  -    r = svm_migrate_copy_to_vram(adev, prange, , , 
scratch);
+    r = svm_migrate_copy_to_vram(adev, prange, , , 
scratch, ttm_res_offset);

  migrate_vma_pages();
    pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
@@ -504,6 +504,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,

  unsigned long addr, start, end;
  struct vm_area_struct *vma;
  struct amdgpu_device *adev;
+    uint64_t ttm_res_offset;
  unsigned long cpages = 0;
  long r = 0;
  @@ -524,6 +525,7 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

    start = prange->start << PAGE_SHIFT;
  end = (prange->last + 1) << PAGE_SHIFT;
+    ttm_res_offset = prange->offset << PAGE_SHIFT;
    for (addr = start; addr < end;) {
  unsigned long next;
@@ -533,13 +535,14 @@ svm_migrate_ram_to_vram(struct svm_range 
*prange, uint32_t best_loc,

  break;
    next = min(vma->vm_end, end);
-    r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, 
trigger);
+    r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, 
trigger, ttm_res_offset);

  if (r < 0) {
  pr_debug("failed %ld to migrate\n", r);
  break;
  } else {
  cpages += r;
  }
+    ttm_res_offset += next - addr;
  addr = next;
  }


Re: [PATCH v2] drm/amdkfd: Cal vram offset in TTM resource for each svm_migrate_copy_to_vram

2023-03-01 Thread Felix Kuehling

Am 2023-03-01 um 11:34 schrieb Xiaogang.Chen:

From: Xiaogang Chen 

svm_migrate_ram_to_vram migrates a prange from sys ram to vram. The prange may
cross multiple vma. Need remember current dst vram offset in the TTM resource 
for
each migration.

Signed-off-by: Xiaogang Chen 


The patch looks good to me. Is this a typo in the title: "Cal vram 
offset ..."? Should this be "Calculate vram offset ..."?


For a shorter title I'd suggest: "Fix BO offset for multi-VMA page 
migration".


Is a similar fix needed for migration in the other direction, VRAM to 
system memory?


Regards,
  Felix



---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 17 ++---
  1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 1c625433ff30..373cd7b0e1ca 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -294,7 +294,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct 
migrate_vma *migrate)
  static int
  svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 struct migrate_vma *migrate, struct dma_fence **mfence,
-dma_addr_t *scratch)
+dma_addr_t *scratch, uint64_t ttm_res_offset)
  {
uint64_t npages = migrate->npages;
struct device *dev = adev->dev;
@@ -304,8 +304,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
uint64_t i, j;
int r;
  
-	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,

-prange->last);
+   pr_debug("svms 0x%p [0x%lx 0x%lx 0x%lx]\n", prange->svms, prange->start,
+prange->last, ttm_res_offset);
  
  	src = scratch;

dst = (uint64_t *)(scratch + npages);
@@ -316,7 +316,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
goto out;
}
  
-	amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,

+   amdgpu_res_first(prange->ttm_res, ttm_res_offset,
 npages << PAGE_SHIFT, );
for (i = j = 0; i < npages; i++) {
struct page *spage;
@@ -403,7 +403,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
  static long
  svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
-   uint64_t end, uint32_t trigger)
+   uint64_t end, uint32_t trigger, uint64_t ttm_res_offset)
  {
struct kfd_process *p = container_of(prange->svms, struct kfd_process, 
svms);
uint64_t npages = (end - start) >> PAGE_SHIFT;
@@ -456,7 +456,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
else
pr_debug("0x%lx pages migrated\n", cpages);
  
-	r = svm_migrate_copy_to_vram(adev, prange, , , scratch);

+   r = svm_migrate_copy_to_vram(adev, prange, , , scratch, 
ttm_res_offset);
migrate_vma_pages();
  
  	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",

@@ -504,6 +504,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
+   uint64_t ttm_res_offset;
unsigned long cpages = 0;
long r = 0;
  
@@ -524,6 +525,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
  
  	start = prange->start << PAGE_SHIFT;

end = (prange->last + 1) << PAGE_SHIFT;
+   ttm_res_offset = prange->offset << PAGE_SHIFT;
  
  	for (addr = start; addr < end;) {

unsigned long next;
@@ -533,13 +535,14 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
break;
  
  		next = min(vma->vm_end, end);

-   r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, 
trigger);
+   r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, 
trigger, ttm_res_offset);
if (r < 0) {
pr_debug("failed %ld to migrate\n", r);
break;
} else {
cpages += r;
}
+   ttm_res_offset += next - addr;
addr = next;
}