[PATCH] drm/kfd: Correct pined buffer handling at kfd restore and validate process

2024-05-13 Thread Xiaogang . Chen
From: Xiaogang Chen 

This reverts 8a774fe912ff09e39c2d3a3589c729330113f388 "drm/amdgpu: avoid restore
process run into dead loop" since buffer got pined is not related whether it
needs mapping. And skip buffer validation at kfd driver if the buffer has been
pinned.

Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 3314821e4cf3..80018738bd1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -415,6 +415,10 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, 
uint32_t domain,
 "Called with userptr BO"))
return -EINVAL;
 
+   /* bo has been pined, not need validate it */
+   if (bo->tbo.pin_count)
+   return 0;
+
amdgpu_bo_placement_from_domain(bo, domain);
 
ret = ttm_bo_validate(>tbo, >placement, );
@@ -2736,7 +2740,7 @@ static int confirm_valid_user_pages_locked(struct 
amdkfd_process_info *process_i
 
/* keep mem without hmm range at userptr_inval_list */
if (!mem->range)
-continue;
+   continue;
 
/* Only check mem with hmm range associated */
valid = amdgpu_ttm_tt_get_user_pages_done(
@@ -2981,9 +2985,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, 
struct dma_fence __rcu *
if (!attachment->is_mapped)
continue;
 
-   if (attachment->bo_va->base.bo->tbo.pin_count)
-   continue;
-
kfd_mem_dmaunmap_attachment(mem, attachment);
ret = update_gpuvm_pte(mem, attachment, _obj);
if (ret) {
-- 
2.25.1



[PATCH] amd/kfd: cancle work iterms at ih_wq in kfd_interrupt_exit

2024-04-10 Thread Xiaogang . Chen
From: Xiaogang Chen 

When kfd/amdgpu driver is tearing down cannot handle callback from
ih_wq. If there is still work items left cancle them instead of flush
that would wait until they got served.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 9b6b6e882593..1847b9290a84 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -98,11 +98,12 @@ void kfd_interrupt_exit(struct kfd_node *node)
spin_unlock_irqrestore(>interrupt_lock, flags);
 
/*
-* flush_work ensures that there are no outstanding
-* work-queue items that will access interrupt_ring. New work items
+* cancel work items still at ih_wq ensures that there are no 
outstanding
+* work-queue items that will access interrupt_ring. At this stage 
kfd/amd
+* driver is tearing down, cannot handle call back from wq. New work 
itemst
 * can't be created because we stopped interrupt handling above.
 */
-   flush_workqueue(node->ih_wq);
+   cancel_work(>interrupt_work);
 
destroy_workqueue(node->ih_wq);
 
-- 
2.25.1



[PATCH v4] drm/amdkfd: Use partial hmm page walk during buffer validation in SVM

2023-12-13 Thread Xiaogang . Chen
From: Xiaogang Chen 

v2:
-not need calculate vram page number for new registered svm range, only
do it for split vram pages.

v3:
-use dma address to calculate vram page number of split svm range;
use migrate_vma from hmm to calculate page number that migrate to vram.

v4:
-combine calculating of vram page number of split svm range and page dma
address copy in same loop if original svm range includes vram pages.

SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial
migration/mapping do validation on same vm range as migration/map do instead of
whole svm range that can be very large. This change is expected to improve svm
code performance.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 35 ---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 79 +++-
 2 files changed, 48 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index b854cbf06dce..3fb8e59acfbf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -260,19 +260,6 @@ static void svm_migrate_put_sys_page(unsigned long addr)
put_page(page);
 }
 
-static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
-{
-   unsigned long cpages = 0;
-   unsigned long i;
-
-   for (i = 0; i < migrate->npages; i++) {
-   if (migrate->src[i] & MIGRATE_PFN_VALID &&
-   migrate->src[i] & MIGRATE_PFN_MIGRATE)
-   cpages++;
-   }
-   return cpages;
-}
-
 static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma 
*migrate)
 {
unsigned long upages = 0;
@@ -402,6 +389,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
struct dma_fence *mfence = NULL;
struct migrate_vma migrate = { 0 };
unsigned long cpages = 0;
+   unsigned long mpages = 0;
dma_addr_t *scratch;
void *buf;
int r = -ENOMEM;
@@ -450,12 +438,13 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
r = svm_migrate_copy_to_vram(node, prange, , , scratch, 
ttm_res_offset);
migrate_vma_pages();
 
-   pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
-   svm_migrate_successful_pages(), cpages, migrate.npages);
-
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize();
 
+   mpages = cpages - svm_migrate_unsuccessful_pages();
+   pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+mpages, cpages, migrate.npages);
+
kfd_smi_event_migration_end(node, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0, node->id, trigger);
@@ -465,12 +454,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
 out_free:
kvfree(buf);
 out:
-   if (!r && cpages) {
+   if (!r && mpages) {
pdd = svm_range_get_pdd_by_node(prange, node);
if (pdd)
-   WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
+   WRITE_ONCE(pdd->page_in, pdd->page_in + mpages);
 
-   return cpages;
+   return mpages;
}
return r;
 }
@@ -498,7 +487,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
struct vm_area_struct *vma;
uint64_t ttm_res_offset;
struct kfd_node *node;
-   unsigned long cpages = 0;
+   unsigned long mpages = 0;
long r = 0;
 
if (start_mgr < prange->start || last_mgr > prange->last) {
@@ -540,15 +529,15 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
pr_debug("failed %ld to migrate\n", r);
break;
} else {
-   cpages += r;
+   mpages += r;
}
ttm_res_offset += next - addr;
addr = next;
}
 
-   if (cpages) {
+   if (mpages) {
prange->actual_loc = best_loc;
-   prange->vram_pages = prange->vram_pages + cpages;
+   prange->vram_pages = prange->vram_pages + mpages;
} else if (!prange->actual_loc) {
/* if no page migrated and all pages from prange are at
 * sys ram drop svm_bo got from svm_range_vram_node_new
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 2834fb351818..61e363e388f8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, 
dma_addr_t dma_addr)
 static int
 svm

[PATCH v3] drm/amdkfd: Use partial hmm page walk during buffer validation in SVM

2023-12-06 Thread Xiaogang . Chen
From: Xiaogang Chen 

v2:
-not need calculate vram page number for new registered svm range, only
do it for split vram pages.

v3:
-use dma address to calculate vram page number of split svm range;
use migrate_vma from hmm to calculate page number that migrate to vram.

SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial
migration/mapping do validation on same vm range as migration/map do instead of
whole svm range that can be very large. This change is expected to improve svm
code performance.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 35 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 72 
 2 files changed, 48 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index b854cbf06dce..34376184c37c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -260,19 +260,6 @@ static void svm_migrate_put_sys_page(unsigned long addr)
put_page(page);
 }
 
-static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
-{
-   unsigned long cpages = 0;
-   unsigned long i;
-
-   for (i = 0; i < migrate->npages; i++) {
-   if (migrate->src[i] & MIGRATE_PFN_VALID &&
-   migrate->src[i] & MIGRATE_PFN_MIGRATE)
-   cpages++;
-   }
-   return cpages;
-}
-
 static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma 
*migrate)
 {
unsigned long upages = 0;
@@ -402,6 +389,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
struct dma_fence *mfence = NULL;
struct migrate_vma migrate = { 0 };
unsigned long cpages = 0;
+   unsigned long mpages = 0;
dma_addr_t *scratch;
void *buf;
int r = -ENOMEM;
@@ -450,12 +438,13 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
r = svm_migrate_copy_to_vram(node, prange, , , scratch, 
ttm_res_offset);
migrate_vma_pages();
 
-   pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
-   svm_migrate_successful_pages(), cpages, migrate.npages);
-
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize();
 
+   mpages = cpages - svm_migrate_unsuccessful_pages();
+   pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+   mpages, cpages, migrate.npages);
+
kfd_smi_event_migration_end(node, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0, node->id, trigger);
@@ -465,12 +454,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
 out_free:
kvfree(buf);
 out:
-   if (!r && cpages) {
+   if (!r && mpages) {
pdd = svm_range_get_pdd_by_node(prange, node);
if (pdd)
-   WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
+   WRITE_ONCE(pdd->page_in, pdd->page_in + mpages);
 
-   return cpages;
+   return mpages;
}
return r;
 }
@@ -498,7 +487,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
struct vm_area_struct *vma;
uint64_t ttm_res_offset;
struct kfd_node *node;
-   unsigned long cpages = 0;
+   unsigned long mpages = 0;
long r = 0;
 
if (start_mgr < prange->start || last_mgr > prange->last) {
@@ -540,15 +529,15 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
pr_debug("failed %ld to migrate\n", r);
break;
} else {
-   cpages += r;
+   mpages += r;
}
ttm_res_offset += next - addr;
addr = next;
}
 
-   if (cpages) {
+   if (mpages) {
prange->actual_loc = best_loc;
-   prange->vram_pages = prange->vram_pages + cpages;
+   prange->vram_pages = prange->vram_pages + mpages;
} else if (!prange->actual_loc) {
/* if no page migrated and all pages from prange are at
 * sys ram drop svm_bo got from svm_range_vram_node_new
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 2834fb351818..8d012ca82cd6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, 
dma_addr_t dma_addr)
 static int
 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
  unsigned long offset, unsigned long npages,
-   

[PATCH v2] drm/amdkfd: Use partial hmm page walk during buffer validation in SVM

2023-12-04 Thread Xiaogang . Chen
From: Xiaogang Chen 

v2:
-not need calculate vram page number for new registered svm range, only
do it for split vram pages.

SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial
migration/mapping do validation on same vm range as migration/map do instead of
whole svm range that can be very large. This change is expected to improve svm
code performance.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 149 ---
 1 file changed, 109 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 2834fb351818..2f14cd1a3416 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, 
dma_addr_t dma_addr)
 static int
 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
  unsigned long offset, unsigned long npages,
- unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t 
*vram_pages)
+ unsigned long *hmm_pfns, uint32_t gpuidx)
 {
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
dma_addr_t *addr = prange->dma_addr[gpuidx];
struct device *dev = adev->dev;
struct page *page;
-   uint64_t vram_pages_dev;
int i, r;
 
if (!addr) {
@@ -174,7 +173,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct 
svm_range *prange,
prange->dma_addr[gpuidx] = addr;
}
 
-   vram_pages_dev = 0;
addr += offset;
for (i = 0; i < npages; i++) {
if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
@@ -184,7 +182,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct 
svm_range *prange,
if (is_zone_device_page(page)) {
struct amdgpu_device *bo_adev = 
prange->svm_bo->node->adev;
 
-   vram_pages_dev++;
addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
   bo_adev->vm_manager.vram_base_offset -
   bo_adev->kfd.pgmap.range.start;
@@ -201,14 +198,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct 
svm_range *prange,
pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
 addr[i] >> PAGE_SHIFT, page_to_pfn(page));
}
-   *vram_pages = vram_pages_dev;
+
return 0;
 }
 
 static int
 svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
  unsigned long offset, unsigned long npages,
- unsigned long *hmm_pfns, uint64_t *vram_pages)
+ unsigned long *hmm_pfns)
 {
struct kfd_process *p;
uint32_t gpuidx;
@@ -227,7 +224,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long 
*bitmap,
}
 
r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, 
npages,
- hmm_pfns, gpuidx, vram_pages);
+ hmm_pfns, gpuidx);
if (r)
break;
}
@@ -982,11 +979,6 @@ svm_range_split_nodes(struct svm_range *new, struct 
svm_range *old,
new->svm_bo = svm_range_bo_ref(old->svm_bo);
new->ttm_res = old->ttm_res;
 
-   /* set new's vram_pages as old range's now, the acurate vram_pages
-* will be updated during mapping
-*/
-   new->vram_pages = min(old->vram_pages, new->npages);
-
spin_lock(>svm_bo->list_lock);
list_add(>svm_bo_list, >svm_bo->range_list);
spin_unlock(>svm_bo->list_lock);
@@ -1107,9 +1099,9 @@ svm_range_split(struct svm_range *prange, uint64_t start, 
uint64_t last,
 
 static int
 svm_range_split_tail(struct svm_range *prange, uint64_t new_last,
-struct list_head *insert_list, struct list_head 
*remap_list)
+struct list_head *insert_list, struct list_head 
*remap_list,
+struct svm_range *tail)
 {
-   struct svm_range *tail;
int r = svm_range_split(prange, prange->start, new_last, );
 
if (!r) {
@@ -1122,9 +1114,9 @@ svm_range_split_tail(struct svm_range *prange, uint64_t 
new_last,
 
 static int
 svm_range_split_head(struct svm_range *prange, uint64_t new_start,
-struct list_head *insert_list, struct list_head 
*remap_list)
+struct list_head *insert_list, struct list_head 
*remap_list,
+struct svm_range *head)
 {
-   struct svm_range *head;
int r = svm_range_split(prange, new_start, prange->last, );
 
if (!r) {
@@ -1573,7 +1565,6 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
struct svm_validate_context *ctx;
u

[PATCH] drm/amdkfd: Use partial hmm page walk during buffer validation in SVM

2023-12-04 Thread Xiaogang . Chen
From: Xiaogang Chen 

SVM uses hmm page walk to valid buffer before map to gpu vm. After have partial
migration/mapping do validation on same vm range as migration/map do instead of
whole svm range that can be very large. This change is expected to improve svm
code performance.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 123 +++
 1 file changed, 89 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 2834fb351818..f670d5f6bcdf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -158,13 +158,12 @@ svm_is_valid_dma_mapping_addr(struct device *dev, 
dma_addr_t dma_addr)
 static int
 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
  unsigned long offset, unsigned long npages,
- unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t 
*vram_pages)
+ unsigned long *hmm_pfns, uint32_t gpuidx)
 {
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
dma_addr_t *addr = prange->dma_addr[gpuidx];
struct device *dev = adev->dev;
struct page *page;
-   uint64_t vram_pages_dev;
int i, r;
 
if (!addr) {
@@ -174,7 +173,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct 
svm_range *prange,
prange->dma_addr[gpuidx] = addr;
}
 
-   vram_pages_dev = 0;
addr += offset;
for (i = 0; i < npages; i++) {
if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
@@ -184,7 +182,6 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct 
svm_range *prange,
if (is_zone_device_page(page)) {
struct amdgpu_device *bo_adev = 
prange->svm_bo->node->adev;
 
-   vram_pages_dev++;
addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
   bo_adev->vm_manager.vram_base_offset -
   bo_adev->kfd.pgmap.range.start;
@@ -201,14 +198,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct 
svm_range *prange,
pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
 addr[i] >> PAGE_SHIFT, page_to_pfn(page));
}
-   *vram_pages = vram_pages_dev;
+
return 0;
 }
 
 static int
 svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
  unsigned long offset, unsigned long npages,
- unsigned long *hmm_pfns, uint64_t *vram_pages)
+ unsigned long *hmm_pfns)
 {
struct kfd_process *p;
uint32_t gpuidx;
@@ -227,7 +224,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long 
*bitmap,
}
 
r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, 
npages,
- hmm_pfns, gpuidx, vram_pages);
+ hmm_pfns, gpuidx);
if (r)
break;
}
@@ -982,11 +979,6 @@ svm_range_split_nodes(struct svm_range *new, struct 
svm_range *old,
new->svm_bo = svm_range_bo_ref(old->svm_bo);
new->ttm_res = old->ttm_res;
 
-   /* set new's vram_pages as old range's now, the acurate vram_pages
-* will be updated during mapping
-*/
-   new->vram_pages = min(old->vram_pages, new->npages);
-
spin_lock(>svm_bo->list_lock);
list_add(>svm_bo_list, >svm_bo->range_list);
spin_unlock(>svm_bo->list_lock);
@@ -1573,7 +1565,6 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
struct svm_validate_context *ctx;
unsigned long start, end, addr;
struct kfd_process *p;
-   uint64_t vram_pages;
void *owner;
int32_t idx;
int r = 0;
@@ -1642,15 +1633,13 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
}
}
 
-   vram_pages = 0;
-   start = prange->start << PAGE_SHIFT;
-   end = (prange->last + 1) << PAGE_SHIFT;
+   start = map_start << PAGE_SHIFT;
+   end = (map_last + 1) << PAGE_SHIFT;
for (addr = start; !r && addr < end; ) {
struct hmm_range *hmm_range;
unsigned long map_start_vma;
unsigned long map_last_vma;
struct vm_area_struct *vma;
-   uint64_t vram_pages_vma;
unsigned long next = 0;
unsigned long offset;
unsigned long npages;
@@ -1677,13 +1666,11 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
}
 
if (!r) {
-   offset = (addr - start) >> PAGE_SHIFT;
+   of

[PATCH] drm/amdkfd: Use partial migrations/mapping for GPU/CPU page faults in SVM

2023-11-14 Thread Xiaogang . Chen
From: Xiaogang Chen 

This patch implements partial migration/mapping for gpu/cpu page faults in SVM
according to migration granularity(default 2MB). A svm range may include pages
from both system ram and vram of one gpu now. These chagnes are expected to
improve migration performance and reduce mmu callback and TLB flush workloads.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 152 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 176 +++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   9 +-
 4 files changed, 183 insertions(+), 160 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6c25dab051d5..67df1b46f292 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
goto out_free;
}
if (cpages != npages)
-   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+   pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 cpages, npages);
else
-   pr_debug("0x%lx pages migrated\n", cpages);
+   pr_debug("0x%lx pages collected\n", cpages);
 
r = svm_migrate_copy_to_vram(node, prange, , , scratch, 
ttm_res_offset);
migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
  * @mm: the process mm structure
  * @trigger: reason of migration
  *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
  */
 static int
 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   unsigned long start_mgr, unsigned long last_mgr,
struct mm_struct *mm, uint32_t trigger)
 {
unsigned long addr, start, end;
@@ -498,30 +501,37 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
unsigned long cpages = 0;
long r = 0;
 
-   if (prange->actual_loc == best_loc) {
-   pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-prange->svms, prange->start, prange->last, best_loc);
+   if (!best_loc) {
+   pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+   prange->svms, start_mgr, last_mgr);
return 0;
}
 
+   if (start_mgr < prange->start || last_mgr > prange->last) {
+   pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+start_mgr, last_mgr, prange->start, 
prange->last);
+   return -EFAULT;
+   }
+
node = svm_range_get_node_by_id(prange, best_loc);
if (!node) {
pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
return -ENODEV;
}
 
-   pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
-prange->start, prange->last, best_loc);
+   pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+   prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+   best_loc);
 
-   start = prange->start << PAGE_SHIFT;
-   end = (prange->last + 1) << PAGE_SHIFT;
+   start = start_mgr << PAGE_SHIFT;
+   end = (last_mgr + 1) << PAGE_SHIFT;
 
r = svm_range_vram_node_new(node, prange, true);
if (r) {
dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
return r;
}
-   ttm_res_offset = prange->offset << PAGE_SHIFT;
+   ttm_res_offset = (start_mgr - prange->start + prange->offset) << 
PAGE_SHIFT;
 
for (addr = start; addr < end;) {
unsigned long next;
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
 
if (cpages) {
prange->actual_loc = best_loc;
-   svm_range_dma_unmap(prange);
-   } else {
+   prange->vram_pages = prange->vram_pages + cpages;
+   } else if (!prange->actual_loc) {
+   /* if no page migrated and all pages from prange are at
+* sys ram drop svm_bo got from svm_range_vram_node_new
+*/
svm_range_vram_node_free(prange);
}
 
@@ -663,9 +676,8 @@ svm_migrate_copy

[PATCH v3] drm/amdkfd: Use partial mapping in GPU page faults

2023-10-20 Thread Xiaogang . Chen
From: Xiaogang Chen 

After partial migration to recover GPU page fault this patch does GPU vm
space mapping for same page range that got migrated intead of mapping all
pages of svm range in which the page fault happened.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 29 
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 54af7a2b29f8..3a71d04779b1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, 
int32_t gpuidx)
  * 5. Release page table (and SVM BO) reservation
  */
 static int svm_range_validate_and_map(struct mm_struct *mm,
+ unsigned long map_start, unsigned long 
map_last,
  struct svm_range *prange, int32_t gpuidx,
  bool intr, bool wait, bool flush_tlb)
 {
@@ -1699,6 +1700,8 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
end = (prange->last + 1) << PAGE_SHIFT;
for (addr = start; !r && addr < end; ) {
struct hmm_range *hmm_range;
+   unsigned long map_start_vma;
+   unsigned long map_last_vma;
struct vm_area_struct *vma;
uint64_t vram_pages_vma;
unsigned long next = 0;
@@ -1747,9 +1750,16 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
r = -EAGAIN;
}
 
-   if (!r)
-   r = svm_range_map_to_gpus(prange, offset, npages, 
readonly,
- ctx->bitmap, wait, flush_tlb);
+   if (!r) {
+   map_start_vma = max(map_start, prange->start + offset);
+   map_last_vma = min(map_last, prange->start + offset + 
npages - 1);
+   if (map_start_vma <= map_last_vma) {
+   offset = map_start_vma - prange->start;
+   npages = map_last_vma - map_start_vma + 1;
+   r = svm_range_map_to_gpus(prange, offset, 
npages, readonly,
+ ctx->bitmap, wait, 
flush_tlb);
+   }
+   }
 
if (!r && next == end)
prange->mapped_to_gpu = true;
@@ -1855,8 +1865,8 @@ static void svm_range_restore_work(struct work_struct 
*work)
 */
mutex_lock(>migrate_mutex);
 
-   r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-  false, true, false);
+   r = svm_range_validate_and_map(mm, prange->start, prange->last, 
prange,
+  MAX_GPU_INSTANCE, false, true, 
false);
if (r)
pr_debug("failed %d to map 0x%lx to gpus\n", r,
 prange->start);
@@ -3069,6 +3079,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
   write_fault, timestamp);
 
+   start = prange->start;
+   last = prange->last;
if (prange->actual_loc != 0 || best_loc != 0) {
migration = true;
/* Align migration range start and size to granularity size */
@@ -3102,10 +3114,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
}
}
 
-   r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
+   r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
+  false, false);
if (r)
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
-r, svms, prange->start, prange->last);
+r, svms, start, last);
 
kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
 migration);
@@ -3650,7 +3663,7 @@ svm_range_set_attr(struct kfd_process *p, struct 
mm_struct *mm,
 
flush_tlb = !migrated && update_mapping && 
prange->mapped_to_gpu;
 
-   r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+   r = svm_range_validate_and_map(mm, prange->start, prange->last, 
prange, MAX_GPU_INSTANCE,
   true, true, flush_tlb);
if (r)
pr_debug("failed %d to map svm range\n", r);
-- 
2.25.1



[PATCH v2] drm/amdkfd: Use partial mapping in GPU page fault recovery

2023-10-20 Thread Xiaogang . Chen
From: Xiaogang Chen 

After partial migration to recover GPU page fault this patch does GPU vm
space mapping for same page range that got migrated instead of mapping all
pages of svm range in which the page fault happened.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 27 +++
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 54af7a2b29f8..58f0506d5221 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, 
int32_t gpuidx)
  * 5. Release page table (and SVM BO) reservation
  */
 static int svm_range_validate_and_map(struct mm_struct *mm,
+ unsigned long map_start, unsigned long 
map_last,
  struct svm_range *prange, int32_t gpuidx,
  bool intr, bool wait, bool flush_tlb)
 {
@@ -1747,9 +1748,16 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
r = -EAGAIN;
}
 
-   if (!r)
-   r = svm_range_map_to_gpus(prange, offset, npages, 
readonly,
- ctx->bitmap, wait, flush_tlb);
+   if (!r) {
+   map_start = max(map_start, prange->start + offset);
+   map_last = min(map_last, prange->start + offset + 
npages - 1);
+   if (map_start <= map_last) {
+   offset = map_start - prange->start;
+   npages = map_last - map_start + 1;
+   r = svm_range_map_to_gpus(prange, offset, 
npages, readonly,
+ ctx->bitmap, wait, 
flush_tlb);
+   }
+   }
 
if (!r && next == end)
prange->mapped_to_gpu = true;
@@ -1855,8 +1863,8 @@ static void svm_range_restore_work(struct work_struct 
*work)
 */
mutex_lock(>migrate_mutex);
 
-   r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-  false, true, false);
+   r = svm_range_validate_and_map(mm, prange->start, prange->last, 
prange,
+  MAX_GPU_INSTANCE, false, true, 
false);
if (r)
pr_debug("failed %d to map 0x%lx to gpus\n", r,
 prange->start);
@@ -3069,6 +3077,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
   write_fault, timestamp);
 
+   start = prange->start;
+   last = prange->last;
if (prange->actual_loc != 0 || best_loc != 0) {
migration = true;
/* Align migration range start and size to granularity size */
@@ -3102,10 +3112,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
}
}
 
-   r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
+   r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
+  false, false);
if (r)
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
-r, svms, prange->start, prange->last);
+r, svms, start, last);
 
kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
 migration);
@@ -3650,7 +3661,7 @@ svm_range_set_attr(struct kfd_process *p, struct 
mm_struct *mm,
 
flush_tlb = !migrated && update_mapping && 
prange->mapped_to_gpu;
 
-   r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+   r = svm_range_validate_and_map(mm, prange->start, prange->last, 
prange, MAX_GPU_INSTANCE,
   true, true, flush_tlb);
if (r)
pr_debug("failed %d to map svm range\n", r);
-- 
2.25.1



[PATCH] drm/amdkfd: Use partial mapping in GPU page fault recovery

2023-10-19 Thread Xiaogang . Chen
From: Xiaogang Chen 

After partial migration to recover GPU page fault this patch does GPU vm
space mapping for same page range that got migrated instead of mapping all
pages of svm range in which the page fault happened.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 33 +---
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 54af7a2b29f8..81dbcc8a4ccc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1619,6 +1619,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, 
int32_t gpuidx)
  * 5. Release page table (and SVM BO) reservation
  */
 static int svm_range_validate_and_map(struct mm_struct *mm,
+ unsigned long map_start, unsigned long 
map_last,
  struct svm_range *prange, int32_t gpuidx,
  bool intr, bool wait, bool flush_tlb)
 {
@@ -1630,6 +1631,12 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
int32_t idx;
int r = 0;
 
+   if (map_start < prange->start || map_last > prange->last) {
+   pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+map_start, map_last, prange->start, 
prange->last);
+   return -EFAULT;
+   }
+
ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
@@ -1747,9 +1754,16 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
r = -EAGAIN;
}
 
-   if (!r)
-   r = svm_range_map_to_gpus(prange, offset, npages, 
readonly,
- ctx->bitmap, wait, flush_tlb);
+   if (!r) {
+   map_start = max(map_start, prange->start + offset);
+   map_last = min(map_last, prange->start + offset + 
npages);
+   if (map_start <= map_last) {
+   offset = map_start - prange->start;
+   npages = map_last - map_start + 1;
+   r = svm_range_map_to_gpus(prange, offset, 
npages, readonly,
+ ctx->bitmap, wait, 
flush_tlb);
+   }
+   }
 
if (!r && next == end)
prange->mapped_to_gpu = true;
@@ -1855,8 +1869,8 @@ static void svm_range_restore_work(struct work_struct 
*work)
 */
mutex_lock(>migrate_mutex);
 
-   r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-  false, true, false);
+   r = svm_range_validate_and_map(mm, prange->start, prange->last, 
prange,
+  MAX_GPU_INSTANCE, false, true, 
false);
if (r)
pr_debug("failed %d to map 0x%lx to gpus\n", r,
 prange->start);
@@ -3069,6 +3083,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
   write_fault, timestamp);
 
+   start = prange->start;
+   last = prange->last;
if (prange->actual_loc != 0 || best_loc != 0) {
migration = true;
/* Align migration range start and size to granularity size */
@@ -3102,10 +3118,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
}
}
 
-   r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
+   r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
+  false, false);
if (r)
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
-r, svms, prange->start, prange->last);
+r, svms, start, last);
 
kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
 migration);
@@ -3650,7 +3667,7 @@ svm_range_set_attr(struct kfd_process *p, struct 
mm_struct *mm,
 
flush_tlb = !migrated && update_mapping && 
prange->mapped_to_gpu;
 
-   r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+   r = svm_range_validate_and_map(mm, prange->start, prange->last, 
prange, MAX_GPU_INSTANCE,
   true, true, flush_tlb);
if (r)
pr_debug("failed %d to map svm range\n", r);
-- 
2.25.1



[PATCH v2] drm/amdgpu: Correctly use bo_va->ref_count in compute VMs

2023-10-12 Thread Xiaogang . Chen
From: Xiaogang Chen 

This is needed to correctly handle BOs imported into compute VM from gfx.
Both kfd and gfx should use same bo_va and set bo_va->ref_count correctly
when map the Bos into same VM, otherwise we may trigger kernel general
protection when iterate mappings over bo_va's valids or invalids list.

Signed-off-by: Felix Kuehling 
Signed-off-by: Xiaogang Chen 
Acked-by: Christian König 
Reviewed-by: Ramesh Errabolu 
Tested-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a15e59abe70a..c1ec93cc50ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -832,6 +832,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
uint64_t va = mem->va;
struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
struct amdgpu_bo *bo[2] = {NULL, NULL};
+   struct amdgpu_bo_va *bo_va;
bool same_hive = false;
int i, ret;
 
@@ -919,7 +920,13 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
pr_debug("Unable to reserve BO during memory attach");
goto unwind;
}
-   attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+   bo_va = amdgpu_vm_bo_find(vm, bo[i]);
+   if (!bo_va)
+   bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+   else
+   ++bo_va->ref_count;
+   attachment[i]->bo_va = bo_va;
+
amdgpu_bo_unreserve(bo[i]);
if (unlikely(!attachment[i]->bo_va)) {
ret = -ENOMEM;
@@ -943,7 +950,8 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
continue;
if (attachment[i]->bo_va) {
amdgpu_bo_reserve(bo[i], true);
-   amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
+   if (--attachment[i]->bo_va->ref_count == 0)
+   amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
amdgpu_bo_unreserve(bo[i]);
list_del([i]->list);
}
-- 
2.25.1



[PATCH] Find bo_va before create it when map bo into compute VM

2023-10-11 Thread Xiaogang . Chen
From: Xiaogang Chen 

This is needed to correctly handle BOs imported into compute VM from gfx.
Both kfd and gfx should use same bo_va when map the Bos into same VM, otherwise
we may trigger kernel general protection when iterate mappings from bo_va.

Signed-off-by: Felix Kuehling 
Acked-by: Christian König 
Reviewed-by: Ramesh Errabolu 
Reviewed-By: Xiaogang Chen 
Tested-By: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a15e59abe70a..c1ec93cc50ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -832,6 +832,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
uint64_t va = mem->va;
struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
struct amdgpu_bo *bo[2] = {NULL, NULL};
+   struct amdgpu_bo_va *bo_va;
bool same_hive = false;
int i, ret;
 
@@ -919,7 +920,13 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
pr_debug("Unable to reserve BO during memory attach");
goto unwind;
}
-   attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+   bo_va = amdgpu_vm_bo_find(vm, bo[i]);
+   if (!bo_va)
+   bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+   else
+   ++bo_va->ref_count;
+   attachment[i]->bo_va = bo_va;
+
amdgpu_bo_unreserve(bo[i]);
if (unlikely(!attachment[i]->bo_va)) {
ret = -ENOMEM;
@@ -943,7 +950,8 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
continue;
if (attachment[i]->bo_va) {
amdgpu_bo_reserve(bo[i], true);
-   amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
+   if (--attachment[i]->bo_va->ref_count == 0)
+   amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
amdgpu_bo_unreserve(bo[i]);
list_del([i]->list);
}
-- 
2.25.1



[PATCH v4] drm/amdkfd: Use partial migrations in GPU page faults

2023-10-03 Thread Xiaogang . Chen
From: Xiaogang Chen 

This patch implements partial migration in gpu page fault according to migration
granularity(default 2MB) and not split svm range in cpu page fault handling.
A svm range may include pages from both system ram and vram of one gpu now.
These chagnes are expected to improve migration performance and reduce mmu
callback and TLB flush workloads.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +--
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  83 +---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   6 +-
 4 files changed, 162 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6c25dab051d5..6a059e4aff86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
goto out_free;
}
if (cpages != npages)
-   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+   pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 cpages, npages);
else
-   pr_debug("0x%lx pages migrated\n", cpages);
+   pr_debug("0x%lx pages collected\n", cpages);
 
r = svm_migrate_copy_to_vram(node, prange, , , scratch, 
ttm_res_offset);
migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
  * @mm: the process mm structure
  * @trigger: reason of migration
  *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
  */
 static int
 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   unsigned long start_mgr, unsigned long last_mgr,
struct mm_struct *mm, uint32_t trigger)
 {
unsigned long addr, start, end;
@@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
unsigned long cpages = 0;
long r = 0;
 
-   if (prange->actual_loc == best_loc) {
-   pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-prange->svms, prange->start, prange->last, best_loc);
+   if (!best_loc) {
+   pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+   prange->svms, start_mgr, last_mgr);
return 0;
}
 
+   if (start_mgr < prange->start || last_mgr > prange->last) {
+   pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+start_mgr, last_mgr, prange->start, 
prange->last);
+   return -EFAULT;
+   }
+
node = svm_range_get_node_by_id(prange, best_loc);
if (!node) {
pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
return -ENODEV;
}
 
-   pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
-prange->start, prange->last, best_loc);
+   pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+   prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+   best_loc);
 
-   start = prange->start << PAGE_SHIFT;
-   end = (prange->last + 1) << PAGE_SHIFT;
+   start = start_mgr << PAGE_SHIFT;
+   end = (last_mgr + 1) << PAGE_SHIFT;
 
r = svm_range_vram_node_new(node, prange, true);
if (r) {
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
 
if (cpages) {
prange->actual_loc = best_loc;
-   svm_range_dma_unmap(prange);
-   } else {
+   prange->vram_pages = prange->vram_pages + cpages;
+   } else if (!prange->actual_loc) {
+   /* if no page migrated and all pages from prange are at
+* sys ram drop svm_bo got from svm_range_vram_node_new
+*/
svm_range_vram_node_free(prange);
}
 
@@ -663,19 +676,19 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, 
struct svm_range *prange,
  * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
  *
  * Return:
- *   0 - success with all pages migrated
  *   negative values - indicate error
- *   positive values - partial migration, number of pages not migrated
+ *   positive values or zero - num

[PATCH] drm/amdkfd: Fix a race condition of vram buffer unref in svm code

2023-09-26 Thread Xiaogang . Chen
From: Xiaogang Chen 

prange->svm_bo unref can happen in both mmu callback and a callback after
migrate to system ram. Both are async call in different tasks. Sync svm_bo
unref operation to avoid random "use-after-free".

Signed-off-by: Xiaogang.Chen 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 70aa882636ab..8e246e848018 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -637,6 +637,15 @@ void svm_range_vram_node_free(struct svm_range *prange)
 {
svm_range_bo_unref(prange->svm_bo);
prange->ttm_res = NULL;
+   /* serialize prange->svm_bo unref */
+   mutex_lock(>lock);
+   /* prange->svm_bo has not been unref */
+   if (prange->ttm_res) {
+   prange->ttm_res = NULL;
+   mutex_unlock(>lock);
+   svm_range_bo_unref(prange->svm_bo);
+   } else
+   mutex_unlock(>lock);
 }
 
 struct kfd_node *
-- 
2.25.1



[PATCH v3] drm/amdkfd: Use partial migrations in GPU page faults

2023-09-20 Thread Xiaogang . Chen
From: Xiaogang Chen 

This patch implements partial migration in gpu page fault according to migration
granularity(default 2MB) and not split svm range in cpu page fault handling.
A svm range may include pages from both system ram and vram of one gpu now.
These chagnes are expected to improve migration performance and reduce mmu
callback and TLB flush workloads.

Signed-off-by: xiaogang chen
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 156 +--
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 104 +++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   6 +-
 4 files changed, 178 insertions(+), 94 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6c25dab051d5..e886f9ce40ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -442,10 +442,10 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
goto out_free;
}
if (cpages != npages)
-   pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+   pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
 cpages, npages);
else
-   pr_debug("0x%lx pages migrated\n", cpages);
+   pr_debug("0x%lx pages collected\n", cpages);
 
r = svm_migrate_copy_to_vram(node, prange, , , scratch, 
ttm_res_offset);
migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
  * @mm: the process mm structure
  * @trigger: reason of migration
  *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
  */
 static int
 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   unsigned long start_mgr, unsigned long last_mgr,
struct mm_struct *mm, uint32_t trigger)
 {
unsigned long addr, start, end;
@@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
unsigned long cpages = 0;
long r = 0;
 
-   if (prange->actual_loc == best_loc) {
-   pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-prange->svms, prange->start, prange->last, best_loc);
+   if (!best_loc) {
+   pr_debug("svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+   prange->svms, start_mgr, last_mgr);
return 0;
}
 
+   if (start_mgr < prange->start || last_mgr > prange->last) {
+   pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+start_mgr, last_mgr, prange->start, 
prange->last);
+   return -EFAULT;
+   }
+
node = svm_range_get_node_by_id(prange, best_loc);
if (!node) {
pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
return -ENODEV;
}
 
-   pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
-prange->start, prange->last, best_loc);
+   pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+   prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+   best_loc);
 
-   start = prange->start << PAGE_SHIFT;
-   end = (prange->last + 1) << PAGE_SHIFT;
+   start = start_mgr << PAGE_SHIFT;
+   end = (last_mgr + 1) << PAGE_SHIFT;
 
r = svm_range_vram_node_new(node, prange, true);
if (r) {
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
 
if (cpages) {
prange->actual_loc = best_loc;
-   svm_range_dma_unmap(prange);
-   } else {
+   prange->vram_pages = prange->vram_pages + cpages;
+   } else if (!prange->actual_loc) {
+   /* if no page migrated and all pages from prange are at
+* sys ram drop svm_bo got from svm_range_vram_node_new
+*/
svm_range_vram_node_free(prange);
}
 
@@ -663,19 +676,19 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, 
struct svm_range *prange,
  * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
  *
  * Return:
- *   0 - success with all pages migrated
  *   negative values - indicate error
- *   positive values - partial migration, number of pages not migrated
+ *   positive values or zero -

[PATCH v2] drm/amdkfd: fix some race conditions in vram buffer alloc/free of svm code

2023-09-20 Thread Xiaogang . Chen
From: Xiaogang Chen 

This patch fixes:
1: ref number of prange's svm_bo got decreased by an async call from hmm. When
wait svm_bo of prange got released we shoul also wait prang->svm_bo become NULL,
otherwise prange->svm_bo may be set to null after allocate new vram buffer.

2: During waiting svm_bo of prange got released in a while loop should 
reschedule
current task to give other tasks oppotunity to run, specially the the workque
task that handles svm_bo ref release, otherwise we may enter to softlock.

Signed-off-by: Xiaogang.Chen 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index bed0f8bf83c7..164cd77af62d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -502,11 +502,11 @@ svm_range_validate_svm_bo(struct kfd_node *node, struct 
svm_range *prange)
 
/* We need a new svm_bo. Spin-loop to wait for concurrent
 * svm_range_bo_release to finish removing this range from
-* its range list. After this, it is safe to reuse the
-* svm_bo pointer and svm_bo_list head.
+* its range list and set prange->svm_bo to null. After this,
+* it is safe to reuse the svm_bo pointer and svm_bo_list head.
 */
-   while (!list_empty_careful(>svm_bo_list))
-   ;
+   while (!list_empty_careful(>svm_bo_list) || prange->svm_bo)
+   cond_resched();
 
return false;
 }
-- 
2.25.1



[PATCH] drm/amdkfd: fix some race conditions in vram buffer alloc/free of svm code

2023-09-20 Thread Xiaogang . Chen
From: Xiaogang Chen 

This patch fixes:
1: ref number of prange's svm_bo got decreased by an async call from hmm. When
wait svm_bo of prange got released we shoul also wait prang->svm_bo become NULL,
otherwise prange->svm_bo may be set to null after allocate new vram buffer.

2: During waiting svm_bo of prange got released in a while loop should schedule
current task to give other tasks oppotunity to run, specially the the workque
task that handles svm_bo ref release, otherwise we may enter to softlock.

Signed-off-by: Xiaogang.Chen 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index bed0f8bf83c7..1074a4aedf57 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -502,11 +502,11 @@ svm_range_validate_svm_bo(struct kfd_node *node, struct 
svm_range *prange)
 
/* We need a new svm_bo. Spin-loop to wait for concurrent
 * svm_range_bo_release to finish removing this range from
-* its range list. After this, it is safe to reuse the
-* svm_bo pointer and svm_bo_list head.
+* its range list and set prange->svm_bo to null. After this,
+* it is safe to reuse the svm_bo pointer and svm_bo_list head.
 */
-   while (!list_empty_careful(>svm_bo_list))
-   ;
+   while (!list_empty_careful(>svm_bo_list) || prange->svm_bo)
+   schedule();
 
return false;
 }
-- 
2.25.1



[PATCH] drm/amdkfd: Seperate dma unmap and free of dma address array operations

2023-09-15 Thread Xiaogang . Chen
From: Xiaogang Chen 

We do not need free dma address array of svm_range each time we do dma unmap
for pages in svm_range as we can reuse the same array. Only free it when free
svm_range. Seperate these two operations and use them accordinly.

Signed-off-by: Xiaogang.Chen 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 23 ---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  4 ++--
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 192b0d106413..6c25dab051d5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -460,7 +460,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0, node->id, trigger);
 
-   svm_range_dma_unmap(adev->dev, scratch, 0, npages);
+   svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
 
 out_free:
kvfree(buf);
@@ -544,7 +544,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
 
if (cpages) {
prange->actual_loc = best_loc;
-   svm_range_free_dma_mappings(prange, true);
+   svm_range_dma_unmap(prange);
} else {
svm_range_vram_node_free(prange);
}
@@ -745,7 +745,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct 
svm_range *prange,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
node->id, 0, trigger);
 
-   svm_range_dma_unmap(adev->dev, scratch, 0, npages);
+   svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
 
 out_free:
kvfree(buf);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 5d7ba7dbf6ce..bed0f8bf83c7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -229,7 +229,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long 
*bitmap,
return r;
 }
 
-void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
 unsigned long offset, unsigned long npages)
 {
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
@@ -247,7 +247,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t 
*dma_addr,
}
 }
 
-void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma)
+void svm_range_dma_unmap(struct svm_range *prange)
 {
struct kfd_process_device *pdd;
dma_addr_t *dma_addr;
@@ -268,10 +268,8 @@ void svm_range_free_dma_mappings(struct svm_range *prange, 
bool unmap_dma)
continue;
}
dev = >dev->adev->pdev->dev;
-   if (unmap_dma)
-   svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
-   kvfree(dma_addr);
-   prange->dma_addr[gpuidx] = NULL;
+
+   svm_range_dma_unmap_dev(dev, dma_addr, 0, prange->npages);
}
 }
 
@@ -279,18 +277,29 @@ static void svm_range_free(struct svm_range *prange, bool 
do_unmap)
 {
uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
struct kfd_process *p = container_of(prange->svms, struct kfd_process, 
svms);
+   uint32_t gpuidx;
 
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
 prange->start, prange->last);
 
svm_range_vram_node_free(prange);
-   svm_range_free_dma_mappings(prange, do_unmap);
+   if (do_unmap)
+   svm_range_dma_unmap(prange);
 
if (do_unmap && !p->xnack_enabled) {
pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
}
+
+   /* free dma_addr array for each gpu */
+   for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
+   if (prange->dma_addr[gpuidx]) {
+   kvfree(prange->dma_addr[gpuidx]);
+   prange->dma_addr[gpuidx] = NULL;
+   }
+   }
+
mutex_destroy(>lock);
mutex_destroy(>migrate_mutex);
kfree(prange);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 9e668eeefb32..78bfb83cd0c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -181,9 +181,9 @@ void svm_range_add_list_work(struct svm_range_list *svms,
 struct svm_range *prange, struct mm

[PATCH v2] drm/amdkfd: Use partial migrations in GPU page faults

2023-09-11 Thread Xiaogang . Chen
From: Xiaogang Chen 

This patch implements partial migration in gpu page fault according to migration
granularity(default 2MB) and not split svm range in cpu page fault handling.
A svm range may include pages from both system ram and vram of one gpu now.
These chagnes are expected to improve migration performance and reduce
mmu callback and TLB flush workloads.

Signed-off-by: xiaogang chen 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 151 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  88 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   7 +-
 4 files changed, 171 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 7d82c7da223a..653a2edbaba4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -445,7 +445,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
 cpages, npages);
else
-   pr_debug("0x%lx pages migrated\n", cpages);
+   pr_debug("0x%lx pages collected\n", cpages);
 
r = svm_migrate_copy_to_vram(node, prange, , , scratch, 
ttm_res_offset);
migrate_vma_pages();
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
  * @mm: the process mm structure
  * @trigger: reason of migration
  *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
  */
 static int
 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   unsigned long start_mgr, unsigned long last_mgr,
struct mm_struct *mm, uint32_t trigger)
 {
unsigned long addr, start, end;
@@ -498,23 +501,30 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
unsigned long cpages = 0;
long r = 0;
 
-   if (prange->actual_loc == best_loc) {
-   pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-prange->svms, prange->start, prange->last, best_loc);
+   if (!best_loc) {
+   pr_debug("request svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+prange->svms, start_mgr, last_mgr);
return 0;
}
 
+   if (start_mgr < prange->start || last_mgr > prange->last) {
+   pr_debug("migration range [0x%lx 0x%lx] out prange [0x%lx 
0x%lx]\n",
+start_mgr, last_mgr, prange->start, 
prange->last);
+   return -EFAULT;
+   }
+
node = svm_range_get_node_by_id(prange, best_loc);
if (!node) {
pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
return -ENODEV;
}
 
-   pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
-prange->start, prange->last, best_loc);
+   pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+prange->svms, start_mgr, last_mgr, prange->start, 
prange->last,
+best_loc);
 
-   start = prange->start << PAGE_SHIFT;
-   end = (prange->last + 1) << PAGE_SHIFT;
+   start = start_mgr << PAGE_SHIFT;
+   end = (last_mgr + 1) << PAGE_SHIFT;
 
r = svm_range_vram_node_new(node, prange, true);
if (r) {
@@ -544,8 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
 
if (cpages) {
prange->actual_loc = best_loc;
-   svm_range_free_dma_mappings(prange, true);
-   } else {
+   prange->vram_pages = prange->vram_pages + cpages;
+   } else if (!prange->actual_loc) {
+   /* if no page migrated and all pages from prange are at
+* sys ram drop svm_bo got from svm_range_vram_node_new
+*/
svm_range_vram_node_free(prange);
}
 
@@ -670,7 +683,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
 static long
 svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
   struct vm_area_struct *vma, uint64_t start, uint64_t end,
-  uint32_t trigger, struct page *fault_page)
+  uint32_t trigger, struct page *fault_page, unsigned long 
*mpages)
 {
struct kfd_process *p = container_of(prange->svms, struct 

[PATCH] drm/amdkfd: Use partial migrations in GPU page faults

2023-08-24 Thread Xiaogang . Chen
From: Xiaogang Chen 

This patch implements partial migration in gpu page fault according to migration
granularity(default 2MB) and not split svm range in cpu page fault handling.
Now a svm range may have pages from both system ram and vram of one gpu.
These chagnes are expected to improve migration performance and reduce
mmu callback and TLB flush workloads.

Signed-off-by: xiaogang chen 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 153 +++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c |  87 -
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   7 +-
 4 files changed, 162 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 7d82c7da223a..5a3aa80a1834 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -479,6 +479,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
  * @mm: the process mm structure
  * @trigger: reason of migration
  *
@@ -489,6 +491,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
  */
 static int
 svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   unsigned long start_mgr, unsigned long last_mgr,
struct mm_struct *mm, uint32_t trigger)
 {
unsigned long addr, start, end;
@@ -498,9 +501,9 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
unsigned long cpages = 0;
long r = 0;
 
-   if (prange->actual_loc == best_loc) {
-   pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
-prange->svms, prange->start, prange->last, best_loc);
+   if (!best_loc) {
+   pr_debug("request svms 0x%p [0x%lx 0x%lx] migrate to sys ram\n",
+prange->svms, start_mgr, last_mgr);
return 0;
}
 
@@ -513,8 +516,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
 prange->start, prange->last, best_loc);
 
-   start = prange->start << PAGE_SHIFT;
-   end = (prange->last + 1) << PAGE_SHIFT;
+   start = start_mgr << PAGE_SHIFT;
+   end = (last_mgr + 1) << PAGE_SHIFT;
 
r = svm_range_vram_node_new(node, prange, true);
if (r) {
@@ -544,10 +547,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
 
if (cpages) {
prange->actual_loc = best_loc;
-   svm_range_free_dma_mappings(prange, true);
-   } else {
+   /* only free dma mapping in the migrated range */
+   svm_range_free_dma_mappings(prange, true,  start_mgr - 
prange->start,
+last_mgr - start_mgr + 1);
+   } else if (!prange->actual_loc)
+   /* if all pages from prange are at sys ram */
svm_range_vram_node_free(prange);
-   }
 
return r < 0 ? r : 0;
 }
@@ -762,6 +767,8 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct 
svm_range *prange,
  * svm_migrate_vram_to_ram - migrate svm range from device to system
  * @prange: range structure
  * @mm: process mm, use current->mm if NULL
+ * @start_mgr: start page need be migrated to sys ram
+ * @last_mgr: last page need be migrated to sys ram
  * @trigger: reason of migration
  * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page 
fault callback
  *
@@ -771,7 +778,8 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct 
svm_range *prange,
  * 0 - OK, otherwise error code
  */
 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
-   uint32_t trigger, struct page *fault_page)
+   unsigned long start_mgr, unsigned long 
last_mgr,
+   uint32_t trigger, struct page 
*fault_page)
 {
struct kfd_node *node;
struct vm_area_struct *vma;
@@ -781,23 +789,30 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, 
struct mm_struct *mm,
unsigned long upages = 0;
long r = 0;
 
+   /* this pragne has no any vram page to migrate to sys ram */
if (!prange->actual_loc) {
pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
 prange->start, prange->last);
return 0;
}
 
+   if (start_mgr < prange->start || last_mgr > prange->last) {
+   pr_debug(&

[PATCH] drm/amdgpu: have bos for PDs/PTS cpu accessible when kfd uses cpu to update vm

2023-06-29 Thread Xiaogang . Chen
From: Xiaogang Chen 

When kfd uses cpu to update vm iterates all current PDs/PTs bos, adds
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED flag and kmap them to kernel virtual
address space before kfd updates the vm that was created by gfx.

Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 11 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 28 +++
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 291977b93b1d..dedf1bf44dc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2278,17 +2278,14 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm)
if (r)
goto unreserve_bo;
 
+   r = amdgpu_vm_pt_cpu_access_root(adev, vm);
+   if (r)
+   goto unreserve_bo;
+
vm->update_funcs = _vm_cpu_funcs;
} else {
vm->update_funcs = _vm_sdma_funcs;
}
-   /*
-* Make sure root PD gets mapped. As vm_update_mode could be changed
-* when turning a GFX VM into a compute VM.
-*/
-   r = vm->update_funcs->map_table(to_amdgpu_bo_vm(vm->root.bo));
-   if (r)
-   goto unreserve_bo;
 
dma_fence_put(vm->last_update);
vm->last_update = dma_fence_get_stub();
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9c85d494f2a2..9b3e75de7c5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -491,6 +491,8 @@ void amdgpu_vm_pt_free_work(struct work_struct *work);
 void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
 #endif
 
+int amdgpu_vm_pt_cpu_access_root(struct amdgpu_device *adev, struct amdgpu_vm 
*vm);
+
 /**
  * amdgpu_vm_tlb_seq - return tlb flush sequence number
  * @vm: the amdgpu_vm structure to query
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index dea1a64be44d..a08742191b7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -1044,3 +1044,31 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params 
*params,
 
return 0;
 }
+
+/**
+ * amdgpu_vm_pt_cpu_access_root - have bo of root PD cpu accessible
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ *
+ * make root page directory and everything below it cpu accessible.
+ */
+int amdgpu_vm_pt_cpu_access_root(struct amdgpu_device *adev, struct amdgpu_vm 
*vm)
+{
+   struct amdgpu_vm_pt_cursor cursor;
+   struct amdgpu_vm_bo_base *entry;
+   int r;
+   struct amdgpu_bo_vm *bo;
+
+   for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) {
+
+   if (entry->bo) {
+   bo = to_amdgpu_bo_vm(entry->bo);
+   entry->bo->flags |= 
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+   r = amdgpu_vm_cpu_funcs.map_table(bo);
+   if (r)
+   return r;
+   }
+   }
+
+   return 0;
+}
-- 
2.25.1



[PATCH] drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute

2023-06-19 Thread Xiaogang . Chen
From: Xiaogang Chen 

Since we allow kfd and graphic operate on same GPU VM to have interoperation
between them GPU VM may have been used by graphic vm operations before kfd turns
a GPU VM into a compute VM. Remove vm clean checking at amdgpu_vm_make_compute.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index eff73c428b12..291977b93b1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2245,16 +2245,16 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm)
if (r)
return r;
 
-   /* Sanity checks */
-   if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
-   r = -EINVAL;
-   goto unreserve_bo;
-   }
-
/* Check if PD needs to be reinitialized and do it before
 * changing any other state, in case it fails.
 */
if (pte_support_ats != vm->pte_support_ats) {
+   /* Sanity checks */
+   if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
+   r = -EINVAL;
+   goto unreserve_bo;
+   }
+
vm->pte_support_ats = pte_support_ats;
r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo),
   false);
-- 
2.25.1



[PATCH] drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute

2023-06-19 Thread Xiaogang . Chen
From: Xiaogang Chen 

Since we allow kfd and graphic operate on same GPU VM to have interoperation
between them GPU VM may have been used by graphic vm operations before kfd turn
a GFX VM into a compute VM. Remove vm clean checking at amdgpu_vm_make_compute.

Signed-off-by: Xiaogang Chen
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index eff73c428b12..33f05297ab7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2246,7 +2246,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, 
struct amdgpu_vm *vm)
return r;
 
/* Sanity checks */
-   if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
+   if (pte_support_ats && !amdgpu_vm_pt_is_root_clean(adev, vm)) {
r = -EINVAL;
goto unreserve_bo;
}
-- 
2.25.1



[PATCH] drm/amdkfd: Fix an issue at userptr buffer validation process.

2023-04-20 Thread Xiaogang . Chen
From: Xiaogang Chen 

amdgpu_ttm_tt_get_user_pages can fail(-EFAULT). If it failed mem has no 
associated
hmm range or user_pages associated. Keep it at process_info->userptr_inval_list 
and
mark mem->invalid until following scheduled attempts can valid it.

Signed-off-by: Xiaogang Chen 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28 ++-
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 7b1f5933ebaa..fad5183baf80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2444,7 +2444,9 @@ static int update_invalid_user_pages(struct 
amdkfd_process_info *process_info,
ret = -EAGAIN;
goto unlock_out;
}
-   mem->invalid = 0;
+/* set mem valid if mem has hmm range associated */
+   if (mem->range)
+   mem->invalid = 0;
}
 
 unlock_out:
@@ -2576,16 +2578,28 @@ static int confirm_valid_user_pages_locked(struct 
amdkfd_process_info *process_i
list_for_each_entry_safe(mem, tmp_mem,
 _info->userptr_inval_list,
 validate_list.head) {
-   bool valid = amdgpu_ttm_tt_get_user_pages_done(
-   mem->bo->tbo.ttm, mem->range);
+   /* Only check mem with hmm range associated */
+   bool valid;
 
-   mem->range = NULL;
-   if (!valid) {
-   WARN(!mem->invalid, "Invalid BO not marked invalid");
+   if (mem->range) {
+   valid = amdgpu_ttm_tt_get_user_pages_done(
+   mem->bo->tbo.ttm, mem->range);
+
+   mem->range = NULL;
+   if (!valid) {
+   WARN(!mem->invalid, "Invalid BO not marked 
invalid");
+   ret = -EAGAIN;
+   continue;
+   }
+   } else
+   /* keep mem without hmm range at userptr_inval_list */
+   continue;
+
+   if (mem->invalid) {
+   WARN(1, "Valid BO is marked invalid");
ret = -EAGAIN;
continue;
}
-   WARN(mem->invalid, "Valid BO is marked invalid");
 
list_move_tail(>validate_list.head,
   _info->userptr_valid_list);
-- 
2.25.1



[PATCH] drm/amdkfd: Fix some issues at userptr buffer validation process.

2023-04-12 Thread Xiaogang . Chen
From: Xiaogang Chen 

Notice userptr buffer restore process has following issues:

1: amdgpu_ttm_tt_get_user_pages can fail(-EFAULT). If it failed we should not 
set
it valid(mem->invalid = 0). In this case mem has no associated hmm range or 
user_pages
associated.

2: mmu notifier can happen concurrently and update 
mem->range->notifier->invalidate_seq,
but not mem->range->notifier_seq. That causes mem->range->notifier_seq stale
when mem is in process_info->userptr_inval_list and 
amdgpu_amdkfd_restore_userptr_worker
got interrupted. At next rescheduled next attempt we use stale 
mem->range->notifier_seq
to compare with mem->range->notifier->invalidate_seq.

Signed-off-by: Xiaogang Chen 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 45 +++
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 7b1f5933ebaa..6881f1b0844c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2444,7 +2444,9 @@ static int update_invalid_user_pages(struct 
amdkfd_process_info *process_info,
ret = -EAGAIN;
goto unlock_out;
}
-   mem->invalid = 0;
+/* set mem valid if mem has hmm range associated */
+   if (mem->range)
+   mem->invalid = 0;
}
 
 unlock_out:
@@ -2576,16 +2578,28 @@ static int confirm_valid_user_pages_locked(struct 
amdkfd_process_info *process_i
list_for_each_entry_safe(mem, tmp_mem,
 _info->userptr_inval_list,
 validate_list.head) {
-   bool valid = amdgpu_ttm_tt_get_user_pages_done(
-   mem->bo->tbo.ttm, mem->range);
+   /* Only check mem with hmm range associated */
+   bool valid;
 
-   mem->range = NULL;
-   if (!valid) {
-   WARN(!mem->invalid, "Invalid BO not marked invalid");
+   if (mem->range) {
+   valid = amdgpu_ttm_tt_get_user_pages_done(
+   mem->bo->tbo.ttm, mem->range);
+
+   mem->range = NULL;
+   if (!valid) {
+   WARN(!mem->invalid, "Invalid BO not marked 
invalid");
+   ret = -EAGAIN;
+   continue;
+   }
+   } else
+   /* keep mem without hmm range at userptr_inval_list */
+   continue;
+
+   if (mem->invalid) {
+   WARN(1, "Valid BO is marked invalid");
ret = -EAGAIN;
continue;
}
-   WARN(mem->invalid, "Valid BO is marked invalid");
 
list_move_tail(>validate_list.head,
   _info->userptr_valid_list);
@@ -2644,8 +2658,23 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct 
work_struct *work)
 * reference counting inside KFD will handle this case.
 */
mutex_lock(_info->notifier_lock);
-   if (process_info->evicted_bos != evicted_bos)
+   if (process_info->evicted_bos != evicted_bos) {
+   /* mmu notifier interrupted amdgpu_amdkfd_restore_userptr_worker
+* before reschedule next attempt update stale 
mem->range->notifier_seq
+* inside userptr_inval_list
+*/
+   struct kgd_mem *mem, *tmp_mem;
+
+   list_for_each_entry_safe(mem, tmp_mem,
+   _info->userptr_inval_list,
+   validate_list.head) {
+
+   if (mem->range)
+   mem->range->notifier_seq = 
mem->range->notifier->invalidate_seq;
+   }
+
goto unlock_notifier_out;
+   }
 
if (confirm_valid_user_pages_locked(process_info)) {
WARN(1, "User pages unexpectedly invalid");
-- 
2.25.1



[PATCH] drm/amdkfd: Change WARN to pr_debug when same userptr BOs got invalidated by mmu.

2023-04-10 Thread Xiaogang . Chen
From: Xiaogang Chen 

During KFD restore evicted userptr BOs mmu invalidate callback may invalidate
same userptr BOs that have been just restored. When KFD restore process detects
it KFD will reschedule another validation process. It is not an error. Change
WARN to pr_debug, not put the BOs at userptr_valid_list, let next scheduled
delayed work validate them again.

Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 7b1f5933ebaa..d0c224703278 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2581,11 +2581,18 @@ static int confirm_valid_user_pages_locked(struct 
amdkfd_process_info *process_i
 
mem->range = NULL;
if (!valid) {
-   WARN(!mem->invalid, "Invalid BO not marked invalid");
+   if (!mem->invalid)
+   pr_debug("Invalid BO not marked invalid\n");
+
+   ret = -EAGAIN;
+   continue;
+   }
+
+   if (mem->invalid) {
+   pr_debug("Valid BO is marked invalid\n");
ret = -EAGAIN;
continue;
}
-   WARN(mem->invalid, "Valid BO is marked invalid");
 
list_move_tail(>validate_list.head,
   _info->userptr_valid_list);
@@ -2648,7 +2655,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct 
work_struct *work)
goto unlock_notifier_out;
 
if (confirm_valid_user_pages_locked(process_info)) {
-   WARN(1, "User pages unexpectedly invalid");
+   pr_debug("User pages unexpectedly invalid, reschedule another 
attempt\n");
goto unlock_notifier_out;
}
 
-- 
2.25.1



[PATCH] drm/amdkfd: fix warnings in kfd_migrate.c

2023-03-10 Thread Xiaogang . Chen
From: Xiaogang Chen 

drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_migrate.c: In function 
‘svm_migrate_copy_to_vram’:
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_migrate.c:393:1: warning: label ‘out’ 
defined but not used [-Wunused-label]
  393 | out:
  | ^~~
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_migrate.c:525:29: note: format string 
is defined here
drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_migrate.c:40:22: warning: format ‘%d’ 
expects argument of type ‘int’, but argument 4 has type ‘long int’ [-Wformat=]
   40 | #define dev_fmt(fmt) "kfd_migrate: " fmt
  525 |   dev_dbg(adev->dev, "fail %d to alloc vram\n", r);

Fixes: b0b7d79469d9 ("drm/amdkfd: Get prange->offset after 
svm_range_vram_node_new")
Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 734b4eeb0f3e..6a7dd6574646 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -390,7 +390,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
migrate->dst[i + 3] = 0;
}
 #endif
-out:
+
return r;
 }
 
@@ -522,7 +522,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
 
r = svm_range_vram_node_new(adev, prange, true);
if (r) {
-   dev_dbg(adev->dev, "fail %d to alloc vram\n", r);
+   dev_dbg(adev->dev, "fail %ld to alloc vram\n", r);
return r;
}
ttm_res_offset = prange->offset << PAGE_SHIFT;
-- 
2.25.1



[PATCH] drm/amdkfd: Get prange->offset after svm_range_vram_node_new

2023-03-07 Thread Xiaogang . Chen
From: Xiaogang Chen 

During miration to vram prange->offset is valid after vram buffer is located,
either use old one or allocate a new one. Move svm_range_vram_node_new before 
migrate
for each vma to get valid prange->offset.

Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index fd54a00e7229..15791490c23e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -310,12 +310,6 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, 
struct svm_range *prange,
src = scratch;
dst = (uint64_t *)(scratch + npages);
 
-   r = svm_range_vram_node_new(adev, prange, true);
-   if (r) {
-   dev_dbg(adev->dev, "fail %d to alloc vram\n", r);
-   goto out;
-   }
-
amdgpu_res_first(prange->ttm_res, ttm_res_offset,
 npages << PAGE_SHIFT, );
for (i = j = 0; i < npages; i++) {
@@ -525,6 +519,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
 
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
+
+   r = svm_range_vram_node_new(adev, prange, true);
+   if (r) {
+   dev_dbg(adev->dev, "fail %d to alloc vram\n", r);
+   return r;
+   }
ttm_res_offset = prange->offset << PAGE_SHIFT;
 
for (addr = start; addr < end;) {
-- 
2.25.1



[PATCH v2] drm/amdkfd: Cal vram offset in TTM resource for each svm_migrate_copy_to_vram

2023-03-01 Thread Xiaogang . Chen
From: Xiaogang Chen 

svm_migrate_ram_to_vram migrates a prange from sys ram to vram. The prange may
cross multiple vma. Need remember current dst vram offset in the TTM resource 
for
each migration.

Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 1c625433ff30..373cd7b0e1ca 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -294,7 +294,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct 
migrate_vma *migrate)
 static int
 svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 struct migrate_vma *migrate, struct dma_fence **mfence,
-dma_addr_t *scratch)
+dma_addr_t *scratch, uint64_t ttm_res_offset)
 {
uint64_t npages = migrate->npages;
struct device *dev = adev->dev;
@@ -304,8 +304,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
uint64_t i, j;
int r;
 
-   pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
-prange->last);
+   pr_debug("svms 0x%p [0x%lx 0x%lx 0x%lx]\n", prange->svms, prange->start,
+prange->last, ttm_res_offset);
 
src = scratch;
dst = (uint64_t *)(scratch + npages);
@@ -316,7 +316,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
goto out;
}
 
-   amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
+   amdgpu_res_first(prange->ttm_res, ttm_res_offset,
 npages << PAGE_SHIFT, );
for (i = j = 0; i < npages; i++) {
struct page *spage;
@@ -403,7 +403,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
 static long
 svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
-   uint64_t end, uint32_t trigger)
+   uint64_t end, uint32_t trigger, uint64_t ttm_res_offset)
 {
struct kfd_process *p = container_of(prange->svms, struct kfd_process, 
svms);
uint64_t npages = (end - start) >> PAGE_SHIFT;
@@ -456,7 +456,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
else
pr_debug("0x%lx pages migrated\n", cpages);
 
-   r = svm_migrate_copy_to_vram(adev, prange, , , scratch);
+   r = svm_migrate_copy_to_vram(adev, prange, , , scratch, 
ttm_res_offset);
migrate_vma_pages();
 
pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
@@ -504,6 +504,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
+   uint64_t ttm_res_offset;
unsigned long cpages = 0;
long r = 0;
 
@@ -524,6 +525,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
 
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
+   ttm_res_offset = prange->offset << PAGE_SHIFT;
 
for (addr = start; addr < end;) {
unsigned long next;
@@ -533,13 +535,14 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
break;
 
next = min(vma->vm_end, end);
-   r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, 
trigger);
+   r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, 
trigger, ttm_res_offset);
if (r < 0) {
pr_debug("failed %ld to migrate\n", r);
break;
} else {
cpages += r;
}
+   ttm_res_offset += next - addr;
addr = next;
}
 
-- 
2.25.1



[PATCH] drm/amdkfd: Cal vram offset in page for each svm_migrate_copy_to_vram

2023-02-27 Thread Xiaogang . Chen
From: Xiaogang Chen 

svm_migrate_ram_to_vram migrate a prange from sys ram to vram. The prange may
cross multiple vma. Need remember current dst vram offset in page for each 
migration.

Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 1c625433ff30..60664e0cbc1c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -294,7 +294,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct 
migrate_vma *migrate)
 static int
 svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 struct migrate_vma *migrate, struct dma_fence **mfence,
-dma_addr_t *scratch)
+dma_addr_t *scratch, uint64_t *cur_dst)
 {
uint64_t npages = migrate->npages;
struct device *dev = adev->dev;
@@ -304,8 +304,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
uint64_t i, j;
int r;
 
-   pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
-prange->last);
+   pr_debug("svms 0x%p [0x%lx 0x%lx 0x%lx]\n", prange->svms, prange->start,
+prange->last, *cur_dst);
 
src = scratch;
dst = (uint64_t *)(scratch + npages);
@@ -316,7 +316,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
goto out;
}
 
-   amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
+   amdgpu_res_first(prange->ttm_res, *cur_dst << PAGE_SHIFT,
 npages << PAGE_SHIFT, );
for (i = j = 0; i < npages; i++) {
struct page *spage;
@@ -381,6 +381,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
migrate->dst[i] = 0;
}
}
+   *cur_dst = *cur_dst + i;
 
 #ifdef DEBUG_FORCE_MIXED_DOMAINS
for (i = 0, j = 0; i < npages; i += 4, j++) {
@@ -403,7 +404,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
 static long
 svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
-   uint64_t end, uint32_t trigger)
+   uint64_t end, uint32_t trigger, uint64_t *cur_dst)
 {
struct kfd_process *p = container_of(prange->svms, struct kfd_process, 
svms);
uint64_t npages = (end - start) >> PAGE_SHIFT;
@@ -456,7 +457,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
else
pr_debug("0x%lx pages migrated\n", cpages);
 
-   r = svm_migrate_copy_to_vram(adev, prange, , , scratch);
+   r = svm_migrate_copy_to_vram(adev, prange, , , scratch, 
cur_dst);
migrate_vma_pages();
 
pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
@@ -504,6 +505,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
+   uint64_t cur_dst;
unsigned long cpages = 0;
long r = 0;
 
@@ -524,6 +526,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
 
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
+   cur_dst = prange->offset;
 
for (addr = start; addr < end;) {
unsigned long next;
@@ -533,7 +536,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
break;
 
next = min(vma->vm_end, end);
-   r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, 
trigger);
+   r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, 
trigger, _dst);
if (r < 0) {
pr_debug("failed %ld to migrate\n", r);
break;
-- 
2.25.1



[PATCH v3] drm/amdkfd: Prevent user space using both svm and kfd api to register same user buffer

2023-02-08 Thread Xiaogang . Chen
From: Xiaogang Chen 

When xnack is on user space can use svm page restore to set a vm range without
setup it first, then use regular api to register. Currently kfd api and svm are
not interoperable. We already have check on that, but for user buffer the 
mapping
address is not same as buffer cpu virtual address. Add checking on that to
avoid error propagate to hmm.

Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f79b8e964140..072fa4fbd27f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1065,6 +1065,20 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
mutex_unlock(>svms.lock);
return -EADDRINUSE;
}
+
+   /* When register user buffer check if it has been registered by svm by
+* buffer cpu virtual address.
+*/
+   if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) &&
+   interval_tree_iter_first(>svms.objects,
+args->mmap_offset >> PAGE_SHIFT,
+(args->mmap_offset  + args->size - 1) >> 
PAGE_SHIFT)) {
+   pr_err("User Buffer Address: 0x%llx already allocated by SVM\n",
+   args->mmap_offset);
+   mutex_unlock(>svms.lock);
+   return -EADDRINUSE;
+   }
+
mutex_unlock(>svms.lock);
 #endif
mutex_lock(>mutex);
-- 
2.25.1



[PATCH v2] drm/amdkfd: Prevent user space using both svm and kfd api to register same user buffer

2023-02-08 Thread Xiaogang . Chen
From: Xiaogang Chen 

When xnack is on user space can use svm page restore to set a vm range without
setup it first, then use regular api to register. Currently kfd api and svm are
not interoperable. We already have check on that, but for user buffer the 
mapping
address is not same as buffer cpu virtual address. Add checking on that to
avoid error propagate to hmm.
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f79b8e964140..6d9cf860d2da 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1065,6 +1065,21 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
mutex_unlock(>svms.lock);
return -EADDRINUSE;
}
+
+   /* When register user buffer check if it has been registered by svm by
+* buffer cpu virtual address.
+*/
+   if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) &&
+   interval_tree_iter_first(>svms.objects,
+   args->mmap_offset >> PAGE_SHIFT,
+   (args->mmap_offset  + args->size - 1) >> PAGE_SHIFT)) {
+
+   pr_err("User Buffer Address: 0x%llx already allocated by SVM\n",
+   args->mmap_offset);
+   mutex_unlock(>svms.lock);
+   return -EADDRINUSE;
+   }
+
mutex_unlock(>svms.lock);
 #endif
mutex_lock(>mutex);
-- 
2.25.1



[PATCH] drm/amdkfd: Prevent user space using both svm and kfd api to register same user buffer

2023-02-07 Thread Xiaogang . Chen
From: Xiaogang Chen 

When xnack is on user space can use svm page restore to set a vm range without
setup it first, then use regular api to register. Currently kfd api and svm are
not interoperable. We already have check on that, but for user buffer the 
mapping
address is not same as buffer cpu virtual address. Add checking on that to
avoid error propagate to hmm.

Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f79b8e964140..cb7acb0b9b52 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1065,6 +1065,23 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file 
*filep,
mutex_unlock(>svms.lock);
return -EADDRINUSE;
}
+
+   /* When register user buffer check if it has been registered by svm by
+* buffer cpu virtual address.
+*/
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+
+   if (interval_tree_iter_first(>svms.objects,
+   untagged_addr(args->mmap_offset) >> PAGE_SHIFT,
+   (untagged_addr(args->mmap_offset)  + args->size 
- 1) >> PAGE_SHIFT)) {
+
+   pr_err("User Buffer Address: 0x%llx already allocated 
by SVM\n",
+   untagged_addr(args->mmap_offset));
+   mutex_unlock(>svms.lock);
+   return -EADDRINUSE;
+   }
+
+   }
mutex_unlock(>svms.lock);
 #endif
mutex_lock(>mutex);
-- 
2.25.1



[PATCH] drm/amdkfd: Remove skiping userptr buffer mapping when mmu notifier marks it as invalid

2022-10-28 Thread Xiaogang . Chen
From: Xiaogang Chen 

mmu notifier does not always hold mm->sem during call back. That causes a race 
condition
between kfd userprt buffer mapping and mmu notifier which leds to gpu shadder 
or SDMA access
userptr buffer before it has been mapped to gpu VM. Always map userptr buffer 
to avoid that
though it may make some userprt buffers mapped two times.

Suggested-by:  Felix Kuehling 
Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 10 --
 1 file changed, 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index da9d475d7ef2..ba72a910d0d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1907,16 +1907,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 */
mutex_lock(>process_info->lock);
 
-   /* Lock mmap-sem. If we find an invalid userptr BO, we can be
-* sure that the MMU notifier is no longer running
-* concurrently and the queues are actually stopped
-*/
-   if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
-   mmap_write_lock(current->mm);
-   is_invalid_userptr = atomic_read(>invalid);
-   mmap_write_unlock(current->mm);
-   }
-
mutex_lock(>lock);
 
domain = mem->domain;
-- 
2.25.1



[PATCH v2] drm/amdgpu: config HDP_MISC_CNTL.READ_BUFFER_WATERMARK to fix applications running across multiple GPU config hang.

2022-02-22 Thread Xiaogang . Chen
From: Xiaogang Chen 

Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c  | 3 +++
 drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index d7811e0327cb..02400d97a95c 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -146,6 +146,9 @@ static void hdp_v4_0_init_registers(struct amdgpu_device 
*adev)
 
WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
 
+   if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 0))
+   WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, READ_BUFFER_WATERMARK, 2);
+
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 
8));
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 
40));
 }
diff --git a/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h 
b/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h
index 25e28691d62d..65c91b0102e4 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h
@@ -104,6 +104,7 @@
 #define HDP_MISC_CNTL__OUTSTANDING_WRITE_COUNT_1024__SHIFT 0x5
 #define HDP_MISC_CNTL__MULTIPLE_READS__SHIFT   0x6
 #define HDP_MISC_CNTL__SIMULTANEOUS_READS_WRITES__SHIFT0xb
+#define HDP_MISC_CNTL__READ_BUFFER_WATERMARK__SHIFT 0xe
 #define HDP_MISC_CNTL__FED_ENABLE__SHIFT   0x15
 #define HDP_MISC_CNTL__SYSHUB_CHANNEL_PRIORITY__SHIFT  0x17
 #define HDP_MISC_CNTL__MMHUB_WRBURST_ENABLE__SHIFT 0x18
@@ -118,6 +119,7 @@
 #define HDP_MISC_CNTL__OUTSTANDING_WRITE_COUNT_1024_MASK   0x0020L
 #define HDP_MISC_CNTL__MULTIPLE_READS_MASK 0x0040L
 #define HDP_MISC_CNTL__SIMULTANEOUS_READS_WRITES_MASK  0x0800L
+#define HDP_MISC_CNTL__READ_BUFFER_WATERMARK_MASK   0xc000L
 #define HDP_MISC_CNTL__FED_ENABLE_MASK 0x0020L
 #define HDP_MISC_CNTL__SYSHUB_CHANNEL_PRIORITY_MASK0x0080L
 #define HDP_MISC_CNTL__MMHUB_WRBURST_ENABLE_MASK   0x0100L
-- 
2.25.1



[PATCH] drm/amdgpu: config HDP_MISC_CNTL.READ_BUFFER_WATERMARK to fix applications running across multiple GPU config hang.

2022-02-21 Thread Xiaogang . Chen
From: Xiaogang Chen 

Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c  | 1 +
 drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index d7811e0327cb..aa2c7c3f721f 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -145,6 +145,7 @@ static void hdp_v4_0_init_registers(struct amdgpu_device 
*adev)
}
 
WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
+   WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, READ_BUFFER_WATERMARK, 2);
 
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 
8));
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 
40));
diff --git a/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h 
b/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h
index 25e28691d62d..65c91b0102e4 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_4_0_sh_mask.h
@@ -104,6 +104,7 @@
 #define HDP_MISC_CNTL__OUTSTANDING_WRITE_COUNT_1024__SHIFT 0x5
 #define HDP_MISC_CNTL__MULTIPLE_READS__SHIFT   0x6
 #define HDP_MISC_CNTL__SIMULTANEOUS_READS_WRITES__SHIFT0xb
+#define HDP_MISC_CNTL__READ_BUFFER_WATERMARK__SHIFT 0xe
 #define HDP_MISC_CNTL__FED_ENABLE__SHIFT   0x15
 #define HDP_MISC_CNTL__SYSHUB_CHANNEL_PRIORITY__SHIFT  0x17
 #define HDP_MISC_CNTL__MMHUB_WRBURST_ENABLE__SHIFT 0x18
@@ -118,6 +119,7 @@
 #define HDP_MISC_CNTL__OUTSTANDING_WRITE_COUNT_1024_MASK   0x0020L
 #define HDP_MISC_CNTL__MULTIPLE_READS_MASK 0x0040L
 #define HDP_MISC_CNTL__SIMULTANEOUS_READS_WRITES_MASK  0x0800L
+#define HDP_MISC_CNTL__READ_BUFFER_WATERMARK_MASK   0xc000L
 #define HDP_MISC_CNTL__FED_ENABLE_MASK 0x0020L
 #define HDP_MISC_CNTL__SYSHUB_CHANNEL_PRIORITY_MASK0x0080L
 #define HDP_MISC_CNTL__MMHUB_WRBURST_ENABLE_MASK   0x0100L
-- 
2.25.1



[PATCH] drm/amdkfd: explicitly create/destroy queue attributes under /sys

2021-12-09 Thread Xiaogang . Chen
From: Xiaogang Chen 

When application is about finish it destroys queues it has created by
an ioctl. Driver deletes queue 
entry(/sys/class/kfd/kfd/proc/pid/queues/queueid/)
which is directory including this queue all attributes. Low level kernel
code deletes all attributes under this directory. The lock from kernel is
on queue entry, not its attributes. At meantime another user space application
can read the attributes. There is possibility that the application can
hold/read the attributes while kernel is deleting the queue entry, cause
the application have invalid memory access, then killed by kernel.

Driver changes: explicitly create/destroy each attribute for each queue,
let kernel put lock on each attribute too.

Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  3 +++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 33 +++-
 2 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0c3f911e3bf4..045da300749e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -546,6 +546,9 @@ struct queue {
 
/* procfs */
struct kobject kobj;
+   struct attribute attr_guid;
+   struct attribute attr_size;
+   struct attribute attr_type;
 };
 
 enum KFD_MQD_TYPE {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 9158f9754a24..04a5638f9196 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -73,6 +73,8 @@ static void evict_process_worker(struct work_struct *work);
 static void restore_process_worker(struct work_struct *work);
 
 static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device 
*pdd);
+static void kfd_sysfs_create_file(struct kobject *kobj, struct attribute *attr,
+   char *name);
 
 struct kfd_procfs_tree {
struct kobject *kobj;
@@ -441,35 +443,12 @@ static ssize_t kfd_sysfs_counters_show(struct kobject 
*kobj,
return 0;
 }
 
-static struct attribute attr_queue_size = {
-   .name = "size",
-   .mode = KFD_SYSFS_FILE_MODE
-};
-
-static struct attribute attr_queue_type = {
-   .name = "type",
-   .mode = KFD_SYSFS_FILE_MODE
-};
-
-static struct attribute attr_queue_gpuid = {
-   .name = "gpuid",
-   .mode = KFD_SYSFS_FILE_MODE
-};
-
-static struct attribute *procfs_queue_attrs[] = {
-   _queue_size,
-   _queue_type,
-   _queue_gpuid,
-   NULL
-};
-
 static const struct sysfs_ops procfs_queue_ops = {
.show = kfd_procfs_queue_show,
 };
 
 static struct kobj_type procfs_queue_type = {
.sysfs_ops = _queue_ops,
-   .default_attrs = procfs_queue_attrs,
 };
 
 static const struct sysfs_ops procfs_stats_ops = {
@@ -511,6 +490,10 @@ int kfd_procfs_add_queue(struct queue *q)
return ret;
}
 
+   kfd_sysfs_create_file(>kobj, >attr_guid, "guid");
+   kfd_sysfs_create_file(>kobj, >attr_size, "size");
+   kfd_sysfs_create_file(>kobj, >attr_type, "type");
+
return 0;
 }
 
@@ -655,6 +638,10 @@ void kfd_procfs_del_queue(struct queue *q)
if (!q)
return;
 
+   sysfs_remove_file(>kobj, >attr_guid);
+   sysfs_remove_file(>kobj, >attr_size);
+   sysfs_remove_file(>kobj, >attr_type);
+
kobject_del(>kobj);
kobject_put(>kobj);
 }
-- 
2.25.1



[PATCH 2/2] drm/amdgpu/display: buffer INTERRUPT_LOW_IRQ_CONTEXT interrupt work

2021-01-03 Thread Xiaogang . Chen
From: Xiaogang Chen 

amdgpu DM handles INTERRUPT_LOW_IRQ_CONTEXT interrupt(hpd, hpd_rx) by
using work queue and uses single work_struct. If previous interrupt
has not been handled new interrupts(same type) will be discarded and
driver just sends "amdgpu_dm_irq_schedule_work FAILED" message out.
If some important hpd, hpd_rx related interrupts are missed by driver
the hot (un)plug devices may cause system hang or unstable, such as
system resumes from S3 sleep with mst device connected.

This patch dynamically allocates new amdgpu_dm_irq_handler_data for
new interrupts if previous INTERRUPT_LOW_IRQ_CONTEXT interrupt work
has not been handled. So the new interrupt works can be queued to the
same workqueue_struct, instead discard the new interrupts.
All allocated amdgpu_dm_irq_handler_data are put into a single linked
list and will be reused after.

Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h  |  14 +--
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c  | 114 ++---
 2 files changed, 80 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index c9d82b9..730e540 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -69,18 +69,6 @@ struct common_irq_params {
 };
 
 /**
- * struct irq_list_head - Linked-list for low context IRQ handlers.
- *
- * @head: The list_head within  handler_data
- * @work: A work_struct containing the deferred handler work
- */
-struct irq_list_head {
-   struct list_head head;
-   /* In case this interrupt needs post-processing, 'work' will be queued*/
-   struct work_struct work;
-};
-
-/**
  * struct dm_compressor_info - Buffer info used by frame buffer compression
  * @cpu_addr: MMIO cpu addr
  * @bo_ptr: Pointer to the buffer object
@@ -270,7 +258,7 @@ struct amdgpu_display_manager {
 * Note that handlers are called in the same order as they were
 * registered (FIFO).
 */
-   struct irq_list_head irq_handler_list_low_tab[DAL_IRQ_SOURCES_NUMBER];
+   struct list_head irq_handler_list_low_tab[DAL_IRQ_SOURCES_NUMBER];
 
/**
 * @irq_handler_list_high_tab:
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index 3577785..ada344a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -82,6 +82,7 @@ struct amdgpu_dm_irq_handler_data {
struct amdgpu_display_manager *dm;
/* DAL irq source which registered for this interrupt. */
enum dc_irq_source irq_source;
+   struct work_struct work;
 };
 
 #define DM_IRQ_TABLE_LOCK(adev, flags) \
@@ -111,20 +112,10 @@ static void init_handler_common_data(struct 
amdgpu_dm_irq_handler_data *hcd,
  */
 static void dm_irq_work_func(struct work_struct *work)
 {
-   struct irq_list_head *irq_list_head =
-   container_of(work, struct irq_list_head, work);
-   struct list_head *handler_list = _list_head->head;
-   struct amdgpu_dm_irq_handler_data *handler_data;
-
-   list_for_each_entry(handler_data, handler_list, list) {
-   DRM_DEBUG_KMS("DM_IRQ: work_func: for dal_src=%d\n",
-   handler_data->irq_source);
+   struct amdgpu_dm_irq_handler_data *handler_data =
+container_of(work, struct amdgpu_dm_irq_handler_data, work);
 
-   DRM_DEBUG_KMS("DM_IRQ: schedule_work: for dal_src=%d\n",
-   handler_data->irq_source);
-
-   handler_data->handler(handler_data->handler_arg);
-   }
+   handler_data->handler(handler_data->handler_arg);
 
/* Call a DAL subcomponent which registered for interrupt notification
 * at INTERRUPT_LOW_IRQ_CONTEXT.
@@ -156,7 +147,7 @@ static struct list_head *remove_irq_handler(struct 
amdgpu_device *adev,
break;
case INTERRUPT_LOW_IRQ_CONTEXT:
default:
-   hnd_list = >dm.irq_handler_list_low_tab[irq_source].head;
+   hnd_list = >dm.irq_handler_list_low_tab[irq_source];
break;
}
 
@@ -287,7 +278,8 @@ void *amdgpu_dm_irq_register_interrupt(struct amdgpu_device 
*adev,
break;
case INTERRUPT_LOW_IRQ_CONTEXT:
default:
-   hnd_list = >dm.irq_handler_list_low_tab[irq_source].head;
+   hnd_list = >dm.irq_handler_list_low_tab[irq_source];
+   INIT_WORK(_data->work, dm_irq_work_func);
break;
}
 
@@ -369,7 +361,7 @@ void amdgpu_dm_irq_unregister_interrupt(struct 
amdgpu_device *adev,
 int amdgpu_dm_irq_init(struct amdgpu_device *adev)
 {
int src;
-   struct irq_list_head *lh;
+   struct list_head *

[PATCH 1/2] drm: distinguish return value of drm_dp_check_and_send_link_address.

2021-01-03 Thread Xiaogang . Chen
From: Xiaogang Chen 

drm_dp_check_and_send_link_address discovers MST device topology.
It can return both positive and negative values. When it returns positive
values there is no error found. If it returns negative values there is error
found, such as get NAK , timeout, etc. Following drm_kms_helper_hotplug_event
should be called when drm_dp_check_and_send_link_address returns positive value.

Signed-off-by: Xiaogang Chen 
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
b/drivers/gpu/drm/drm_dp_mst_topology.c
index 17dbed0..3ef5206 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -2650,7 +2650,7 @@ static void drm_dp_mst_link_probe_work(struct work_struct 
*work)
drm_dp_mst_topology_put_mstb(mstb);
 
mutex_unlock(>probe_lock);
-   if (ret)
+   if (ret > 0)
drm_kms_helper_hotplug_event(dev);
 }
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx