Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v5
On 2019-01-07 9:21 a.m., Christian König wrote: > Am 14.12.18 um 22:10 schrieb Yang, Philip: >> Use HMM helper function hmm_vma_fault() to get physical pages backing >> userptr and start CPU page table update track of those pages. Then use >> hmm_vma_range_done() to check if those pages are updated before >> amdgpu_cs_submit for gfx or before user queues are resumed for kfd. >> >> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart >> from scratch, for kfd, restore worker is rescheduled to retry. >> >> HMM simplify the CPU page table concurrent update check, so remove >> guptasklock, mmu_invalidations, last_set_pages fields from >> amdgpu_ttm_tt struct. >> >> HMM does not pin the page (increase page ref count), so remove related >> operations like release_pages(), put_page(), mark_page_dirty(). >> >> Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67 >> Signed-off-by: Philip Yang >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 - >> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++-- >> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 - >> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 185 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 25 ++- >> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 4 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 168 ++-- >> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 - >> 11 files changed, 209 insertions(+), 293 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> index 70429f7aa9a8..717791d4fa45 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> @@ -62,7 +62,6 @@ struct kgd_mem { >> atomic_t invalid; >> struct amdkfd_process_info *process_info; >> - struct page **user_pages; >> struct amdgpu_sync sync; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> index be1ab43473c6..2897793600f7 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, >> struct mm_struct *mm, >> goto out; >> } >> - /* If no restore worker is running concurrently, user_pages >> - * should not be allocated >> - */ >> - WARN(mem->user_pages, "Leaking user_pages array"); >> - >> - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, >> - sizeof(struct page *), >> - GFP_KERNEL | __GFP_ZERO); >> - if (!mem->user_pages) { >> - pr_err("%s: Failed to allocate pages array\n", __func__); >> - ret = -ENOMEM; >> - goto unregister_out; >> - } >> - >> - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); >> + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); >> if (ret) { >> pr_err("%s: Failed to get user pages: %d\n", __func__, ret); >> - goto free_out; >> + goto unregister_out; >> } >> - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); >> - >> ret = amdgpu_bo_reserve(bo, true); >> if (ret) { >> pr_err("%s: Failed to reserve BO\n", __func__); >> @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, >> struct mm_struct *mm, >> amdgpu_bo_unreserve(bo); >> release_out: >> - if (ret) >> - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); >> -free_out: >> - kvfree(mem->user_pages); >> - mem->user_pages = NULL; >> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); >> unregister_out: >> if (ret) >> amdgpu_mn_unregister(bo); >> @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, >> ctx->kfd_bo.priority = 0; >> ctx->kfd_bo.tv.bo = >tbo; >> ctx->kfd_bo.tv.num_shared = 1; >> - ctx->kfd_bo.user_pages = NULL; >> list_add(>kfd_bo.tv.head, >list); >> amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]); >> @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem >> *mem, >> ctx->kfd_bo.priority = 0; >> ctx->kfd_bo.tv.bo = >tbo; >> ctx->kfd_bo.tv.num_shared = 1; >> - ctx->kfd_bo.user_pages = NULL; >> list_add(>kfd_bo.tv.head, >list); >> i = 0; >> @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( >> list_del(_list_entry->head); >> mutex_unlock(_info->lock); >> - /* Free user pages if necessary */ >> - if (mem->user_pages) { >> - pr_debug("%s: Freeing user_pages array\n", __func__); >> - if (mem->user_pages[0]) >> - release_pages(mem->user_pages, >> -
Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v5
Am 14.12.18 um 22:10 schrieb Yang, Philip: Use HMM helper function hmm_vma_fault() to get physical pages backing userptr and start CPU page table update track of those pages. Then use hmm_vma_range_done() to check if those pages are updated before amdgpu_cs_submit for gfx or before user queues are resumed for kfd. If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart from scratch, for kfd, restore worker is rescheduled to retry. HMM simplify the CPU page table concurrent update check, so remove guptasklock, mmu_invalidations, last_set_pages fields from amdgpu_ttm_tt struct. HMM does not pin the page (increase page ref count), so remove related operations like release_pages(), put_page(), mark_page_dirty(). Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67 Signed-off-by: Philip Yang --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 185 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 25 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h| 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 168 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 1 - 11 files changed, 209 insertions(+), 293 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 70429f7aa9a8..717791d4fa45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -62,7 +62,6 @@ struct kgd_mem { atomic_t invalid; struct amdkfd_process_info *process_info; - struct page **user_pages; struct amdgpu_sync sync; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index be1ab43473c6..2897793600f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, goto out; } - /* If no restore worker is running concurrently, user_pages -* should not be allocated -*/ - WARN(mem->user_pages, "Leaking user_pages array"); - - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", __func__); - ret = -ENOMEM; - goto unregister_out; - } - - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); - goto free_out; + goto unregister_out; } - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); - ret = amdgpu_bo_reserve(bo, true); if (ret) { pr_err("%s: Failed to reserve BO\n", __func__); @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, amdgpu_bo_unreserve(bo); release_out: - if (ret) - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); -free_out: - kvfree(mem->user_pages); - mem->user_pages = NULL; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = >tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(>kfd_bo.tv.head, >list); amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]); @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = >tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(>kfd_bo.tv.head, >list); i = 0; @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( list_del(_list_entry->head); mutex_unlock(_info->lock); - /* Free user pages if necessary */ - if (mem->user_pages) { - pr_debug("%s: Freeing user_pages array\n", __func__); - if (mem->user_pages[0]) - release_pages(mem->user_pages, - mem->bo->tbo.ttm->num_pages); - kvfree(mem->user_pages); - } - ret =
Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v5
I'm just getting up to speed again after winter vacation and have tons of mails to dig through and a few important personal appointments coming as well. From skimming over it looks like it should work, but please give me at least till Monday to take a closer look. Christian. Am 02.01.19 um 21:10 schrieb Yang, Philip: Hi Christian, May you help review the CS parts related changes? I tested it using libdrm Userptr Test under X. Do you know other test applications which can stress the CS userptr path? Thanks, Philip On 2018-12-14 4:25 p.m., Kuehling, Felix wrote: Except for the GEM and CS parts, the series is Reviewed-by: Felix Kuehling Regards, Felix On 2018-12-14 4:10 p.m., Yang, Philip wrote: Use HMM helper function hmm_vma_fault() to get physical pages backing userptr and start CPU page table update track of those pages. Then use hmm_vma_range_done() to check if those pages are updated before amdgpu_cs_submit for gfx or before user queues are resumed for kfd. If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart from scratch, for kfd, restore worker is rescheduled to retry. HMM simplify the CPU page table concurrent update check, so remove guptasklock, mmu_invalidations, last_set_pages fields from amdgpu_ttm_tt struct. HMM does not pin the page (increase page ref count), so remove related operations like release_pages(), put_page(), mark_page_dirty(). Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67 Signed-off-by: Philip Yang --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 185 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 25 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h| 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 168 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 1 - 11 files changed, 209 insertions(+), 293 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 70429f7aa9a8..717791d4fa45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -62,7 +62,6 @@ struct kgd_mem { atomic_t invalid; struct amdkfd_process_info *process_info; - struct page **user_pages; struct amdgpu_sync sync; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index be1ab43473c6..2897793600f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, goto out; } - /* If no restore worker is running concurrently, user_pages -* should not be allocated -*/ - WARN(mem->user_pages, "Leaking user_pages array"); - - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", __func__); - ret = -ENOMEM; - goto unregister_out; - } - - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); - goto free_out; + goto unregister_out; } - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); - ret = amdgpu_bo_reserve(bo, true); if (ret) { pr_err("%s: Failed to reserve BO\n", __func__); @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, amdgpu_bo_unreserve(bo); release_out: - if (ret) - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); -free_out: - kvfree(mem->user_pages); - mem->user_pages = NULL; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = >tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(>kfd_bo.tv.head, >list); amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]); @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0;
Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v5
Hi Christian, May you help review the CS parts related changes? I tested it using libdrm Userptr Test under X. Do you know other test applications which can stress the CS userptr path? Thanks, Philip On 2018-12-14 4:25 p.m., Kuehling, Felix wrote: > Except for the GEM and CS parts, the series is Reviewed-by: Felix > Kuehling > > Regards, > Felix > > On 2018-12-14 4:10 p.m., Yang, Philip wrote: >> Use HMM helper function hmm_vma_fault() to get physical pages backing >> userptr and start CPU page table update track of those pages. Then use >> hmm_vma_range_done() to check if those pages are updated before >> amdgpu_cs_submit for gfx or before user queues are resumed for kfd. >> >> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart >> from scratch, for kfd, restore worker is rescheduled to retry. >> >> HMM simplify the CPU page table concurrent update check, so remove >> guptasklock, mmu_invalidations, last_set_pages fields from >> amdgpu_ttm_tt struct. >> >> HMM does not pin the page (increase page ref count), so remove related >> operations like release_pages(), put_page(), mark_page_dirty(). >> >> Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67 >> Signed-off-by: Philip Yang >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 - >> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++-- >> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 - >> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 185 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 25 ++- >> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h| 4 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 168 ++-- >> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 1 - >> 11 files changed, 209 insertions(+), 293 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> index 70429f7aa9a8..717791d4fa45 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >> @@ -62,7 +62,6 @@ struct kgd_mem { >> >> atomic_t invalid; >> struct amdkfd_process_info *process_info; >> -struct page **user_pages; >> >> struct amdgpu_sync sync; >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> index be1ab43473c6..2897793600f7 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct >> mm_struct *mm, >> goto out; >> } >> >> -/* If no restore worker is running concurrently, user_pages >> - * should not be allocated >> - */ >> -WARN(mem->user_pages, "Leaking user_pages array"); >> - >> -mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, >> - sizeof(struct page *), >> - GFP_KERNEL | __GFP_ZERO); >> -if (!mem->user_pages) { >> -pr_err("%s: Failed to allocate pages array\n", __func__); >> -ret = -ENOMEM; >> -goto unregister_out; >> -} >> - >> -ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); >> +ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); >> if (ret) { >> pr_err("%s: Failed to get user pages: %d\n", __func__, ret); >> -goto free_out; >> +goto unregister_out; >> } >> >> -amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); >> - >> ret = amdgpu_bo_reserve(bo, true); >> if (ret) { >> pr_err("%s: Failed to reserve BO\n", __func__); >> @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct >> mm_struct *mm, >> amdgpu_bo_unreserve(bo); >> >> release_out: >> -if (ret) >> -release_pages(mem->user_pages, bo->tbo.ttm->num_pages); >> -free_out: >> -kvfree(mem->user_pages); >> -mem->user_pages = NULL; >> +amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); >> unregister_out: >> if (ret) >> amdgpu_mn_unregister(bo); >> @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, >> ctx->kfd_bo.priority = 0; >> ctx->kfd_bo.tv.bo = >tbo; >> ctx->kfd_bo.tv.num_shared = 1; >> -ctx->kfd_bo.user_pages = NULL; >> list_add(>kfd_bo.tv.head, >list); >> >> amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]); >> @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, >> ctx->kfd_bo.priority = 0; >> ctx->kfd_bo.tv.bo = >tbo; >> ctx->kfd_bo.tv.num_shared = 1; >> -ctx->kfd_bo.user_pages = NULL; >> list_add(>kfd_bo.tv.head,
Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v5
Except for the GEM and CS parts, the series is Reviewed-by: Felix Kuehling Regards, Felix On 2018-12-14 4:10 p.m., Yang, Philip wrote: > Use HMM helper function hmm_vma_fault() to get physical pages backing > userptr and start CPU page table update track of those pages. Then use > hmm_vma_range_done() to check if those pages are updated before > amdgpu_cs_submit for gfx or before user queues are resumed for kfd. > > If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart > from scratch, for kfd, restore worker is rescheduled to retry. > > HMM simplify the CPU page table concurrent update check, so remove > guptasklock, mmu_invalidations, last_set_pages fields from > amdgpu_ttm_tt struct. > > HMM does not pin the page (increase page ref count), so remove related > operations like release_pages(), put_page(), mark_page_dirty(). > > Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67 > Signed-off-by: Philip Yang > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 - > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 - > drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 185 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 25 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h| 4 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 168 ++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 1 - > 11 files changed, 209 insertions(+), 293 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index 70429f7aa9a8..717791d4fa45 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -62,7 +62,6 @@ struct kgd_mem { > > atomic_t invalid; > struct amdkfd_process_info *process_info; > - struct page **user_pages; > > struct amdgpu_sync sync; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index be1ab43473c6..2897793600f7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct > mm_struct *mm, > goto out; > } > > - /* If no restore worker is running concurrently, user_pages > - * should not be allocated > - */ > - WARN(mem->user_pages, "Leaking user_pages array"); > - > - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, > -sizeof(struct page *), > -GFP_KERNEL | __GFP_ZERO); > - if (!mem->user_pages) { > - pr_err("%s: Failed to allocate pages array\n", __func__); > - ret = -ENOMEM; > - goto unregister_out; > - } > - > - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); > + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); > if (ret) { > pr_err("%s: Failed to get user pages: %d\n", __func__, ret); > - goto free_out; > + goto unregister_out; > } > > - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); > - > ret = amdgpu_bo_reserve(bo, true); > if (ret) { > pr_err("%s: Failed to reserve BO\n", __func__); > @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct > mm_struct *mm, > amdgpu_bo_unreserve(bo); > > release_out: > - if (ret) > - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); > -free_out: > - kvfree(mem->user_pages); > - mem->user_pages = NULL; > + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > unregister_out: > if (ret) > amdgpu_mn_unregister(bo); > @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, > ctx->kfd_bo.priority = 0; > ctx->kfd_bo.tv.bo = >tbo; > ctx->kfd_bo.tv.num_shared = 1; > - ctx->kfd_bo.user_pages = NULL; > list_add(>kfd_bo.tv.head, >list); > > amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]); > @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, > ctx->kfd_bo.priority = 0; > ctx->kfd_bo.tv.bo = >tbo; > ctx->kfd_bo.tv.num_shared = 1; > - ctx->kfd_bo.user_pages = NULL; > list_add(>kfd_bo.tv.head, >list); > > i = 0; > @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > list_del(_list_entry->head); > mutex_unlock(_info->lock); > > - /* Free user pages if necessary */ > - if (mem->user_pages) { > - pr_debug("%s: Freeing user_pages array\n", __func__); > - if
[PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v5
Use HMM helper function hmm_vma_fault() to get physical pages backing userptr and start CPU page table update track of those pages. Then use hmm_vma_range_done() to check if those pages are updated before amdgpu_cs_submit for gfx or before user queues are resumed for kfd. If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart from scratch, for kfd, restore worker is rescheduled to retry. HMM simplify the CPU page table concurrent update check, so remove guptasklock, mmu_invalidations, last_set_pages fields from amdgpu_ttm_tt struct. HMM does not pin the page (increase page ref count), so remove related operations like release_pages(), put_page(), mark_page_dirty(). Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67 Signed-off-by: Philip Yang --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 185 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 25 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h| 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 168 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 1 - 11 files changed, 209 insertions(+), 293 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 70429f7aa9a8..717791d4fa45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -62,7 +62,6 @@ struct kgd_mem { atomic_t invalid; struct amdkfd_process_info *process_info; - struct page **user_pages; struct amdgpu_sync sync; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index be1ab43473c6..2897793600f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, goto out; } - /* If no restore worker is running concurrently, user_pages -* should not be allocated -*/ - WARN(mem->user_pages, "Leaking user_pages array"); - - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", __func__); - ret = -ENOMEM; - goto unregister_out; - } - - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); - goto free_out; + goto unregister_out; } - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); - ret = amdgpu_bo_reserve(bo, true); if (ret) { pr_err("%s: Failed to reserve BO\n", __func__); @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, amdgpu_bo_unreserve(bo); release_out: - if (ret) - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); -free_out: - kvfree(mem->user_pages); - mem->user_pages = NULL; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = >tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(>kfd_bo.tv.head, >list); amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]); @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = >tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(>kfd_bo.tv.head, >list); i = 0; @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( list_del(_list_entry->head); mutex_unlock(_info->lock); - /* Free user pages if necessary */ - if (mem->user_pages) { - pr_debug("%s: Freeing user_pages array\n", __func__); - if (mem->user_pages[0]) - release_pages(mem->user_pages, - mem->bo->tbo.ttm->num_pages); - kvfree(mem->user_pages); - } - ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, ); if