Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART
On 02/05/2024 17:18, Christian König wrote: Am 26.04.24 um 15:48 schrieb Shashank Sharma: To support oversubscription, MES FW expects WPTR BOs to be mapped into GART, before they are submitted to usermode queues. This patch adds a function for the same. V4: fix the wptr value before mapping lookup (Bas, Christian). V5: Addressed review comments from Christian: - Either pin object or allocate from GART, but not both. - All the handling must be done with the VM locks held. V7: Addressed review comments from Christian: - Do not take vm->eviction_lock - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset V8: Rebase V9: Changed the function names from gfx_v11* to mes_v11* Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav The patch itself looks good, but this really need the eviction fence to work properly. Otherwise it can be that the BO mapped into the GART is evicted at some point. Noted, eviction fences will be following up soon. - Shashank Christian. --- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 77 +++ .../gpu/drm/amd/include/amdgpu_userqueue.h | 1 + 2 files changed, 78 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index 8d2cd61af26b..37b80626e792 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -30,6 +30,74 @@ #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE +static int +mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + DRM_ERROR("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(>tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_bo_unreserve(bo); + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + return ret; +} + +static int +mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + uint64_t wptr) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_bo_va_mapping *wptr_mapping; + struct amdgpu_vm *wptr_vm; + struct amdgpu_userq_obj *wptr_obj = >wptr_obj; + int ret; + + wptr_vm = queue->vm; + ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); + if (ret) + return ret; + + wptr &= AMDGPU_GMC_HOLE_MASK; + wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); + amdgpu_bo_unreserve(wptr_vm->root.bo); + if (!wptr_mapping) { + DRM_ERROR("Failed to lookup wptr bo\n"); + return -EINVAL; + } + + wptr_obj->obj = wptr_mapping->bo_va->base.bo; + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { + DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); + return -EINVAL; + } + + ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj); + if (ret) { + DRM_ERROR("Failed to map wptr bo to GART\n"); + return ret; + } + + queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); + return 0; +} + static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, struct amdgpu_mqd_prop *userq_props) @@ -61,6 +129,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, queue_input.queue_size = userq_props->queue_size >> 2; queue_input.doorbell_offset = userq_props->doorbell_index; queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo); + queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr; amdgpu_mes_lock(>mes); r = adev->mes.funcs->add_hw_queue(>mes, _input); @@ -187,6 +256,13 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } + /* FW expects WPTR BOs to be mapped into GART */ + r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); + if (r) { + DRM_ERROR("Failed to create WPTR mapping\n"); + goto free_ctx; + } + /* Map userqueue into FW using MES */ r = mes_v11_0_userq_map(uq_mgr, queue, userq_props); if (r) { @@ -216,6 +292,7 @@ mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { mes_v11_0_userq_unmap(uq_mgr, queue); + amdgpu_bo_unref(>wptr_obj.obj); amdgpu_userqueue_destroy_object(uq_mgr, >fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, >mqd); diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART
Am 26.04.24 um 15:48 schrieb Shashank Sharma: To support oversubscription, MES FW expects WPTR BOs to be mapped into GART, before they are submitted to usermode queues. This patch adds a function for the same. V4: fix the wptr value before mapping lookup (Bas, Christian). V5: Addressed review comments from Christian: - Either pin object or allocate from GART, but not both. - All the handling must be done with the VM locks held. V7: Addressed review comments from Christian: - Do not take vm->eviction_lock - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset V8: Rebase V9: Changed the function names from gfx_v11* to mes_v11* Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav The patch itself looks good, but this really need the eviction fence to work properly. Otherwise it can be that the BO mapped into the GART is evicted at some point. Christian. --- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 77 +++ .../gpu/drm/amd/include/amdgpu_userqueue.h| 1 + 2 files changed, 78 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index 8d2cd61af26b..37b80626e792 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -30,6 +30,74 @@ #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE +static int +mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + DRM_ERROR("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(>tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_bo_unreserve(bo); + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + return ret; +} + +static int +mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + uint64_t wptr) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_bo_va_mapping *wptr_mapping; + struct amdgpu_vm *wptr_vm; + struct amdgpu_userq_obj *wptr_obj = >wptr_obj; + int ret; + + wptr_vm = queue->vm; + ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); + if (ret) + return ret; + + wptr &= AMDGPU_GMC_HOLE_MASK; + wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); + amdgpu_bo_unreserve(wptr_vm->root.bo); + if (!wptr_mapping) { + DRM_ERROR("Failed to lookup wptr bo\n"); + return -EINVAL; + } + + wptr_obj->obj = wptr_mapping->bo_va->base.bo; + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { + DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); + return -EINVAL; + } + + ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj); + if (ret) { + DRM_ERROR("Failed to map wptr bo to GART\n"); + return ret; + } + + queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); + return 0; +} + static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, struct amdgpu_mqd_prop *userq_props) @@ -61,6 +129,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, queue_input.queue_size = userq_props->queue_size >> 2; queue_input.doorbell_offset = userq_props->doorbell_index; queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo); + queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr; amdgpu_mes_lock(>mes); r = adev->mes.funcs->add_hw_queue(>mes, _input); @@ -187,6 +256,13 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } + /* FW expects WPTR BOs to be mapped into GART */ + r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); + if (r) { + DRM_ERROR("Failed to create WPTR mapping\n"); + goto free_ctx; + } + /* Map userqueue into FW using MES */ r = mes_v11_0_userq_map(uq_mgr, queue, userq_props); if (r) { @@ -216,6 +292,7 @@ mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { mes_v11_0_userq_unmap(uq_mgr, queue); + amdgpu_bo_unref(>wptr_obj.obj); amdgpu_userqueue_destroy_object(uq_mgr,
Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART
On Thu, May 2, 2024 at 1:31 AM Sharma, Shashank wrote: > > > On 01/05/2024 23:36, Alex Deucher wrote: > > On Fri, Apr 26, 2024 at 9:57 AM Shashank Sharma > > wrote: > >> To support oversubscription, MES FW expects WPTR BOs to > >> be mapped into GART, before they are submitted to usermode > >> queues. This patch adds a function for the same. > >> > >> V4: fix the wptr value before mapping lookup (Bas, Christian). > >> > >> V5: Addressed review comments from Christian: > >> - Either pin object or allocate from GART, but not both. > >> - All the handling must be done with the VM locks held. > >> > >> V7: Addressed review comments from Christian: > >> - Do not take vm->eviction_lock > >> - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset > >> > >> V8: Rebase > >> V9: Changed the function names from gfx_v11* to mes_v11* > >> > >> Cc: Alex Deucher > >> Cc: Christian Koenig > >> Signed-off-by: Shashank Sharma > >> Signed-off-by: Arvind Yadav > >> --- > >> .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 77 +++ > >> .../gpu/drm/amd/include/amdgpu_userqueue.h| 1 + > >> 2 files changed, 78 insertions(+) > >> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c > >> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c > >> index 8d2cd61af26b..37b80626e792 100644 > >> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c > >> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c > >> @@ -30,6 +30,74 @@ > >> #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE > >> #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE > >> > >> +static int > >> +mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo > >> *bo) > >> +{ > >> + int ret; > >> + > >> + ret = amdgpu_bo_reserve(bo, true); > >> + if (ret) { > >> + DRM_ERROR("Failed to reserve bo. ret %d\n", ret); > >> + goto err_reserve_bo_failed; > >> + } > >> + > >> + ret = amdgpu_ttm_alloc_gart(>tbo); > >> + if (ret) { > >> + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); > >> + goto err_map_bo_gart_failed; > >> + } > >> + > >> + amdgpu_bo_unreserve(bo); > >> + bo = amdgpu_bo_ref(bo); > >> + > >> + return 0; > >> + > >> +err_map_bo_gart_failed: > >> + amdgpu_bo_unreserve(bo); > >> +err_reserve_bo_failed: > >> + return ret; > >> +} > >> + > >> +static int > >> +mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, > >> + struct amdgpu_usermode_queue *queue, > >> + uint64_t wptr) > >> +{ > >> + struct amdgpu_device *adev = uq_mgr->adev; > >> + struct amdgpu_bo_va_mapping *wptr_mapping; > >> + struct amdgpu_vm *wptr_vm; > >> + struct amdgpu_userq_obj *wptr_obj = >wptr_obj; > >> + int ret; > >> + > >> + wptr_vm = queue->vm; > >> + ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); > >> + if (ret) > >> + return ret; > >> + > >> + wptr &= AMDGPU_GMC_HOLE_MASK; > >> + wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> > >> PAGE_SHIFT); > >> + amdgpu_bo_unreserve(wptr_vm->root.bo); > >> + if (!wptr_mapping) { > >> + DRM_ERROR("Failed to lookup wptr bo\n"); > >> + return -EINVAL; > >> + } > >> + > >> + wptr_obj->obj = wptr_mapping->bo_va->base.bo; > >> + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { > >> + DRM_ERROR("Requested GART mapping for wptr bo larger than > >> one page\n"); > >> + return -EINVAL; > >> + } > >> + > >> + ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj); > >> + if (ret) { > >> + DRM_ERROR("Failed to map wptr bo to GART\n"); > >> + return ret; > >> + } > >> + > >> + queue->wptr_obj.gpu_addr = > >> amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); > > The wptr virtual address from the user may not be at offset 0 from the > > start of the object. We should add the offset to the base vmid0 GPU > > address. > > can you please elaborate a bit here ? wptr_obj->obj is already mapped to > gart, do we still need this ? The location that the MES will poll needs to be the same as the location that the UMD will be writing to. E.g., if you allocate the BO and then map it into user space at location 0x5000 in the user's GPU virtual address space and then the user uses 0x5008 as the wptr address, we need to make sure that we are polling in MES at vmid0 virtual address + 0x8. If you map the BO at 0x2000 in the vmid0 address space, you need to make sure to point the firmware to 0x2008. Alex > > - Shashank > > > > > Alex > > > >> + return 0; > >> +} > >> + > >> static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, > >> struct amdgpu_usermode_queue *queue, > >> struct amdgpu_mqd_prop
Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART
On 02/05/2024 15:06, Kasiviswanathan, Harish wrote: [AMD Official Use Only - General] -Original Message- From: amd-gfx On Behalf Of Sharma, Shashank Sent: Thursday, May 2, 2024 1:32 AM To: Alex Deucher Cc: amd-gfx@lists.freedesktop.org; Yadav, Arvind ; Deucher, Alexander ; Koenig, Christian Subject: Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART On 01/05/2024 23:36, Alex Deucher wrote: On Fri, Apr 26, 2024 at 9:57 AM Shashank Sharma wrote: To support oversubscription, MES FW expects WPTR BOs to be mapped into GART, before they are submitted to usermode queues. This patch adds a function for the same. V4: fix the wptr value before mapping lookup (Bas, Christian). V5: Addressed review comments from Christian: - Either pin object or allocate from GART, but not both. - All the handling must be done with the VM locks held. V7: Addressed review comments from Christian: - Do not take vm->eviction_lock - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset V8: Rebase V9: Changed the function names from gfx_v11* to mes_v11* Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav --- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 77 +++ .../gpu/drm/amd/include/amdgpu_userqueue.h| 1 + 2 files changed, 78 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index 8d2cd61af26b..37b80626e792 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -30,6 +30,74 @@ #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE +static int +mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + DRM_ERROR("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(>tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_bo_unreserve(bo); + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + return ret; +} + There is a very similar function amdgpu_amdkfd_map_gtt_bo_to_gart(). Is it possible to unify. Also, adev parameter in the above function is confusing. This was also removed from amdgpu_amdkfd_map_gtt_bo_to_gart(). It looks like bo is mapped to gart of adev, however it doesn't have to be. It is mapped to the gart to which bo is associated. I don't think unification makes much sense here, but I agree that adev can be removed from the input args. I will update this. - Shashank +static int +mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + uint64_t wptr) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_bo_va_mapping *wptr_mapping; + struct amdgpu_vm *wptr_vm; + struct amdgpu_userq_obj *wptr_obj = >wptr_obj; + int ret; + + wptr_vm = queue->vm; + ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); + if (ret) + return ret; + + wptr &= AMDGPU_GMC_HOLE_MASK; + wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); + amdgpu_bo_unreserve(wptr_vm->root.bo); + if (!wptr_mapping) { + DRM_ERROR("Failed to lookup wptr bo\n"); + return -EINVAL; + } + + wptr_obj->obj = wptr_mapping->bo_va->base.bo; + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { + DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); + return -EINVAL; + } + + ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj); + if (ret) { + DRM_ERROR("Failed to map wptr bo to GART\n"); + return ret; + } + + queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); The wptr virtual address from the user may not be at offset 0 from the start of the object. We should add the offset to the base vmid0 GPU address. can you please elaborate a bit here ? wptr_obj->obj is already mapped to gart, do we still need this ? - Shashank Alex + return 0; +} + static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, struct amdgpu_mqd_prop *userq_props) @@ -61,6 +129,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, queue_input
RE: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART
[AMD Official Use Only - General] -Original Message- From: amd-gfx On Behalf Of Sharma, Shashank Sent: Thursday, May 2, 2024 1:32 AM To: Alex Deucher Cc: amd-gfx@lists.freedesktop.org; Yadav, Arvind ; Deucher, Alexander ; Koenig, Christian Subject: Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART On 01/05/2024 23:36, Alex Deucher wrote: > On Fri, Apr 26, 2024 at 9:57 AM Shashank Sharma > wrote: >> To support oversubscription, MES FW expects WPTR BOs to >> be mapped into GART, before they are submitted to usermode >> queues. This patch adds a function for the same. >> >> V4: fix the wptr value before mapping lookup (Bas, Christian). >> >> V5: Addressed review comments from Christian: >> - Either pin object or allocate from GART, but not both. >> - All the handling must be done with the VM locks held. >> >> V7: Addressed review comments from Christian: >> - Do not take vm->eviction_lock >> - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset >> >> V8: Rebase >> V9: Changed the function names from gfx_v11* to mes_v11* >> >> Cc: Alex Deucher >> Cc: Christian Koenig >> Signed-off-by: Shashank Sharma >> Signed-off-by: Arvind Yadav >> --- >> .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 77 +++ >> .../gpu/drm/amd/include/amdgpu_userqueue.h| 1 + >> 2 files changed, 78 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c >> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c >> index 8d2cd61af26b..37b80626e792 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c >> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c >> @@ -30,6 +30,74 @@ >> #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE >> #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE >> >> +static int >> +mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo >> *bo) >> +{ >> + int ret; >> + >> + ret = amdgpu_bo_reserve(bo, true); >> + if (ret) { >> + DRM_ERROR("Failed to reserve bo. ret %d\n", ret); >> + goto err_reserve_bo_failed; >> + } >> + >> + ret = amdgpu_ttm_alloc_gart(>tbo); >> + if (ret) { >> + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); >> + goto err_map_bo_gart_failed; >> + } >> + >> + amdgpu_bo_unreserve(bo); >> + bo = amdgpu_bo_ref(bo); >> + >> + return 0; >> + >> +err_map_bo_gart_failed: >> + amdgpu_bo_unreserve(bo); >> +err_reserve_bo_failed: >> + return ret; >> +} >> + There is a very similar function amdgpu_amdkfd_map_gtt_bo_to_gart(). Is it possible to unify. Also, adev parameter in the above function is confusing. This was also removed from amdgpu_amdkfd_map_gtt_bo_to_gart(). It looks like bo is mapped to gart of adev, however it doesn't have to be. It is mapped to the gart to which bo is associated. >> +static int >> +mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, >> + struct amdgpu_usermode_queue *queue, >> + uint64_t wptr) >> +{ >> + struct amdgpu_device *adev = uq_mgr->adev; >> + struct amdgpu_bo_va_mapping *wptr_mapping; >> + struct amdgpu_vm *wptr_vm; >> + struct amdgpu_userq_obj *wptr_obj = >wptr_obj; >> + int ret; >> + >> + wptr_vm = queue->vm; >> + ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); >> + if (ret) >> + return ret; >> + >> + wptr &= AMDGPU_GMC_HOLE_MASK; >> + wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> >> PAGE_SHIFT); >> + amdgpu_bo_unreserve(wptr_vm->root.bo); >> + if (!wptr_mapping) { >> + DRM_ERROR("Failed to lookup wptr bo\n"); >> + return -EINVAL; >> + } >> + >> + wptr_obj->obj = wptr_mapping->bo_va->base.bo; >> + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { >> + DRM_ERROR("Requested GART mapping for wptr bo larger than >> one page\n"); >> + return -EINVAL; >> + } >> + >> + ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj); >> + if (ret) { >> + DRM_ERROR("Failed to map wptr bo to GART\n"); >> + return ret; >> +
Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART
On 01/05/2024 23:36, Alex Deucher wrote: On Fri, Apr 26, 2024 at 9:57 AM Shashank Sharma wrote: To support oversubscription, MES FW expects WPTR BOs to be mapped into GART, before they are submitted to usermode queues. This patch adds a function for the same. V4: fix the wptr value before mapping lookup (Bas, Christian). V5: Addressed review comments from Christian: - Either pin object or allocate from GART, but not both. - All the handling must be done with the VM locks held. V7: Addressed review comments from Christian: - Do not take vm->eviction_lock - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset V8: Rebase V9: Changed the function names from gfx_v11* to mes_v11* Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav --- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 77 +++ .../gpu/drm/amd/include/amdgpu_userqueue.h| 1 + 2 files changed, 78 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index 8d2cd61af26b..37b80626e792 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -30,6 +30,74 @@ #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE +static int +mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + DRM_ERROR("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(>tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_bo_unreserve(bo); + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + return ret; +} + +static int +mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + uint64_t wptr) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_bo_va_mapping *wptr_mapping; + struct amdgpu_vm *wptr_vm; + struct amdgpu_userq_obj *wptr_obj = >wptr_obj; + int ret; + + wptr_vm = queue->vm; + ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); + if (ret) + return ret; + + wptr &= AMDGPU_GMC_HOLE_MASK; + wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); + amdgpu_bo_unreserve(wptr_vm->root.bo); + if (!wptr_mapping) { + DRM_ERROR("Failed to lookup wptr bo\n"); + return -EINVAL; + } + + wptr_obj->obj = wptr_mapping->bo_va->base.bo; + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { + DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); + return -EINVAL; + } + + ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj); + if (ret) { + DRM_ERROR("Failed to map wptr bo to GART\n"); + return ret; + } + + queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); The wptr virtual address from the user may not be at offset 0 from the start of the object. We should add the offset to the base vmid0 GPU address. can you please elaborate a bit here ? wptr_obj->obj is already mapped to gart, do we still need this ? - Shashank Alex + return 0; +} + static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, struct amdgpu_mqd_prop *userq_props) @@ -61,6 +129,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, queue_input.queue_size = userq_props->queue_size >> 2; queue_input.doorbell_offset = userq_props->doorbell_index; queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo); + queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr; amdgpu_mes_lock(>mes); r = adev->mes.funcs->add_hw_queue(>mes, _input); @@ -187,6 +256,13 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } + /* FW expects WPTR BOs to be mapped into GART */ + r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); + if (r) { + DRM_ERROR("Failed to create WPTR mapping\n"); + goto free_ctx; + } + /* Map userqueue into FW using MES */ r = mes_v11_0_userq_map(uq_mgr, queue, userq_props); if (r) { @@ -216,6 +292,7 @@ mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct
Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART
On Fri, Apr 26, 2024 at 9:57 AM Shashank Sharma wrote: > > To support oversubscription, MES FW expects WPTR BOs to > be mapped into GART, before they are submitted to usermode > queues. This patch adds a function for the same. > > V4: fix the wptr value before mapping lookup (Bas, Christian). > > V5: Addressed review comments from Christian: > - Either pin object or allocate from GART, but not both. > - All the handling must be done with the VM locks held. > > V7: Addressed review comments from Christian: > - Do not take vm->eviction_lock > - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset > > V8: Rebase > V9: Changed the function names from gfx_v11* to mes_v11* > > Cc: Alex Deucher > Cc: Christian Koenig > Signed-off-by: Shashank Sharma > Signed-off-by: Arvind Yadav > --- > .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 77 +++ > .../gpu/drm/amd/include/amdgpu_userqueue.h| 1 + > 2 files changed, 78 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c > b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c > index 8d2cd61af26b..37b80626e792 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c > @@ -30,6 +30,74 @@ > #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE > #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE > > +static int > +mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo > *bo) > +{ > + int ret; > + > + ret = amdgpu_bo_reserve(bo, true); > + if (ret) { > + DRM_ERROR("Failed to reserve bo. ret %d\n", ret); > + goto err_reserve_bo_failed; > + } > + > + ret = amdgpu_ttm_alloc_gart(>tbo); > + if (ret) { > + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); > + goto err_map_bo_gart_failed; > + } > + > + amdgpu_bo_unreserve(bo); > + bo = amdgpu_bo_ref(bo); > + > + return 0; > + > +err_map_bo_gart_failed: > + amdgpu_bo_unreserve(bo); > +err_reserve_bo_failed: > + return ret; > +} > + > +static int > +mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, > + struct amdgpu_usermode_queue *queue, > + uint64_t wptr) > +{ > + struct amdgpu_device *adev = uq_mgr->adev; > + struct amdgpu_bo_va_mapping *wptr_mapping; > + struct amdgpu_vm *wptr_vm; > + struct amdgpu_userq_obj *wptr_obj = >wptr_obj; > + int ret; > + > + wptr_vm = queue->vm; > + ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); > + if (ret) > + return ret; > + > + wptr &= AMDGPU_GMC_HOLE_MASK; > + wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> > PAGE_SHIFT); > + amdgpu_bo_unreserve(wptr_vm->root.bo); > + if (!wptr_mapping) { > + DRM_ERROR("Failed to lookup wptr bo\n"); > + return -EINVAL; > + } > + > + wptr_obj->obj = wptr_mapping->bo_va->base.bo; > + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { > + DRM_ERROR("Requested GART mapping for wptr bo larger than one > page\n"); > + return -EINVAL; > + } > + > + ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj); > + if (ret) { > + DRM_ERROR("Failed to map wptr bo to GART\n"); > + return ret; > + } > + > + queue->wptr_obj.gpu_addr = > amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); The wptr virtual address from the user may not be at offset 0 from the start of the object. We should add the offset to the base vmid0 GPU address. Alex > + return 0; > +} > + > static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, >struct amdgpu_usermode_queue *queue, >struct amdgpu_mqd_prop *userq_props) > @@ -61,6 +129,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr > *uq_mgr, > queue_input.queue_size = userq_props->queue_size >> 2; > queue_input.doorbell_offset = userq_props->doorbell_index; > queue_input.page_table_base_addr = > amdgpu_gmc_pd_addr(queue->vm->root.bo); > + queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr; > > amdgpu_mes_lock(>mes); > r = adev->mes.funcs->add_hw_queue(>mes, _input); > @@ -187,6 +256,13 @@ static int mes_v11_0_userq_mqd_create(struct > amdgpu_userq_mgr *uq_mgr, > goto free_mqd; > } > > + /* FW expects WPTR BOs to be mapped into GART */ > + r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, > userq_props->wptr_gpu_addr); > + if (r) { > + DRM_ERROR("Failed to create WPTR mapping\n"); > + goto free_ctx; > + } > + > /* Map userqueue into FW using MES */ > r = mes_v11_0_userq_map(uq_mgr, queue, userq_props); > if (r) { > @@ -216,6 +292,7 @@