Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART

2024-05-02 Thread Sharma, Shashank



On 02/05/2024 17:18, Christian König wrote:

Am 26.04.24 um 15:48 schrieb Shashank Sharma:

To support oversubscription, MES FW expects WPTR BOs to
be mapped into GART, before they are submitted to usermode
queues. This patch adds a function for the same.

V4: fix the wptr value before mapping lookup (Bas, Christian).

V5: Addressed review comments from Christian:
 - Either pin object or allocate from GART, but not both.
 - All the handling must be done with the VM locks held.

V7: Addressed review comments from Christian:
 - Do not take vm->eviction_lock
 - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset

V8: Rebase
V9: Changed the function names from gfx_v11* to mes_v11*

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 


The patch itself looks good, but this really need the eviction fence 
to work properly.


Otherwise it can be that the BO mapped into the GART is evicted at 
some point.



Noted, eviction fences will be following up soon.

- Shashank



Christian.


---
  .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c  | 77 +++
  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  1 +
  2 files changed, 78 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c

index 8d2cd61af26b..37b80626e792 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
@@ -30,6 +30,74 @@
  #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
  #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
  +static int
+mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
amdgpu_bo *bo)

+{
+    int ret;
+
+    ret = amdgpu_bo_reserve(bo, true);
+    if (ret) {
+    DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+    goto err_reserve_bo_failed;
+    }
+
+    ret = amdgpu_ttm_alloc_gart(>tbo);
+    if (ret) {
+    DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+    goto err_map_bo_gart_failed;
+    }
+
+    amdgpu_bo_unreserve(bo);
+    bo = amdgpu_bo_ref(bo);
+
+    return 0;
+
+err_map_bo_gart_failed:
+    amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+    return ret;
+}
+
+static int
+mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
+  struct amdgpu_usermode_queue *queue,
+  uint64_t wptr)
+{
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct amdgpu_bo_va_mapping *wptr_mapping;
+    struct amdgpu_vm *wptr_vm;
+    struct amdgpu_userq_obj *wptr_obj = >wptr_obj;
+    int ret;
+
+    wptr_vm = queue->vm;
+    ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+    if (ret)
+    return ret;
+
+    wptr &= AMDGPU_GMC_HOLE_MASK;
+    wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> 
PAGE_SHIFT);

+    amdgpu_bo_unreserve(wptr_vm->root.bo);
+    if (!wptr_mapping) {
+    DRM_ERROR("Failed to lookup wptr bo\n");
+    return -EINVAL;
+    }
+
+    wptr_obj->obj = wptr_mapping->bo_va->base.bo;
+    if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
+    DRM_ERROR("Requested GART mapping for wptr bo larger than 
one page\n");

+    return -EINVAL;
+    }
+
+    ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj);
+    if (ret) {
+    DRM_ERROR("Failed to map wptr bo to GART\n");
+    return ret;
+    }
+
+    queue->wptr_obj.gpu_addr = 
amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);

+    return 0;
+}
+
  static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr,
 struct amdgpu_usermode_queue *queue,
 struct amdgpu_mqd_prop *userq_props)
@@ -61,6 +129,7 @@ static int mes_v11_0_userq_map(struct 
amdgpu_userq_mgr *uq_mgr,

  queue_input.queue_size = userq_props->queue_size >> 2;
  queue_input.doorbell_offset = userq_props->doorbell_index;
  queue_input.page_table_base_addr = 
amdgpu_gmc_pd_addr(queue->vm->root.bo);

+    queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
    amdgpu_mes_lock(>mes);
  r = adev->mes.funcs->add_hw_queue(>mes, _input);
@@ -187,6 +256,13 @@ static int mes_v11_0_userq_mqd_create(struct 
amdgpu_userq_mgr *uq_mgr,

  goto free_mqd;
  }
  +    /* FW expects WPTR BOs to be mapped into GART */
+    r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, 
userq_props->wptr_gpu_addr);

+    if (r) {
+    DRM_ERROR("Failed to create WPTR mapping\n");
+    goto free_ctx;
+    }
+
  /* Map userqueue into FW using MES */
  r = mes_v11_0_userq_map(uq_mgr, queue, userq_props);
  if (r) {
@@ -216,6 +292,7 @@ mes_v11_0_userq_mqd_destroy(struct 
amdgpu_userq_mgr *uq_mgr,

  struct amdgpu_usermode_queue *queue)
  {
  mes_v11_0_userq_unmap(uq_mgr, queue);
+    amdgpu_bo_unref(>wptr_obj.obj);
  amdgpu_userqueue_destroy_object(uq_mgr, >fw_obj);
  kfree(queue->userq_prop);
  amdgpu_userqueue_destroy_object(uq_mgr, >mqd);
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h 

Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART

2024-05-02 Thread Christian König

Am 26.04.24 um 15:48 schrieb Shashank Sharma:

To support oversubscription, MES FW expects WPTR BOs to
be mapped into GART, before they are submitted to usermode
queues. This patch adds a function for the same.

V4: fix the wptr value before mapping lookup (Bas, Christian).

V5: Addressed review comments from Christian:
 - Either pin object or allocate from GART, but not both.
 - All the handling must be done with the VM locks held.

V7: Addressed review comments from Christian:
 - Do not take vm->eviction_lock
 - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset

V8: Rebase
V9: Changed the function names from gfx_v11* to mes_v11*

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 


The patch itself looks good, but this really need the eviction fence to 
work properly.


Otherwise it can be that the BO mapped into the GART is evicted at some 
point.


Christian.


---
  .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c  | 77 +++
  .../gpu/drm/amd/include/amdgpu_userqueue.h|  1 +
  2 files changed, 78 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
index 8d2cd61af26b..37b80626e792 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
@@ -30,6 +30,74 @@
  #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
  #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
  
+static int

+mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+   int ret;
+
+   ret = amdgpu_bo_reserve(bo, true);
+   if (ret) {
+   DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+   goto err_reserve_bo_failed;
+   }
+
+   ret = amdgpu_ttm_alloc_gart(>tbo);
+   if (ret) {
+   DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+   goto err_map_bo_gart_failed;
+   }
+
+   amdgpu_bo_unreserve(bo);
+   bo = amdgpu_bo_ref(bo);
+
+   return 0;
+
+err_map_bo_gart_failed:
+   amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+   return ret;
+}
+
+static int
+mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ uint64_t wptr)
+{
+   struct amdgpu_device *adev = uq_mgr->adev;
+   struct amdgpu_bo_va_mapping *wptr_mapping;
+   struct amdgpu_vm *wptr_vm;
+   struct amdgpu_userq_obj *wptr_obj = >wptr_obj;
+   int ret;
+
+   wptr_vm = queue->vm;
+   ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+   if (ret)
+   return ret;
+
+   wptr &= AMDGPU_GMC_HOLE_MASK;
+   wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
+   amdgpu_bo_unreserve(wptr_vm->root.bo);
+   if (!wptr_mapping) {
+   DRM_ERROR("Failed to lookup wptr bo\n");
+   return -EINVAL;
+   }
+
+   wptr_obj->obj = wptr_mapping->bo_va->base.bo;
+   if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
+   DRM_ERROR("Requested GART mapping for wptr bo larger than one 
page\n");
+   return -EINVAL;
+   }
+
+   ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj);
+   if (ret) {
+   DRM_ERROR("Failed to map wptr bo to GART\n");
+   return ret;
+   }
+
+   queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);
+   return 0;
+}
+
  static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr,
   struct amdgpu_usermode_queue *queue,
   struct amdgpu_mqd_prop *userq_props)
@@ -61,6 +129,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr 
*uq_mgr,
queue_input.queue_size = userq_props->queue_size >> 2;
queue_input.doorbell_offset = userq_props->doorbell_index;
queue_input.page_table_base_addr = 
amdgpu_gmc_pd_addr(queue->vm->root.bo);
+   queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
  
  	amdgpu_mes_lock(>mes);

r = adev->mes.funcs->add_hw_queue(>mes, _input);
@@ -187,6 +256,13 @@ static int mes_v11_0_userq_mqd_create(struct 
amdgpu_userq_mgr *uq_mgr,
goto free_mqd;
}
  
+	/* FW expects WPTR BOs to be mapped into GART */

+   r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, 
userq_props->wptr_gpu_addr);
+   if (r) {
+   DRM_ERROR("Failed to create WPTR mapping\n");
+   goto free_ctx;
+   }
+
/* Map userqueue into FW using MES */
r = mes_v11_0_userq_map(uq_mgr, queue, userq_props);
if (r) {
@@ -216,6 +292,7 @@ mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue)
  {
mes_v11_0_userq_unmap(uq_mgr, queue);
+   amdgpu_bo_unref(>wptr_obj.obj);
amdgpu_userqueue_destroy_object(uq_mgr, 

Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART

2024-05-02 Thread Alex Deucher
On Thu, May 2, 2024 at 1:31 AM Sharma, Shashank  wrote:
>
>
> On 01/05/2024 23:36, Alex Deucher wrote:
> > On Fri, Apr 26, 2024 at 9:57 AM Shashank Sharma  
> > wrote:
> >> To support oversubscription, MES FW expects WPTR BOs to
> >> be mapped into GART, before they are submitted to usermode
> >> queues. This patch adds a function for the same.
> >>
> >> V4: fix the wptr value before mapping lookup (Bas, Christian).
> >>
> >> V5: Addressed review comments from Christian:
> >>  - Either pin object or allocate from GART, but not both.
> >>  - All the handling must be done with the VM locks held.
> >>
> >> V7: Addressed review comments from Christian:
> >>  - Do not take vm->eviction_lock
> >>  - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset
> >>
> >> V8: Rebase
> >> V9: Changed the function names from gfx_v11* to mes_v11*
> >>
> >> Cc: Alex Deucher 
> >> Cc: Christian Koenig 
> >> Signed-off-by: Shashank Sharma 
> >> Signed-off-by: Arvind Yadav 
> >> ---
> >>   .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c  | 77 +++
> >>   .../gpu/drm/amd/include/amdgpu_userqueue.h|  1 +
> >>   2 files changed, 78 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c 
> >> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
> >> index 8d2cd61af26b..37b80626e792 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
> >> @@ -30,6 +30,74 @@
> >>   #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
> >>   #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
> >>
> >> +static int
> >> +mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo 
> >> *bo)
> >> +{
> >> +   int ret;
> >> +
> >> +   ret = amdgpu_bo_reserve(bo, true);
> >> +   if (ret) {
> >> +   DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
> >> +   goto err_reserve_bo_failed;
> >> +   }
> >> +
> >> +   ret = amdgpu_ttm_alloc_gart(>tbo);
> >> +   if (ret) {
> >> +   DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
> >> +   goto err_map_bo_gart_failed;
> >> +   }
> >> +
> >> +   amdgpu_bo_unreserve(bo);
> >> +   bo = amdgpu_bo_ref(bo);
> >> +
> >> +   return 0;
> >> +
> >> +err_map_bo_gart_failed:
> >> +   amdgpu_bo_unreserve(bo);
> >> +err_reserve_bo_failed:
> >> +   return ret;
> >> +}
> >> +
> >> +static int
> >> +mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
> >> + struct amdgpu_usermode_queue *queue,
> >> + uint64_t wptr)
> >> +{
> >> +   struct amdgpu_device *adev = uq_mgr->adev;
> >> +   struct amdgpu_bo_va_mapping *wptr_mapping;
> >> +   struct amdgpu_vm *wptr_vm;
> >> +   struct amdgpu_userq_obj *wptr_obj = >wptr_obj;
> >> +   int ret;
> >> +
> >> +   wptr_vm = queue->vm;
> >> +   ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
> >> +   if (ret)
> >> +   return ret;
> >> +
> >> +   wptr &= AMDGPU_GMC_HOLE_MASK;
> >> +   wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> 
> >> PAGE_SHIFT);
> >> +   amdgpu_bo_unreserve(wptr_vm->root.bo);
> >> +   if (!wptr_mapping) {
> >> +   DRM_ERROR("Failed to lookup wptr bo\n");
> >> +   return -EINVAL;
> >> +   }
> >> +
> >> +   wptr_obj->obj = wptr_mapping->bo_va->base.bo;
> >> +   if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
> >> +   DRM_ERROR("Requested GART mapping for wptr bo larger than 
> >> one page\n");
> >> +   return -EINVAL;
> >> +   }
> >> +
> >> +   ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj);
> >> +   if (ret) {
> >> +   DRM_ERROR("Failed to map wptr bo to GART\n");
> >> +   return ret;
> >> +   }
> >> +
> >> +   queue->wptr_obj.gpu_addr = 
> >> amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);
> > The wptr virtual address from the user may not be at offset 0 from the
> > start of the object.  We should add the offset to the base vmid0 GPU
> > address.
>
> can you please elaborate a bit here ? wptr_obj->obj is already mapped to
> gart, do we still need this ?

The location that the MES will poll needs to be the same as the
location that the UMD will be writing to.  E.g., if you allocate the
BO and then map it into user space at location 0x5000 in the user's
GPU virtual address space and then the user uses 0x5008 as the wptr
address, we need to make sure that we are polling in MES at vmid0
virtual address + 0x8.  If you map the BO at 0x2000 in the vmid0
address space, you need to make sure to point the firmware to 0x2008.

Alex

>
> - Shashank
>
> >
> > Alex
> >
> >> +   return 0;
> >> +}
> >> +
> >>   static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr,
> >> struct amdgpu_usermode_queue *queue,
> >> struct amdgpu_mqd_prop 

Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART

2024-05-02 Thread Sharma, Shashank



On 02/05/2024 15:06, Kasiviswanathan, Harish wrote:

[AMD Official Use Only - General]

-Original Message-
From: amd-gfx  On Behalf Of Sharma, 
Shashank
Sent: Thursday, May 2, 2024 1:32 AM
To: Alex Deucher 
Cc: amd-gfx@lists.freedesktop.org; Yadav, Arvind ; Deucher, Alexander 
; Koenig, Christian 
Subject: Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART


On 01/05/2024 23:36, Alex Deucher wrote:

On Fri, Apr 26, 2024 at 9:57 AM Shashank Sharma  wrote:

To support oversubscription, MES FW expects WPTR BOs to
be mapped into GART, before they are submitted to usermode
queues. This patch adds a function for the same.

V4: fix the wptr value before mapping lookup (Bas, Christian).

V5: Addressed review comments from Christian:
  - Either pin object or allocate from GART, but not both.
  - All the handling must be done with the VM locks held.

V7: Addressed review comments from Christian:
  - Do not take vm->eviction_lock
  - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset

V8: Rebase
V9: Changed the function names from gfx_v11* to mes_v11*

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
   .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c  | 77 +++
   .../gpu/drm/amd/include/amdgpu_userqueue.h|  1 +
   2 files changed, 78 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
index 8d2cd61af26b..37b80626e792 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
@@ -30,6 +30,74 @@
   #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
   #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE

+static int
+mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+   int ret;
+
+   ret = amdgpu_bo_reserve(bo, true);
+   if (ret) {
+   DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+   goto err_reserve_bo_failed;
+   }
+
+   ret = amdgpu_ttm_alloc_gart(>tbo);
+   if (ret) {
+   DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+   goto err_map_bo_gart_failed;
+   }
+
+   amdgpu_bo_unreserve(bo);
+   bo = amdgpu_bo_ref(bo);
+
+   return 0;
+
+err_map_bo_gart_failed:
+   amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+   return ret;
+}
+

There is a very similar function amdgpu_amdkfd_map_gtt_bo_to_gart(). Is it 
possible to unify. Also, adev parameter in the above function is confusing. 
This was also removed from amdgpu_amdkfd_map_gtt_bo_to_gart(). It looks like bo 
is mapped to gart of adev, however it doesn't have to be. It is mapped to the 
gart to which bo is associated.


I don't think unification makes much sense here, but I agree that adev 
can be removed from the input args. I will update this.


- Shashank


+static int
+mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ uint64_t wptr)
+{
+   struct amdgpu_device *adev = uq_mgr->adev;
+   struct amdgpu_bo_va_mapping *wptr_mapping;
+   struct amdgpu_vm *wptr_vm;
+   struct amdgpu_userq_obj *wptr_obj = >wptr_obj;
+   int ret;
+
+   wptr_vm = queue->vm;
+   ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+   if (ret)
+   return ret;
+
+   wptr &= AMDGPU_GMC_HOLE_MASK;
+   wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
+   amdgpu_bo_unreserve(wptr_vm->root.bo);
+   if (!wptr_mapping) {
+   DRM_ERROR("Failed to lookup wptr bo\n");
+   return -EINVAL;
+   }
+
+   wptr_obj->obj = wptr_mapping->bo_va->base.bo;
+   if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
+   DRM_ERROR("Requested GART mapping for wptr bo larger than one 
page\n");
+   return -EINVAL;
+   }
+
+   ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj);
+   if (ret) {
+   DRM_ERROR("Failed to map wptr bo to GART\n");
+   return ret;
+   }
+
+   queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);

The wptr virtual address from the user may not be at offset 0 from the
start of the object.  We should add the offset to the base vmid0 GPU
address.

can you please elaborate a bit here ? wptr_obj->obj is already mapped to
gart, do we still need this ?

- Shashank


Alex


+   return 0;
+}
+
   static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr,
 struct amdgpu_usermode_queue *queue,
 struct amdgpu_mqd_prop *userq_props)
@@ -61,6 +129,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr 
*uq_mgr,
  queue_input

RE: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART

2024-05-02 Thread Kasiviswanathan, Harish
[AMD Official Use Only - General]

-Original Message-
From: amd-gfx  On Behalf Of Sharma, 
Shashank
Sent: Thursday, May 2, 2024 1:32 AM
To: Alex Deucher 
Cc: amd-gfx@lists.freedesktop.org; Yadav, Arvind ; 
Deucher, Alexander ; Koenig, Christian 

Subject: Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART


On 01/05/2024 23:36, Alex Deucher wrote:
> On Fri, Apr 26, 2024 at 9:57 AM Shashank Sharma  
> wrote:
>> To support oversubscription, MES FW expects WPTR BOs to
>> be mapped into GART, before they are submitted to usermode
>> queues. This patch adds a function for the same.
>>
>> V4: fix the wptr value before mapping lookup (Bas, Christian).
>>
>> V5: Addressed review comments from Christian:
>>  - Either pin object or allocate from GART, but not both.
>>  - All the handling must be done with the VM locks held.
>>
>> V7: Addressed review comments from Christian:
>>  - Do not take vm->eviction_lock
>>  - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset
>>
>> V8: Rebase
>> V9: Changed the function names from gfx_v11* to mes_v11*
>>
>> Cc: Alex Deucher 
>> Cc: Christian Koenig 
>> Signed-off-by: Shashank Sharma 
>> Signed-off-by: Arvind Yadav 
>> ---
>>   .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c  | 77 +++
>>   .../gpu/drm/amd/include/amdgpu_userqueue.h|  1 +
>>   2 files changed, 78 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c 
>> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
>> index 8d2cd61af26b..37b80626e792 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
>> @@ -30,6 +30,74 @@
>>   #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
>>   #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
>>
>> +static int
>> +mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo 
>> *bo)
>> +{
>> +   int ret;
>> +
>> +   ret = amdgpu_bo_reserve(bo, true);
>> +   if (ret) {
>> +   DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
>> +   goto err_reserve_bo_failed;
>> +   }
>> +
>> +   ret = amdgpu_ttm_alloc_gart(>tbo);
>> +   if (ret) {
>> +   DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
>> +   goto err_map_bo_gart_failed;
>> +   }
>> +
>> +   amdgpu_bo_unreserve(bo);
>> +   bo = amdgpu_bo_ref(bo);
>> +
>> +   return 0;
>> +
>> +err_map_bo_gart_failed:
>> +   amdgpu_bo_unreserve(bo);
>> +err_reserve_bo_failed:
>> +   return ret;
>> +}
>> +

There is a very similar function amdgpu_amdkfd_map_gtt_bo_to_gart(). Is it 
possible to unify. Also, adev parameter in the above function is confusing. 
This was also removed from amdgpu_amdkfd_map_gtt_bo_to_gart(). It looks like bo 
is mapped to gart of adev, however it doesn't have to be. It is mapped to the 
gart to which bo is associated.

>> +static int
>> +mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
>> + struct amdgpu_usermode_queue *queue,
>> + uint64_t wptr)
>> +{
>> +   struct amdgpu_device *adev = uq_mgr->adev;
>> +   struct amdgpu_bo_va_mapping *wptr_mapping;
>> +   struct amdgpu_vm *wptr_vm;
>> +   struct amdgpu_userq_obj *wptr_obj = >wptr_obj;
>> +   int ret;
>> +
>> +   wptr_vm = queue->vm;
>> +   ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
>> +   if (ret)
>> +   return ret;
>> +
>> +   wptr &= AMDGPU_GMC_HOLE_MASK;
>> +   wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> 
>> PAGE_SHIFT);
>> +   amdgpu_bo_unreserve(wptr_vm->root.bo);
>> +   if (!wptr_mapping) {
>> +   DRM_ERROR("Failed to lookup wptr bo\n");
>> +   return -EINVAL;
>> +   }
>> +
>> +   wptr_obj->obj = wptr_mapping->bo_va->base.bo;
>> +   if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
>> +   DRM_ERROR("Requested GART mapping for wptr bo larger than 
>> one page\n");
>> +   return -EINVAL;
>> +   }
>> +
>> +   ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj);
>> +   if (ret) {
>> +   DRM_ERROR("Failed to map wptr bo to GART\n");
>> +   return ret;
>> +

Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART

2024-05-01 Thread Sharma, Shashank



On 01/05/2024 23:36, Alex Deucher wrote:

On Fri, Apr 26, 2024 at 9:57 AM Shashank Sharma  wrote:

To support oversubscription, MES FW expects WPTR BOs to
be mapped into GART, before they are submitted to usermode
queues. This patch adds a function for the same.

V4: fix the wptr value before mapping lookup (Bas, Christian).

V5: Addressed review comments from Christian:
 - Either pin object or allocate from GART, but not both.
 - All the handling must be done with the VM locks held.

V7: Addressed review comments from Christian:
 - Do not take vm->eviction_lock
 - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset

V8: Rebase
V9: Changed the function names from gfx_v11* to mes_v11*

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c  | 77 +++
  .../gpu/drm/amd/include/amdgpu_userqueue.h|  1 +
  2 files changed, 78 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
index 8d2cd61af26b..37b80626e792 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
@@ -30,6 +30,74 @@
  #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
  #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE

+static int
+mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+   int ret;
+
+   ret = amdgpu_bo_reserve(bo, true);
+   if (ret) {
+   DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+   goto err_reserve_bo_failed;
+   }
+
+   ret = amdgpu_ttm_alloc_gart(>tbo);
+   if (ret) {
+   DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+   goto err_map_bo_gart_failed;
+   }
+
+   amdgpu_bo_unreserve(bo);
+   bo = amdgpu_bo_ref(bo);
+
+   return 0;
+
+err_map_bo_gart_failed:
+   amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+   return ret;
+}
+
+static int
+mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ uint64_t wptr)
+{
+   struct amdgpu_device *adev = uq_mgr->adev;
+   struct amdgpu_bo_va_mapping *wptr_mapping;
+   struct amdgpu_vm *wptr_vm;
+   struct amdgpu_userq_obj *wptr_obj = >wptr_obj;
+   int ret;
+
+   wptr_vm = queue->vm;
+   ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+   if (ret)
+   return ret;
+
+   wptr &= AMDGPU_GMC_HOLE_MASK;
+   wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
+   amdgpu_bo_unreserve(wptr_vm->root.bo);
+   if (!wptr_mapping) {
+   DRM_ERROR("Failed to lookup wptr bo\n");
+   return -EINVAL;
+   }
+
+   wptr_obj->obj = wptr_mapping->bo_va->base.bo;
+   if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
+   DRM_ERROR("Requested GART mapping for wptr bo larger than one 
page\n");
+   return -EINVAL;
+   }
+
+   ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj);
+   if (ret) {
+   DRM_ERROR("Failed to map wptr bo to GART\n");
+   return ret;
+   }
+
+   queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);

The wptr virtual address from the user may not be at offset 0 from the
start of the object.  We should add the offset to the base vmid0 GPU
address.


can you please elaborate a bit here ? wptr_obj->obj is already mapped to 
gart, do we still need this ?


- Shashank



Alex


+   return 0;
+}
+
  static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue,
struct amdgpu_mqd_prop *userq_props)
@@ -61,6 +129,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr 
*uq_mgr,
 queue_input.queue_size = userq_props->queue_size >> 2;
 queue_input.doorbell_offset = userq_props->doorbell_index;
 queue_input.page_table_base_addr = 
amdgpu_gmc_pd_addr(queue->vm->root.bo);
+   queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;

 amdgpu_mes_lock(>mes);
 r = adev->mes.funcs->add_hw_queue(>mes, _input);
@@ -187,6 +256,13 @@ static int mes_v11_0_userq_mqd_create(struct 
amdgpu_userq_mgr *uq_mgr,
 goto free_mqd;
 }

+   /* FW expects WPTR BOs to be mapped into GART */
+   r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, 
userq_props->wptr_gpu_addr);
+   if (r) {
+   DRM_ERROR("Failed to create WPTR mapping\n");
+   goto free_ctx;
+   }
+
 /* Map userqueue into FW using MES */
 r = mes_v11_0_userq_map(uq_mgr, queue, userq_props);
 if (r) {
@@ -216,6 +292,7 @@ mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
 struct 

Re: [PATCH v9 08/14] drm/amdgpu: map wptr BO into GART

2024-05-01 Thread Alex Deucher
On Fri, Apr 26, 2024 at 9:57 AM Shashank Sharma  wrote:
>
> To support oversubscription, MES FW expects WPTR BOs to
> be mapped into GART, before they are submitted to usermode
> queues. This patch adds a function for the same.
>
> V4: fix the wptr value before mapping lookup (Bas, Christian).
>
> V5: Addressed review comments from Christian:
> - Either pin object or allocate from GART, but not both.
> - All the handling must be done with the VM locks held.
>
> V7: Addressed review comments from Christian:
> - Do not take vm->eviction_lock
> - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset
>
> V8: Rebase
> V9: Changed the function names from gfx_v11* to mes_v11*
>
> Cc: Alex Deucher 
> Cc: Christian Koenig 
> Signed-off-by: Shashank Sharma 
> Signed-off-by: Arvind Yadav 
> ---
>  .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c  | 77 +++
>  .../gpu/drm/amd/include/amdgpu_userqueue.h|  1 +
>  2 files changed, 78 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c 
> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
> index 8d2cd61af26b..37b80626e792 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
> @@ -30,6 +30,74 @@
>  #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
>  #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
>
> +static int
> +mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo 
> *bo)
> +{
> +   int ret;
> +
> +   ret = amdgpu_bo_reserve(bo, true);
> +   if (ret) {
> +   DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
> +   goto err_reserve_bo_failed;
> +   }
> +
> +   ret = amdgpu_ttm_alloc_gart(>tbo);
> +   if (ret) {
> +   DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
> +   goto err_map_bo_gart_failed;
> +   }
> +
> +   amdgpu_bo_unreserve(bo);
> +   bo = amdgpu_bo_ref(bo);
> +
> +   return 0;
> +
> +err_map_bo_gart_failed:
> +   amdgpu_bo_unreserve(bo);
> +err_reserve_bo_failed:
> +   return ret;
> +}
> +
> +static int
> +mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
> + struct amdgpu_usermode_queue *queue,
> + uint64_t wptr)
> +{
> +   struct amdgpu_device *adev = uq_mgr->adev;
> +   struct amdgpu_bo_va_mapping *wptr_mapping;
> +   struct amdgpu_vm *wptr_vm;
> +   struct amdgpu_userq_obj *wptr_obj = >wptr_obj;
> +   int ret;
> +
> +   wptr_vm = queue->vm;
> +   ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
> +   if (ret)
> +   return ret;
> +
> +   wptr &= AMDGPU_GMC_HOLE_MASK;
> +   wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> 
> PAGE_SHIFT);
> +   amdgpu_bo_unreserve(wptr_vm->root.bo);
> +   if (!wptr_mapping) {
> +   DRM_ERROR("Failed to lookup wptr bo\n");
> +   return -EINVAL;
> +   }
> +
> +   wptr_obj->obj = wptr_mapping->bo_va->base.bo;
> +   if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
> +   DRM_ERROR("Requested GART mapping for wptr bo larger than one 
> page\n");
> +   return -EINVAL;
> +   }
> +
> +   ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj);
> +   if (ret) {
> +   DRM_ERROR("Failed to map wptr bo to GART\n");
> +   return ret;
> +   }
> +
> +   queue->wptr_obj.gpu_addr = 
> amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);

The wptr virtual address from the user may not be at offset 0 from the
start of the object.  We should add the offset to the base vmid0 GPU
address.

Alex

> +   return 0;
> +}
> +
>  static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr,
>struct amdgpu_usermode_queue *queue,
>struct amdgpu_mqd_prop *userq_props)
> @@ -61,6 +129,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr 
> *uq_mgr,
> queue_input.queue_size = userq_props->queue_size >> 2;
> queue_input.doorbell_offset = userq_props->doorbell_index;
> queue_input.page_table_base_addr = 
> amdgpu_gmc_pd_addr(queue->vm->root.bo);
> +   queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
>
> amdgpu_mes_lock(>mes);
> r = adev->mes.funcs->add_hw_queue(>mes, _input);
> @@ -187,6 +256,13 @@ static int mes_v11_0_userq_mqd_create(struct 
> amdgpu_userq_mgr *uq_mgr,
> goto free_mqd;
> }
>
> +   /* FW expects WPTR BOs to be mapped into GART */
> +   r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, 
> userq_props->wptr_gpu_addr);
> +   if (r) {
> +   DRM_ERROR("Failed to create WPTR mapping\n");
> +   goto free_ctx;
> +   }
> +
> /* Map userqueue into FW using MES */
> r = mes_v11_0_userq_map(uq_mgr, queue, userq_props);
> if (r) {
> @@ -216,6 +292,7 @@