Re: [PATCH v6 7/9] drm/amdgpu: map wptr BO into GART

2023-10-05 Thread Christian König

Am 04.10.23 um 23:34 schrieb Felix Kuehling:


On 2023-09-18 06:32, Christian König wrote:

Am 08.09.23 um 18:04 schrieb Shashank Sharma:

To support oversubscription, MES FW expects WPTR BOs to
be mapped into GART, before they are submitted to usermode
queues. This patch adds a function for the same.

V4: fix the wptr value before mapping lookup (Bas, Christian).
V5: Addressed review comments from Christian:
 - Either pin object or allocate from GART, but not both.
 - All the handling must be done with the VM locks held.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    | 81 
+++

  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  1 +
  2 files changed, 82 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index e266674e0d44..c0eb622dfc37 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6427,6 +6427,79 @@ const struct amdgpu_ip_block_version 
gfx_v11_0_ip_block =

  .funcs = _v11_0_ip_funcs,
  };
  +static int
+gfx_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
amdgpu_bo *bo)

+{
+    int ret;
+
+    ret = amdgpu_bo_reserve(bo, true);
+    if (ret) {
+    DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+    goto err_reserve_bo_failed;
+    }
+
+    ret = amdgpu_ttm_alloc_gart(>tbo);
+    if (ret) {
+    DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+    goto err_map_bo_gart_failed;
+    }
+
+    amdgpu_bo_unreserve(bo);


The GART mapping can become invalid as soon as you unlock the BOs.

You need to attach an eviction fence for this to work correctly.


Don't you need an eviction fence on the WPTR BO regardless of the GTT 
mapping?


Yeah, indeed :)

Long story short we need a general eviction fence handling for BOs.

Regards,
Christian.



Regards,
  Felix





+    bo = amdgpu_bo_ref(bo);
+
+    return 0;
+
+err_map_bo_gart_failed:
+    amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+    return ret;
+}
+
+static int
+gfx_v11_0_create_wptr_mapping(struct amdgpu_device *adev,
+  struct amdgpu_usermode_queue *queue,
+  uint64_t wptr)
+{
+    struct amdgpu_bo_va_mapping *wptr_mapping;
+    struct amdgpu_vm *wptr_vm;
+    struct amdgpu_bo *wptr_bo = NULL;
+    int ret;
+
+    mutex_lock(>vm->eviction_lock);


Never ever touch the eviction lock outside of the VM code! That lock 
is completely unrelated to what you do here.



+    wptr_vm = queue->vm;
+    ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+    if (ret)
+    goto unlock;
+
+    wptr &= AMDGPU_GMC_HOLE_MASK;
+    wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> 
PAGE_SHIFT);

+    amdgpu_bo_unreserve(wptr_vm->root.bo);
+    if (!wptr_mapping) {
+    DRM_ERROR("Failed to lookup wptr bo\n");
+    ret = -EINVAL;
+    goto unlock;
+    }
+
+    wptr_bo = wptr_mapping->bo_va->base.bo;
+    if (wptr_bo->tbo.base.size > PAGE_SIZE) {
+    DRM_ERROR("Requested GART mapping for wptr bo larger than 
one page\n");

+    ret = -EINVAL;
+    goto unlock;
+    }


We probably also want to enforce that this BO is a per VM BO.


+
+    ret = gfx_v11_0_map_gtt_bo_to_gart(adev, wptr_bo);
+    if (ret) {
+    DRM_ERROR("Failed to map wptr bo to GART\n");
+    goto unlock;
+    }
+
+    queue->wptr_mc_addr = wptr_bo->tbo.resource->start << PAGE_SHIFT;


This needs to be amdgpu_bo_gpu_offset() instead.

Regards,
Christian.


+
+unlock:
+    mutex_unlock(>vm->eviction_lock);
+    return ret;
+}
+
  static void gfx_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
    struct amdgpu_usermode_queue *queue)
  {
@@ -6475,6 +6548,7 @@ static int gfx_v11_0_userq_map(struct 
amdgpu_userq_mgr *uq_mgr,

  queue_input.queue_size = userq_props->queue_size >> 2;
  queue_input.doorbell_offset = userq_props->doorbell_index;
  queue_input.page_table_base_addr = 
amdgpu_gmc_pd_addr(queue->vm->root.bo);

+    queue_input.wptr_mc_addr = queue->wptr_mc_addr;
    amdgpu_mes_lock(>mes);
  r = adev->mes.funcs->add_hw_queue(>mes, _input);
@@ -6601,6 +6675,13 @@ static int gfx_v11_0_userq_mqd_create(struct 
amdgpu_userq_mgr *uq_mgr,

  goto free_mqd;
  }
  +    /* FW expects WPTR BOs to be mapped into GART */
+    r = gfx_v11_0_create_wptr_mapping(adev, queue, 
userq_props.wptr_gpu_addr);

+    if (r) {
+    DRM_ERROR("Failed to create WPTR mapping\n");
+    goto free_ctx;
+    }
+
  /* Map userqueue into FW using MES */
  r = gfx_v11_0_userq_map(uq_mgr, queue, _props);
  if (r) {
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h 
b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h

index 34e20daa06c8..ae155de62560 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -39,6 +39,7 @@ struct 

Re: [PATCH v6 7/9] drm/amdgpu: map wptr BO into GART

2023-10-05 Thread Shashank Sharma

Hey Felix,

On 04/10/2023 23:34, Felix Kuehling wrote:


On 2023-09-18 06:32, Christian König wrote:

Am 08.09.23 um 18:04 schrieb Shashank Sharma:

To support oversubscription, MES FW expects WPTR BOs to
be mapped into GART, before they are submitted to usermode
queues. This patch adds a function for the same.

V4: fix the wptr value before mapping lookup (Bas, Christian).
V5: Addressed review comments from Christian:
 - Either pin object or allocate from GART, but not both.
 - All the handling must be done with the VM locks held.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    | 81 
+++

  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  1 +
  2 files changed, 82 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index e266674e0d44..c0eb622dfc37 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6427,6 +6427,79 @@ const struct amdgpu_ip_block_version 
gfx_v11_0_ip_block =

  .funcs = _v11_0_ip_funcs,
  };
  +static int
+gfx_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
amdgpu_bo *bo)

+{
+    int ret;
+
+    ret = amdgpu_bo_reserve(bo, true);
+    if (ret) {
+    DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+    goto err_reserve_bo_failed;
+    }
+
+    ret = amdgpu_ttm_alloc_gart(>tbo);
+    if (ret) {
+    DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+    goto err_map_bo_gart_failed;
+    }
+
+    amdgpu_bo_unreserve(bo);


The GART mapping can become invalid as soon as you unlock the BOs.

You need to attach an eviction fence for this to work correctly.


Don't you need an eviction fence on the WPTR BO regardless of the GTT 
mapping?


Yes, Christian also mentioned this in this iteration, I have implemented 
the basic eviction fence for [V7], I will publish it soon.


- Shashank



Regards,
  Felix





+    bo = amdgpu_bo_ref(bo);
+
+    return 0;
+
+err_map_bo_gart_failed:
+    amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+    return ret;
+}
+
+static int
+gfx_v11_0_create_wptr_mapping(struct amdgpu_device *adev,
+  struct amdgpu_usermode_queue *queue,
+  uint64_t wptr)
+{
+    struct amdgpu_bo_va_mapping *wptr_mapping;
+    struct amdgpu_vm *wptr_vm;
+    struct amdgpu_bo *wptr_bo = NULL;
+    int ret;
+
+    mutex_lock(>vm->eviction_lock);


Never ever touch the eviction lock outside of the VM code! That lock 
is completely unrelated to what you do here.



+    wptr_vm = queue->vm;
+    ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+    if (ret)
+    goto unlock;
+
+    wptr &= AMDGPU_GMC_HOLE_MASK;
+    wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> 
PAGE_SHIFT);

+    amdgpu_bo_unreserve(wptr_vm->root.bo);
+    if (!wptr_mapping) {
+    DRM_ERROR("Failed to lookup wptr bo\n");
+    ret = -EINVAL;
+    goto unlock;
+    }
+
+    wptr_bo = wptr_mapping->bo_va->base.bo;
+    if (wptr_bo->tbo.base.size > PAGE_SIZE) {
+    DRM_ERROR("Requested GART mapping for wptr bo larger than 
one page\n");

+    ret = -EINVAL;
+    goto unlock;
+    }


We probably also want to enforce that this BO is a per VM BO.


+
+    ret = gfx_v11_0_map_gtt_bo_to_gart(adev, wptr_bo);
+    if (ret) {
+    DRM_ERROR("Failed to map wptr bo to GART\n");
+    goto unlock;
+    }
+
+    queue->wptr_mc_addr = wptr_bo->tbo.resource->start << PAGE_SHIFT;


This needs to be amdgpu_bo_gpu_offset() instead.

Regards,
Christian.


+
+unlock:
+    mutex_unlock(>vm->eviction_lock);
+    return ret;
+}
+
  static void gfx_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
    struct amdgpu_usermode_queue *queue)
  {
@@ -6475,6 +6548,7 @@ static int gfx_v11_0_userq_map(struct 
amdgpu_userq_mgr *uq_mgr,

  queue_input.queue_size = userq_props->queue_size >> 2;
  queue_input.doorbell_offset = userq_props->doorbell_index;
  queue_input.page_table_base_addr = 
amdgpu_gmc_pd_addr(queue->vm->root.bo);

+    queue_input.wptr_mc_addr = queue->wptr_mc_addr;
    amdgpu_mes_lock(>mes);
  r = adev->mes.funcs->add_hw_queue(>mes, _input);
@@ -6601,6 +6675,13 @@ static int gfx_v11_0_userq_mqd_create(struct 
amdgpu_userq_mgr *uq_mgr,

  goto free_mqd;
  }
  +    /* FW expects WPTR BOs to be mapped into GART */
+    r = gfx_v11_0_create_wptr_mapping(adev, queue, 
userq_props.wptr_gpu_addr);

+    if (r) {
+    DRM_ERROR("Failed to create WPTR mapping\n");
+    goto free_ctx;
+    }
+
  /* Map userqueue into FW using MES */
  r = gfx_v11_0_userq_map(uq_mgr, queue, _props);
  if (r) {
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h 
b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h

index 34e20daa06c8..ae155de62560 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ 

Re: [PATCH v6 7/9] drm/amdgpu: map wptr BO into GART

2023-10-04 Thread Felix Kuehling



On 2023-09-18 06:32, Christian König wrote:

Am 08.09.23 um 18:04 schrieb Shashank Sharma:

To support oversubscription, MES FW expects WPTR BOs to
be mapped into GART, before they are submitted to usermode
queues. This patch adds a function for the same.

V4: fix the wptr value before mapping lookup (Bas, Christian).
V5: Addressed review comments from Christian:
 - Either pin object or allocate from GART, but not both.
 - All the handling must be done with the VM locks held.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c    | 81 +++
  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  1 +
  2 files changed, 82 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

index e266674e0d44..c0eb622dfc37 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6427,6 +6427,79 @@ const struct amdgpu_ip_block_version 
gfx_v11_0_ip_block =

  .funcs = _v11_0_ip_funcs,
  };
  +static int
+gfx_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct 
amdgpu_bo *bo)

+{
+    int ret;
+
+    ret = amdgpu_bo_reserve(bo, true);
+    if (ret) {
+    DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+    goto err_reserve_bo_failed;
+    }
+
+    ret = amdgpu_ttm_alloc_gart(>tbo);
+    if (ret) {
+    DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+    goto err_map_bo_gart_failed;
+    }
+
+    amdgpu_bo_unreserve(bo);


The GART mapping can become invalid as soon as you unlock the BOs.

You need to attach an eviction fence for this to work correctly.


Don't you need an eviction fence on the WPTR BO regardless of the GTT 
mapping?


Regards,
  Felix





+    bo = amdgpu_bo_ref(bo);
+
+    return 0;
+
+err_map_bo_gart_failed:
+    amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+    return ret;
+}
+
+static int
+gfx_v11_0_create_wptr_mapping(struct amdgpu_device *adev,
+  struct amdgpu_usermode_queue *queue,
+  uint64_t wptr)
+{
+    struct amdgpu_bo_va_mapping *wptr_mapping;
+    struct amdgpu_vm *wptr_vm;
+    struct amdgpu_bo *wptr_bo = NULL;
+    int ret;
+
+    mutex_lock(>vm->eviction_lock);


Never ever touch the eviction lock outside of the VM code! That lock 
is completely unrelated to what you do here.



+    wptr_vm = queue->vm;
+    ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+    if (ret)
+    goto unlock;
+
+    wptr &= AMDGPU_GMC_HOLE_MASK;
+    wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> 
PAGE_SHIFT);

+    amdgpu_bo_unreserve(wptr_vm->root.bo);
+    if (!wptr_mapping) {
+    DRM_ERROR("Failed to lookup wptr bo\n");
+    ret = -EINVAL;
+    goto unlock;
+    }
+
+    wptr_bo = wptr_mapping->bo_va->base.bo;
+    if (wptr_bo->tbo.base.size > PAGE_SIZE) {
+    DRM_ERROR("Requested GART mapping for wptr bo larger than 
one page\n");

+    ret = -EINVAL;
+    goto unlock;
+    }


We probably also want to enforce that this BO is a per VM BO.


+
+    ret = gfx_v11_0_map_gtt_bo_to_gart(adev, wptr_bo);
+    if (ret) {
+    DRM_ERROR("Failed to map wptr bo to GART\n");
+    goto unlock;
+    }
+
+    queue->wptr_mc_addr = wptr_bo->tbo.resource->start << PAGE_SHIFT;


This needs to be amdgpu_bo_gpu_offset() instead.

Regards,
Christian.


+
+unlock:
+    mutex_unlock(>vm->eviction_lock);
+    return ret;
+}
+
  static void gfx_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
    struct amdgpu_usermode_queue *queue)
  {
@@ -6475,6 +6548,7 @@ static int gfx_v11_0_userq_map(struct 
amdgpu_userq_mgr *uq_mgr,

  queue_input.queue_size = userq_props->queue_size >> 2;
  queue_input.doorbell_offset = userq_props->doorbell_index;
  queue_input.page_table_base_addr = 
amdgpu_gmc_pd_addr(queue->vm->root.bo);

+    queue_input.wptr_mc_addr = queue->wptr_mc_addr;
    amdgpu_mes_lock(>mes);
  r = adev->mes.funcs->add_hw_queue(>mes, _input);
@@ -6601,6 +6675,13 @@ static int gfx_v11_0_userq_mqd_create(struct 
amdgpu_userq_mgr *uq_mgr,

  goto free_mqd;
  }
  +    /* FW expects WPTR BOs to be mapped into GART */
+    r = gfx_v11_0_create_wptr_mapping(adev, queue, 
userq_props.wptr_gpu_addr);

+    if (r) {
+    DRM_ERROR("Failed to create WPTR mapping\n");
+    goto free_ctx;
+    }
+
  /* Map userqueue into FW using MES */
  r = gfx_v11_0_userq_map(uq_mgr, queue, _props);
  if (r) {
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h 
b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h

index 34e20daa06c8..ae155de62560 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -39,6 +39,7 @@ struct amdgpu_usermode_queue {
  int    queue_type;
  uint64_t    doorbell_handle;
  uint64_t    doorbell_index;
+    uint64_t    

Re: [PATCH v6 7/9] drm/amdgpu: map wptr BO into GART

2023-09-18 Thread Christian König

Am 08.09.23 um 18:04 schrieb Shashank Sharma:

To support oversubscription, MES FW expects WPTR BOs to
be mapped into GART, before they are submitted to usermode
queues. This patch adds a function for the same.

V4: fix the wptr value before mapping lookup (Bas, Christian).
V5: Addressed review comments from Christian:
 - Either pin object or allocate from GART, but not both.
 - All the handling must be done with the VM locks held.

Cc: Alex Deucher 
Cc: Christian Koenig 
Signed-off-by: Shashank Sharma 
Signed-off-by: Arvind Yadav 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c| 81 +++
  .../gpu/drm/amd/include/amdgpu_userqueue.h|  1 +
  2 files changed, 82 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index e266674e0d44..c0eb622dfc37 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6427,6 +6427,79 @@ const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
.funcs = _v11_0_ip_funcs,
  };
  
+static int

+gfx_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+   int ret;
+
+   ret = amdgpu_bo_reserve(bo, true);
+   if (ret) {
+   DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+   goto err_reserve_bo_failed;
+   }
+
+   ret = amdgpu_ttm_alloc_gart(>tbo);
+   if (ret) {
+   DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+   goto err_map_bo_gart_failed;
+   }
+
+   amdgpu_bo_unreserve(bo);


The GART mapping can become invalid as soon as you unlock the BOs.

You need to attach an eviction fence for this to work correctly.


+   bo = amdgpu_bo_ref(bo);
+
+   return 0;
+
+err_map_bo_gart_failed:
+   amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+   return ret;
+}
+
+static int
+gfx_v11_0_create_wptr_mapping(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *queue,
+ uint64_t wptr)
+{
+   struct amdgpu_bo_va_mapping *wptr_mapping;
+   struct amdgpu_vm *wptr_vm;
+   struct amdgpu_bo *wptr_bo = NULL;
+   int ret;
+
+   mutex_lock(>vm->eviction_lock);


Never ever touch the eviction lock outside of the VM code! That lock is 
completely unrelated to what you do here.



+   wptr_vm = queue->vm;
+   ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+   if (ret)
+   goto unlock;
+
+   wptr &= AMDGPU_GMC_HOLE_MASK;
+   wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
+   amdgpu_bo_unreserve(wptr_vm->root.bo);
+   if (!wptr_mapping) {
+   DRM_ERROR("Failed to lookup wptr bo\n");
+   ret = -EINVAL;
+   goto unlock;
+   }
+
+   wptr_bo = wptr_mapping->bo_va->base.bo;
+   if (wptr_bo->tbo.base.size > PAGE_SIZE) {
+   DRM_ERROR("Requested GART mapping for wptr bo larger than one 
page\n");
+   ret = -EINVAL;
+   goto unlock;
+   }


We probably also want to enforce that this BO is a per VM BO.


+
+   ret = gfx_v11_0_map_gtt_bo_to_gart(adev, wptr_bo);
+   if (ret) {
+   DRM_ERROR("Failed to map wptr bo to GART\n");
+   goto unlock;
+   }
+
+   queue->wptr_mc_addr = wptr_bo->tbo.resource->start << PAGE_SHIFT;


This needs to be amdgpu_bo_gpu_offset() instead.

Regards,
Christian.


+
+unlock:
+   mutex_unlock(>vm->eviction_lock);
+   return ret;
+}
+
  static void gfx_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
  struct amdgpu_usermode_queue *queue)
  {
@@ -6475,6 +6548,7 @@ static int gfx_v11_0_userq_map(struct amdgpu_userq_mgr 
*uq_mgr,
queue_input.queue_size = userq_props->queue_size >> 2;
queue_input.doorbell_offset = userq_props->doorbell_index;
queue_input.page_table_base_addr = 
amdgpu_gmc_pd_addr(queue->vm->root.bo);
+   queue_input.wptr_mc_addr = queue->wptr_mc_addr;
  
  	amdgpu_mes_lock(>mes);

r = adev->mes.funcs->add_hw_queue(>mes, _input);
@@ -6601,6 +6675,13 @@ static int gfx_v11_0_userq_mqd_create(struct 
amdgpu_userq_mgr *uq_mgr,
goto free_mqd;
}
  
+	/* FW expects WPTR BOs to be mapped into GART */

+   r = gfx_v11_0_create_wptr_mapping(adev, queue, 
userq_props.wptr_gpu_addr);
+   if (r) {
+   DRM_ERROR("Failed to create WPTR mapping\n");
+   goto free_ctx;
+   }
+
/* Map userqueue into FW using MES */
r = gfx_v11_0_userq_map(uq_mgr, queue, _props);
if (r) {
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h 
b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
index 34e20daa06c8..ae155de62560 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -39,6 +39,7 @@ struct amdgpu_usermode_queue {
int