v5 - add convert_to_umsch_priority() to allow user priority setting
v4 - add vcn.agdb_offset which will be used in AMDGPU_INFO_DOORBELL
v3 - 1 use common function amdgpu_userq_create_wptr_mapping()
2 use dev_err() instead of DRM_ERROR()
3 don't need mqd setting from user space
4 powergating on the last queue removal
v2 - use amdgpu_bo_gpu_offset() and reserve BO (Christian)
Implement User mode queues infrastructure api support for UMSCH.
And enable VCN user queues for vcn v4_0_5.
Drop v4_0_6 as it is not tested.
Use new amdgpu_userq_funcs structure for above functions and
convert rb_size as dword (David)
Signed-off-by: Saleemkhan Jamadar <[email protected]>
Signed-off-by: David (Ming Qiang) Wu <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 237 ++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h | 3 +
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 3 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 +
drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c | 5 +
6 files changed, 242 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
index 760285ad028f..5a9589b56534 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
@@ -31,6 +31,7 @@
#include "amdgpu.h"
#include "amdgpu_umsch_mm.h"
#include "umsch_mm_v4_0.h"
+#include "amdgpu_userq_fence.h"
MODULE_FIRMWARE("amdgpu/umsch_mm_4_0_0.bin");
@@ -125,7 +126,6 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm
*umsch)
switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
- case IP_VERSION(4, 0, 6):
fw_name = "4_0_0";
break;
default:
@@ -253,15 +253,21 @@ int amdgpu_umsch_mm_psp_execute_cmd_buf(struct
amdgpu_umsch_mm *umsch)
static void umsch_mm_agdb_index_init(struct amdgpu_device *adev)
{
+ struct amdgpu_bo *obj = adev->agdb_bo;
uint32_t umsch_mm_agdb_start;
- int i;
+ int i, r;
- umsch_mm_agdb_start = adev->doorbell_index.max_assignment + 1;
- umsch_mm_agdb_start = roundup(umsch_mm_agdb_start, 1024);
- umsch_mm_agdb_start += (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1);
+ r = amdgpu_bo_reserve(obj, true);
+ if (r)
+ return;
+ adev->vcn.agdb_offset = AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1;
+ umsch_mm_agdb_start = amdgpu_doorbell_index_on_bar(adev, obj,
+ adev->vcn.agdb_offset, sizeof(u32));
+ amdgpu_bo_unreserve(obj);
for (i = 0; i < CONTEXT_PRIORITY_NUM_LEVELS; i++)
- adev->umsch_mm.agdb_index[i] = umsch_mm_agdb_start + i;
+ adev->umsch_mm.agdb_index[i] = umsch_mm_agdb_start +
+ (i * DIV_ROUND_UP(sizeof(u32),
4));
}
static int umsch_mm_init(struct amdgpu_device *adev)
@@ -328,8 +334,8 @@ static int umsch_mm_early_init(struct amdgpu_ip_block
*ip_block)
switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
- case IP_VERSION(4, 0, 6):
umsch_mm_v4_0_set_funcs(&adev->umsch_mm);
+ adev->userq_funcs[AMDGPU_HW_IP_VCN_ENC] =
&userq_umsch_4_0_funcs;
break;
default:
return -EINVAL;
@@ -440,6 +446,216 @@ static int umsch_mm_resume(struct amdgpu_ip_block
*ip_block)
return umsch_mm_hw_init(ip_block);
}
+static int convert_to_umsch_priority(int priority)
+{
+ switch (priority) {
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW:
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW:
+ default:
+ return CONTEXT_PRIORITY_LEVEL_NORMAL;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH:
+ return CONTEXT_PRIORITY_LEVEL_FOCUS;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH:
+ return CONTEXT_PRIORITY_LEVEL_REALTIME;
+ }
+}
+
+static int amdgpu_umsch_userq_map(struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_umsch_mm *umsch = &adev->umsch_mm;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ struct umsch_mm_add_queue_input in_queue;
+ int r;
+
+ memset(&in_queue, 0, sizeof(struct umsch_mm_add_queue_input));
+ in_queue.process_id = queue->vm->pasid;
+ in_queue.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo);
+ in_queue.process_va_start = 0;
+ in_queue.process_va_end = (adev->vm_manager.max_pfn - 1);
+ in_queue.process_quantum = 100000; /* 10ms */;
+ in_queue.process_csa_addr = ctx->gpu_addr;
+
+ in_queue.context_quantum = 10000; /* 1ms */;
+ in_queue.context_csa_addr = ctx->gpu_addr + AMDGPU_GPU_PAGE_SIZE;
+ in_queue.inprocess_context_priority = CONTEXT_PRIORITY_LEVEL_NORMAL;
+ in_queue.context_global_priority_level =
convert_to_umsch_priority(queue->priority);
+ if (queue->queue_type == AMDGPU_HW_IP_VCN_ENC) {
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0,
5)) {
+ in_queue.doorbell_offset_0 = queue->doorbell_index;
+ in_queue.doorbell_offset_1 = 0;
+ in_queue.affinity = 1;
+ if (adev->vcn.num_vcn_inst == 2) {
+ in_queue.doorbell_offset_1 =
(queue->doorbell_index + 2 + 8 * 1);
+ in_queue.affinity = 0x5;
+ }
+ }
+
+ in_queue.engine_type = UMSCH_SWIP_ENGINE_TYPE_VCN;
+ }
+
+ in_queue.mqd_addr = queue->mqd.gpu_addr;
+ in_queue.mqd_type = 2; /* MQD Type Linux */
+ in_queue.fence_signal_addr = queue->fence_drv->gpu_addr;
+ in_queue.vm_context_cntl = hub->vm_cntx_cntl;
+ amdgpu_umsch_mm_lock(&adev->umsch_mm);
+ r = umsch->funcs->add_queue(umsch, &in_queue);
+ amdgpu_umsch_mm_unlock(&adev->umsch_mm);
+ if (r)
+ dev_err(adev->dev, "Failed to create queue. for IP %d r %d\n",
queue->queue_type, r);
+
+ return r;
+}
+
+static int amdgpu_umsch_userq_unmap(struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ struct amdgpu_umsch_mm *umsch = &adev->umsch_mm;
+ struct umsch_mm_remove_queue_input q_input;
+ int r;
+
+ memset(&q_input, 0, sizeof(struct umsch_mm_remove_queue_input));
+ q_input.doorbell_offset_0 = queue->doorbell_index;
+ q_input.doorbell_offset_1 = 0;
+ if (adev->vcn.num_vcn_inst == 2)
+ q_input.doorbell_offset_1 = (queue->doorbell_index + 2 + 8 * 1);
+
+ q_input.context_csa_addr = ctx->gpu_addr + AMDGPU_GPU_PAGE_SIZE;
+
+ amdgpu_umsch_mm_lock(&adev->umsch_mm);
+ r = umsch->funcs->remove_queue(umsch, &q_input);
+ amdgpu_umsch_mm_unlock(&adev->umsch_mm);
+ if (r)
+ dev_err(adev->dev, "Failed to unmap queue in HW, err (%d)\n",
r);
+
+ return r;
+}
+
+static int amdgpu_umsch_mqd_create(struct amdgpu_usermode_queue *queue,
+ struct drm_amdgpu_userq_in *args)
+{
+ struct amdgpu_userq_obj *sfence = &queue->suspend_fence_obj;
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_umsch_mm *umsch = &adev->umsch_mm;
+ struct amdgpu_mqd_prop *userq_props;
+ struct MQD_INFO *mqd;
+ int r, size, i;
+
+ /* Structure to initialize MQD for userqueue using generic MQD init
function */
+ userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL);
+ if (!userq_props) {
+ dev_err(adev->dev, "Failed to allocate memory for
userq_props\n");
+ return -ENOMEM;
+ }
+
+ /* FW expects WPTR BOs to be mapped into GART */
+ r = amdgpu_userq_create_wptr_mapping(uq_mgr, queue, args->wptr_va);
+ if (r) {
+ dev_err(adev->dev, "Failed to create WPTR mapping\n");
+ goto exit;
+ }
+
+ r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, sizeof(struct
MQD_INFO));
+ if (r) {
+ dev_err(adev->dev, "Failed to create MQD object for
userqueue\n");
+ goto exit;
+ }
+
+ userq_props->wptr_gpu_addr = args->wptr_va;
+ userq_props->mqd_gpu_addr = queue->mqd.gpu_addr;
+ userq_props->use_doorbell = true;
+ userq_props->doorbell_index = queue->doorbell_index; /* VCN0 doorbell*/
+ queue->userq_prop = userq_props;
+
+ /* Initialize the MQD BO with user given values */
+ mqd = (struct MQD_INFO *)queue->mqd.cpu_ptr;
+ memset(mqd, 0, sizeof(struct MQD_INFO));
+ mqd->rb_base_lo = lower_32_bits(args->queue_va);
+ mqd->rb_base_hi = upper_32_bits(args->queue_va);
+ mqd->wptr_addr_monotonic_hi = upper_32_bits(queue->wptr_obj.gpu_addr);
+ mqd->wptr_addr_monotonic_lo = lower_32_bits(queue->wptr_obj.gpu_addr);
+ mqd->rptr_addr_monotonic_hi = upper_32_bits(args->rptr_va);
+ mqd->rptr_addr_monotonic_lo = lower_32_bits(args->rptr_va);
+ mqd->rb_size = args->queue_size / 4;
+ mqd->unmapped = 1;
+
+ size = 2 * PAGE_SIZE;
+ r = amdgpu_userq_create_object(uq_mgr, ctx, size);
+ if (r) {
+ dev_err(adev->dev, "Failed to allocate ctx space bo for
userqueue, err:%d\n", r);
+ goto free_mqd;
+ }
+
+ mutex_lock(&umsch->mutex_hidden);
+ atomic_inc(&umsch->userq_count);
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_VCN) {
+ const struct amdgpu_ip_block_version *ip_block =
+ adev->ip_blocks[i].version;
+
+ r =
ip_block->funcs->set_powergating_state(&adev->ip_blocks[i],
+ AMD_PG_STATE_UNGATE);
+ if (r) {
+ dev_err(adev->dev, "Failed to power ON VCN :err
%d\n", r);
+ goto free_ctx;
+ }
+ }
+ }
+ mutex_unlock(&umsch->mutex_hidden);
+
+ r = amdgpu_userq_create_object(uq_mgr, sfence, AMDGPU_GPU_PAGE_SIZE);
+ if (r) {
+ DRM_ERROR("Failed to allocate suspend fence bo for userq,
err:%d\n", r);
+ goto free_ctx;
+ }
+
+ return 0;
+
+free_ctx:
+ amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+
+free_mqd:
+ amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+
+exit:
+ kfree(userq_props);
+ return r;
+}
+
+static void amdgpu_umsch_destroy_queue(struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_umsch_mm *umsch = &adev->umsch_mm;
+ int r, i;
+
+ amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+ amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+ kfree(queue->userq_prop);
+
+ mutex_lock(&umsch->mutex_hidden);
+ if (!atomic_dec_return(&umsch->userq_count)) {
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type ==
AMD_IP_BLOCK_TYPE_VCN) {
+ const struct amdgpu_ip_block_version *ip_block =
+ adev->ip_blocks[i].version;
+
+ r =
ip_block->funcs->set_powergating_state(&adev->ip_blocks[i],
+
AMD_PG_STATE_GATE);
+ if (r)
+ dev_err(adev->dev, "Failed to power OFF
VCN :err %d\n", r);
+ }
+ }
+ }
+ mutex_unlock(&umsch->mutex_hidden);
+}
+
void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm)
{
#if defined(CONFIG_DEBUG_FS)
@@ -559,3 +775,10 @@ const struct amdgpu_ip_block_version
umsch_mm_v4_0_ip_block = {
.rev = 0,
.funcs = &umsch_mm_v4_0_ip_funcs,
};
+
+const struct amdgpu_userq_funcs userq_umsch_4_0_funcs = {
+ .mqd_create = amdgpu_umsch_mqd_create,
+ .mqd_destroy = amdgpu_umsch_destroy_queue,
+ .map = amdgpu_umsch_userq_map,
+ .unmap = amdgpu_umsch_userq_unmap,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
index af34faa5e1ef..6b827c92e817 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
@@ -25,6 +25,8 @@
#ifndef __AMDGPU_UMSCH_MM_H__
#define __AMDGPU_UMSCH_MM_H__
+extern const struct amdgpu_userq_funcs userq_umsch_4_0_funcs;
+
enum UMSCH_SWIP_ENGINE_TYPE {
UMSCH_SWIP_ENGINE_TYPE_VCN = 0,
UMSCH_SWIP_ENGINE_TYPE_VPE = 1,
@@ -181,6 +183,7 @@ struct amdgpu_umsch_mm {
uint64_t log_gpu_addr;
uint32_t mem_size;
uint32_t log_offset;
+ atomic_t userq_count;
};
int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int
ndws);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 59e593b3bae7..50c75acc8e94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -916,7 +916,8 @@ static int amdgpu_userq_input_args_validate(struct
drm_device *dev,
/* Usermode queues are only supported for GFX IP as of now */
if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
args->in.ip_type != AMDGPU_HW_IP_DMA &&
- args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
+ args->in.ip_type != AMDGPU_HW_IP_COMPUTE &&
+ args->in.ip_type != AMDGPU_HW_IP_VCN_ENC) {
drm_file_err(filp, "Usermode queue doesn't support IP
type %u\n",
args->in.ip_type);
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 758464203d98..7a5b5c204601 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -66,6 +66,7 @@ struct amdgpu_usermode_queue {
struct amdgpu_userq_obj db_obj;
struct amdgpu_userq_obj fw_obj;
struct amdgpu_userq_obj wptr_obj;
+ struct amdgpu_userq_obj suspend_fence_obj;
struct xarray fence_drv_xa;
struct amdgpu_userq_fence_driver *fence_drv;
struct dma_fence *last_fence;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index bea95307fd42..ec8540fcc916 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -371,6 +371,7 @@ struct amdgpu_vcn {
bool disable_uq;
bool disable_kq;
+ uint32_t agdb_offset;
};
struct amdgpu_fw_shared_rb_ptrs_struct {
diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
index 79e1ec9933c5..60d1fdfb2af5 100644
--- a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
@@ -289,6 +289,11 @@ static int umsch_mm_v4_0_set_hw_resources(struct
amdgpu_umsch_mm *umsch)
set_hw_resources.g_sch_ctx_gpu_mc_ptr = umsch->sch_ctx_gpu_addr;
set_hw_resources.enable_level_process_quantum_check = 1;
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 5)) {
+ set_hw_resources.is_vcn0_enabled = 1;
+ if (adev->vcn.num_vcn_inst == 2)
+ set_hw_resources.is_vcn1_enabled = 1;
+ }
memcpy(set_hw_resources.mmhub_base, adev->reg_offset[MMHUB_HWIP][0],
sizeof(uint32_t) * 5);
--
2.43.0