initialization table handshake with mmsch

Signed-off-by: Samir Dhume <samir.dh...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 260 +++++++++++++++++++++---
 1 file changed, 236 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 411c1d802823..66eb0c8e6f94 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -31,6 +31,7 @@
 #include "soc15d.h"
 #include "soc15_hw_ip.h"
 #include "vcn_v2_0.h"
+#include "mmsch_v4_0_3.h"
 
 #include "vcn/vcn_4_0_3_offset.h"
 #include "vcn/vcn_4_0_3_sh_mask.h"
@@ -44,6 +45,7 @@
 #define VCN_VID_SOC_ADDRESS_2_0                0x1fb00
 #define VCN1_VID_SOC_ADDRESS_3_0       0x48300
 
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
 static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
 static int vcn_v4_0_3_set_powergating_state(void *handle,
@@ -130,6 +132,12 @@ static int vcn_v4_0_3_sw_init(void *handle)
                        amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
        }
 
+       if (amdgpu_sriov_vf(adev)) {
+               r = amdgpu_virt_alloc_mm_table(adev);
+               if (r)
+                       return r;
+       }
+
        if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
                adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
 
@@ -167,6 +175,9 @@ static int vcn_v4_0_3_sw_fini(void *handle)
                drm_dev_exit(idx);
        }
 
+       if (amdgpu_sriov_vf(adev))
+               amdgpu_virt_free_mm_table(adev);
+
        r = amdgpu_vcn_suspend(adev);
        if (r)
                return r;
@@ -189,33 +200,47 @@ static int vcn_v4_0_3_hw_init(void *handle)
        struct amdgpu_ring *ring;
        int i, r, vcn_inst;
 
-       for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-               vcn_inst = GET_INST(VCN, i);
-               ring = &adev->vcn.inst[i].ring_enc[0];
+       if (amdgpu_sriov_vf(adev)) {
+               r = vcn_v4_0_3_start_sriov(adev);
+               if (r)
+                       goto done;
 
-               if (ring->use_doorbell) {
-                       adev->nbio.funcs->vcn_doorbell_range(
-                               adev, ring->use_doorbell,
-                               (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-                                       9 * vcn_inst,
-                               adev->vcn.inst[i].aid_id);
-
-                       WREG32_SOC15(
-                               VCN, GET_INST(VCN, ring->me),
-                               regVCN_RB1_DB_CTRL,
-                               ring->doorbell_index
-                                               << 
VCN_RB1_DB_CTRL__OFFSET__SHIFT |
-                                       VCN_RB1_DB_CTRL__EN_MASK);
-
-                       /* Read DB_CTRL to flush the write DB_CTRL command. */
-                       RREG32_SOC15(
-                               VCN, GET_INST(VCN, ring->me),
-                               regVCN_RB1_DB_CTRL);
+               for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+                       ring = &adev->vcn.inst[i].ring_enc[0];
+                       ring->wptr = 0;
+                       ring->wptr_old = 0;
+                       vcn_v4_0_3_unified_ring_set_wptr(ring);
+                       ring->sched.ready = true;
                }
+       } else {
+               for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+                       vcn_inst = GET_INST(VCN, i);
+                       ring = &adev->vcn.inst[i].ring_enc[0];
+
+                       if (ring->use_doorbell) {
+                               adev->nbio.funcs->vcn_doorbell_range(
+                                       adev, ring->use_doorbell,
+                                       (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+                                               9 * vcn_inst,
+                                       adev->vcn.inst[i].aid_id);
+
+                               WREG32_SOC15(
+                                       VCN, GET_INST(VCN, ring->me),
+                                       regVCN_RB1_DB_CTRL,
+                                       ring->doorbell_index
+                                                       << 
VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+                                               VCN_RB1_DB_CTRL__EN_MASK);
+
+                               /* Read DB_CTRL to flush the write DB_CTRL 
command. */
+                               RREG32_SOC15(
+                                       VCN, GET_INST(VCN, ring->me),
+                                       regVCN_RB1_DB_CTRL);
+                       }
 
-               r = amdgpu_ring_test_helper(ring);
-               if (r)
-                       goto done;
+                       r = amdgpu_ring_test_helper(ring);
+                       if (r)
+                               goto done;
+               }
        }
 
 done:
@@ -813,6 +838,193 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device 
*adev, int inst_idx, b
        return 0;
 }
 
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+       int i, vcn_inst;
+       struct amdgpu_ring *ring_enc;
+       uint64_t cache_addr;
+       uint64_t rb_enc_addr;
+       uint64_t ctx_addr;
+       uint32_t param, resp, expected;
+       uint32_t offset, cache_size;
+       uint32_t tmp, timeout;
+
+       struct amdgpu_mm_table *table = &adev->virt.mm_table;
+       uint32_t *table_loc;
+       uint32_t table_size;
+       uint32_t size, size_dw;
+       uint32_t init_status;
+       uint32_t enabled_vcn;
+
+       struct mmsch_v4_0_cmd_direct_write
+               direct_wt = { {0} };
+       struct mmsch_v4_0_cmd_direct_read_modify_write
+               direct_rd_mod_wt = { {0} };
+       struct mmsch_v4_0_cmd_end end = { {0} };
+       struct mmsch_v4_0_3_init_header header;
+
+       volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+       volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
+
+       direct_wt.cmd_header.command_type =
+               MMSCH_COMMAND__DIRECT_REG_WRITE;
+       direct_rd_mod_wt.cmd_header.command_type =
+               MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+       end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+       for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+               vcn_inst = GET_INST(VCN, i);
+
+               memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
+               header.version = MMSCH_VERSION;
+               header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 
2;
+
+               table_loc = (uint32_t *)table->cpu_addr;
+               table_loc += header.total_size;
+
+               table_size = 0;
+
+               MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, 
regUVD_STATUS),
+                       ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+               cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+
+               if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+                       MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                               regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+                               adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + 
i].tmr_mc_addr_lo);
+
+                       MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                               regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+                               adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + 
i].tmr_mc_addr_hi);
+
+                       offset = 0;
+                       MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                               regUVD_VCPU_CACHE_OFFSET0), 0);
+               } else {
+                       MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                               regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+                               lower_32_bits(adev->vcn.inst[i].gpu_addr));
+                       MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                               regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+                               upper_32_bits(adev->vcn.inst[i].gpu_addr));
+                       offset = cache_size;
+                       MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                               regUVD_VCPU_CACHE_OFFSET0),
+                               AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+               }
+
+               MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                       regUVD_VCPU_CACHE_SIZE0),
+                       cache_size);
+
+               cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
+               MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                       regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 
lower_32_bits(cache_addr));
+               MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                       regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 
upper_32_bits(cache_addr));
+               MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                       regUVD_VCPU_CACHE_OFFSET1), 0);
+               MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                       regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
+
+               cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
+                       AMDGPU_VCN_STACK_SIZE;
+
+               MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                       regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), 
lower_32_bits(cache_addr));
+
+               MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                       regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), 
upper_32_bits(cache_addr));
+
+               MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                       regUVD_VCPU_CACHE_OFFSET2), 0);
+
+               MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                       regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
+
+               fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
+               rb_setup = &fw_shared->rb_setup;
+
+               ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
+               ring_enc->wptr = 0;
+               rb_enc_addr = ring_enc->gpu_addr;
+
+               rb_setup->is_rb_enabled_flags |= RB_ENABLED;
+               rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+               rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+               rb_setup->rb_size = ring_enc->ring_size / 4;
+               fw_shared->present_flag_0 |= 
cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+               MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                       regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+                       
lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+               MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                       regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+                       
upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+               MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+                       regUVD_VCPU_NONCACHE_SIZE0),
+                       AMDGPU_GPU_PAGE_ALIGN(sizeof(struct 
amdgpu_vcn4_fw_shared)));
+               MMSCH_V4_0_INSERT_END();
+
+               header.vcn0.init_status = 0;
+               header.vcn0.table_offset = header.total_size;
+               header.vcn0.table_size = table_size;
+               header.total_size += table_size;
+
+               /* Send init table to mmsch */
+               size = sizeof(struct mmsch_v4_0_3_init_header);
+               table_loc = (uint32_t *)table->cpu_addr;
+               memcpy((void *)table_loc, &header, size);
+
+               ctx_addr = table->gpu_addr;
+               WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, 
lower_32_bits(ctx_addr));
+               WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, 
upper_32_bits(ctx_addr));
+
+               tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
+               tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+               tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+               WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
+
+               size = header.total_size;
+               WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
+
+               WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+               param = 0x00000001;
+               WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
+               tmp = 0;
+               timeout = 1000;
+               resp = 0;
+               expected = MMSCH_VF_MAILBOX_RESP__OK;
+               while (resp != expected) {
+                       resp = RREG32_SOC15(VCN, vcn_inst, 
regMMSCH_VF_MAILBOX_RESP);
+                       if (resp != 0)
+                               break;
+
+                       udelay(10);
+                       tmp = tmp + 10;
+                       if (tmp >= timeout) {
+                               DRM_ERROR("failed to init MMSCH. TIME-OUT after 
%d usec"\
+                                       " waiting for regMMSCH_VF_MAILBOX_RESP 
"\
+                                       "(expected=0x%08x, readback=0x%08x)\n",
+                                       tmp, expected, resp);
+                               return -EBUSY;
+                       }
+               }
+
+               enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 
0) ? 1 : 0;
+               init_status = ((struct mmsch_v4_0_3_init_header 
*)(table_loc))->vcn0.init_status;
+               if (resp != expected && resp != 
MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+                                       && init_status != 
MMSCH_VF_ENGINE_STATUS__PASS) {
+                       DRM_ERROR("MMSCH init status is incorrect! 
readback=0x%08x, header init "\
+                               "status for VCN%x: 0x%x\n", resp, enabled_vcn, 
init_status);
+               }
+       }
+
+       return 0;
+}
+
 /**
  * vcn_v4_0_3_start - VCN start
  *
-- 
2.34.1

Reply via email to