initialization table handshake with mmsch

Signed-off-by: Samir Dhume <samir.dh...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 174 ++++++++++++++++++++---
 1 file changed, 153 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index ce2b22f7e4e4..33f04ea8549f 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -26,6 +26,7 @@
 #include "soc15.h"
 #include "soc15d.h"
 #include "jpeg_v4_0_3.h"
+#include "mmsch_v4_0_3.h"
 
 #include "vcn/vcn_4_0_3_offset.h"
 #include "vcn/vcn_4_0_3_sh_mask.h"
@@ -41,6 +42,7 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device 
*adev);
 static int jpeg_v4_0_3_set_powergating_state(void *handle,
                                enum amd_powergating_state state);
 static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
 
 static int amdgpu_ih_srcid_jpeg[] = {
        VCN_4_0__SRCID__JPEG_DECODE,
@@ -160,6 +162,120 @@ static int jpeg_v4_0_3_sw_fini(void *handle)
        return r;
 }
 
+static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+       struct amdgpu_ring *ring;
+       uint64_t ctx_addr;
+       uint32_t param, resp, expected;
+       uint32_t tmp, timeout;
+
+       struct amdgpu_mm_table *table = &adev->virt.mm_table;
+       uint32_t *table_loc;
+       uint32_t table_size;
+       uint32_t size, size_dw, item_offset;
+       uint32_t init_status;
+       int i, j, jpeg_inst;
+
+       struct mmsch_v4_0_cmd_direct_write
+               direct_wt = { {0} };
+       struct mmsch_v4_0_cmd_end end = { {0} };
+       struct mmsch_v4_0_3_init_header header;
+
+       direct_wt.cmd_header.command_type =
+               MMSCH_COMMAND__DIRECT_REG_WRITE;
+       end.cmd_header.command_type =
+               MMSCH_COMMAND__END;
+
+       for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+               jpeg_inst = GET_INST(JPEG, i);
+
+               memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
+               header.version = MMSCH_VERSION;
+               header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 
2;
+
+               table_loc = (uint32_t *)table->cpu_addr;
+               table_loc += header.total_size;
+
+               item_offset = header.total_size;
+
+
+               for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+                       ring = &adev->jpeg.inst[i].ring_dec[j];
+                       table_size = 0;
+
+                       tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+                       MMSCH_V4_0_INSERT_DIRECT_WT(tmp, 
lower_32_bits(ring->gpu_addr));
+                       tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+                       MMSCH_V4_0_INSERT_DIRECT_WT(tmp, 
upper_32_bits(ring->gpu_addr));
+                       tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JRBC0_UVD_JRBC_RB_SIZE);
+                       MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+                       if (j <= 3) {
+                               header.mjpegdec0[j].table_offset = item_offset;
+                               header.mjpegdec0[j].init_status = 0;
+                               header.mjpegdec0[j].table_size = table_size;
+                       } else {
+                               header.mjpegdec1[j-4].table_offset = 
item_offset;
+                               header.mjpegdec1[j-4].init_status = 0;
+                               header.mjpegdec1[j-4].table_size = table_size;
+                       }
+                       header.total_size += table_size;
+                       item_offset+= table_size;
+               }
+
+               MMSCH_V4_0_INSERT_END();
+
+               /* send init table to MMSCH */
+               size = sizeof(struct mmsch_v4_0_3_init_header);
+               table_loc = (uint32_t *)table->cpu_addr;
+               memcpy((void *)table_loc, &header, size);
+
+               ctx_addr = table->gpu_addr;
+               WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, 
lower_32_bits(ctx_addr));
+               WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, 
upper_32_bits(ctx_addr));
+
+               tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID);
+               tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+               tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+               WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp);
+
+               size = header.total_size;
+               WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size);
+
+               WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+               param = 0x00000001;
+               WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param);
+               tmp = 0;
+               timeout = 1000;
+               resp = 0;
+               expected = MMSCH_VF_MAILBOX_RESP__OK;
+               init_status = ((struct mmsch_v4_0_3_init_header 
*)(table_loc))->mjpegdec0[i].init_status;
+               while (resp != expected) {
+                       resp = RREG32_SOC15(VCN, jpeg_inst, 
regMMSCH_VF_MAILBOX_RESP);
+
+                       if (resp != 0)
+                               break;
+                       udelay(10);
+                       tmp = tmp + 10;
+                       if (tmp >= timeout) {
+                               DRM_ERROR("failed to init MMSCH. TIME-OUT after 
%d usec"\
+                                       " waiting for regMMSCH_VF_MAILBOX_RESP 
"\
+                                       "(expected=0x%08x, readback=0x%08x)\n",
+                                       tmp, expected, resp);
+                               return -EBUSY;
+                       }
+               }
+               if (resp != expected && resp != 
MMSCH_VF_MAILBOX_RESP__INCOMPLETE && init_status != 
MMSCH_VF_ENGINE_STATUS__PASS) {
+                       DRM_ERROR("MMSCH init status is incorrect! 
readback=0x%08x, header init status for jpeg: %x\n", resp, init_status);
+               }
+
+
+       }
+       return 0;
+}
+
+
 /**
  * jpeg_v4_0_3_hw_init - start and test JPEG block
  *
@@ -172,31 +288,47 @@ static int jpeg_v4_0_3_hw_init(void *handle)
        struct amdgpu_ring *ring;
        int i, j, r, jpeg_inst;
 
-       for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
-               jpeg_inst = GET_INST(JPEG, i);
+       if (amdgpu_sriov_vf(adev)) {
+               r = jpeg_v4_0_3_start_sriov(adev);
+               if (r)
+                       return r;
 
-               ring = adev->jpeg.inst[i].ring_dec;
+               for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+                       for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+                               ring = &adev->jpeg.inst[i].ring_dec[j];
+                               ring->wptr = 0;
+                               ring->wptr_old = 0;
+                               jpeg_v4_0_3_dec_ring_set_wptr(ring);
+                               ring->sched.ready = true;
+                       }
+               }
+       } else {
+               for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+                       jpeg_inst = GET_INST(JPEG, i);
 
-               if (ring->use_doorbell)
-                       adev->nbio.funcs->vcn_doorbell_range(
-                               adev, ring->use_doorbell,
-                               (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-                                       9 * jpeg_inst,
-                               adev->jpeg.inst[i].aid_id);
+                       ring = adev->jpeg.inst[i].ring_dec;
 
-               for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
-                       ring = &adev->jpeg.inst[i].ring_dec[j];
                        if (ring->use_doorbell)
-                               WREG32_SOC15_OFFSET(
-                                       VCN, GET_INST(VCN, i),
-                                       regVCN_JPEG_DB_CTRL,
-                                       (ring->pipe ? (ring->pipe - 0x15) : 0),
-                                       ring->doorbell_index
-                                                       << 
VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
-                                               VCN_JPEG_DB_CTRL__EN_MASK);
-                       r = amdgpu_ring_test_helper(ring);
-                       if (r)
-                               return r;
+                               adev->nbio.funcs->vcn_doorbell_range(
+                                       adev, ring->use_doorbell,
+                                       (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+                                               9 * jpeg_inst,
+                                       adev->jpeg.inst[i].aid_id);
+
+                       for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+                               ring = &adev->jpeg.inst[i].ring_dec[j];
+                               if (ring->use_doorbell)
+                                       WREG32_SOC15_OFFSET(
+                                               VCN, GET_INST(VCN, i),
+                                               regVCN_JPEG_DB_CTRL,
+                                               (ring->pipe ? (ring->pipe - 
0x15) : 0),
+                                               ring->doorbell_index
+                                                               << 
VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+                                                       
VCN_JPEG_DB_CTRL__EN_MASK);
+                               r = amdgpu_ring_test_helper(ring);
+                               if (r)
+                                       return r;
+                       }
                }
        }
        DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n");
-- 
2.34.1

Reply via email to