[PATCH v3 3/3] drm/amdgpu/jpeg: support for sriov cpx mode
In SRIOV CPX mode, each VF has 4 jpeg engines. The even- numbered VFs point to JPEG0 block of the AID and the odd- numbered VFs point to the JPEG1 block. Even-numbered VFs Odd numbered VFs VCN doorbell 0 VCN Decode ring VCN Decode ring VCN doorbell 1-3Reserved Reserved VCN doorbell 4 JPEG0-0 ring VCN doorbell 5 JPEG0-1 ring VCN doorbell 6 JPEG0-2 ring VCN doorbell 7 JPEG0-3 ring VCN doorbell 8JPEG1-0 ring VCN doorbell 9JPEG1-1 ring VCN doorbell 10 JPEG1-2 ring VCN doorbell 11 JPEG1-3 ring Changes involve 1. sriov cpx mode - 4 rings 2. sriov cpx mode for odd numbered VFs - register correct src-ids (starting with JPEG4). Map src-id to correct instance in interrupt- handler. v2: 1. removed mmio access from interrupt handler 2. remove unneccessary sriov variables Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 60 +--- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 32caeb37cef9..d95ca797412c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -68,6 +68,11 @@ static int jpeg_v4_0_3_early_init(void *handle) adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; + /* check for sriov cpx mode */ + if (amdgpu_sriov_vf(adev)) + if (adev->gfx.xcc_mask == 0x1) + adev->jpeg.num_jpeg_rings = 4; + jpeg_v4_0_3_set_dec_ring_funcs(adev); jpeg_v4_0_3_set_irq_funcs(adev); jpeg_v4_0_3_set_ras_funcs(adev); @@ -87,11 +92,25 @@ static int jpeg_v4_0_3_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; int i, j, r, jpeg_inst; + bool sriov_cpx_odd = false; + + /* check for sriov cpx mode odd/even numbered vfs */ + if (amdgpu_sriov_vf(adev)) { + if (adev->gfx.xcc_mask == 0x1) { + if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1) + sriov_cpx_odd = true; + } + } for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { /* JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + if (!sriov_cpx_odd) + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, amdgpu_ih_srcid_jpeg[j], >jpeg.inst->irq); + else + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + amdgpu_ih_srcid_jpeg[j+4], >jpeg.inst->irq); + if (r) return r; } @@ -116,10 +135,14 @@ static int jpeg_v4_0_3_sw_init(void *handle) (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + j + 9 * jpeg_inst; } else { - if (j < 4) + if ((j < 4) && (!sriov_cpx_odd)) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 4 + j + 32 * jpeg_inst; + else if (sriov_cpx_odd) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 12 + j + 32 * jpeg_inst; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + @@ -186,6 +209,7 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) uint32_t size, size_dw, item_offset; uint32_t init_status; int i, j, jpeg_inst; + bool cpx_odd = false; struct mmsch_v4_0_cmd_direct_write direct_wt = { {0} }; @@ -197,6 +221,12 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) end.cmd_header.command_type = MMSCH_COMMAND__END; + /* check for cpx mode odd/even numbered vf */ + if (adev->gfx.xcc_mask == 0x1) { + if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1) + cpx_odd = true; + } + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { jpeg_inst = GET_INST(JPEG, i); @@ -220,10 +250,14 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
[PATCH v3 2/3] drm/amdgpu: sdma support for sriov cpx mode
sdma has 2 instances in SRIOV cpx mode. Odd numbered VFs have sdma0/sdma1 instances. Even numbered vfs have sdma2/sdma3. Changes involve 1. identifying odd/even numbered VF 2. registering correct number of instances with irq handler 3. mapping instance number with IH client-id depending upon whether vf is odd/even numbered. v2: 1. fix for correct number of instances registered with irq 2. remove mmio access from interrupt handler. Use xcc_mask to detect cpx mode. v3: 1. restore all instances registered with irq in case there is harvesting of some sdma instances. Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 20 +--- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index eaa4f5f49949..01d1024e91ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -82,7 +82,7 @@ static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num) } } -static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id) +static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, unsigned client_id) { switch (client_id) { case SOC15_IH_CLIENTID_SDMA0: @@ -90,9 +90,15 @@ static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id) case SOC15_IH_CLIENTID_SDMA1: return 1; case SOC15_IH_CLIENTID_SDMA2: - return 2; + if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1)) + return 0; + else + return 2; case SOC15_IH_CLIENTID_SDMA3: - return 3; + if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1)) + return 1; + else + return 3; default: return -EINVAL; } @@ -1541,7 +1547,7 @@ static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev, uint32_t instance, i; DRM_DEBUG("IH: SDMA trap\n"); - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); /* Client id gives the SDMA instance in AID. To know the exact SDMA * instance, interrupt entry gives the node id which corresponds to the AID instance. @@ -1584,7 +1590,7 @@ static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev, if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) goto out; - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); if (instance < 0) goto out; @@ -1603,7 +1609,7 @@ static int sdma_v4_4_2_process_illegal_inst_irq(struct amdgpu_device *adev, DRM_ERROR("Illegal instruction in SDMA command stream\n"); - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); if (instance < 0) return 0; @@ -1647,7 +1653,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, struct amdgpu_task_info *task_info; u64 addr; - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); if (instance < 0 || instance >= adev->sdma.num_instances) { dev_err(adev->dev, "sdma instance invalid %d\n", instance); return -EINVAL; -- 2.34.1
[PATCH v3 1/3] drm/amdgpu: function to read physical xcc_id
For SRIOV CPX mode, the assignments of jpeg doorbells depends on whether the VF is even/odd numbered. Physical xcc_id provides info whether the VF is even/odd. regCP_PSP_XCP_CTL is RO for VF through rlcg. Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 ++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 04a86dff71e6..451192403c24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -297,6 +297,7 @@ struct amdgpu_gfx_funcs { int (*switch_partition_mode)(struct amdgpu_device *adev, int num_xccs_per_xcp); int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node); + int (*get_xcc_id)(struct amdgpu_device *adev, int inst); }; struct sq_work { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index b53c8fd4e8cf..68508c19a9b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -669,6 +669,11 @@ static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) return xcc - 1; } +static int gfx_v9_4_3_get_xcc_id(struct amdgpu_device *adev, int inst) +{ + return RREG32_SOC15(GC, GET_INST(GC, inst), regCP_PSP_XCP_CTL); +} + static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .get_gpu_clock_counter = _v9_4_3_get_gpu_clock_counter, .select_se_sh = _v9_4_3_xcc_select_se_sh, @@ -678,6 +683,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .select_me_pipe_q = _v9_4_3_select_me_pipe_q, .switch_partition_mode = _v9_4_3_switch_compute_partition, .ih_node_to_logical_xcc = _v9_4_3_ih_to_xcc_inst, + .get_xcc_id = _v9_4_3_get_xcc_id, }; static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle, -- 2.34.1
[PATCH v2 2/3] drm/amdgpu: sdma support for sriov cpx mode
sdma has 2 instances in SRIOV cpx mode. Odd numbered VFs have sdma0/sdma1 instances. Even numbered vfs have sdma2/sdma3. Changes involve 1. identifying odd/even numbered VF 2. registering correct number of instances with irq handler 3. mapping instance number with IH client-id depending upon whether vf is odd/even numbered. v2: 1. fix for correct number of instances registered with irq 2. remove mmio access from interrupt handler. Use xcc_mask to detect cpx mode. Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 63 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index eaa4f5f49949..117a7c692c0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -66,13 +66,28 @@ static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev, return (adev->reg_offset[SDMA0_HWIP][dev_inst][0] + offset); } -static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num) +static unsigned sdma_v4_4_2_seq_to_irq_id(struct amdgpu_device *adev, int seq_num) { + bool sriov_cpx_odd = false; + + /* check for sriov cpx mode odd/even vf */ + if (amdgpu_sriov_vf(adev)) { + if (adev->gfx.xcc_mask == 0x1) + if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1) + sriov_cpx_odd = true; + } + switch (seq_num) { case 0: - return SOC15_IH_CLIENTID_SDMA0; + if (sriov_cpx_odd) + return SOC15_IH_CLIENTID_SDMA2; + else + return SOC15_IH_CLIENTID_SDMA0; case 1: - return SOC15_IH_CLIENTID_SDMA1; + if (sriov_cpx_odd) + return SOC15_IH_CLIENTID_SDMA3; + else + return SOC15_IH_CLIENTID_SDMA1; case 2: return SOC15_IH_CLIENTID_SDMA2; case 3: @@ -82,7 +97,7 @@ static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num) } } -static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id) +static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, unsigned client_id) { switch (client_id) { case SOC15_IH_CLIENTID_SDMA0: @@ -90,9 +105,15 @@ static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id) case SOC15_IH_CLIENTID_SDMA1: return 1; case SOC15_IH_CLIENTID_SDMA2: - return 2; + if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1)) + return 0; + else + return 2; case SOC15_IH_CLIENTID_SDMA3: - return 3; + if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1)) + return 1; + else + return 3; default: return -EINVAL; } @@ -1300,13 +1321,15 @@ static int sdma_v4_4_2_late_init(void *handle) static int sdma_v4_4_2_sw_init(void *handle) { struct amdgpu_ring *ring; - int r, i; + int r, i, num_irq_inst; struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 aid_id; + num_irq_inst = min(adev->sdma.num_instances, adev->sdma.num_inst_per_aid); + /* SDMA trap event */ - for (i = 0; i < adev->sdma.num_inst_per_aid; i++) { - r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i), + for (i = 0; i < num_irq_inst; i++) { + r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i), SDMA0_4_0__SRCID__SDMA_TRAP, >sdma.trap_irq); if (r) @@ -1314,8 +1337,8 @@ static int sdma_v4_4_2_sw_init(void *handle) } /* SDMA SRAM ECC event */ - for (i = 0; i < adev->sdma.num_inst_per_aid; i++) { - r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i), + for (i = 0; i < num_irq_inst; i++) { + r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i), SDMA0_4_0__SRCID__SDMA_SRAM_ECC, >sdma.ecc_irq); if (r) @@ -1323,26 +1346,26 @@ static int sdma_v4_4_2_sw_init(void *handle) } /* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/ - for (i = 0; i < adev->sdma.num_inst_per_aid; i++) { - r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i), + for (i = 0; i < num_irq_inst; i++) { + r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i), SDMA0_4_0__SRCID__SDMA_VM_HOLE, >sdma.vm_hole_irq); if (r
[PATCH v2 3/3] drm/amdgpu/jpeg: support for sriov cpx mode
In SRIOV CPX mode, each VF has 4 jpeg engines. The even- numbered VFs point to JPEG0 block of the AID and the odd- numbered VFs point to the JPEG1 block. Even-numbered VFs Odd numbered VFs VCN doorbell 0 VCN Decode ring VCN Decode ring VCN doorbell 1-3Reserved Reserved VCN doorbell 4 JPEG0-0 ring VCN doorbell 5 JPEG0-1 ring VCN doorbell 6 JPEG0-2 ring VCN doorbell 7 JPEG0-3 ring VCN doorbell 8JPEG1-0 ring VCN doorbell 9JPEG1-1 ring VCN doorbell 10 JPEG1-2 ring VCN doorbell 11 JPEG1-3 ring Changes involve 1. sriov cpx mode - 4 rings 2. sriov cpx mode for odd numbered VFs - register correct src-ids (starting with JPEG4). Map src-id to correct instance in interrupt- handler. v2: 1. removed mmio access from interrupt handler. Use xcc_mask to detect cpx mode. 2. remove unneccessary sriov variables Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 60 +--- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 32caeb37cef9..d95ca797412c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -68,6 +68,11 @@ static int jpeg_v4_0_3_early_init(void *handle) adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; + /* check for sriov cpx mode */ + if (amdgpu_sriov_vf(adev)) + if (adev->gfx.xcc_mask == 0x1) + adev->jpeg.num_jpeg_rings = 4; + jpeg_v4_0_3_set_dec_ring_funcs(adev); jpeg_v4_0_3_set_irq_funcs(adev); jpeg_v4_0_3_set_ras_funcs(adev); @@ -87,11 +92,25 @@ static int jpeg_v4_0_3_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; int i, j, r, jpeg_inst; + bool sriov_cpx_odd = false; + + /* check for sriov cpx mode odd/even numbered vfs */ + if (amdgpu_sriov_vf(adev)) { + if (adev->gfx.xcc_mask == 0x1) { + if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1) + sriov_cpx_odd = true; + } + } for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { /* JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + if (!sriov_cpx_odd) + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, amdgpu_ih_srcid_jpeg[j], >jpeg.inst->irq); + else + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + amdgpu_ih_srcid_jpeg[j+4], >jpeg.inst->irq); + if (r) return r; } @@ -116,10 +135,14 @@ static int jpeg_v4_0_3_sw_init(void *handle) (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + j + 9 * jpeg_inst; } else { - if (j < 4) + if ((j < 4) && (!sriov_cpx_odd)) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 4 + j + 32 * jpeg_inst; + else if (sriov_cpx_odd) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 12 + j + 32 * jpeg_inst; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + @@ -186,6 +209,7 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) uint32_t size, size_dw, item_offset; uint32_t init_status; int i, j, jpeg_inst; + bool cpx_odd = false; struct mmsch_v4_0_cmd_direct_write direct_wt = { {0} }; @@ -197,6 +221,12 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) end.cmd_header.command_type = MMSCH_COMMAND__END; + /* check for cpx mode odd/even numbered vf */ + if (adev->gfx.xcc_mask == 0x1) { + if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1) + cpx_odd = true; + } + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { jpeg_inst = GET_INST(JPEG, i); @@ -220,10 +250,14 @@ static int jpeg_v4_0_3_start_sri
[PATCH v2 1/3] drm/amdgpu: function to read physical xcc_id
For SRIOV CPX mode, the assignments of jpeg doorbells depends on whether the VF is even/odd numbered. Physical xcc_id provides info whether the VF is even/odd. regCP_PSP_XCP_CTL is RO for VF through rlcg. Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 ++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 04a86dff71e6..451192403c24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -297,6 +297,7 @@ struct amdgpu_gfx_funcs { int (*switch_partition_mode)(struct amdgpu_device *adev, int num_xccs_per_xcp); int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node); + int (*get_xcc_id)(struct amdgpu_device *adev, int inst); }; struct sq_work { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index b53c8fd4e8cf..68508c19a9b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -669,6 +669,11 @@ static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) return xcc - 1; } +static int gfx_v9_4_3_get_xcc_id(struct amdgpu_device *adev, int inst) +{ + return RREG32_SOC15(GC, GET_INST(GC, inst), regCP_PSP_XCP_CTL); +} + static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .get_gpu_clock_counter = _v9_4_3_get_gpu_clock_counter, .select_se_sh = _v9_4_3_xcc_select_se_sh, @@ -678,6 +683,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .select_me_pipe_q = _v9_4_3_select_me_pipe_q, .switch_partition_mode = _v9_4_3_switch_compute_partition, .ih_node_to_logical_xcc = _v9_4_3_ih_to_xcc_inst, + .get_xcc_id = _v9_4_3_get_xcc_id, }; static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle, -- 2.34.1
[PATCH 3/3] drm/amdgpu/jpeg: support for sriov cpx mode
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 80 +--- 1 file changed, 73 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 32caeb37cef9..4bf087f8ca2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -65,9 +65,15 @@ static int amdgpu_ih_srcid_jpeg[] = { static int jpeg_v4_0_3_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; + if (amdgpu_sriov_vf(adev)) + if (adev->xcp_mgr->funcs->query_partition_mode(xcp_mgr) == + AMDGPU_CPX_PARTITION_MODE) + adev->jpeg.num_jpeg_rings = 4; + jpeg_v4_0_3_set_dec_ring_funcs(adev); jpeg_v4_0_3_set_irq_funcs(adev); jpeg_v4_0_3_set_ras_funcs(adev); @@ -88,10 +94,28 @@ static int jpeg_v4_0_3_sw_init(void *handle) struct amdgpu_ring *ring; int i, j, r, jpeg_inst; + bool sriov_cpx_odd = false; + struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; + int mode; + + if (amdgpu_sriov_vf(adev)) { + mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr); + + if (mode == AMDGPU_CPX_PARTITION_MODE) { + if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1) + sriov_cpx_odd = true; + } + } + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { /* JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + if (!sriov_cpx_odd) + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, amdgpu_ih_srcid_jpeg[j], >jpeg.inst->irq); + else + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + amdgpu_ih_srcid_jpeg[j+4], >jpeg.inst->irq); + if (r) return r; } @@ -116,10 +140,15 @@ static int jpeg_v4_0_3_sw_init(void *handle) (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + j + 9 * jpeg_inst; } else { - if (j < 4) + if ((j < 4) && (!sriov_cpx_odd)) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 4 + j + 32 * jpeg_inst; + else if (sriov_cpx_odd) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 12 + j + 32 * jpeg_inst; + else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + @@ -186,6 +215,9 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) uint32_t size, size_dw, item_offset; uint32_t init_status; int i, j, jpeg_inst; + struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; + int mode; + bool cpx_odd = false; struct mmsch_v4_0_cmd_direct_write direct_wt = { {0} }; @@ -197,6 +229,13 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) end.cmd_header.command_type = MMSCH_COMMAND__END; + mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr); + + if (mode == AMDGPU_CPX_PARTITION_MODE) { + if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1) + cpx_odd = true; + } + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { jpeg_inst = GET_INST(JPEG, i); @@ -220,10 +259,14 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE); MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4); - if (j <= 3) { + if ((j <= 3) && (!cpx_odd)) { header.mjpegdec0[j].table_offset = item_offset; header.mjpegdec0[j].init_status = 0; header.mjpegdec0[j].table_size = table_size; + } else if (cpx_odd) { + header.mjpegdec1[j].table_offset = i
[PATCH 2/3] drm/amdgpu: sdma support for sriov cpx mode
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 34 +++- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index fec5a3d1c4bc..f666ececbe7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -82,17 +82,37 @@ static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num) } } -static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id) +static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, unsigned client_id) { + + struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; + bool sriov_cpx_odd = false; + int mode; + + if (amdgpu_sriov_vf(adev)) { + mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr); + + if (mode == AMDGPU_CPX_PARTITION_MODE) { + if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1) + sriov_cpx_odd = true; + } + } + switch (client_id) { case SOC15_IH_CLIENTID_SDMA0: return 0; case SOC15_IH_CLIENTID_SDMA1: return 1; case SOC15_IH_CLIENTID_SDMA2: - return 2; + if (sriov_cpx_odd) + return 0; + else + return 2; case SOC15_IH_CLIENTID_SDMA3: - return 3; + if (sriov_cpx_odd) + return 1; + else + return 3; default: return -EINVAL; } @@ -1541,7 +1561,7 @@ static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev, uint32_t instance, i; DRM_DEBUG("IH: SDMA trap\n"); - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); /* Client id gives the SDMA instance in AID. To know the exact SDMA * instance, interrupt entry gives the node id which corresponds to the AID instance. @@ -1584,7 +1604,7 @@ static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev, if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) goto out; - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); if (instance < 0) goto out; @@ -1603,7 +1623,7 @@ static int sdma_v4_4_2_process_illegal_inst_irq(struct amdgpu_device *adev, DRM_ERROR("Illegal instruction in SDMA command stream\n"); - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); if (instance < 0) return 0; @@ -1647,7 +1667,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, struct amdgpu_task_info task_info; u64 addr; - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); if (instance < 0 || instance >= adev->sdma.num_instances) { dev_err(adev->dev, "sdma instance invalid %d\n", instance); return -EINVAL; -- 2.34.1
[PATCH 1/3] drm/amdgpu: function to read physical xcc_id
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 ++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8fcf889ddce9..bebda5501cb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -298,6 +298,7 @@ struct amdgpu_gfx_funcs { int (*switch_partition_mode)(struct amdgpu_device *adev, int num_xccs_per_xcp); int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node); + int (*get_xcc_id)(struct amdgpu_device *adev, int inst); }; struct sq_work { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index b53c8fd4e8cf..68508c19a9b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -669,6 +669,11 @@ static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) return xcc - 1; } +static int gfx_v9_4_3_get_xcc_id(struct amdgpu_device *adev, int inst) +{ + return RREG32_SOC15(GC, GET_INST(GC, inst), regCP_PSP_XCP_CTL); +} + static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .get_gpu_clock_counter = _v9_4_3_get_gpu_clock_counter, .select_se_sh = _v9_4_3_xcc_select_se_sh, @@ -678,6 +683,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .select_me_pipe_q = _v9_4_3_select_me_pipe_q, .switch_partition_mode = _v9_4_3_switch_compute_partition, .ih_node_to_logical_xcc = _v9_4_3_ih_to_xcc_inst, + .get_xcc_id = _v9_4_3_get_xcc_id, }; static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle, -- 2.34.1
[PATCH] drm/amdgpu/jpeg - skip change of power-gating state for sriov
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 15612915bb6c..1de79d660285 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -360,8 +360,10 @@ static int jpeg_v4_0_3_hw_fini(void *handle) cancel_delayed_work_sync(>jpeg.idle_work); - if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) - ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + if (!amdgpu_sriov_vf(adev)) { + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) + ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + } return ret; } -- 2.34.1
[PATCH v4 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary
BASE: VCN0 unified (32 byte boundary) BASE+4: MJPEG0 BASE+5: MJPEG1 BASE+6: MJPEG2 BASE+7: MJPEG3 BASE+12: MJPEG4 BASE+13: MJPEG5 BASE+14: MJPEG6 BASE+15: MJPEG7 Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 17 ++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 33f04ea8549f..f745eeef442f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -111,9 +111,20 @@ static int jpeg_v4_0_3_sw_init(void *handle) ring = >jpeg.inst[i].ring_dec[j]; ring->use_doorbell = true; ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 1 + j + 9 * jpeg_inst; + if (!amdgpu_sriov_vf(adev)) { + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 1 + j + 9 * jpeg_inst; + } else { + if (j < 4) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 4 + j + 32 * jpeg_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 8 + j + 32 * jpeg_inst; + } sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); r = amdgpu_ring_init(adev, ring, 512, >jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); -- 2.34.1
[PATCH v4 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3
For sriov, doorbell index for vcn0 for AID needs to be on 32 byte boundary so we need to move the vcn end doorbell Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index f637574644c0..4a279960cd21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -330,14 +330,14 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 { AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F, /* IH: 0x1A0 ~ 0x1AF */ AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0, - /* VCN: 0x1B0 ~ 0x1D4 */ + /* VCN: 0x1B0 ~ 0x1E8 */ AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0, - AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4, + AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1E8, AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP= AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START, AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END, - AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1D4, + AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1E8, AMDGPU_DOORBELL_LAYOUT1_INVALID = 0x } AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1; -- 2.34.1
[PATCH v4 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3
initialization table handshake with mmsch Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 260 +--- 1 file changed, 236 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 411c1d802823..66eb0c8e6f94 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -31,6 +31,7 @@ #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" +#include "mmsch_v4_0_3.h" #include "vcn/vcn_4_0_3_offset.h" #include "vcn/vcn_4_0_3_sh_mask.h" @@ -44,6 +45,7 @@ #define VCN_VID_SOC_ADDRESS_2_00x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v4_0_3_set_powergating_state(void *handle, @@ -130,6 +132,12 @@ static int vcn_v4_0_3_sw_init(void *handle) amdgpu_vcn_fwlog_init(>vcn.inst[i]); } + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode; @@ -167,6 +175,9 @@ static int vcn_v4_0_3_sw_fini(void *handle) drm_dev_exit(idx); } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + r = amdgpu_vcn_suspend(adev); if (r) return r; @@ -189,33 +200,47 @@ static int vcn_v4_0_3_hw_init(void *handle) struct amdgpu_ring *ring; int i, r, vcn_inst; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - vcn_inst = GET_INST(VCN, i); - ring = >vcn.inst[i].ring_enc[0]; + if (amdgpu_sriov_vf(adev)) { + r = vcn_v4_0_3_start_sriov(adev); + if (r) + goto done; - if (ring->use_doorbell) { - adev->nbio.funcs->vcn_doorbell_range( - adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst, - adev->vcn.inst[i].aid_id); - - WREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL, - ring->doorbell_index - << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - - /* Read DB_CTRL to flush the write DB_CTRL command. */ - RREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = >vcn.inst[i].ring_enc[0]; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v4_0_3_unified_ring_set_wptr(ring); + ring->sched.ready = true; } + } else { + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + ring = >vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) { + adev->nbio.funcs->vcn_doorbell_range( + adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst, + adev->vcn.inst[i].aid_id); + + WREG32_SOC15( + VCN, GET_INST(VCN, ring->me), + regVCN_RB1_DB_CTRL, + ring->doorbell_index + << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15( + VCN, GET_INST(VCN, ring->me), + regVCN_RB1_DB_CTRL); + } - r = amdgpu_ring_test_helper(ring); - if (r) - got
[PATCH v4 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3
initialization table handshake with mmsch Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 174 --- 1 file changed, 153 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ce2b22f7e4e4..33f04ea8549f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -26,6 +26,7 @@ #include "soc15.h" #include "soc15d.h" #include "jpeg_v4_0_3.h" +#include "mmsch_v4_0_3.h" #include "vcn/vcn_4_0_3_offset.h" #include "vcn/vcn_4_0_3_sh_mask.h" @@ -41,6 +42,7 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v4_0_3_set_powergating_state(void *handle, enum amd_powergating_state state); static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); +static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring); static int amdgpu_ih_srcid_jpeg[] = { VCN_4_0__SRCID__JPEG_DECODE, @@ -160,6 +162,120 @@ static int jpeg_v4_0_3_sw_fini(void *handle) return r; } +static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = >virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw, item_offset; + uint32_t init_status; + int i, j, jpeg_inst; + + struct mmsch_v4_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v4_0_cmd_end end = { {0} }; + struct mmsch_v4_0_3_init_header header; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + jpeg_inst = GET_INST(JPEG, i); + + memset(, 0, sizeof(struct mmsch_v4_0_3_init_header)); + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + + item_offset = header.total_size; + + + for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) { + ring = >jpeg.inst[i].ring_dec[j]; + table_size = 0; + + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4); + + if (j <= 3) { + header.mjpegdec0[j].table_offset = item_offset; + header.mjpegdec0[j].init_status = 0; + header.mjpegdec0[j].table_size = table_size; + } else { + header.mjpegdec1[j-4].table_offset = item_offset; + header.mjpegdec1[j-4].init_status = 0; + header.mjpegdec1[j-4].table_size = table_size; + } + header.total_size += table_size; + item_offset+= table_size; + } + + MMSCH_V4_0_INSERT_END(); + + /* send init table to MMSCH */ + size = sizeof(struct mmsch_v4_0_3_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, , size); + + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp); + + size = header.total_size; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size); + + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0); + + param = 0x0001; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param); + tmp
[PATCH v4 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 66eb0c8e6f94..1e5aad207878 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -113,9 +113,16 @@ static int vcn_v4_0_3_sw_init(void *handle) ring = >vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst; + + if (!amdgpu_sriov_vf(adev)) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 32 * vcn_inst; + ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); r = amdgpu_ring_init(adev, ring, 512, >vcn.inst->irq, 0, -- 2.34.1
[PATCH v4 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov
The structures are the same as v4_0 except for the init header Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h | 37 +++ 1 file changed, 37 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h new file mode 100644 index ..db7eb5260295 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h @@ -0,0 +1,37 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MMSCH_V4_0_3_H__ +#define __MMSCH_V4_0_3_H__ + +#include "amdgpu_vcn.h" +#include "mmsch_v4_0.h" + +struct mmsch_v4_0_3_init_header { + uint32_t version; + uint32_t total_size; + struct mmsch_v4_0_table_info vcn0; + struct mmsch_v4_0_table_info mjpegdec0[4]; + struct mmsch_v4_0_table_info mjpegdec1[4]; +}; +#endif -- 2.34.1
[PATCH v4 2/7] drm/amdgpu/vcn : Skip vcn power-gating change for sriov
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 550ac040b4be..411c1d802823 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1317,6 +1317,15 @@ static int vcn_v4_0_3_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + /* for SRIOV, guest should not control VCN Power-gating +* MMSCH FW should control Power-gating and clock-gating +* guest should avoid touching CGC and PG +*/ + if (amdgpu_sriov_vf(adev)) { + adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if (state == adev->vcn.cur_state) return 0; -- 2.34.1
[PATCH v3 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3
For sriov, doorbell index for vcn0 for AID needs to be on 32 byte boundary so we need to move the vcn end doorbell Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index f637574644c0..4a279960cd21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -330,14 +330,14 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 { AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F, /* IH: 0x1A0 ~ 0x1AF */ AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0, - /* VCN: 0x1B0 ~ 0x1D4 */ + /* VCN: 0x1B0 ~ 0x1E8 */ AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0, - AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4, + AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1E8, AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP= AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START, AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END, - AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1D4, + AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1E8, AMDGPU_DOORBELL_LAYOUT1_INVALID = 0x } AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1; -- 2.34.1
[PATCH v3 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary
BASE: VCN0 unified (32 byte boundary) BASE+4: MJPEG0 BASE+5: MJPEG1 BASE+6: MJPEG2 BASE+7: MJPEG3 BASE+12: MJPEG4 BASE+13: MJPEG5 BASE+14: MJPEG6 BASE+15: MJPEG7 Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 17 ++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 33f04ea8549f..f745eeef442f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -111,9 +111,20 @@ static int jpeg_v4_0_3_sw_init(void *handle) ring = >jpeg.inst[i].ring_dec[j]; ring->use_doorbell = true; ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 1 + j + 9 * jpeg_inst; + if (!amdgpu_sriov_vf(adev)) { + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 1 + j + 9 * jpeg_inst; + } else { + if (j < 4) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 4 + j + 32 * jpeg_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 8 + j + 32 * jpeg_inst; + } sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); r = amdgpu_ring_init(adev, ring, 512, >jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); -- 2.34.1
[PATCH v3 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3
initialization table handshake with mmsch Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 174 --- 1 file changed, 153 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ce2b22f7e4e4..33f04ea8549f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -26,6 +26,7 @@ #include "soc15.h" #include "soc15d.h" #include "jpeg_v4_0_3.h" +#include "mmsch_v4_0_3.h" #include "vcn/vcn_4_0_3_offset.h" #include "vcn/vcn_4_0_3_sh_mask.h" @@ -41,6 +42,7 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v4_0_3_set_powergating_state(void *handle, enum amd_powergating_state state); static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); +static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring); static int amdgpu_ih_srcid_jpeg[] = { VCN_4_0__SRCID__JPEG_DECODE, @@ -160,6 +162,120 @@ static int jpeg_v4_0_3_sw_fini(void *handle) return r; } +static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = >virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw, item_offset; + uint32_t init_status; + int i, j, jpeg_inst; + + struct mmsch_v4_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v4_0_cmd_end end = { {0} }; + struct mmsch_v4_0_3_init_header header; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + jpeg_inst = GET_INST(JPEG, i); + + memset(, 0, sizeof(struct mmsch_v4_0_3_init_header)); + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + + item_offset = header.total_size; + + + for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) { + ring = >jpeg.inst[i].ring_dec[j]; + table_size = 0; + + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4); + + if (j <= 3) { + header.mjpegdec0[j].table_offset = item_offset; + header.mjpegdec0[j].init_status = 0; + header.mjpegdec0[j].table_size = table_size; + } else { + header.mjpegdec1[j-4].table_offset = item_offset; + header.mjpegdec1[j-4].init_status = 0; + header.mjpegdec1[j-4].table_size = table_size; + } + header.total_size += table_size; + item_offset+= table_size; + } + + MMSCH_V4_0_INSERT_END(); + + /* send init table to MMSCH */ + size = sizeof(struct mmsch_v4_0_3_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, , size); + + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp); + + size = header.total_size; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size); + + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0); + + param = 0x0001; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param); + tmp
[PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3
initialization table handshake with mmsch Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 257 +--- 1 file changed, 233 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 411c1d802823..b978265b2d77 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -31,6 +31,7 @@ #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" +#include "mmsch_v4_0_3.h" #include "vcn/vcn_4_0_3_offset.h" #include "vcn/vcn_4_0_3_sh_mask.h" @@ -44,6 +45,7 @@ #define VCN_VID_SOC_ADDRESS_2_00x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v4_0_3_set_powergating_state(void *handle, @@ -130,6 +132,10 @@ static int vcn_v4_0_3_sw_init(void *handle) amdgpu_vcn_fwlog_init(>vcn.inst[i]); } + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode; @@ -167,6 +173,8 @@ static int vcn_v4_0_3_sw_fini(void *handle) drm_dev_exit(idx); } + amdgpu_virt_free_mm_table(adev); + r = amdgpu_vcn_suspend(adev); if (r) return r; @@ -189,33 +197,47 @@ static int vcn_v4_0_3_hw_init(void *handle) struct amdgpu_ring *ring; int i, r, vcn_inst; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - vcn_inst = GET_INST(VCN, i); - ring = >vcn.inst[i].ring_enc[0]; + if (amdgpu_sriov_vf(adev)) { + r = vcn_v4_0_3_start_sriov(adev); + if (r) + goto done; - if (ring->use_doorbell) { - adev->nbio.funcs->vcn_doorbell_range( - adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst, - adev->vcn.inst[i].aid_id); - - WREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL, - ring->doorbell_index - << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - - /* Read DB_CTRL to flush the write DB_CTRL command. */ - RREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = >vcn.inst[i].ring_enc[0]; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v4_0_3_unified_ring_set_wptr(ring); + ring->sched.ready = true; } + } else { + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + ring = >vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) { + adev->nbio.funcs->vcn_doorbell_range( + adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst, + adev->vcn.inst[i].aid_id); + + WREG32_SOC15( + VCN, GET_INST(VCN, ring->me), + regVCN_RB1_DB_CTRL, + ring->doorbell_index + << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15( + VCN, GET_INST(VCN, ring->me), + regVCN_RB1_DB_CTRL); + } - r = amdgpu_ring_test_helper(ring); - if (r) - goto done; + r = amdgpu_ring_test_helper(ring); + if (r) +
[PATCH v3 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index b978265b2d77..7cd5ca204317 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -113,9 +113,16 @@ static int vcn_v4_0_3_sw_init(void *handle) ring = >vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst; + + if (!amdgpu_sriov_vf(adev)) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 32 * vcn_inst; + ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); r = amdgpu_ring_init(adev, ring, 512, >vcn.inst->irq, 0, -- 2.34.1
[PATCH v3 2/7] drm/amdgpu/vcn : Skip vcn power-gating change for sriov
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 550ac040b4be..411c1d802823 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1317,6 +1317,15 @@ static int vcn_v4_0_3_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + /* for SRIOV, guest should not control VCN Power-gating +* MMSCH FW should control Power-gating and clock-gating +* guest should avoid touching CGC and PG +*/ + if (amdgpu_sriov_vf(adev)) { + adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if (state == adev->vcn.cur_state) return 0; -- 2.34.1
[PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov
The structures are the same as v4_0 except for the init header Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h | 37 +++ 1 file changed, 37 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h new file mode 100644 index ..db7eb5260295 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h @@ -0,0 +1,37 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MMSCH_V4_0_3_H__ +#define __MMSCH_V4_0_3_H__ + +#include "amdgpu_vcn.h" +#include "mmsch_v4_0.h" + +struct mmsch_v4_0_3_init_header { + uint32_t version; + uint32_t total_size; + struct mmsch_v4_0_table_info vcn0; + struct mmsch_v4_0_table_info mjpegdec0[4]; + struct mmsch_v4_0_table_info mjpegdec1[4]; +}; +#endif -- 2.34.1
[PATCH 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3
For sriov, doorbell index for vcn0 for AID needs to be on 32 byte boundary so we need to move the vcn end doorbell Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index f637574644c0..4a279960cd21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -330,14 +330,14 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 { AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F, /* IH: 0x1A0 ~ 0x1AF */ AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0, - /* VCN: 0x1B0 ~ 0x1D4 */ + /* VCN: 0x1B0 ~ 0x1E8 */ AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0, - AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4, + AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1E8, AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP= AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START, AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END, - AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1D4, + AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1E8, AMDGPU_DOORBELL_LAYOUT1_INVALID = 0x } AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1; -- 2.34.1
[PATCH 2/7] drm/amdgpu/vcn : Skip vcn power-gating change for sriov
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 550ac040b4be..411c1d802823 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1317,6 +1317,15 @@ static int vcn_v4_0_3_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + /* for SRIOV, guest should not control VCN Power-gating +* MMSCH FW should control Power-gating and clock-gating +* guest should avoid touching CGC and PG +*/ + if (amdgpu_sriov_vf(adev)) { + adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if (state == adev->vcn.cur_state) return 0; -- 2.34.1
[PATCH 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 17 ++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 85ee74fdb7e3..896e2f895884 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -111,9 +111,20 @@ static int jpeg_v4_0_3_sw_init(void *handle) ring = >jpeg.inst[i].ring_dec[j]; ring->use_doorbell = true; ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 1 + j + 9 * jpeg_inst; + if (!amdgpu_sriov_vf(adev)) { + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 1 + j + 9 * jpeg_inst; + } else { + if (j < 4) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 4 + j + 32 * jpeg_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 8 + j + 32 * jpeg_inst; + } sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); r = amdgpu_ring_init(adev, ring, 512, >jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); -- 2.34.1
[PATCH 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3
initialization table handshake with mmsch Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 171 --- 1 file changed, 150 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ce2b22f7e4e4..85ee74fdb7e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -26,6 +26,7 @@ #include "soc15.h" #include "soc15d.h" #include "jpeg_v4_0_3.h" +#include "mmsch_v4_0_3.h" #include "vcn/vcn_4_0_3_offset.h" #include "vcn/vcn_4_0_3_sh_mask.h" @@ -41,6 +42,7 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v4_0_3_set_powergating_state(void *handle, enum amd_powergating_state state); static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); +static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring); static int amdgpu_ih_srcid_jpeg[] = { VCN_4_0__SRCID__JPEG_DECODE, @@ -160,6 +162,117 @@ static int jpeg_v4_0_3_sw_fini(void *handle) return r; } +static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = >virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw, item_offset; + uint32_t init_status; + int i, j; + + struct mmsch_v4_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v4_0_cmd_end end = { {0} }; + struct mmsch_v4_0_3_init_header header; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + memset(, 0, sizeof(struct mmsch_v4_0_3_init_header)); + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + + item_offset = header.total_size; + + for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) { + ring = >jpeg.inst[i].ring_dec[j]; + table_size = 0; + + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4); + + if (j <= 3) { + header.mjpegdec0[j].table_offset = item_offset; + header.mjpegdec0[j].init_status = 0; + header.mjpegdec0[j].table_size = table_size; + } else { + header.mjpegdec1[j-4].table_offset = item_offset; + header.mjpegdec1[j-4].init_status = 0; + header.mjpegdec1[j-4].table_size = table_size; + } + header.total_size += table_size; + item_offset+= table_size; + } + + MMSCH_V4_0_INSERT_END(); + + /* send init table to MMSCH */ + size = sizeof(struct mmsch_v4_0_3_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, , size); + + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, i, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, i, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + tmp = RREG32_SOC15(VCN, i, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, i, regMMSCH_VF_VMID, tmp); + + size = header.total_size; + WREG32_SOC15(VCN, i, regMMSCH_VF_CTX_SIZE, size); + + WREG32_SOC15(VCN, i, regMMSCH_VF_MAILBOX_RESP, 0); + + param = 0x0001; + WREG32_SOC15(VCN, i, regMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = MMSCH_VF_MAILBOX_
[PATCH 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3
initialization table handshake with mmsch Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 261 +--- 1 file changed, 237 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 411c1d802823..8650e3c6288d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -31,6 +31,7 @@ #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" +#include "mmsch_v4_0_3.h" #include "vcn/vcn_4_0_3_offset.h" #include "vcn/vcn_4_0_3_sh_mask.h" @@ -44,6 +45,7 @@ #define VCN_VID_SOC_ADDRESS_2_00x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v4_0_3_set_powergating_state(void *handle, @@ -130,6 +132,10 @@ static int vcn_v4_0_3_sw_init(void *handle) amdgpu_vcn_fwlog_init(>vcn.inst[i]); } + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode; @@ -167,6 +173,8 @@ static int vcn_v4_0_3_sw_fini(void *handle) drm_dev_exit(idx); } + amdgpu_virt_free_mm_table(adev); + r = amdgpu_vcn_suspend(adev); if (r) return r; @@ -189,33 +197,50 @@ static int vcn_v4_0_3_hw_init(void *handle) struct amdgpu_ring *ring; int i, r, vcn_inst; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - vcn_inst = GET_INST(VCN, i); - ring = >vcn.inst[i].ring_enc[0]; + if (amdgpu_sriov_vf(adev)) { + r = vcn_v4_0_3_start_sriov(adev); + if (r) + goto done; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; - if (ring->use_doorbell) { - adev->nbio.funcs->vcn_doorbell_range( - adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst, - adev->vcn.inst[i].aid_id); - - WREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL, - ring->doorbell_index - << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - - /* Read DB_CTRL to flush the write DB_CTRL command. */ - RREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL); + ring = >vcn.inst[i].ring_enc[0]; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v4_0_3_unified_ring_set_wptr(ring); + ring->sched.ready = true; } + } else { + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + ring = >vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) { + adev->nbio.funcs->vcn_doorbell_range( + adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst, + adev->vcn.inst[i].aid_id); + + WREG32_SOC15( + VCN, GET_INST(VCN, ring->me), + regVCN_RB1_DB_CTRL, + ring->doorbell_index + << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15( + VCN, GET_INST(VCN, ring->me), + regVCN_RB1_DB_CTRL); + } - r = amdgpu_ring_test_helper(ring); - if (r) -
[PATCH 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 8650e3c6288d..09b3fa707af6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -113,9 +113,16 @@ static int vcn_v4_0_3_sw_init(void *handle) ring = >vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst; + + if (!amdgpu_sriov_vf(adev)) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 32 * vcn_inst; + ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); r = amdgpu_ring_init(adev, ring, 512, >vcn.inst->irq, 0, -- 2.34.1
[PATCH 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov
The structures are the same as v4_0 except for the init header Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h | 37 +++ 1 file changed, 37 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h new file mode 100644 index ..db7eb5260295 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h @@ -0,0 +1,37 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MMSCH_V4_0_3_H__ +#define __MMSCH_V4_0_3_H__ + +#include "amdgpu_vcn.h" +#include "mmsch_v4_0.h" + +struct mmsch_v4_0_3_init_header { + uint32_t version; + uint32_t total_size; + struct mmsch_v4_0_table_info vcn0; + struct mmsch_v4_0_table_info mjpegdec0[4]; + struct mmsch_v4_0_table_info mjpegdec1[4]; +}; +#endif -- 2.34.1
[PATCH 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3
For sriov, doorbell index for vcn0 for AID needs to be on 32 byte boundary so we need to move the vcn end doorbell Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index f637574644c0..4a279960cd21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -330,14 +330,14 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 { AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F, /* IH: 0x1A0 ~ 0x1AF */ AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0, - /* VCN: 0x1B0 ~ 0x1D4 */ + /* VCN: 0x1B0 ~ 0x1E8 */ AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0, - AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4, + AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1E8, AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP= AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START, AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END, - AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1D4, + AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1E8, AMDGPU_DOORBELL_LAYOUT1_INVALID = 0x } AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1; -- 2.34.1
[PATCH 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3
initialization table handshake with mmsch Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 171 --- 1 file changed, 150 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ce2b22f7e4e4..85ee74fdb7e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -26,6 +26,7 @@ #include "soc15.h" #include "soc15d.h" #include "jpeg_v4_0_3.h" +#include "mmsch_v4_0_3.h" #include "vcn/vcn_4_0_3_offset.h" #include "vcn/vcn_4_0_3_sh_mask.h" @@ -41,6 +42,7 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v4_0_3_set_powergating_state(void *handle, enum amd_powergating_state state); static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); +static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring); static int amdgpu_ih_srcid_jpeg[] = { VCN_4_0__SRCID__JPEG_DECODE, @@ -160,6 +162,117 @@ static int jpeg_v4_0_3_sw_fini(void *handle) return r; } +static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = >virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw, item_offset; + uint32_t init_status; + int i, j; + + struct mmsch_v4_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v4_0_cmd_end end = { {0} }; + struct mmsch_v4_0_3_init_header header; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + memset(, 0, sizeof(struct mmsch_v4_0_3_init_header)); + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + + item_offset = header.total_size; + + for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) { + ring = >jpeg.inst[i].ring_dec[j]; + table_size = 0; + + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4); + + if (j <= 3) { + header.mjpegdec0[j].table_offset = item_offset; + header.mjpegdec0[j].init_status = 0; + header.mjpegdec0[j].table_size = table_size; + } else { + header.mjpegdec1[j-4].table_offset = item_offset; + header.mjpegdec1[j-4].init_status = 0; + header.mjpegdec1[j-4].table_size = table_size; + } + header.total_size += table_size; + item_offset+= table_size; + } + + MMSCH_V4_0_INSERT_END(); + + /* send init table to MMSCH */ + size = sizeof(struct mmsch_v4_0_3_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, , size); + + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, i, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, i, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + tmp = RREG32_SOC15(VCN, i, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, i, regMMSCH_VF_VMID, tmp); + + size = header.total_size; + WREG32_SOC15(VCN, i, regMMSCH_VF_CTX_SIZE, size); + + WREG32_SOC15(VCN, i, regMMSCH_VF_MAILBOX_RESP, 0); + + param = 0x0001; + WREG32_SOC15(VCN, i, regMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = MMSCH_VF_MAILBOX_
[PATCH 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 17 ++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 85ee74fdb7e3..896e2f895884 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -111,9 +111,20 @@ static int jpeg_v4_0_3_sw_init(void *handle) ring = >jpeg.inst[i].ring_dec[j]; ring->use_doorbell = true; ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 1 + j + 9 * jpeg_inst; + if (!amdgpu_sriov_vf(adev)) { + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 1 + j + 9 * jpeg_inst; + } else { + if (j < 4) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 4 + j + 32 * jpeg_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 8 + j + 32 * jpeg_inst; + } sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); r = amdgpu_ring_init(adev, ring, 512, >jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); -- 2.34.1
[PATCH 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index ac405dfcfaf1..fa9abcb08c22 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -113,9 +113,16 @@ static int vcn_v4_0_3_sw_init(void *handle) ring = >vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst; + + if (!amdgpu_sriov_vf(adev)) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 32 * vcn_inst; + ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); r = amdgpu_ring_init(adev, ring, 512, >vcn.inst->irq, 0, -- 2.34.1
[PATCH 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3
initialization table handshake with mmsch Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 264 +--- 1 file changed, 240 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 411c1d802823..ac405dfcfaf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -31,6 +31,7 @@ #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" +#include "mmsch_v4_0_3.h" #include "vcn/vcn_4_0_3_offset.h" #include "vcn/vcn_4_0_3_sh_mask.h" @@ -44,6 +45,7 @@ #define VCN_VID_SOC_ADDRESS_2_00x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v4_0_3_set_powergating_state(void *handle, @@ -130,6 +132,12 @@ static int vcn_v4_0_3_sw_init(void *handle) amdgpu_vcn_fwlog_init(>vcn.inst[i]); } + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode; @@ -167,6 +175,9 @@ static int vcn_v4_0_3_sw_fini(void *handle) drm_dev_exit(idx); } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + r = amdgpu_vcn_suspend(adev); if (r) return r; @@ -189,33 +200,50 @@ static int vcn_v4_0_3_hw_init(void *handle) struct amdgpu_ring *ring; int i, r, vcn_inst; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - vcn_inst = GET_INST(VCN, i); - ring = >vcn.inst[i].ring_enc[0]; + if (amdgpu_sriov_vf(adev)) { + r = vcn_v4_0_3_start_sriov(adev); + if (r) + goto done; - if (ring->use_doorbell) { - adev->nbio.funcs->vcn_doorbell_range( - adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst, - adev->vcn.inst[i].aid_id); - - WREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL, - ring->doorbell_index - << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - - /* Read DB_CTRL to flush the write DB_CTRL command. */ - RREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ring = >vcn.inst[i].ring_enc[0]; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v4_0_3_unified_ring_set_wptr(ring); + ring->sched.ready = true; } + } else { + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + ring = >vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) { + adev->nbio.funcs->vcn_doorbell_range( + adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst, + adev->vcn.inst[i].aid_id); + + WREG32_SOC15( + VCN, GET_INST(VCN, ring->me), + regVCN_RB1_DB_CTRL, + ring->doorbell_index + << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15( + VCN, GET_INST(VCN, ring->me), + regVCN_RB1_DB_
[PATCH 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov
The structures are the same as v4_0 except for the init header Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h | 37 +++ 1 file changed, 37 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h new file mode 100644 index ..db7eb5260295 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h @@ -0,0 +1,37 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MMSCH_V4_0_3_H__ +#define __MMSCH_V4_0_3_H__ + +#include "amdgpu_vcn.h" +#include "mmsch_v4_0.h" + +struct mmsch_v4_0_3_init_header { + uint32_t version; + uint32_t total_size; + struct mmsch_v4_0_table_info vcn0; + struct mmsch_v4_0_table_info mjpegdec0[4]; + struct mmsch_v4_0_table_info mjpegdec1[4]; +}; +#endif -- 2.34.1
[PATCH 2/7] drm/amdgpu/vcn : Skip vcn power-gating change for sriov
Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 550ac040b4be..411c1d802823 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1317,6 +1317,15 @@ static int vcn_v4_0_3_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + /* for SRIOV, guest should not control VCN Power-gating +* MMSCH FW should control Power-gating and clock-gating +* guest should avoid touching CGC and PG +*/ + if (amdgpu_sriov_vf(adev)) { + adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if (state == adev->vcn.cur_state) return 0; -- 2.34.1
[PATCH] drm/amdgpu: Rearm IRQ in Navi10 SR-IOV if IRQ lost
Ported from Vega10. SDMA stress tests sometimes see IRQ lost. Signed-off-by: Samir Dhume --- drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 36 ++ 1 file changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index cf557a428298..e08245a446fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -32,6 +32,7 @@ #include "soc15_common.h" #include "navi10_ih.h" +#define MAX_REARM_RETRY 10 static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev); @@ -283,6 +284,38 @@ static void navi10_ih_decode_iv(struct amdgpu_device *adev, ih->rptr += 32; } +/** + * navi10_ih_irq_rearm - rearm IRQ if lost + * + * @adev: amdgpu_device pointer + * + */ +static void navi10_ih_irq_rearm(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + uint32_t reg_rptr = 0; + uint32_t v = 0; + uint32_t i = 0; + + if (ih == >irq.ih) + reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR); + else if (ih == >irq.ih1) + reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1); + else if (ih == >irq.ih2) + reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2); + else + return; + + /* Rearm IRQ / re-write doorbell if doorbell write is lost */ + for (i = 0; i < MAX_REARM_RETRY; i++) { + v = RREG32_NO_KIQ(reg_rptr); + if ((v < ih->ring_size) && (v != ih->rptr)) + WDOORBELL32(ih->doorbell_index, ih->rptr); + else + break; + } +} + /** * navi10_ih_set_rptr - set the IH ring buffer rptr * @@ -297,6 +330,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev, /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; WDOORBELL32(ih->doorbell_index, ih->rptr); + + if (amdgpu_sriov_vf(adev)) + navi10_ih_irq_rearm(adev, ih); } else WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr); } -- 2.20.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx