[PATCH v3 3/3] drm/amdgpu/jpeg: support for sriov cpx mode

2024-03-18 Thread Samir Dhume
In SRIOV CPX mode, each VF has 4 jpeg engines. The even-
numbered VFs point to JPEG0 block of the AID and the odd-
numbered VFs point to the JPEG1 block.

Even-numbered VFs Odd numbered VFs

VCN doorbell 0  VCN Decode ring   VCN Decode ring
VCN doorbell 1-3Reserved  Reserved
VCN doorbell 4  JPEG0-0 ring
VCN doorbell 5  JPEG0-1 ring
VCN doorbell 6  JPEG0-2 ring
VCN doorbell 7  JPEG0-3 ring
VCN doorbell 8JPEG1-0 ring
VCN doorbell 9JPEG1-1 ring
VCN doorbell 10   JPEG1-2 ring
VCN doorbell 11   JPEG1-3 ring

Changes involve
1. sriov cpx mode - 4 rings
2. sriov cpx mode for odd numbered VFs - register correct src-ids
(starting with JPEG4). Map src-id to correct instance in interrupt-
handler.

v2:
1. removed mmio access from interrupt handler
2. remove unneccessary sriov variables

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 60 +---
 1 file changed, 53 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 32caeb37cef9..d95ca797412c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -68,6 +68,11 @@ static int jpeg_v4_0_3_early_init(void *handle)
 
adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
 
+   /* check for sriov cpx mode */
+   if (amdgpu_sriov_vf(adev))
+   if (adev->gfx.xcc_mask == 0x1)
+   adev->jpeg.num_jpeg_rings = 4;
+
jpeg_v4_0_3_set_dec_ring_funcs(adev);
jpeg_v4_0_3_set_irq_funcs(adev);
jpeg_v4_0_3_set_ras_funcs(adev);
@@ -87,11 +92,25 @@ static int jpeg_v4_0_3_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
int i, j, r, jpeg_inst;
+   bool sriov_cpx_odd = false;
+
+   /* check for sriov cpx mode odd/even numbered vfs */
+   if (amdgpu_sriov_vf(adev)) {
+   if (adev->gfx.xcc_mask == 0x1) {
+   if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+   sriov_cpx_odd = true;
+   }
+   }
 
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
/* JPEG TRAP */
-   r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+   if (!sriov_cpx_odd)
+   r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
amdgpu_ih_srcid_jpeg[j], >jpeg.inst->irq);
+   else
+   r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+   amdgpu_ih_srcid_jpeg[j+4], 
>jpeg.inst->irq);
+
if (r)
return r;
}
@@ -116,10 +135,14 @@ static int jpeg_v4_0_3_sw_init(void *handle)
(adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
1 + j + 9 * jpeg_inst;
} else {
-   if (j < 4)
+   if ((j < 4) && (!sriov_cpx_odd))
ring->doorbell_index =

(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
4 + j + 32 * jpeg_inst;
+   else if (sriov_cpx_odd)
+   ring->doorbell_index =
+   
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   12 + j + 32 * jpeg_inst;
else
ring->doorbell_index =

(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
@@ -186,6 +209,7 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device 
*adev)
uint32_t size, size_dw, item_offset;
uint32_t init_status;
int i, j, jpeg_inst;
+   bool cpx_odd = false;
 
struct mmsch_v4_0_cmd_direct_write
direct_wt = { {0} };
@@ -197,6 +221,12 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device 
*adev)
end.cmd_header.command_type =
MMSCH_COMMAND__END;
 
+   /* check for cpx mode odd/even numbered vf */
+   if (adev->gfx.xcc_mask == 0x1) {
+   if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+   cpx_odd = true;
+   }
+
for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
jpeg_inst = GET_INST(JPEG, i);
 
@@ -220,10 +250,14 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device 
*adev)
  

[PATCH v3 2/3] drm/amdgpu: sdma support for sriov cpx mode

2024-03-18 Thread Samir Dhume
sdma has 2 instances in SRIOV cpx mode. Odd numbered VFs have
sdma0/sdma1 instances. Even numbered vfs have sdma2/sdma3.
Changes involve
1. identifying odd/even numbered VF
2. registering correct number of instances with irq handler
3. mapping instance number with IH client-id depending upon
whether vf is odd/even numbered.

v2:
1. fix for correct number of instances registered with irq
2. remove mmio access from interrupt handler. Use xcc_mask to
detect cpx mode.

v3:
1. restore all instances registered with irq in case there is
harvesting of some sdma instances.

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index eaa4f5f49949..01d1024e91ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -82,7 +82,7 @@ static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
}
 }
 
-static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
+static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, unsigned 
client_id)
 {
switch (client_id) {
case SOC15_IH_CLIENTID_SDMA0:
@@ -90,9 +90,15 @@ static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
case SOC15_IH_CLIENTID_SDMA1:
return 1;
case SOC15_IH_CLIENTID_SDMA2:
-   return 2;
+   if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+   return 0;
+   else
+   return 2;
case SOC15_IH_CLIENTID_SDMA3:
-   return 3;
+   if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+   return 1;
+   else
+   return 3;
default:
return -EINVAL;
}
@@ -1541,7 +1547,7 @@ static int sdma_v4_4_2_process_trap_irq(struct 
amdgpu_device *adev,
uint32_t instance, i;
 
DRM_DEBUG("IH: SDMA trap\n");
-   instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+   instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
 
/* Client id gives the SDMA instance in AID. To know the exact SDMA
 * instance, interrupt entry gives the node id which corresponds to the 
AID instance.
@@ -1584,7 +1590,7 @@ static int sdma_v4_4_2_process_ras_data_cb(struct 
amdgpu_device *adev,
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA))
goto out;
 
-   instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+   instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
if (instance < 0)
goto out;
 
@@ -1603,7 +1609,7 @@ static int sdma_v4_4_2_process_illegal_inst_irq(struct 
amdgpu_device *adev,
 
DRM_ERROR("Illegal instruction in SDMA command stream\n");
 
-   instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+   instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
if (instance < 0)
return 0;
 
@@ -1647,7 +1653,7 @@ static int sdma_v4_4_2_print_iv_entry(struct 
amdgpu_device *adev,
struct amdgpu_task_info *task_info;
u64 addr;
 
-   instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+   instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
if (instance < 0 || instance >= adev->sdma.num_instances) {
dev_err(adev->dev, "sdma instance invalid %d\n", instance);
return -EINVAL;
-- 
2.34.1



[PATCH v3 1/3] drm/amdgpu: function to read physical xcc_id

2024-03-18 Thread Samir Dhume
For SRIOV CPX mode, the assignments of jpeg doorbells depends on
whether the VF is even/odd numbered. Physical xcc_id provides
info whether the VF is even/odd.

regCP_PSP_XCP_CTL is RO for VF through rlcg.

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 04a86dff71e6..451192403c24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -297,6 +297,7 @@ struct amdgpu_gfx_funcs {
int (*switch_partition_mode)(struct amdgpu_device *adev,
 int num_xccs_per_xcp);
int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
+   int (*get_xcc_id)(struct amdgpu_device *adev, int inst);
 };
 
 struct sq_work {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index b53c8fd4e8cf..68508c19a9b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -669,6 +669,11 @@ static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device 
*adev, int ih_node)
return xcc - 1;
 }
 
+static int gfx_v9_4_3_get_xcc_id(struct amdgpu_device *adev, int inst)
+{
+   return RREG32_SOC15(GC, GET_INST(GC, inst), regCP_PSP_XCP_CTL);
+}
+
 static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
.get_gpu_clock_counter = _v9_4_3_get_gpu_clock_counter,
.select_se_sh = _v9_4_3_xcc_select_se_sh,
@@ -678,6 +683,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = 
{
.select_me_pipe_q = _v9_4_3_select_me_pipe_q,
.switch_partition_mode = _v9_4_3_switch_compute_partition,
.ih_node_to_logical_xcc = _v9_4_3_ih_to_xcc_inst,
+   .get_xcc_id = _v9_4_3_get_xcc_id,
 };
 
 static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle,
-- 
2.34.1



[PATCH v2 2/3] drm/amdgpu: sdma support for sriov cpx mode

2024-03-15 Thread Samir Dhume
sdma has 2 instances in SRIOV cpx mode. Odd numbered VFs have
sdma0/sdma1 instances. Even numbered vfs have sdma2/sdma3.
Changes involve
1. identifying odd/even numbered VF
2. registering correct number of instances with irq handler
3. mapping instance number with IH client-id depending upon
whether vf is odd/even numbered.

v2:
1. fix for correct number of instances registered with irq
2. remove mmio access from interrupt handler. Use xcc_mask to
detect cpx mode.

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 63 
 1 file changed, 43 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index eaa4f5f49949..117a7c692c0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -66,13 +66,28 @@ static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device 
*adev,
return (adev->reg_offset[SDMA0_HWIP][dev_inst][0] + offset);
 }
 
-static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
+static unsigned sdma_v4_4_2_seq_to_irq_id(struct amdgpu_device *adev, int 
seq_num)
 {
+   bool sriov_cpx_odd = false;
+
+   /* check for sriov cpx mode odd/even vf */
+   if (amdgpu_sriov_vf(adev)) {
+   if (adev->gfx.xcc_mask == 0x1)
+   if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+   sriov_cpx_odd = true;
+   }
+
switch (seq_num) {
case 0:
-   return SOC15_IH_CLIENTID_SDMA0;
+   if (sriov_cpx_odd)
+   return SOC15_IH_CLIENTID_SDMA2;
+   else
+   return SOC15_IH_CLIENTID_SDMA0;
case 1:
-   return SOC15_IH_CLIENTID_SDMA1;
+   if (sriov_cpx_odd)
+   return SOC15_IH_CLIENTID_SDMA3;
+   else
+   return SOC15_IH_CLIENTID_SDMA1;
case 2:
return SOC15_IH_CLIENTID_SDMA2;
case 3:
@@ -82,7 +97,7 @@ static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
}
 }
 
-static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
+static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, unsigned 
client_id)
 {
switch (client_id) {
case SOC15_IH_CLIENTID_SDMA0:
@@ -90,9 +105,15 @@ static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
case SOC15_IH_CLIENTID_SDMA1:
return 1;
case SOC15_IH_CLIENTID_SDMA2:
-   return 2;
+   if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+   return 0;
+   else
+   return 2;
case SOC15_IH_CLIENTID_SDMA3:
-   return 3;
+   if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+   return 1;
+   else
+   return 3;
default:
return -EINVAL;
}
@@ -1300,13 +1321,15 @@ static int sdma_v4_4_2_late_init(void *handle)
 static int sdma_v4_4_2_sw_init(void *handle)
 {
struct amdgpu_ring *ring;
-   int r, i;
+   int r, i, num_irq_inst;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 aid_id;
 
+   num_irq_inst = min(adev->sdma.num_instances, 
adev->sdma.num_inst_per_aid);
+
/* SDMA trap event */
-   for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
-   r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+   for (i = 0; i < num_irq_inst; i++) {
+   r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i),
  SDMA0_4_0__SRCID__SDMA_TRAP,
  >sdma.trap_irq);
if (r)
@@ -1314,8 +1337,8 @@ static int sdma_v4_4_2_sw_init(void *handle)
}
 
/* SDMA SRAM ECC event */
-   for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
-   r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+   for (i = 0; i < num_irq_inst; i++) {
+   r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i),
  SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
  >sdma.ecc_irq);
if (r)
@@ -1323,26 +1346,26 @@ static int sdma_v4_4_2_sw_init(void *handle)
}
 
/* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
-   for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
-   r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+   for (i = 0; i < num_irq_inst; i++) {
+   r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(adev, i),
  SDMA0_4_0__SRCID__SDMA_VM_HOLE,
  >sdma.vm_hole_irq);
if (r

[PATCH v2 3/3] drm/amdgpu/jpeg: support for sriov cpx mode

2024-03-15 Thread Samir Dhume
In SRIOV CPX mode, each VF has 4 jpeg engines. The even-
numbered VFs point to JPEG0 block of the AID and the odd-
numbered VFs point to the JPEG1 block.

Even-numbered VFs Odd numbered VFs

VCN doorbell 0  VCN Decode ring   VCN Decode ring
VCN doorbell 1-3Reserved  Reserved
VCN doorbell 4  JPEG0-0 ring
VCN doorbell 5  JPEG0-1 ring
VCN doorbell 6  JPEG0-2 ring
VCN doorbell 7  JPEG0-3 ring
VCN doorbell 8JPEG1-0 ring
VCN doorbell 9JPEG1-1 ring
VCN doorbell 10   JPEG1-2 ring
VCN doorbell 11   JPEG1-3 ring

Changes involve
1. sriov cpx mode - 4 rings
2. sriov cpx mode for odd numbered VFs - register correct src-ids
(starting with JPEG4). Map src-id to correct instance in interrupt-
handler.

v2:
1. removed mmio access from interrupt handler. Use xcc_mask to detect
cpx mode.
2. remove unneccessary sriov variables

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 60 +---
 1 file changed, 53 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 32caeb37cef9..d95ca797412c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -68,6 +68,11 @@ static int jpeg_v4_0_3_early_init(void *handle)
 
adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
 
+   /* check for sriov cpx mode */
+   if (amdgpu_sriov_vf(adev))
+   if (adev->gfx.xcc_mask == 0x1)
+   adev->jpeg.num_jpeg_rings = 4;
+
jpeg_v4_0_3_set_dec_ring_funcs(adev);
jpeg_v4_0_3_set_irq_funcs(adev);
jpeg_v4_0_3_set_ras_funcs(adev);
@@ -87,11 +92,25 @@ static int jpeg_v4_0_3_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
int i, j, r, jpeg_inst;
+   bool sriov_cpx_odd = false;
+
+   /* check for sriov cpx mode odd/even numbered vfs */
+   if (amdgpu_sriov_vf(adev)) {
+   if (adev->gfx.xcc_mask == 0x1) {
+   if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+   sriov_cpx_odd = true;
+   }
+   }
 
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
/* JPEG TRAP */
-   r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+   if (!sriov_cpx_odd)
+   r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
amdgpu_ih_srcid_jpeg[j], >jpeg.inst->irq);
+   else
+   r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+   amdgpu_ih_srcid_jpeg[j+4], 
>jpeg.inst->irq);
+
if (r)
return r;
}
@@ -116,10 +135,14 @@ static int jpeg_v4_0_3_sw_init(void *handle)
(adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
1 + j + 9 * jpeg_inst;
} else {
-   if (j < 4)
+   if ((j < 4) && (!sriov_cpx_odd))
ring->doorbell_index =

(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
4 + j + 32 * jpeg_inst;
+   else if (sriov_cpx_odd)
+   ring->doorbell_index =
+   
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   12 + j + 32 * jpeg_inst;
else
ring->doorbell_index =

(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
@@ -186,6 +209,7 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device 
*adev)
uint32_t size, size_dw, item_offset;
uint32_t init_status;
int i, j, jpeg_inst;
+   bool cpx_odd = false;
 
struct mmsch_v4_0_cmd_direct_write
direct_wt = { {0} };
@@ -197,6 +221,12 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device 
*adev)
end.cmd_header.command_type =
MMSCH_COMMAND__END;
 
+   /* check for cpx mode odd/even numbered vf */
+   if (adev->gfx.xcc_mask == 0x1) {
+   if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+   cpx_odd = true;
+   }
+
for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
jpeg_inst = GET_INST(JPEG, i);
 
@@ -220,10 +250,14 @@ static int jpeg_v4_0_3_start_sri

[PATCH v2 1/3] drm/amdgpu: function to read physical xcc_id

2024-03-15 Thread Samir Dhume
For SRIOV CPX mode, the assignments of jpeg doorbells depends on
whether the VF is even/odd numbered. Physical xcc_id provides
info whether the VF is even/odd.

regCP_PSP_XCP_CTL is RO for VF through rlcg.

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 04a86dff71e6..451192403c24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -297,6 +297,7 @@ struct amdgpu_gfx_funcs {
int (*switch_partition_mode)(struct amdgpu_device *adev,
 int num_xccs_per_xcp);
int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
+   int (*get_xcc_id)(struct amdgpu_device *adev, int inst);
 };
 
 struct sq_work {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index b53c8fd4e8cf..68508c19a9b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -669,6 +669,11 @@ static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device 
*adev, int ih_node)
return xcc - 1;
 }
 
+static int gfx_v9_4_3_get_xcc_id(struct amdgpu_device *adev, int inst)
+{
+   return RREG32_SOC15(GC, GET_INST(GC, inst), regCP_PSP_XCP_CTL);
+}
+
 static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
.get_gpu_clock_counter = _v9_4_3_get_gpu_clock_counter,
.select_se_sh = _v9_4_3_xcc_select_se_sh,
@@ -678,6 +683,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = 
{
.select_me_pipe_q = _v9_4_3_select_me_pipe_q,
.switch_partition_mode = _v9_4_3_switch_compute_partition,
.ih_node_to_logical_xcc = _v9_4_3_ih_to_xcc_inst,
+   .get_xcc_id = _v9_4_3_get_xcc_id,
 };
 
 static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle,
-- 
2.34.1



[PATCH 3/3] drm/amdgpu/jpeg: support for sriov cpx mode

2024-03-04 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 80 +---
 1 file changed, 73 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 32caeb37cef9..4bf087f8ca2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -65,9 +65,15 @@ static int amdgpu_ih_srcid_jpeg[] = {
 static int jpeg_v4_0_3_early_init(void *handle)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
 
adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
 
+   if (amdgpu_sriov_vf(adev))
+   if (adev->xcp_mgr->funcs->query_partition_mode(xcp_mgr) ==
+   AMDGPU_CPX_PARTITION_MODE)
+   adev->jpeg.num_jpeg_rings = 4;
+
jpeg_v4_0_3_set_dec_ring_funcs(adev);
jpeg_v4_0_3_set_irq_funcs(adev);
jpeg_v4_0_3_set_ras_funcs(adev);
@@ -88,10 +94,28 @@ static int jpeg_v4_0_3_sw_init(void *handle)
struct amdgpu_ring *ring;
int i, j, r, jpeg_inst;
 
+   bool sriov_cpx_odd = false;
+   struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+   int mode;
+
+   if (amdgpu_sriov_vf(adev)) {
+   mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
+
+   if (mode == AMDGPU_CPX_PARTITION_MODE) {
+   if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+   sriov_cpx_odd = true;
+   }
+   }
+
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
/* JPEG TRAP */
-   r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+   if (!sriov_cpx_odd)
+   r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
amdgpu_ih_srcid_jpeg[j], >jpeg.inst->irq);
+   else
+   r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+   amdgpu_ih_srcid_jpeg[j+4], 
>jpeg.inst->irq);
+
if (r)
return r;
}
@@ -116,10 +140,15 @@ static int jpeg_v4_0_3_sw_init(void *handle)
(adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
1 + j + 9 * jpeg_inst;
} else {
-   if (j < 4)
+   if ((j < 4) && (!sriov_cpx_odd))
ring->doorbell_index =

(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
4 + j + 32 * jpeg_inst;
+   else if (sriov_cpx_odd)
+   ring->doorbell_index =
+   
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   12 + j + 32 * jpeg_inst;
+
else
ring->doorbell_index =

(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
@@ -186,6 +215,9 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device 
*adev)
uint32_t size, size_dw, item_offset;
uint32_t init_status;
int i, j, jpeg_inst;
+   struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+   int mode;
+   bool cpx_odd = false;
 
struct mmsch_v4_0_cmd_direct_write
direct_wt = { {0} };
@@ -197,6 +229,13 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device 
*adev)
end.cmd_header.command_type =
MMSCH_COMMAND__END;
 
+   mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
+
+   if (mode == AMDGPU_CPX_PARTITION_MODE) {
+   if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+   cpx_odd = true;
+   }
+
for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
jpeg_inst = GET_INST(JPEG, i);
 
@@ -220,10 +259,14 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device 
*adev)
tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JRBC0_UVD_JRBC_RB_SIZE);
MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
 
-   if (j <= 3) {
+   if ((j <= 3) && (!cpx_odd)) {
header.mjpegdec0[j].table_offset = item_offset;
header.mjpegdec0[j].init_status = 0;
header.mjpegdec0[j].table_size = table_size;
+   } else if (cpx_odd) {
+   header.mjpegdec1[j].table_offset = i

[PATCH 2/3] drm/amdgpu: sdma support for sriov cpx mode

2024-03-04 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 34 +++-
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index fec5a3d1c4bc..f666ececbe7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -82,17 +82,37 @@ static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
}
 }
 
-static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
+static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, unsigned 
client_id)
 {
+
+   struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+   bool sriov_cpx_odd = false;
+   int mode;
+
+   if (amdgpu_sriov_vf(adev)) {
+   mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
+
+   if (mode == AMDGPU_CPX_PARTITION_MODE) {
+   if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
+   sriov_cpx_odd = true;
+   }
+   }
+
switch (client_id) {
case SOC15_IH_CLIENTID_SDMA0:
return 0;
case SOC15_IH_CLIENTID_SDMA1:
return 1;
case SOC15_IH_CLIENTID_SDMA2:
-   return 2;
+   if (sriov_cpx_odd)
+   return 0;
+   else
+   return 2;
case SOC15_IH_CLIENTID_SDMA3:
-   return 3;
+   if (sriov_cpx_odd)
+   return 1;
+   else
+   return 3;
default:
return -EINVAL;
}
@@ -1541,7 +1561,7 @@ static int sdma_v4_4_2_process_trap_irq(struct 
amdgpu_device *adev,
uint32_t instance, i;
 
DRM_DEBUG("IH: SDMA trap\n");
-   instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+   instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
 
/* Client id gives the SDMA instance in AID. To know the exact SDMA
 * instance, interrupt entry gives the node id which corresponds to the 
AID instance.
@@ -1584,7 +1604,7 @@ static int sdma_v4_4_2_process_ras_data_cb(struct 
amdgpu_device *adev,
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA))
goto out;
 
-   instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+   instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
if (instance < 0)
goto out;
 
@@ -1603,7 +1623,7 @@ static int sdma_v4_4_2_process_illegal_inst_irq(struct 
amdgpu_device *adev,
 
DRM_ERROR("Illegal instruction in SDMA command stream\n");
 
-   instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+   instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
if (instance < 0)
return 0;
 
@@ -1647,7 +1667,7 @@ static int sdma_v4_4_2_print_iv_entry(struct 
amdgpu_device *adev,
struct amdgpu_task_info task_info;
u64 addr;
 
-   instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+   instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
if (instance < 0 || instance >= adev->sdma.num_instances) {
dev_err(adev->dev, "sdma instance invalid %d\n", instance);
return -EINVAL;
-- 
2.34.1



[PATCH 1/3] drm/amdgpu: function to read physical xcc_id

2024-03-04 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 8fcf889ddce9..bebda5501cb7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -298,6 +298,7 @@ struct amdgpu_gfx_funcs {
int (*switch_partition_mode)(struct amdgpu_device *adev,
 int num_xccs_per_xcp);
int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
+   int (*get_xcc_id)(struct amdgpu_device *adev, int inst);
 };
 
 struct sq_work {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index b53c8fd4e8cf..68508c19a9b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -669,6 +669,11 @@ static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device 
*adev, int ih_node)
return xcc - 1;
 }
 
+static int gfx_v9_4_3_get_xcc_id(struct amdgpu_device *adev, int inst)
+{
+   return RREG32_SOC15(GC, GET_INST(GC, inst), regCP_PSP_XCP_CTL);
+}
+
 static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
.get_gpu_clock_counter = _v9_4_3_get_gpu_clock_counter,
.select_se_sh = _v9_4_3_xcc_select_se_sh,
@@ -678,6 +683,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = 
{
.select_me_pipe_q = _v9_4_3_select_me_pipe_q,
.switch_partition_mode = _v9_4_3_switch_compute_partition,
.ih_node_to_logical_xcc = _v9_4_3_ih_to_xcc_inst,
+   .get_xcc_id = _v9_4_3_get_xcc_id,
 };
 
 static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle,
-- 
2.34.1



[PATCH] drm/amdgpu/jpeg - skip change of power-gating state for sriov

2023-08-16 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 15612915bb6c..1de79d660285 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -360,8 +360,10 @@ static int jpeg_v4_0_3_hw_fini(void *handle)
 
cancel_delayed_work_sync(>jpeg.idle_work);
 
-   if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
-   ret = jpeg_v4_0_3_set_powergating_state(adev, 
AMD_PG_STATE_GATE);
+   if (!amdgpu_sriov_vf(adev)) {
+   if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+   ret = jpeg_v4_0_3_set_powergating_state(adev, 
AMD_PG_STATE_GATE);
+   }
 
return ret;
 }
-- 
2.34.1



[PATCH v4 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary

2023-08-08 Thread Samir Dhume
BASE: VCN0 unified (32 byte boundary)
BASE+4: MJPEG0
BASE+5: MJPEG1
BASE+6: MJPEG2
BASE+7: MJPEG3
BASE+12: MJPEG4
BASE+13: MJPEG5
BASE+14: MJPEG6
BASE+15: MJPEG7

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 33f04ea8549f..f745eeef442f 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -111,9 +111,20 @@ static int jpeg_v4_0_3_sw_init(void *handle)
ring = >jpeg.inst[i].ring_dec[j];
ring->use_doorbell = true;
ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
-   ring->doorbell_index =
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   1 + j + 9 * jpeg_inst;
+   if (!amdgpu_sriov_vf(adev)) {
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+   1 + j + 9 * jpeg_inst;
+   } else {
+   if (j < 4)
+   ring->doorbell_index =
+   
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   4 + j + 32 * jpeg_inst;
+   else
+   ring->doorbell_index =
+   
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   8 + j + 32 * jpeg_inst;
+   }
sprintf(ring->name, "jpeg_dec_%d.%d", 
adev->jpeg.inst[i].aid_id, j);
r = amdgpu_ring_init(adev, ring, 512, 
>jpeg.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
-- 
2.34.1



[PATCH v4 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3

2023-08-08 Thread Samir Dhume
For sriov, doorbell index for vcn0 for AID needs to be on
32 byte boundary so we need to move the vcn end doorbell

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index f637574644c0..4a279960cd21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -330,14 +330,14 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
/* IH: 0x1A0 ~ 0x1AF */
AMDGPU_DOORBELL_LAYOUT1_IH  = 0x1A0,
-   /* VCN: 0x1B0 ~ 0x1D4 */
+   /* VCN: 0x1B0 ~ 0x1E8 */
AMDGPU_DOORBELL_LAYOUT1_VCN_START   = 0x1B0,
-   AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4,
+   AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1E8,
 
AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP= 
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = 
AMDGPU_DOORBELL_LAYOUT1_VCN_END,
 
-   AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT  = 0x1D4,
+   AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT  = 0x1E8,
AMDGPU_DOORBELL_LAYOUT1_INVALID = 0x
 } AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1;
 
-- 
2.34.1



[PATCH v4 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3

2023-08-08 Thread Samir Dhume
initialization table handshake with mmsch

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 260 +---
 1 file changed, 236 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 411c1d802823..66eb0c8e6f94 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -31,6 +31,7 @@
 #include "soc15d.h"
 #include "soc15_hw_ip.h"
 #include "vcn_v2_0.h"
+#include "mmsch_v4_0_3.h"
 
 #include "vcn/vcn_4_0_3_offset.h"
 #include "vcn/vcn_4_0_3_sh_mask.h"
@@ -44,6 +45,7 @@
 #define VCN_VID_SOC_ADDRESS_2_00x1fb00
 #define VCN1_VID_SOC_ADDRESS_3_0   0x48300
 
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
 static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
 static int vcn_v4_0_3_set_powergating_state(void *handle,
@@ -130,6 +132,12 @@ static int vcn_v4_0_3_sw_init(void *handle)
amdgpu_vcn_fwlog_init(>vcn.inst[i]);
}
 
+   if (amdgpu_sriov_vf(adev)) {
+   r = amdgpu_virt_alloc_mm_table(adev);
+   if (r)
+   return r;
+   }
+
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
 
@@ -167,6 +175,9 @@ static int vcn_v4_0_3_sw_fini(void *handle)
drm_dev_exit(idx);
}
 
+   if (amdgpu_sriov_vf(adev))
+   amdgpu_virt_free_mm_table(adev);
+
r = amdgpu_vcn_suspend(adev);
if (r)
return r;
@@ -189,33 +200,47 @@ static int vcn_v4_0_3_hw_init(void *handle)
struct amdgpu_ring *ring;
int i, r, vcn_inst;
 
-   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-   vcn_inst = GET_INST(VCN, i);
-   ring = >vcn.inst[i].ring_enc[0];
+   if (amdgpu_sriov_vf(adev)) {
+   r = vcn_v4_0_3_start_sriov(adev);
+   if (r)
+   goto done;
 
-   if (ring->use_doorbell) {
-   adev->nbio.funcs->vcn_doorbell_range(
-   adev, ring->use_doorbell,
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   9 * vcn_inst,
-   adev->vcn.inst[i].aid_id);
-
-   WREG32_SOC15(
-   VCN, GET_INST(VCN, ring->me),
-   regVCN_RB1_DB_CTRL,
-   ring->doorbell_index
-   << 
VCN_RB1_DB_CTRL__OFFSET__SHIFT |
-   VCN_RB1_DB_CTRL__EN_MASK);
-
-   /* Read DB_CTRL to flush the write DB_CTRL command. */
-   RREG32_SOC15(
-   VCN, GET_INST(VCN, ring->me),
-   regVCN_RB1_DB_CTRL);
+   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+   ring = >vcn.inst[i].ring_enc[0];
+   ring->wptr = 0;
+   ring->wptr_old = 0;
+   vcn_v4_0_3_unified_ring_set_wptr(ring);
+   ring->sched.ready = true;
}
+   } else {
+   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+   vcn_inst = GET_INST(VCN, i);
+   ring = >vcn.inst[i].ring_enc[0];
+
+   if (ring->use_doorbell) {
+   adev->nbio.funcs->vcn_doorbell_range(
+   adev, ring->use_doorbell,
+   (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+   9 * vcn_inst,
+   adev->vcn.inst[i].aid_id);
+
+   WREG32_SOC15(
+   VCN, GET_INST(VCN, ring->me),
+   regVCN_RB1_DB_CTRL,
+   ring->doorbell_index
+   << 
VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+   VCN_RB1_DB_CTRL__EN_MASK);
+
+   /* Read DB_CTRL to flush the write DB_CTRL 
command. */
+   RREG32_SOC15(
+   VCN, GET_INST(VCN, ring->me),
+   regVCN_RB1_DB_CTRL);
+   }
 
-   r = amdgpu_ring_test_helper(ring);
-   if (r)
-   got

[PATCH v4 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3

2023-08-08 Thread Samir Dhume
initialization table handshake with mmsch

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 174 ---
 1 file changed, 153 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index ce2b22f7e4e4..33f04ea8549f 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -26,6 +26,7 @@
 #include "soc15.h"
 #include "soc15d.h"
 #include "jpeg_v4_0_3.h"
+#include "mmsch_v4_0_3.h"
 
 #include "vcn/vcn_4_0_3_offset.h"
 #include "vcn/vcn_4_0_3_sh_mask.h"
@@ -41,6 +42,7 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device 
*adev);
 static int jpeg_v4_0_3_set_powergating_state(void *handle,
enum amd_powergating_state state);
 static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
 
 static int amdgpu_ih_srcid_jpeg[] = {
VCN_4_0__SRCID__JPEG_DECODE,
@@ -160,6 +162,120 @@ static int jpeg_v4_0_3_sw_fini(void *handle)
return r;
 }
 
+static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+   struct amdgpu_ring *ring;
+   uint64_t ctx_addr;
+   uint32_t param, resp, expected;
+   uint32_t tmp, timeout;
+
+   struct amdgpu_mm_table *table = >virt.mm_table;
+   uint32_t *table_loc;
+   uint32_t table_size;
+   uint32_t size, size_dw, item_offset;
+   uint32_t init_status;
+   int i, j, jpeg_inst;
+
+   struct mmsch_v4_0_cmd_direct_write
+   direct_wt = { {0} };
+   struct mmsch_v4_0_cmd_end end = { {0} };
+   struct mmsch_v4_0_3_init_header header;
+
+   direct_wt.cmd_header.command_type =
+   MMSCH_COMMAND__DIRECT_REG_WRITE;
+   end.cmd_header.command_type =
+   MMSCH_COMMAND__END;
+
+   for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+   jpeg_inst = GET_INST(JPEG, i);
+
+   memset(, 0, sizeof(struct mmsch_v4_0_3_init_header));
+   header.version = MMSCH_VERSION;
+   header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 
2;
+
+   table_loc = (uint32_t *)table->cpu_addr;
+   table_loc += header.total_size;
+
+   item_offset = header.total_size;
+
+
+   for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+   ring = >jpeg.inst[i].ring_dec[j];
+   table_size = 0;
+
+   tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+   MMSCH_V4_0_INSERT_DIRECT_WT(tmp, 
lower_32_bits(ring->gpu_addr));
+   tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+   MMSCH_V4_0_INSERT_DIRECT_WT(tmp, 
upper_32_bits(ring->gpu_addr));
+   tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JRBC0_UVD_JRBC_RB_SIZE);
+   MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+   if (j <= 3) {
+   header.mjpegdec0[j].table_offset = item_offset;
+   header.mjpegdec0[j].init_status = 0;
+   header.mjpegdec0[j].table_size = table_size;
+   } else {
+   header.mjpegdec1[j-4].table_offset = 
item_offset;
+   header.mjpegdec1[j-4].init_status = 0;
+   header.mjpegdec1[j-4].table_size = table_size;
+   }
+   header.total_size += table_size;
+   item_offset+= table_size;
+   }
+
+   MMSCH_V4_0_INSERT_END();
+
+   /* send init table to MMSCH */
+   size = sizeof(struct mmsch_v4_0_3_init_header);
+   table_loc = (uint32_t *)table->cpu_addr;
+   memcpy((void *)table_loc, , size);
+
+   ctx_addr = table->gpu_addr;
+   WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, 
lower_32_bits(ctx_addr));
+   WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, 
upper_32_bits(ctx_addr));
+
+   tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID);
+   tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+   tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+   WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp);
+
+   size = header.total_size;
+   WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size);
+
+   WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+   param = 0x0001;
+   WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param);
+   tmp 

[PATCH v4 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary

2023-08-08 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 66eb0c8e6f94..1e5aad207878 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -113,9 +113,16 @@ static int vcn_v4_0_3_sw_init(void *handle)
 
ring = >vcn.inst[i].ring_enc[0];
ring->use_doorbell = true;
-   ring->doorbell_index =
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   9 * vcn_inst;
+
+   if (!amdgpu_sriov_vf(adev))
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   9 * vcn_inst;
+   else
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   32 * vcn_inst;
+
ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
r = amdgpu_ring_init(adev, ring, 512, >vcn.inst->irq, 0,
-- 
2.34.1



[PATCH v4 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov

2023-08-08 Thread Samir Dhume
The structures are the same as v4_0 except for the
init header

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h | 37 +++
 1 file changed, 37 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h

diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h 
b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
new file mode 100644
index ..db7eb5260295
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V4_0_3_H__
+#define __MMSCH_V4_0_3_H__
+
+#include "amdgpu_vcn.h"
+#include "mmsch_v4_0.h"
+
+struct mmsch_v4_0_3_init_header {
+   uint32_t version;
+   uint32_t total_size;
+   struct mmsch_v4_0_table_info vcn0;
+   struct mmsch_v4_0_table_info mjpegdec0[4];
+   struct mmsch_v4_0_table_info mjpegdec1[4];
+};
+#endif
-- 
2.34.1



[PATCH v4 2/7] drm/amdgpu/vcn : Skip vcn power-gating change for sriov

2023-08-08 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 550ac040b4be..411c1d802823 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -1317,6 +1317,15 @@ static int vcn_v4_0_3_set_powergating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
 
+   /* for SRIOV, guest should not control VCN Power-gating
+* MMSCH FW should control Power-gating and clock-gating
+* guest should avoid touching CGC and PG
+*/
+   if (amdgpu_sriov_vf(adev)) {
+   adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+   return 0;
+   }
+
if (state == adev->vcn.cur_state)
return 0;
 
-- 
2.34.1



[PATCH v3 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3

2023-07-28 Thread Samir Dhume
For sriov, doorbell index for vcn0 for AID needs to be on
32 byte boundary so we need to move the vcn end doorbell

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index f637574644c0..4a279960cd21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -330,14 +330,14 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
/* IH: 0x1A0 ~ 0x1AF */
AMDGPU_DOORBELL_LAYOUT1_IH  = 0x1A0,
-   /* VCN: 0x1B0 ~ 0x1D4 */
+   /* VCN: 0x1B0 ~ 0x1E8 */
AMDGPU_DOORBELL_LAYOUT1_VCN_START   = 0x1B0,
-   AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4,
+   AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1E8,
 
AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP= 
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = 
AMDGPU_DOORBELL_LAYOUT1_VCN_END,
 
-   AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT  = 0x1D4,
+   AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT  = 0x1E8,
AMDGPU_DOORBELL_LAYOUT1_INVALID = 0x
 } AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1;
 
-- 
2.34.1



[PATCH v3 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary

2023-07-28 Thread Samir Dhume
BASE: VCN0 unified (32 byte boundary)
BASE+4: MJPEG0
BASE+5: MJPEG1
BASE+6: MJPEG2
BASE+7: MJPEG3
BASE+12: MJPEG4
BASE+13: MJPEG5
BASE+14: MJPEG6
BASE+15: MJPEG7

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 33f04ea8549f..f745eeef442f 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -111,9 +111,20 @@ static int jpeg_v4_0_3_sw_init(void *handle)
ring = >jpeg.inst[i].ring_dec[j];
ring->use_doorbell = true;
ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
-   ring->doorbell_index =
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   1 + j + 9 * jpeg_inst;
+   if (!amdgpu_sriov_vf(adev)) {
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+   1 + j + 9 * jpeg_inst;
+   } else {
+   if (j < 4)
+   ring->doorbell_index =
+   
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   4 + j + 32 * jpeg_inst;
+   else
+   ring->doorbell_index =
+   
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   8 + j + 32 * jpeg_inst;
+   }
sprintf(ring->name, "jpeg_dec_%d.%d", 
adev->jpeg.inst[i].aid_id, j);
r = amdgpu_ring_init(adev, ring, 512, 
>jpeg.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
-- 
2.34.1



[PATCH v3 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3

2023-07-28 Thread Samir Dhume
initialization table handshake with mmsch

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 174 ---
 1 file changed, 153 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index ce2b22f7e4e4..33f04ea8549f 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -26,6 +26,7 @@
 #include "soc15.h"
 #include "soc15d.h"
 #include "jpeg_v4_0_3.h"
+#include "mmsch_v4_0_3.h"
 
 #include "vcn/vcn_4_0_3_offset.h"
 #include "vcn/vcn_4_0_3_sh_mask.h"
@@ -41,6 +42,7 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device 
*adev);
 static int jpeg_v4_0_3_set_powergating_state(void *handle,
enum amd_powergating_state state);
 static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
 
 static int amdgpu_ih_srcid_jpeg[] = {
VCN_4_0__SRCID__JPEG_DECODE,
@@ -160,6 +162,120 @@ static int jpeg_v4_0_3_sw_fini(void *handle)
return r;
 }
 
+static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+   struct amdgpu_ring *ring;
+   uint64_t ctx_addr;
+   uint32_t param, resp, expected;
+   uint32_t tmp, timeout;
+
+   struct amdgpu_mm_table *table = >virt.mm_table;
+   uint32_t *table_loc;
+   uint32_t table_size;
+   uint32_t size, size_dw, item_offset;
+   uint32_t init_status;
+   int i, j, jpeg_inst;
+
+   struct mmsch_v4_0_cmd_direct_write
+   direct_wt = { {0} };
+   struct mmsch_v4_0_cmd_end end = { {0} };
+   struct mmsch_v4_0_3_init_header header;
+
+   direct_wt.cmd_header.command_type =
+   MMSCH_COMMAND__DIRECT_REG_WRITE;
+   end.cmd_header.command_type =
+   MMSCH_COMMAND__END;
+
+   for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+   jpeg_inst = GET_INST(JPEG, i);
+
+   memset(, 0, sizeof(struct mmsch_v4_0_3_init_header));
+   header.version = MMSCH_VERSION;
+   header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 
2;
+
+   table_loc = (uint32_t *)table->cpu_addr;
+   table_loc += header.total_size;
+
+   item_offset = header.total_size;
+
+
+   for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+   ring = >jpeg.inst[i].ring_dec[j];
+   table_size = 0;
+
+   tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+   MMSCH_V4_0_INSERT_DIRECT_WT(tmp, 
lower_32_bits(ring->gpu_addr));
+   tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+   MMSCH_V4_0_INSERT_DIRECT_WT(tmp, 
upper_32_bits(ring->gpu_addr));
+   tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JRBC0_UVD_JRBC_RB_SIZE);
+   MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+   if (j <= 3) {
+   header.mjpegdec0[j].table_offset = item_offset;
+   header.mjpegdec0[j].init_status = 0;
+   header.mjpegdec0[j].table_size = table_size;
+   } else {
+   header.mjpegdec1[j-4].table_offset = 
item_offset;
+   header.mjpegdec1[j-4].init_status = 0;
+   header.mjpegdec1[j-4].table_size = table_size;
+   }
+   header.total_size += table_size;
+   item_offset+= table_size;
+   }
+
+   MMSCH_V4_0_INSERT_END();
+
+   /* send init table to MMSCH */
+   size = sizeof(struct mmsch_v4_0_3_init_header);
+   table_loc = (uint32_t *)table->cpu_addr;
+   memcpy((void *)table_loc, , size);
+
+   ctx_addr = table->gpu_addr;
+   WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, 
lower_32_bits(ctx_addr));
+   WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, 
upper_32_bits(ctx_addr));
+
+   tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID);
+   tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+   tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+   WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp);
+
+   size = header.total_size;
+   WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size);
+
+   WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+   param = 0x0001;
+   WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param);
+   tmp 

[PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3

2023-07-28 Thread Samir Dhume
initialization table handshake with mmsch

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 257 +---
 1 file changed, 233 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 411c1d802823..b978265b2d77 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -31,6 +31,7 @@
 #include "soc15d.h"
 #include "soc15_hw_ip.h"
 #include "vcn_v2_0.h"
+#include "mmsch_v4_0_3.h"
 
 #include "vcn/vcn_4_0_3_offset.h"
 #include "vcn/vcn_4_0_3_sh_mask.h"
@@ -44,6 +45,7 @@
 #define VCN_VID_SOC_ADDRESS_2_00x1fb00
 #define VCN1_VID_SOC_ADDRESS_3_0   0x48300
 
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
 static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
 static int vcn_v4_0_3_set_powergating_state(void *handle,
@@ -130,6 +132,10 @@ static int vcn_v4_0_3_sw_init(void *handle)
amdgpu_vcn_fwlog_init(>vcn.inst[i]);
}
 
+   r = amdgpu_virt_alloc_mm_table(adev);
+   if (r)
+   return r;
+
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
 
@@ -167,6 +173,8 @@ static int vcn_v4_0_3_sw_fini(void *handle)
drm_dev_exit(idx);
}
 
+   amdgpu_virt_free_mm_table(adev);
+
r = amdgpu_vcn_suspend(adev);
if (r)
return r;
@@ -189,33 +197,47 @@ static int vcn_v4_0_3_hw_init(void *handle)
struct amdgpu_ring *ring;
int i, r, vcn_inst;
 
-   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-   vcn_inst = GET_INST(VCN, i);
-   ring = >vcn.inst[i].ring_enc[0];
+   if (amdgpu_sriov_vf(adev)) {
+   r = vcn_v4_0_3_start_sriov(adev);
+   if (r)
+   goto done;
 
-   if (ring->use_doorbell) {
-   adev->nbio.funcs->vcn_doorbell_range(
-   adev, ring->use_doorbell,
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   9 * vcn_inst,
-   adev->vcn.inst[i].aid_id);
-
-   WREG32_SOC15(
-   VCN, GET_INST(VCN, ring->me),
-   regVCN_RB1_DB_CTRL,
-   ring->doorbell_index
-   << 
VCN_RB1_DB_CTRL__OFFSET__SHIFT |
-   VCN_RB1_DB_CTRL__EN_MASK);
-
-   /* Read DB_CTRL to flush the write DB_CTRL command. */
-   RREG32_SOC15(
-   VCN, GET_INST(VCN, ring->me),
-   regVCN_RB1_DB_CTRL);
+   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+   ring = >vcn.inst[i].ring_enc[0];
+   ring->wptr = 0;
+   ring->wptr_old = 0;
+   vcn_v4_0_3_unified_ring_set_wptr(ring);
+   ring->sched.ready = true;
}
+   } else {
+   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+   vcn_inst = GET_INST(VCN, i);
+   ring = >vcn.inst[i].ring_enc[0];
+
+   if (ring->use_doorbell) {
+   adev->nbio.funcs->vcn_doorbell_range(
+   adev, ring->use_doorbell,
+   (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+   9 * vcn_inst,
+   adev->vcn.inst[i].aid_id);
+
+   WREG32_SOC15(
+   VCN, GET_INST(VCN, ring->me),
+   regVCN_RB1_DB_CTRL,
+   ring->doorbell_index
+   << 
VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+   VCN_RB1_DB_CTRL__EN_MASK);
+
+   /* Read DB_CTRL to flush the write DB_CTRL 
command. */
+   RREG32_SOC15(
+   VCN, GET_INST(VCN, ring->me),
+   regVCN_RB1_DB_CTRL);
+   }
 
-   r = amdgpu_ring_test_helper(ring);
-   if (r)
-   goto done;
+   r = amdgpu_ring_test_helper(ring);
+   if (r)
+

[PATCH v3 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary

2023-07-28 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index b978265b2d77..7cd5ca204317 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -113,9 +113,16 @@ static int vcn_v4_0_3_sw_init(void *handle)
 
ring = >vcn.inst[i].ring_enc[0];
ring->use_doorbell = true;
-   ring->doorbell_index =
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   9 * vcn_inst;
+
+   if (!amdgpu_sriov_vf(adev))
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   9 * vcn_inst;
+   else
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   32 * vcn_inst;
+
ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
r = amdgpu_ring_init(adev, ring, 512, >vcn.inst->irq, 0,
-- 
2.34.1



[PATCH v3 2/7] drm/amdgpu/vcn : Skip vcn power-gating change for sriov

2023-07-28 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 550ac040b4be..411c1d802823 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -1317,6 +1317,15 @@ static int vcn_v4_0_3_set_powergating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
 
+   /* for SRIOV, guest should not control VCN Power-gating
+* MMSCH FW should control Power-gating and clock-gating
+* guest should avoid touching CGC and PG
+*/
+   if (amdgpu_sriov_vf(adev)) {
+   adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+   return 0;
+   }
+
if (state == adev->vcn.cur_state)
return 0;
 
-- 
2.34.1



[PATCH v3 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov

2023-07-28 Thread Samir Dhume
The structures are the same as v4_0 except for the
init header

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h | 37 +++
 1 file changed, 37 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h

diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h 
b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
new file mode 100644
index ..db7eb5260295
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V4_0_3_H__
+#define __MMSCH_V4_0_3_H__
+
+#include "amdgpu_vcn.h"
+#include "mmsch_v4_0.h"
+
+struct mmsch_v4_0_3_init_header {
+   uint32_t version;
+   uint32_t total_size;
+   struct mmsch_v4_0_table_info vcn0;
+   struct mmsch_v4_0_table_info mjpegdec0[4];
+   struct mmsch_v4_0_table_info mjpegdec1[4];
+};
+#endif
-- 
2.34.1



[PATCH 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3

2023-07-20 Thread Samir Dhume
For sriov, doorbell index for vcn0 for AID needs to be on
32 byte boundary so we need to move the vcn end doorbell

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index f637574644c0..4a279960cd21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -330,14 +330,14 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
/* IH: 0x1A0 ~ 0x1AF */
AMDGPU_DOORBELL_LAYOUT1_IH  = 0x1A0,
-   /* VCN: 0x1B0 ~ 0x1D4 */
+   /* VCN: 0x1B0 ~ 0x1E8 */
AMDGPU_DOORBELL_LAYOUT1_VCN_START   = 0x1B0,
-   AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4,
+   AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1E8,
 
AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP= 
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = 
AMDGPU_DOORBELL_LAYOUT1_VCN_END,
 
-   AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT  = 0x1D4,
+   AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT  = 0x1E8,
AMDGPU_DOORBELL_LAYOUT1_INVALID = 0x
 } AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1;
 
-- 
2.34.1



[PATCH 2/7] drm/amdgpu/vcn : Skip vcn power-gating change for sriov

2023-07-20 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 550ac040b4be..411c1d802823 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -1317,6 +1317,15 @@ static int vcn_v4_0_3_set_powergating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
 
+   /* for SRIOV, guest should not control VCN Power-gating
+* MMSCH FW should control Power-gating and clock-gating
+* guest should avoid touching CGC and PG
+*/
+   if (amdgpu_sriov_vf(adev)) {
+   adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+   return 0;
+   }
+
if (state == adev->vcn.cur_state)
return 0;
 
-- 
2.34.1



[PATCH 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary

2023-07-20 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 85ee74fdb7e3..896e2f895884 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -111,9 +111,20 @@ static int jpeg_v4_0_3_sw_init(void *handle)
ring = >jpeg.inst[i].ring_dec[j];
ring->use_doorbell = true;
ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
-   ring->doorbell_index =
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   1 + j + 9 * jpeg_inst;
+   if (!amdgpu_sriov_vf(adev)) {
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+   1 + j + 9 * jpeg_inst;
+   } else {
+   if (j < 4)
+   ring->doorbell_index =
+   
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   4 + j + 32 * jpeg_inst;
+   else
+   ring->doorbell_index =
+   
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   8 + j + 32 * jpeg_inst;
+   }
sprintf(ring->name, "jpeg_dec_%d.%d", 
adev->jpeg.inst[i].aid_id, j);
r = amdgpu_ring_init(adev, ring, 512, 
>jpeg.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
-- 
2.34.1



[PATCH 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3

2023-07-20 Thread Samir Dhume
initialization table handshake with mmsch

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 171 ---
 1 file changed, 150 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index ce2b22f7e4e4..85ee74fdb7e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -26,6 +26,7 @@
 #include "soc15.h"
 #include "soc15d.h"
 #include "jpeg_v4_0_3.h"
+#include "mmsch_v4_0_3.h"
 
 #include "vcn/vcn_4_0_3_offset.h"
 #include "vcn/vcn_4_0_3_sh_mask.h"
@@ -41,6 +42,7 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device 
*adev);
 static int jpeg_v4_0_3_set_powergating_state(void *handle,
enum amd_powergating_state state);
 static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
 
 static int amdgpu_ih_srcid_jpeg[] = {
VCN_4_0__SRCID__JPEG_DECODE,
@@ -160,6 +162,117 @@ static int jpeg_v4_0_3_sw_fini(void *handle)
return r;
 }
 
+static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+   struct amdgpu_ring *ring;
+   uint64_t ctx_addr;
+   uint32_t param, resp, expected;
+   uint32_t tmp, timeout;
+
+   struct amdgpu_mm_table *table = >virt.mm_table;
+   uint32_t *table_loc;
+   uint32_t table_size;
+   uint32_t size, size_dw, item_offset;
+   uint32_t init_status;
+   int i, j;
+
+   struct mmsch_v4_0_cmd_direct_write
+   direct_wt = { {0} };
+   struct mmsch_v4_0_cmd_end end = { {0} };
+   struct mmsch_v4_0_3_init_header header;
+
+   direct_wt.cmd_header.command_type =
+   MMSCH_COMMAND__DIRECT_REG_WRITE;
+   end.cmd_header.command_type =
+   MMSCH_COMMAND__END;
+
+   for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+   memset(, 0, sizeof(struct mmsch_v4_0_3_init_header));
+   header.version = MMSCH_VERSION;
+   header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 
2;
+
+   table_loc = (uint32_t *)table->cpu_addr;
+   table_loc += header.total_size;
+
+   item_offset = header.total_size;
+
+   for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+   ring = >jpeg.inst[i].ring_dec[j];
+   table_size = 0;
+
+   tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+   MMSCH_V4_0_INSERT_DIRECT_WT(tmp, 
lower_32_bits(ring->gpu_addr));
+   tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+   MMSCH_V4_0_INSERT_DIRECT_WT(tmp, 
upper_32_bits(ring->gpu_addr));
+   tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JRBC0_UVD_JRBC_RB_SIZE);
+   MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+   if (j <= 3) {
+   header.mjpegdec0[j].table_offset = item_offset;
+   header.mjpegdec0[j].init_status = 0;
+   header.mjpegdec0[j].table_size = table_size;
+   } else {
+   header.mjpegdec1[j-4].table_offset = 
item_offset;
+   header.mjpegdec1[j-4].init_status = 0;
+   header.mjpegdec1[j-4].table_size = table_size;
+   }
+   header.total_size += table_size;
+   item_offset+= table_size;
+   }
+
+   MMSCH_V4_0_INSERT_END();
+
+   /* send init table to MMSCH */
+   size = sizeof(struct mmsch_v4_0_3_init_header);
+   table_loc = (uint32_t *)table->cpu_addr;
+   memcpy((void *)table_loc, , size);
+
+   ctx_addr = table->gpu_addr;
+   WREG32_SOC15(VCN, i, regMMSCH_VF_CTX_ADDR_LO, 
lower_32_bits(ctx_addr));
+   WREG32_SOC15(VCN, i, regMMSCH_VF_CTX_ADDR_HI, 
upper_32_bits(ctx_addr));
+
+   tmp = RREG32_SOC15(VCN, i, regMMSCH_VF_VMID);
+   tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+   tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+   WREG32_SOC15(VCN, i, regMMSCH_VF_VMID, tmp);
+
+   size = header.total_size;
+   WREG32_SOC15(VCN, i, regMMSCH_VF_CTX_SIZE, size);
+
+   WREG32_SOC15(VCN, i, regMMSCH_VF_MAILBOX_RESP, 0);
+
+   param = 0x0001;
+   WREG32_SOC15(VCN, i, regMMSCH_VF_MAILBOX_HOST, param);
+   tmp = 0;
+   timeout = 1000;
+   resp = 0;
+   expected = MMSCH_VF_MAILBOX_

[PATCH 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3

2023-07-20 Thread Samir Dhume
initialization table handshake with mmsch

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 261 +---
 1 file changed, 237 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 411c1d802823..8650e3c6288d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -31,6 +31,7 @@
 #include "soc15d.h"
 #include "soc15_hw_ip.h"
 #include "vcn_v2_0.h"
+#include "mmsch_v4_0_3.h"
 
 #include "vcn/vcn_4_0_3_offset.h"
 #include "vcn/vcn_4_0_3_sh_mask.h"
@@ -44,6 +45,7 @@
 #define VCN_VID_SOC_ADDRESS_2_00x1fb00
 #define VCN1_VID_SOC_ADDRESS_3_0   0x48300
 
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
 static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
 static int vcn_v4_0_3_set_powergating_state(void *handle,
@@ -130,6 +132,10 @@ static int vcn_v4_0_3_sw_init(void *handle)
amdgpu_vcn_fwlog_init(>vcn.inst[i]);
}
 
+   r = amdgpu_virt_alloc_mm_table(adev);
+   if (r)
+   return r;
+
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
 
@@ -167,6 +173,8 @@ static int vcn_v4_0_3_sw_fini(void *handle)
drm_dev_exit(idx);
}
 
+   amdgpu_virt_free_mm_table(adev);
+
r = amdgpu_vcn_suspend(adev);
if (r)
return r;
@@ -189,33 +197,50 @@ static int vcn_v4_0_3_hw_init(void *handle)
struct amdgpu_ring *ring;
int i, r, vcn_inst;
 
-   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-   vcn_inst = GET_INST(VCN, i);
-   ring = >vcn.inst[i].ring_enc[0];
+   if (amdgpu_sriov_vf(adev)) {
+   r = vcn_v4_0_3_start_sriov(adev);
+   if (r)
+   goto done;
+
+   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+   if (adev->vcn.harvest_config & (1 << i))
+   continue;
 
-   if (ring->use_doorbell) {
-   adev->nbio.funcs->vcn_doorbell_range(
-   adev, ring->use_doorbell,
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   9 * vcn_inst,
-   adev->vcn.inst[i].aid_id);
-
-   WREG32_SOC15(
-   VCN, GET_INST(VCN, ring->me),
-   regVCN_RB1_DB_CTRL,
-   ring->doorbell_index
-   << 
VCN_RB1_DB_CTRL__OFFSET__SHIFT |
-   VCN_RB1_DB_CTRL__EN_MASK);
-
-   /* Read DB_CTRL to flush the write DB_CTRL command. */
-   RREG32_SOC15(
-   VCN, GET_INST(VCN, ring->me),
-   regVCN_RB1_DB_CTRL);
+   ring = >vcn.inst[i].ring_enc[0];
+   ring->wptr = 0;
+   ring->wptr_old = 0;
+   vcn_v4_0_3_unified_ring_set_wptr(ring);
+   ring->sched.ready = true;
}
+   } else {
+   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+   vcn_inst = GET_INST(VCN, i);
+   ring = >vcn.inst[i].ring_enc[0];
+
+   if (ring->use_doorbell) {
+   adev->nbio.funcs->vcn_doorbell_range(
+   adev, ring->use_doorbell,
+   (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+   9 * vcn_inst,
+   adev->vcn.inst[i].aid_id);
+
+   WREG32_SOC15(
+   VCN, GET_INST(VCN, ring->me),
+   regVCN_RB1_DB_CTRL,
+   ring->doorbell_index
+   << 
VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+   VCN_RB1_DB_CTRL__EN_MASK);
+
+   /* Read DB_CTRL to flush the write DB_CTRL 
command. */
+   RREG32_SOC15(
+   VCN, GET_INST(VCN, ring->me),
+   regVCN_RB1_DB_CTRL);
+   }
 
-   r = amdgpu_ring_test_helper(ring);
-   if (r)
- 

[PATCH 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary

2023-07-20 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 8650e3c6288d..09b3fa707af6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -113,9 +113,16 @@ static int vcn_v4_0_3_sw_init(void *handle)
 
ring = >vcn.inst[i].ring_enc[0];
ring->use_doorbell = true;
-   ring->doorbell_index =
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   9 * vcn_inst;
+
+   if (!amdgpu_sriov_vf(adev))
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   9 * vcn_inst;
+   else
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   32 * vcn_inst;
+
ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
r = amdgpu_ring_init(adev, ring, 512, >vcn.inst->irq, 0,
-- 
2.34.1



[PATCH 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov

2023-07-20 Thread Samir Dhume
The structures are the same as v4_0 except for the
init header

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h | 37 +++
 1 file changed, 37 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h

diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h 
b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
new file mode 100644
index ..db7eb5260295
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V4_0_3_H__
+#define __MMSCH_V4_0_3_H__
+
+#include "amdgpu_vcn.h"
+#include "mmsch_v4_0.h"
+
+struct mmsch_v4_0_3_init_header {
+   uint32_t version;
+   uint32_t total_size;
+   struct mmsch_v4_0_table_info vcn0;
+   struct mmsch_v4_0_table_info mjpegdec0[4];
+   struct mmsch_v4_0_table_info mjpegdec1[4];
+};
+#endif
-- 
2.34.1



[PATCH 7/7] drm/amdgpu/vcn: change end doorbell index for vcn_v4_0_3

2023-07-17 Thread Samir Dhume
For sriov, doorbell index for vcn0 for AID needs to be on
32 byte boundary so we need to move the vcn end doorbell

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index f637574644c0..4a279960cd21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -330,14 +330,14 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
/* IH: 0x1A0 ~ 0x1AF */
AMDGPU_DOORBELL_LAYOUT1_IH  = 0x1A0,
-   /* VCN: 0x1B0 ~ 0x1D4 */
+   /* VCN: 0x1B0 ~ 0x1E8 */
AMDGPU_DOORBELL_LAYOUT1_VCN_START   = 0x1B0,
-   AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4,
+   AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1E8,
 
AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP= 
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = 
AMDGPU_DOORBELL_LAYOUT1_VCN_END,
 
-   AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT  = 0x1D4,
+   AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT  = 0x1E8,
AMDGPU_DOORBELL_LAYOUT1_INVALID = 0x
 } AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1;
 
-- 
2.34.1



[PATCH 5/7] drm/amdgpu/jpeg: sriov support for jpeg_v4_0_3

2023-07-17 Thread Samir Dhume
initialization table handshake with mmsch

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 171 ---
 1 file changed, 150 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index ce2b22f7e4e4..85ee74fdb7e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -26,6 +26,7 @@
 #include "soc15.h"
 #include "soc15d.h"
 #include "jpeg_v4_0_3.h"
+#include "mmsch_v4_0_3.h"
 
 #include "vcn/vcn_4_0_3_offset.h"
 #include "vcn/vcn_4_0_3_sh_mask.h"
@@ -41,6 +42,7 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device 
*adev);
 static int jpeg_v4_0_3_set_powergating_state(void *handle,
enum amd_powergating_state state);
 static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
 
 static int amdgpu_ih_srcid_jpeg[] = {
VCN_4_0__SRCID__JPEG_DECODE,
@@ -160,6 +162,117 @@ static int jpeg_v4_0_3_sw_fini(void *handle)
return r;
 }
 
+static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
+{
+   struct amdgpu_ring *ring;
+   uint64_t ctx_addr;
+   uint32_t param, resp, expected;
+   uint32_t tmp, timeout;
+
+   struct amdgpu_mm_table *table = >virt.mm_table;
+   uint32_t *table_loc;
+   uint32_t table_size;
+   uint32_t size, size_dw, item_offset;
+   uint32_t init_status;
+   int i, j;
+
+   struct mmsch_v4_0_cmd_direct_write
+   direct_wt = { {0} };
+   struct mmsch_v4_0_cmd_end end = { {0} };
+   struct mmsch_v4_0_3_init_header header;
+
+   direct_wt.cmd_header.command_type =
+   MMSCH_COMMAND__DIRECT_REG_WRITE;
+   end.cmd_header.command_type =
+   MMSCH_COMMAND__END;
+
+   for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+   memset(, 0, sizeof(struct mmsch_v4_0_3_init_header));
+   header.version = MMSCH_VERSION;
+   header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 
2;
+
+   table_loc = (uint32_t *)table->cpu_addr;
+   table_loc += header.total_size;
+
+   item_offset = header.total_size;
+
+   for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+   ring = >jpeg.inst[i].ring_dec[j];
+   table_size = 0;
+
+   tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+   MMSCH_V4_0_INSERT_DIRECT_WT(tmp, 
lower_32_bits(ring->gpu_addr));
+   tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+   MMSCH_V4_0_INSERT_DIRECT_WT(tmp, 
upper_32_bits(ring->gpu_addr));
+   tmp = SOC15_REG_OFFSET(JPEG, 0, 
regUVD_JRBC0_UVD_JRBC_RB_SIZE);
+   MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+   if (j <= 3) {
+   header.mjpegdec0[j].table_offset = item_offset;
+   header.mjpegdec0[j].init_status = 0;
+   header.mjpegdec0[j].table_size = table_size;
+   } else {
+   header.mjpegdec1[j-4].table_offset = 
item_offset;
+   header.mjpegdec1[j-4].init_status = 0;
+   header.mjpegdec1[j-4].table_size = table_size;
+   }
+   header.total_size += table_size;
+   item_offset+= table_size;
+   }
+
+   MMSCH_V4_0_INSERT_END();
+
+   /* send init table to MMSCH */
+   size = sizeof(struct mmsch_v4_0_3_init_header);
+   table_loc = (uint32_t *)table->cpu_addr;
+   memcpy((void *)table_loc, , size);
+
+   ctx_addr = table->gpu_addr;
+   WREG32_SOC15(VCN, i, regMMSCH_VF_CTX_ADDR_LO, 
lower_32_bits(ctx_addr));
+   WREG32_SOC15(VCN, i, regMMSCH_VF_CTX_ADDR_HI, 
upper_32_bits(ctx_addr));
+
+   tmp = RREG32_SOC15(VCN, i, regMMSCH_VF_VMID);
+   tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+   tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+   WREG32_SOC15(VCN, i, regMMSCH_VF_VMID, tmp);
+
+   size = header.total_size;
+   WREG32_SOC15(VCN, i, regMMSCH_VF_CTX_SIZE, size);
+
+   WREG32_SOC15(VCN, i, regMMSCH_VF_MAILBOX_RESP, 0);
+
+   param = 0x0001;
+   WREG32_SOC15(VCN, i, regMMSCH_VF_MAILBOX_HOST, param);
+   tmp = 0;
+   timeout = 1000;
+   resp = 0;
+   expected = MMSCH_VF_MAILBOX_

[PATCH 6/7] drm/amdgpu/jpeg: mmsch_v3_0_4 requires doorbell on 32 byte boundary

2023-07-17 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 85ee74fdb7e3..896e2f895884 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -111,9 +111,20 @@ static int jpeg_v4_0_3_sw_init(void *handle)
ring = >jpeg.inst[i].ring_dec[j];
ring->use_doorbell = true;
ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
-   ring->doorbell_index =
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   1 + j + 9 * jpeg_inst;
+   if (!amdgpu_sriov_vf(adev)) {
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+   1 + j + 9 * jpeg_inst;
+   } else {
+   if (j < 4)
+   ring->doorbell_index =
+   
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   4 + j + 32 * jpeg_inst;
+   else
+   ring->doorbell_index =
+   
(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   8 + j + 32 * jpeg_inst;
+   }
sprintf(ring->name, "jpeg_dec_%d.%d", 
adev->jpeg.inst[i].aid_id, j);
r = amdgpu_ring_init(adev, ring, 512, 
>jpeg.inst->irq, 0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
-- 
2.34.1



[PATCH 4/7] drm/amdgpu/vcn: mmsch_v3_0_4 requires doorbell on 32 byte boundary

2023-07-17 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index ac405dfcfaf1..fa9abcb08c22 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -113,9 +113,16 @@ static int vcn_v4_0_3_sw_init(void *handle)
 
ring = >vcn.inst[i].ring_enc[0];
ring->use_doorbell = true;
-   ring->doorbell_index =
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   9 * vcn_inst;
+
+   if (!amdgpu_sriov_vf(adev))
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   9 * vcn_inst;
+   else
+   ring->doorbell_index =
+   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+   32 * vcn_inst;
+
ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
r = amdgpu_ring_init(adev, ring, 512, >vcn.inst->irq, 0,
-- 
2.34.1



[PATCH 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3

2023-07-17 Thread Samir Dhume
initialization table handshake with mmsch

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 264 +---
 1 file changed, 240 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 411c1d802823..ac405dfcfaf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -31,6 +31,7 @@
 #include "soc15d.h"
 #include "soc15_hw_ip.h"
 #include "vcn_v2_0.h"
+#include "mmsch_v4_0_3.h"
 
 #include "vcn/vcn_4_0_3_offset.h"
 #include "vcn/vcn_4_0_3_sh_mask.h"
@@ -44,6 +45,7 @@
 #define VCN_VID_SOC_ADDRESS_2_00x1fb00
 #define VCN1_VID_SOC_ADDRESS_3_0   0x48300
 
+static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
 static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
 static int vcn_v4_0_3_set_powergating_state(void *handle,
@@ -130,6 +132,12 @@ static int vcn_v4_0_3_sw_init(void *handle)
amdgpu_vcn_fwlog_init(>vcn.inst[i]);
}
 
+   if (amdgpu_sriov_vf(adev)) {
+   r = amdgpu_virt_alloc_mm_table(adev);
+   if (r)
+   return r;
+   }
+
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
 
@@ -167,6 +175,9 @@ static int vcn_v4_0_3_sw_fini(void *handle)
drm_dev_exit(idx);
}
 
+   if (amdgpu_sriov_vf(adev))
+   amdgpu_virt_free_mm_table(adev);
+
r = amdgpu_vcn_suspend(adev);
if (r)
return r;
@@ -189,33 +200,50 @@ static int vcn_v4_0_3_hw_init(void *handle)
struct amdgpu_ring *ring;
int i, r, vcn_inst;
 
-   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
-   vcn_inst = GET_INST(VCN, i);
-   ring = >vcn.inst[i].ring_enc[0];
+   if (amdgpu_sriov_vf(adev)) {
+   r = vcn_v4_0_3_start_sriov(adev);
+   if (r)
+   goto done;
 
-   if (ring->use_doorbell) {
-   adev->nbio.funcs->vcn_doorbell_range(
-   adev, ring->use_doorbell,
-   (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-   9 * vcn_inst,
-   adev->vcn.inst[i].aid_id);
-
-   WREG32_SOC15(
-   VCN, GET_INST(VCN, ring->me),
-   regVCN_RB1_DB_CTRL,
-   ring->doorbell_index
-   << 
VCN_RB1_DB_CTRL__OFFSET__SHIFT |
-   VCN_RB1_DB_CTRL__EN_MASK);
-
-   /* Read DB_CTRL to flush the write DB_CTRL command. */
-   RREG32_SOC15(
-   VCN, GET_INST(VCN, ring->me),
-   regVCN_RB1_DB_CTRL);
+   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+   if (adev->vcn.harvest_config & (1 << i))
+   continue;
+
+   ring = >vcn.inst[i].ring_enc[0];
+   ring->wptr = 0;
+   ring->wptr_old = 0;
+   vcn_v4_0_3_unified_ring_set_wptr(ring);
+   ring->sched.ready = true;
}
+   } else {
+   for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+   vcn_inst = GET_INST(VCN, i);
+   ring = >vcn.inst[i].ring_enc[0];
+
+   if (ring->use_doorbell) {
+   adev->nbio.funcs->vcn_doorbell_range(
+   adev, ring->use_doorbell,
+   (adev->doorbell_index.vcn.vcn_ring0_1 
<< 1) +
+   9 * vcn_inst,
+   adev->vcn.inst[i].aid_id);
+
+   WREG32_SOC15(
+   VCN, GET_INST(VCN, ring->me),
+   regVCN_RB1_DB_CTRL,
+   ring->doorbell_index
+   << 
VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+   VCN_RB1_DB_CTRL__EN_MASK);
+
+   /* Read DB_CTRL to flush the write DB_CTRL 
command. */
+   RREG32_SOC15(
+   VCN, GET_INST(VCN, ring->me),
+   regVCN_RB1_DB_

[PATCH 1/7] drm/amdgpu/vcn: Add MMSCH v4_0_3 support for sriov

2023-07-17 Thread Samir Dhume
The structures are the same as v4_0 except for the
init header

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h | 37 +++
 1 file changed, 37 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h

diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h 
b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
new file mode 100644
index ..db7eb5260295
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V4_0_3_H__
+#define __MMSCH_V4_0_3_H__
+
+#include "amdgpu_vcn.h"
+#include "mmsch_v4_0.h"
+
+struct mmsch_v4_0_3_init_header {
+   uint32_t version;
+   uint32_t total_size;
+   struct mmsch_v4_0_table_info vcn0;
+   struct mmsch_v4_0_table_info mjpegdec0[4];
+   struct mmsch_v4_0_table_info mjpegdec1[4];
+};
+#endif
-- 
2.34.1



[PATCH 2/7] drm/amdgpu/vcn : Skip vcn power-gating change for sriov

2023-07-17 Thread Samir Dhume
Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 550ac040b4be..411c1d802823 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -1317,6 +1317,15 @@ static int vcn_v4_0_3_set_powergating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
 
+   /* for SRIOV, guest should not control VCN Power-gating
+* MMSCH FW should control Power-gating and clock-gating
+* guest should avoid touching CGC and PG
+*/
+   if (amdgpu_sriov_vf(adev)) {
+   adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+   return 0;
+   }
+
if (state == adev->vcn.cur_state)
return 0;
 
-- 
2.34.1



[PATCH] drm/amdgpu: Rearm IRQ in Navi10 SR-IOV if IRQ lost

2020-02-06 Thread Samir Dhume
Ported from Vega10. SDMA stress tests sometimes see IRQ lost.

Signed-off-by: Samir Dhume 
---
 drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 36 ++
 1 file changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c 
b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index cf557a428298..e08245a446fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -32,6 +32,7 @@
 #include "soc15_common.h"
 #include "navi10_ih.h"
 
+#define MAX_REARM_RETRY 10
 
 static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev);
 
@@ -283,6 +284,38 @@ static void navi10_ih_decode_iv(struct amdgpu_device *adev,
ih->rptr += 32;
 }
 
+/**
+ * navi10_ih_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+static void navi10_ih_irq_rearm(struct amdgpu_device *adev,
+  struct amdgpu_ih_ring *ih)
+{
+   uint32_t reg_rptr = 0;
+   uint32_t v = 0;
+   uint32_t i = 0;
+
+   if (ih == >irq.ih)
+   reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR);
+   else if (ih == >irq.ih1)
+   reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1);
+   else if (ih == >irq.ih2)
+   reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2);
+   else
+   return;
+
+   /* Rearm IRQ / re-write doorbell if doorbell write is lost */
+   for (i = 0; i < MAX_REARM_RETRY; i++) {
+   v = RREG32_NO_KIQ(reg_rptr);
+   if ((v < ih->ring_size) && (v != ih->rptr))
+   WDOORBELL32(ih->doorbell_index, ih->rptr);
+   else
+   break;
+   }
+}
+
 /**
  * navi10_ih_set_rptr - set the IH ring buffer rptr
  *
@@ -297,6 +330,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev,
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+   if (amdgpu_sriov_vf(adev))
+   navi10_ih_irq_rearm(adev, ih);
} else
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr);
 }
-- 
2.20.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx