[PATCH] drm/amdgpu: disable MCBP by default

2023-11-30 Thread jiadong.zhu
From: Jiadong Zhu 

Disable MCBP(mid command buffer preemption) by default as old Mesa
hangs with it. We shall not enable the feature that breaks old usermode
driver.

Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 21b8a8f2b622..280fcad9ce93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3690,10 +3690,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device 
*adev)
adev->gfx.mcbp = true;
else if (amdgpu_mcbp == 0)
adev->gfx.mcbp = false;
-   else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
-(amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
-adev->gfx.num_gfx_rings)
-   adev->gfx.mcbp = true;
 
if (amdgpu_sriov_vf(adev))
adev->gfx.mcbp = true;
-- 
2.25.1



[PATCH] drm/amdgpu/soc21: add mode2 asic reset for SMU IP v14.0.0

2023-10-26 Thread jiadong.zhu
From: Jiadong Zhu 

Set the default reset method to mode2 for SMU IP v14.0.0

Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/soc21.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c 
b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 8c6cab641a1c..ebf99406e634 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -381,6 +381,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
return AMD_RESET_METHOD_MODE1;
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 11):
+   case IP_VERSION(14, 0, 0):
return AMD_RESET_METHOD_MODE2;
default:
if (amdgpu_dpm_is_baco_supported(adev))
-- 
2.25.1



[PATCH] drm/amdgpu: add tmz support for GC IP v11.5.0

2023-10-18 Thread jiadong.zhu
From: Jiadong Zhu 

Add tmz support for GC 11.5.0.

Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index a02992bff6af..2dce338b0f1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -786,6 +786,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
/* YELLOW_CARP*/
case IP_VERSION(10, 3, 3):
case IP_VERSION(11, 0, 4):
+   case IP_VERSION(11, 5, 0):
/* Don't enable it by default yet.
 */
if (amdgpu_tmz < 1) {
-- 
2.25.1



[PATCH] drm/amd/pm: drop unneeded dpm features disablement for SMU 14.0.0

2023-10-18 Thread jiadong.zhu
From: Jiadong Zhu 

PMFW will handle the features disablement properly for gpu reset case,
driver involvement may cause some unexpected issues.

Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 7c3356d6da5e..ace71abbbcf6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1677,13 +1677,14 @@ static int smu_disable_dpms(struct smu_context *smu)
}
 
/*
-* For SMU 13.0.4/11, PMFW will handle the features disablement properly
+* For SMU 13.0.4/11 and 14.0.0, PMFW will handle the features 
disablement properly
 * for gpu reset and S0i3 cases. Driver involvement is unnecessary.
 */
if (amdgpu_in_reset(adev) || adev->in_s0ix) {
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 11):
+   case IP_VERSION(14, 0, 0):
return 0;
default:
break;
-- 
2.25.1



[PATCH] drm/amdgpu: disable mcbp if parameter zero is set

2023-08-08 Thread jiadong.zhu
From: Jiadong Zhu 

The parameter amdgpu_mcbp shall have priority against the default value
calculated from the chip version.
User could disable mcbp by setting the parameter mcbp as zero.

v2: do not trigger preemption in sw ring muxer when mcbp is disabled.

Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c   | 9 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7314529553f6..615669dcabc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3699,10 +3699,11 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device 
*adev)
 {
if (amdgpu_mcbp == 1)
adev->gfx.mcbp = true;
-
-   if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
-   (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
-   adev->gfx.num_gfx_rings)
+   else if (amdgpu_mcbp == 0)
+   adev->gfx.mcbp = false;
+   else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
+(adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
+adev->gfx.num_gfx_rings)
adev->gfx.mcbp = true;
 
if (amdgpu_sriov_vf(adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index b779ee4bbaa7..e1ee1c7117fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -397,7 +397,7 @@ void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
struct amdgpu_ring_mux *mux = >gfx.muxer;
 
WARN_ON(!ring->is_sw_ring);
-   if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
+   if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
if (amdgpu_mcbp_scan(mux) > 0)
amdgpu_mcbp_trigger_preempt(mux);
return;
-- 
2.25.1



[PATCH] drm/amdgpu: disable mcbp if parameter zero is set

2023-08-07 Thread jiadong.zhu
From: Jiadong Zhu 

The parameter amdgpu_mcbp shall have priority against the default value
calculated from the chip version.
User could disable mcbp by setting the parameter mcbp as zero.

Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7314529553f6..615669dcabc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3699,10 +3699,11 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device 
*adev)
 {
if (amdgpu_mcbp == 1)
adev->gfx.mcbp = true;
-
-   if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
-   (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
-   adev->gfx.num_gfx_rings)
+   else if (amdgpu_mcbp == 0)
+   adev->gfx.mcbp = false;
+   else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
+(adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
+adev->gfx.num_gfx_rings)
adev->gfx.mcbp = true;
 
if (amdgpu_sriov_vf(adev))
-- 
2.25.1



[PATCH] drm/amdgpu: set completion status as preempted for the resubmission

2023-07-26 Thread jiadong.zhu
From: Jiadong Zhu 

The driver's CSA buffer is shared by all the ibs. When the high priority ib
is submitted after the preempted ib, CP overrides the ib_completion_status
as completed in the csa buffer. After that the preempted ib is resubmitted,
CP would clear some locals stored for ib resume when reading the completed
status, which causes gpu hang in some cases.

Always set status as preempted for those resubmitted ib instead of reading
everything from the CSA buffer.

Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h | 9 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 4 +++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
index b22d4fb2a847..d3186b570b82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -56,6 +56,15 @@ enum amdgpu_ring_mux_offset_type {
AMDGPU_MUX_OFFSET_TYPE_CE,
 };
 
+enum ib_complete_status {
+   /* IB not started/reset value, default value. */
+   IB_COMPLETION_STATUS_DEFAULT = 0,
+   /* IB preempted, started but not completed. */
+   IB_COMPLETION_STATUS_PREEMPTED = 1,
+   /* IB completed. */
+   IB_COMPLETION_STATUS_COMPLETED = 2,
+};
+
 struct amdgpu_ring_mux {
struct amdgpu_ring  *real_ring;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index fc179e5f8dc1..272f206042bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5169,7 +5169,6 @@ static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring 
*ring,
 unsigned offset)
 {
u32 control = ring->ring[offset];
-
control |= INDIRECT_BUFFER_PRE_RESUME(1);
ring->ring[offset] = control;
 }
@@ -5226,6 +5225,9 @@ static void gfx_v9_0_ring_patch_de_meta(struct 
amdgpu_ring *ring,
de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
}
 
+   ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
+   IB_COMPLETION_STATUS_PREEMPTED;
+
if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
memcpy((void *)>ring[offset], de_payload_cpu_addr, 
payload_size);
} else {
-- 
2.25.1



[PATCH] drm/amdgpu: enable mcbp by default on gfx9 chips

2023-06-15 Thread jiadong.zhu
From: Jiadong Zhu 

Gfx9 is using software rings which would trigger mcbp in some cases.
Thus the parameter amdgpu_mcbp shall be enabled by default.

Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 65577eca58f1..1b3cfda946f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4527,6 +4527,7 @@ static int gfx_v9_0_early_init(void *handle)
adev->gfx.xcc_mask = 1;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
  AMDGPU_MAX_COMPUTE_RINGS);
+   amdgpu_mcbp = 1;
gfx_v9_0_set_kiq_pm4_funcs(adev);
gfx_v9_0_set_ring_funcs(adev);
gfx_v9_0_set_irq_funcs(adev);
-- 
2.25.1



[PATCH] drm/amdgpu: Skip mark offset for high priority rings

2023-06-15 Thread jiadong.zhu
From: Jiadong Zhu 

Only low priority rings are using chunks to save the offset.
Bypass the mark offset callings from high priority rings.

Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 73516abef662..b779ee4bbaa7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -423,6 +423,9 @@ void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring 
*ring, enum amdgpu_ring_mu
struct amdgpu_ring_mux *mux = >gfx.muxer;
unsigned offset;
 
+   if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+   return;
+
offset = ring->wptr & ring->buf_mask;
 
amdgpu_ring_mux_ib_mark_offset(mux, ring, offset, type);
-- 
2.25.1



[PATCH 1/2] drm/amdgpu: Modify indirect buffer packages for resubmission

2023-05-25 Thread jiadong.zhu
From: Jiadong Zhu 

When the preempted IB frame resubmitted to cp, we need to modify the frame
data including:
1. set PRE_RESUME 1 in CONTEXT_CONTROL.
2. use meta data(DE and CE) read from CSA in WRITE_DATA.

Add functions to save the location the first time IBs emitted and callback
to patch the package when resubmission happens.

Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 18 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  9 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 60 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h | 15 +
 4 files changed, 102 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 7429b20257a6..12ba863e69f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -692,3 +692,21 @@ void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
if (ring->is_sw_ring)
amdgpu_sw_ring_ib_end(ring);
 }
+
+void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring)
+{
+   if (ring->is_sw_ring)
+   amdgpu_sw_ring_ib_mark_offset(ring, 
AMDGPU_MUX_OFFSET_TYPE_CONTROL);
+}
+
+void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring)
+{
+   if (ring->is_sw_ring)
+   amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CE);
+}
+
+void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
+{
+   if (ring->is_sw_ring)
+   amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index baa03527bf8b..702ce55b962a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -229,6 +229,9 @@ struct amdgpu_ring_funcs {
int (*preempt_ib)(struct amdgpu_ring *ring);
void (*emit_mem_sync)(struct amdgpu_ring *ring);
void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable);
+   void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset);
+   void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset);
+   void (*patch_de)(struct amdgpu_ring *ring, unsigned offset);
 };
 
 struct amdgpu_ring {
@@ -323,11 +326,17 @@ struct amdgpu_ring {
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
 #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
+#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
+#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
+#define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o)))
 
 unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type);
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
 void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
 void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring);
 
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib 
*ib);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 62079f0e3ee8..73516abef662 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -105,6 +105,16 @@ static void amdgpu_mux_resubmit_chunks(struct 
amdgpu_ring_mux *mux)
amdgpu_fence_update_start_timestamp(e->ring,

chunk->sync_seq,

ktime_get());
+   if (chunk->sync_seq ==
+   
le32_to_cpu(*(e->ring->fence_drv.cpu_addr + 2))) {
+   if (chunk->cntl_offset <= 
e->ring->buf_mask)
+   amdgpu_ring_patch_cntl(e->ring,
+  
chunk->cntl_offset);
+   if (chunk->ce_offset <= 
e->ring->buf_mask)
+   amdgpu_ring_patch_ce(e->ring, 
chunk->ce_offset);
+   if (chunk->de_offset <= 
e->ring->buf_mask)
+   amdgpu_ring_patch_de(e->ring, 
chunk->de_offset);
+   }
amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, 
e->ring,
  
chunk->start,
  
chunk->end);
@@ -407,6 +417,17 @@ void amdgpu_sw_ring_ib_end(struct 

[PATCH 2/2] drm/amdgpu: Implement gfx9 patch functions for resubmission

2023-05-25 Thread jiadong.zhu
From: Jiadong Zhu 

Patch the packages including CONTEXT_CONTROL and WRITE_DATA for gfx9
during the resubmission scenario.

Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 80 +++
 1 file changed, 80 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index cbcf6126cce5..4fbeb9b5752c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5172,9 +5172,83 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
 #endif
lower_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+   amdgpu_ring_ib_on_emit_cntl(ring);
amdgpu_ring_write(ring, control);
 }
 
+static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
+unsigned offset)
+{
+   u32 control = ring->ring[offset];
+
+   control |= INDIRECT_BUFFER_PRE_RESUME(1);
+   ring->ring[offset] = control;
+}
+
+static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
+   unsigned offset)
+{
+   struct amdgpu_device *adev = ring->adev;
+   void *ce_payload_cpu_addr;
+   uint64_t payload_offset, payload_size;
+
+   payload_size = sizeof(struct v9_ce_ib_state);
+
+   if (ring->is_mes_queue) {
+   payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gfx_meta_data) +
+   offsetof(struct v9_gfx_meta_data, ce_payload);
+   ce_payload_cpu_addr =
+   amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
+   } else {
+   payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+   ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
+   }
+
+   if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
+   memcpy((void *)>ring[offset], ce_payload_cpu_addr, 
payload_size);
+   } else {
+   memcpy((void *)>ring[offset], ce_payload_cpu_addr,
+  (ring->buf_mask + 1 - offset) << 2);
+   payload_size -= (ring->buf_mask + 1 - offset) << 2;
+   memcpy((void *)>ring[0],
+  ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 
2),
+  payload_size);
+   }
+}
+
+static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
+   unsigned offset)
+{
+   struct amdgpu_device *adev = ring->adev;
+   void *de_payload_cpu_addr;
+   uint64_t payload_offset, payload_size;
+
+   payload_size = sizeof(struct v9_de_ib_state);
+
+   if (ring->is_mes_queue) {
+   payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gfx_meta_data) +
+   offsetof(struct v9_gfx_meta_data, de_payload);
+   de_payload_cpu_addr =
+   amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
+   } else {
+   payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
+   de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
+   }
+
+   if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
+   memcpy((void *)>ring[offset], de_payload_cpu_addr, 
payload_size);
+   } else {
+   memcpy((void *)>ring[offset], de_payload_cpu_addr,
+  (ring->buf_mask + 1 - offset) << 2);
+   payload_size -= (ring->buf_mask + 1 - offset) << 2;
+   memcpy((void *)>ring[0],
+  de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 
2),
+  payload_size);
+   }
+}
+
 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
  struct amdgpu_job *job,
  struct amdgpu_ib *ib,
@@ -5370,6 +5444,8 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring 
*ring, bool resume)
amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
 
+   amdgpu_ring_ib_on_emit_ce(ring);
+
if (resume)
amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
   sizeof(ce_payload) >> 2);
@@ -5481,6 +5557,7 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring 
*ring, bool resume, bo
amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
 
+   amdgpu_ring_ib_on_emit_de(ring);
if (resume)
amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
   sizeof(de_payload) >> 2);
@@ -6891,6 +6968,9 @@ static const struct amdgpu_ring_funcs 

[PATCH] drm/amdgpu: Program gds backup address as zero if no gds allocated

2023-05-24 Thread jiadong.zhu
From: Jiadong Zhu 

It is firmware requirement to set gds_backup_addrlo and gds_backup_addrhi
of DE meta both zero if no gds partition is allocated for the frame.

Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index cbdd9918b3e7..cbcf6126cce5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -765,7 +765,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device 
*adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, 
bool usegds);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
  void *ras_error_status);
@@ -5160,7 +5160,8 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
gfx_v9_0_ring_emit_de_meta(ring,
   
(!amdgpu_sriov_vf(ring->adev) &&
   flags & AMDGPU_IB_PREEMPTED) 
?
-  true : false);
+  true : false,
+  job->gds_size > 0 && 
job->gds_base != 0);
}
 
amdgpu_ring_write(ring, header);
@@ -5435,7 +5436,7 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring 
*ring)
return r;
 }
 
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, 
bool usegds)
 {
struct amdgpu_device *adev = ring->adev;
struct v9_de_ib_state de_payload = {0};
@@ -5466,8 +5467,10 @@ static void gfx_v9_0_ring_emit_de_meta(struct 
amdgpu_ring *ring, bool resume)
 PAGE_SIZE);
}
 
-   de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
-   de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+   if (usegds) {
+   de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
+   de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+   }
 
cnt = (sizeof(de_payload) >> 2) + 4 - 2;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
-- 
2.25.1



[PATCH] drm/amdgpu: Reset CP_VMID_PREEMPT after trailing fence signaled

2023-05-23 Thread jiadong.zhu
From: Jiadong Zhu 

When MEC executes unmap_queue for mid command buffer preemption, it will
kick the write pointer of the gfx ring, set CP_VMID_PREEMPT to trigger the
preemption and wait for CP_VMID_PREEMPT becomes zero after the preemption
done. There is a race condition that PFP may excute the resetting command
before MEC set CP_VMID_PREEMPT. As a result, hang happens as
CP_VMID_PREEMPT is always 0x.

To avoid this, we send resetting CP_VMID_PREEMPT command after the trailing
fence is siganled and update gfx write pointer explicitly.

Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 8bf95a6b0767..cbdd9918b3e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5402,10 +5402,6 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring 
*ring)
amdgpu_ring_alloc(ring, 13);
gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | 
AMDGPU_FENCE_FLAG_INT);
-   /*reset the CP_VMID_PREEMPT after trailing fence*/
-   amdgpu_ring_emit_wreg(ring,
- SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
- 0x0);
 
/* assert IB preemption, emit the trailing fence */
kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
@@ -5428,6 +5424,10 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring 
*ring)
DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
}
 
+   /*reset the CP_VMID_PREEMPT after trailing fence*/
+   amdgpu_ring_emit_wreg(ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
+ 0x0);
amdgpu_ring_commit(ring);
 
/* deassert preemption condition */
-- 
2.25.1



[PATCH] drm/amdgpu: Make amdgpu_ring_mux functions as static

2022-12-04 Thread jiadong.zhu
From: Jiadong Zhu 

lkp robot reported missing-prototypes and unused-but-set-variable warnings on
some functions of amdgpu_mcbp_mux.c. Make them static and remove the unused
variable.

Reported-by: kernel test robot 
Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 3348337379da..62079f0e3ee8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -350,7 +350,7 @@ unsigned int amdgpu_sw_ring_priority(int idx)
 }
 
 /*Scan on low prio rings to have unsignaled fence and high ring has no fence.*/
-int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
+static int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
 {
struct amdgpu_ring *ring;
int i, need_preempt;
@@ -370,7 +370,7 @@ int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
 }
 
 /* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to 
resubmit. */
-int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
+static int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
 {
int r;
 
@@ -434,7 +434,7 @@ void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, 
struct amdgpu_ring *r
 
 static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct 
amdgpu_ring *ring)
 {
-   uint32_t last_seq, size = 0;
+   uint32_t last_seq = 0;
struct amdgpu_mux_entry *e;
struct amdgpu_mux_chunk *chunk, *tmp;
 
@@ -450,8 +450,6 @@ static void scan_and_remove_signaled_chunk(struct 
amdgpu_ring_mux *mux, struct a
if (chunk->sync_seq <= last_seq) {
list_del(>entry);
kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
-   } else {
-   size++;
}
}
 }
-- 
2.25.1



[PATCH 4/4] drm/amdgpu: MCBP based on DRM scheduler (v9)

2022-12-01 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Trigger Mid-Command Buffer Preemption according to the priority of the software
rings and the hw fence signalling condition.

The muxer saves the locations of the indirect buffer frames from the software
ring together with the fence sequence number in its fifo queue, and pops out
those records when the fences are signalled. The locations are used to resubmit
packages in preemption scenarios by coping the chunks from the software ring.

v2: Update comment style.
v3: Fix conflict caused by previous modifications.
v4: Remove unnecessary prints.
v5: Fix corner cases for resubmission cases.
v6: Refactor functions for resubmission, calling fence_process in irq handler.
v7: Solve conflict for removing amdgpu_sw_ring.c.
v8: Add time threshold to judge if preemption request is needed.
v9: Correct comment spelling. Set fence emit timestamp before rsu assignment.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Cc: Michel Dänzer 
Signed-off-by: Jiadong.Zhu 
Acked-by: Luben Tuikov 
Acked-by: Huang Rui 
Acked-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c|  54 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  12 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   8 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 351 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  29 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|   7 +-
 8 files changed, 422 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d0d99ed607dd..00444203220d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -55,6 +55,7 @@ struct amdgpu_fence {
 
/* RB, DMA, etc. */
struct amdgpu_ring  *ring;
+   ktime_t start_timestamp;
 };
 
 static struct kmem_cache *amdgpu_fence_slab;
@@ -199,6 +200,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f, struct amd
}
}
 
+   to_amdgpu_fence(fence)->start_timestamp = ktime_get();
+
/* This function can't be called concurrently anyway, otherwise
 * emitting the fence would mess up the hardware ring buffer.
 */
@@ -406,6 +409,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring 
*ring)
return lower_32_bits(emitted);
 }
 
+/**
+ * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
+ * @ring: ring the fence is associated with
+ *
+ * Find the earliest fence unsignaled until now, calculate the time delta
+ * between the time fence emitted and now.
+ */
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
+{
+   struct amdgpu_fence_driver *drv = >fence_drv;
+   struct dma_fence *fence;
+   uint32_t last_seq, sync_seq;
+
+   last_seq = atomic_read(>fence_drv.last_seq);
+   sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+   if (last_seq == sync_seq)
+   return 0;
+
+   ++last_seq;
+   last_seq &= drv->num_fences_mask;
+   fence = drv->fences[last_seq];
+   if (!fence)
+   return 0;
+
+   return ktime_us_delta(ktime_get(),
+   to_amdgpu_fence(fence)->start_timestamp);
+}
+
+/**
+ * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
+ * @ring: ring the fence is associated with
+ * @seq: the fence seq number to update.
+ * @timestamp: the start timestamp to update.
+ *
+ * The function called at the time the fence and related ib is about to
+ * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
+ * with amdgpu_fence_process to modify the same fence.
+ */
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t 
seq, ktime_t timestamp)
+{
+   struct amdgpu_fence_driver *drv = >fence_drv;
+   struct dma_fence *fence;
+
+   seq &= drv->num_fences_mask;
+   fence = drv->fences[seq];
+   if (!fence)
+   return;
+
+   to_amdgpu_fence(fence)->start_timestamp = timestamp;
+}
+
 /**
  * amdgpu_fence_driver_start_ring - make the fence driver
  * ready for use on the requested ring.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 258cffe3c06a..af86d87e2f3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
}
 
+   amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
ring->hw_prio == AMDGPU_G

[PATCH] Add vkpreemption for gfx9 mcbp

2022-11-29 Thread jiadong.zhu
From: jiadozhu 

This is a standalone test case used for software mcbp on gfx9.
Build and open two consoles to run:
build/bin/vkpreemption s gfx=draws:100,priority:high,delay:0
build/bin/vkpreemption c gfx=draws:100,priority:low,delay:0

The result is printed on the console of the server side.

Signed-off-by: jiadozhu 
---
 vkpreemption/CMakeLists.txt   |  17 +
 vkpreemption/VulkanInitializers.hpp   | 591 
 vkpreemption/VulkanTools.cpp  | 361 
 vkpreemption/VulkanTools.h| 118 
 vkpreemption/base.hpp | 269 +
 vkpreemption/build_lnx.sh |  11 +
 vkpreemption/computework.hpp  | 429 ++
 vkpreemption/graphicwork.hpp  | 777 ++
 vkpreemption/headless.comp|  34 ++
 vkpreemption/headless.comp.inc|  33 ++
 vkpreemption/main.cpp | 385 +
 vkpreemption/triangle.frag|  10 +
 vkpreemption/triangle.frag.glsl   |  10 +
 vkpreemption/triangle.frag.inc|  17 +
 vkpreemption/triangle.vert|  20 +
 vkpreemption/triangle.vert.glsl   |  20 +
 vkpreemption/triangle.vert.inc|  34 ++
 vkpreemption/vk_amd_dispatch_tunnel.h |  34 ++
 vkpreemption/vk_internal_ext_helper.h |  33 ++
 19 files changed, 3203 insertions(+)
 create mode 100644 vkpreemption/CMakeLists.txt
 create mode 100644 vkpreemption/VulkanInitializers.hpp
 create mode 100644 vkpreemption/VulkanTools.cpp
 create mode 100644 vkpreemption/VulkanTools.h
 create mode 100644 vkpreemption/base.hpp
 create mode 100644 vkpreemption/build_lnx.sh
 create mode 100644 vkpreemption/computework.hpp
 create mode 100644 vkpreemption/graphicwork.hpp
 create mode 100644 vkpreemption/headless.comp
 create mode 100644 vkpreemption/headless.comp.inc
 create mode 100644 vkpreemption/main.cpp
 create mode 100644 vkpreemption/triangle.frag
 create mode 100644 vkpreemption/triangle.frag.glsl
 create mode 100644 vkpreemption/triangle.frag.inc
 create mode 100644 vkpreemption/triangle.vert
 create mode 100644 vkpreemption/triangle.vert.glsl
 create mode 100644 vkpreemption/triangle.vert.inc
 create mode 100644 vkpreemption/vk_amd_dispatch_tunnel.h
 create mode 100644 vkpreemption/vk_internal_ext_helper.h

diff --git a/vkpreemption/CMakeLists.txt b/vkpreemption/CMakeLists.txt
new file mode 100644
index ..0c54ddab
--- /dev/null
+++ b/vkpreemption/CMakeLists.txt
@@ -0,0 +1,17 @@
+cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
+cmake_policy(VERSION 2.8)
+project(vkpreemption)
+
+message("CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
+
+include_directories(glm)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/")
+
+file(GLOB EXAMPLE_SRC "*.cpp" "*.hpp")
+add_executable(vkpreemption ${EXAMPLE_SRC})
+
+target_link_libraries(
+vkpreemption
+libvulkan.so
+)
diff --git a/vkpreemption/VulkanInitializers.hpp 
b/vkpreemption/VulkanInitializers.hpp
new file mode 100644
index ..806ab513
--- /dev/null
+++ b/vkpreemption/VulkanInitializers.hpp
@@ -0,0 +1,591 @@
+/*
+* Initializers for Vulkan structures and objects used by the examples
+* Saves lot of VK_STRUCTURE_TYPE assignments
+* Some initializers are parameterized for convenience
+*
+* Copyright (C) 2016 by Sascha Willems - www.saschawillems.de
+*
+* This code is licensed under the MIT license (MIT) 
(http://opensource.org/licenses/MIT)
+*/
+
+#pragma once
+
+#include 
+#include "vulkan/vulkan.h"
+
+namespace vks
+{
+   namespace initializers
+   {
+
+   inline VkMemoryAllocateInfo memoryAllocateInfo()
+   {
+   VkMemoryAllocateInfo memAllocInfo {};
+   memAllocInfo.sType = 
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+   return memAllocInfo;
+   }
+
+   inline VkMappedMemoryRange mappedMemoryRange()
+   {
+   VkMappedMemoryRange mappedMemoryRange {};
+   mappedMemoryRange.sType = 
VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+   return mappedMemoryRange;
+   }
+
+   inline VkCommandBufferAllocateInfo commandBufferAllocateInfo(
+   VkCommandPool commandPool,
+   VkCommandBufferLevel level,
+   uint32_t bufferCount)
+   {
+   VkCommandBufferAllocateInfo commandBufferAllocateInfo 
{};
+   commandBufferAllocateInfo.sType = 
VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+   commandBufferAllocateInfo.commandPool = commandPool;
+   commandBufferAllocateInfo.level = level;
+   commandBufferAllocateInfo.commandBufferCount = 
bufferCount;
+   return commandBufferAllocateInfo;
+   }
+
+   inline VkCommandPoolCreateInfo commandPoolCreateInfo()
+   {
+  

[PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9 (v8)

2022-11-28 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Set ring functions with software ring callbacks on gfx9.

The software ring could be tested by debugfs_test_ib case.

v2: Set sw_ring 2 to enable software ring by default.
v3: Remove the parameter for software ring enablement.
v4: Use amdgpu_ring_init/fini for software rings.
v5: Update for code format. Fix conflict.
v6: Remove unnecessary checks and enable software ring on gfx9 by default.
v7: Use static array for software ring names and priorities.
v8: Stop creating software rings if no gfx ring existed.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Cc: Michel Dänzer 
Cc: Likun Gao 
Signed-off-by: Jiadong.Zhu 
Acked-by: Luben Tuikov 
Acked-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c |  20 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 113 ++-
 5 files changed, 136 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 7c2692f29311..5d0c45659e97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -365,6 +365,7 @@ struct amdgpu_gfx {
 
boolis_poweron;
 
+   struct amdgpu_ring  sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
struct amdgpu_ring_mux  muxer;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 8be51ebfedd5..a744aa9bac95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -39,6 +39,7 @@ struct amdgpu_vm;
 #define AMDGPU_MAX_RINGS   28
 #define AMDGPU_MAX_HWIP_RINGS  8
 #define AMDGPU_MAX_GFX_RINGS   2
+#define AMDGPU_MAX_SW_GFX_RINGS 2
 #define AMDGPU_MAX_COMPUTE_RINGS   8
 #define AMDGPU_MAX_VCE_RINGS   3
 #define AMDGPU_MAX_UVD_ENC_RINGS   2
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 6fbf71451e29..a52c4e7e8c39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -29,6 +29,14 @@
 
 #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
 
+static const struct ring_info {
+   unsigned int hw_pio;
+   const char *ring_name;
+} sw_ring_info[] = {
+   { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
+   { AMDGPU_RING_PRIO_2, "gfx_high"},
+};
+
 int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
 unsigned int entry_size)
 {
@@ -219,3 +227,15 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
uint32_t count)
 {
WARN_ON(!ring->is_sw_ring);
 }
+
+const char *amdgpu_sw_ring_name(int idx)
+{
+   return idx < ARRAY_SIZE(sw_ring_info) ?
+   sw_ring_info[idx].ring_name : NULL;
+}
+
+unsigned int amdgpu_sw_ring_priority(int idx)
+{
+   return idx < ARRAY_SIZE(sw_ring_info) ?
+   sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
index f58672a8e0ad..f67970dc3dbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -73,4 +73,6 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
uint32_t count);
 void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
 void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
 
+const char *amdgpu_sw_ring_name(int idx);
+unsigned int amdgpu_sw_ring_priority(int idx);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0d26910a782b..ec80047b1b41 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
 
 #include "amdgpu_ras.h"
 
+#include "amdgpu_ring_mux.h"
 #include "gfx_v9_4.h"
 #include "gfx_v9_0.h"
 #include "gfx_v9_4_2.h"
@@ -56,6 +57,7 @@
 #include "asic_reg/gc/gc_9_0_default.h"
 
 #define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_SW_GFX_RINGS  2
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0xL
@@ -2105,6 +2107,7 @@ static int gfx_v9_0_sw_init(void *handle)
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   unsigned int hw_prio;
 
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
@@ -2195,6 +2198,9 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = tr

[PATCH 4/4] drm/amdgpu: MCBP based on DRM scheduler (v9)

2022-11-28 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Trigger Mid-Command Buffer Preemption according to the priority of the software
rings and the hw fence signalling condition.

The muxer saves the locations of the indirect buffer frames from the software
ring together with the fence sequence number in its fifo queue, and pops out
those records when the fences are signalled. The locations are used to resubmit
packages in preemption scenarios by coping the chunks from the software ring.

v2: Update comment style.
v3: Fix conflict caused by previous modifications.
v4: Remove unnecessary prints.
v5: Fix corner cases for resubmission cases.
v6: Refactor functions for resubmission, calling fence_process in irq handler.
v7: Solve conflict for removing amdgpu_sw_ring.c.
v8: Add time threshold to judge if preemption request is needed.
v9: Correct comment spelling. Set fence emit timestamp before rsu assignment.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Cc: Michel Dänzer 
Signed-off-by: Jiadong.Zhu 
Acked-by: Luben Tuikov 
Acked-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c|  54 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  12 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   8 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 351 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  29 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|   7 +-
 8 files changed, 422 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 2f8784396e89..ca6a47792dcb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -55,6 +55,7 @@ struct amdgpu_fence {
 
/* RB, DMA, etc. */
struct amdgpu_ring  *ring;
+   ktime_t start_timestamp;
 };
 
 static struct kmem_cache *amdgpu_fence_slab;
@@ -199,6 +200,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f, struct amd
}
}
 
+   to_amdgpu_fence(fence)->start_timestamp = ktime_get();
+
/* This function can't be called concurrently anyway, otherwise
 * emitting the fence would mess up the hardware ring buffer.
 */
@@ -415,6 +418,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring 
*ring)
return lower_32_bits(emitted);
 }
 
+/**
+ * amdgpu_fence_last_unsignaled_time_us - the time fence emited until now
+ * @ring: ring the fence is associated with
+ *
+ * Find the earlist fence unsignaled until now, calculate the time delta
+ * between the time fence emitted and now.
+ */
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
+{
+   struct amdgpu_fence_driver *drv = >fence_drv;
+   struct dma_fence *fence;
+   uint32_t last_seq, sync_seq;
+
+   last_seq = atomic_read(>fence_drv.last_seq);
+   sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+   if (last_seq == sync_seq)
+   return 0;
+
+   ++last_seq;
+   last_seq &= drv->num_fences_mask;
+   fence = drv->fences[last_seq];
+   if (!fence)
+   return 0;
+
+   return ktime_us_delta(ktime_get(),
+   to_amdgpu_fence(fence)->start_timestamp);
+}
+
+/**
+ * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
+ * @ring: ring the fence is associated with
+ * @seq: the fence seq number to update.
+ * @timestamp: the start timestamp to update.
+ *
+ * The function called at the time the fence and related ib is about to
+ * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
+ * with amdgpu_fence_process to modify the same fence.
+ */
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t 
seq, ktime_t timestamp)
+{
+   struct amdgpu_fence_driver *drv = >fence_drv;
+   struct dma_fence *fence;
+
+   seq &= drv->num_fences_mask;
+   fence = drv->fences[seq];
+   if (!fence)
+   return;
+
+   to_amdgpu_fence(fence)->start_timestamp = timestamp;
+}
+
 /**
  * amdgpu_fence_driver_start_ring - make the fence driver
  * ready for use on the requested ring.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 9dc1487c6fb2..bc70d1e3efd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -219,6 +219,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
}
 
+   amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -293,6 +294,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE

[PATCH 3/4] drm/amdgpu: Modify unmap_queue format for gfx9 (v6)

2022-11-28 Thread jiadong.zhu
From: "Jiadong.Zhu" 

1. Modify the unmap_queue package on gfx9. Add trailing fence to track the
   preemption done.
2. Modify emit_ce_meta emit_de_meta functions for the resumed ibs.

v2: Restyle code not to use ternary operator.
v3: Modify code format.
v4: Enable Mid-Command Buffer Preemption for gfx9 by default.
v5: Optimize the flag bit set for emit_fence.
v6: Modify log message for preemption timeout.

Cc: Christian Koenig 
Cc: Michel Dänzer 
Cc: Luben Tuikov 
Signed-off-by: Jiadong.Zhu 
Acked-by: Christian König 
Acked-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 182 +++
 drivers/gpu/drm/amd/amdgpu/soc15d.h  |   2 +
 3 files changed, 156 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index a744aa9bac95..073e767c057f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -60,6 +60,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT   (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC  (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ec80047b1b41..ec528b1d82e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -755,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device 
*adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
  void *ras_error_status);
@@ -828,9 +828,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring 
*kiq_ring,

PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
if (action == PREEMPT_QUEUES_NO_UNMAP) {
-   amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, seq);
+   amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & 
ring->buf_mask));
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -5326,11 +5327,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 
control |= ib->length_dw | (vmid << 24);
 
-   if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
+   if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+   if (flags & AMDGPU_IB_PREEMPTED)
+   control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-   gfx_v9_0_ring_emit_de_meta(ring);
+   gfx_v9_0_ring_emit_de_meta(ring,
+  
(!amdgpu_sriov_vf(ring->adev) &&
+  flags & AMDGPU_IB_PREEMPTED) 
?
+  true : false);
}
 
amdgpu_ring_write(ring, header);
@@ -5385,17 +5392,24 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+   bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+   uint32_t dw2 = 0;
 
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-   amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
-  EOP_TC_NC_ACTION_EN) :
- (EOP_TCL1_ACTION_EN |
-  EOP_TC_ACTION_EN |
-  EOP_TC_WB_ACTION_EN |
-  EOP_TC_MD_ACTION_EN)) |
-EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
-EVENT_INDEX(5)));
+
+   if (w

[PATCH 1/4] drm/amdgpu: Introduce gfx software ring (v9)

2022-11-28 Thread jiadong.zhu
From: "Jiadong.Zhu" 

The software ring is created to support priority context while there is only
one hardware queue for gfx.

Every software ring has its fence driver and could be used as an ordinary ring
for the GPU scheduler.
Multiple software rings are bound to a real ring with the ring muxer. The
packages committed on the software ring are copied to the real ring.

v2: Use array to store software ring entry.
v3: Remove unnecessary prints.
v4: Remove amdgpu_ring_sw_init/fini functions,
using gtt for sw ring buffer for later dma copy
optimization.
v5: Allocate ring entry dynamically in the muxer.
v6: Update comments for the ring muxer.
v7: Modify for function naming.
v8: Combine software ring functions into amdgpu_ring_mux.c
v9: Use kernel-doc comment on the get_rptr function.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky  
Cc: Michel Dänzer 
Signed-off-by: Jiadong.Zhu 
Acked-by: Huang Rui 
Acked-by: Luben Tuikov 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 221 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  76 +++
 5 files changed, 306 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 74f109a56d90..f58aa5d2e83e 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -62,7 +62,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
-   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
+   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+   amdgpu_ring_mux.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 1e6e35ff3f13..7c2692f29311 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -33,6 +33,7 @@
 #include "amdgpu_imu.h"
 #include "soc15.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE 0xL
@@ -363,6 +364,8 @@ struct amdgpu_gfx {
struct amdgpu_gfx_ras   *ras;
 
boolis_poweron;
+
+   struct amdgpu_ring_mux  muxer;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 82c178a9033a..8be51ebfedd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -279,6 +279,10 @@ struct amdgpu_ring {
boolis_mes_queue;
uint32_thw_queue_id;
struct amdgpu_mes_ctx_data *mes_ctx;
+
+   boolis_sw_ring;
+   unsigned intentry_index;
+
 };
 
 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), 
(ib)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index ..6fbf71451e29
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include 
+#include 
+
+#include "amdgpu_ring_mux.h"
+#include "a

[PATCH 4/4] drm/amdgpu: MCBP based on DRM scheduler (v9)

2022-11-16 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Trigger Mid-Command Buffer Preemption according to the priority of the software
rings and the hw fence signalling condition.

The muxer saves the locations of the indirect buffer frames from the software
ring together with the fence sequence number in its fifo queue, and pops out
those records when the fences are signalled. The locations are used to resubmit
packages in preemption scenarios by coping the chunks from the software ring.

v2: Update comment style.
v3: Fix conflict caused by previous modifications.
v4: Remove unnecessary prints.
v5: Fix corner cases for resubmission cases.
v6: Refactor functions for resubmission, calling fence_process in irq handler.
v7: Solve conflict for removing amdgpu_sw_ring.c.
v8: Add time threshold to judge if preemption request is needed.
v9: Correct comment spelling. Set fence emit timestamp before rsu assignment.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Cc: Michel Dänzer 
Signed-off-by: Jiadong.Zhu 
Acked-by: Luben Tuikov 
Acked-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c|  54 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  12 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   8 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 353 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  29 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|   7 +-
 8 files changed, 423 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 2f8784396e89..ca6a47792dcb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -55,6 +55,7 @@ struct amdgpu_fence {
 
/* RB, DMA, etc. */
struct amdgpu_ring  *ring;
+   ktime_t start_timestamp;
 };
 
 static struct kmem_cache *amdgpu_fence_slab;
@@ -199,6 +200,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f, struct amd
}
}
 
+   to_amdgpu_fence(fence)->start_timestamp = ktime_get();
+
/* This function can't be called concurrently anyway, otherwise
 * emitting the fence would mess up the hardware ring buffer.
 */
@@ -415,6 +418,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring 
*ring)
return lower_32_bits(emitted);
 }
 
+/**
+ * amdgpu_fence_last_unsignaled_time_us - the time fence emited until now
+ * @ring: ring the fence is associated with
+ *
+ * Find the earlist fence unsignaled until now, calculate the time delta
+ * between the time fence emitted and now.
+ */
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
+{
+   struct amdgpu_fence_driver *drv = >fence_drv;
+   struct dma_fence *fence;
+   uint32_t last_seq, sync_seq;
+
+   last_seq = atomic_read(>fence_drv.last_seq);
+   sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+   if (last_seq == sync_seq)
+   return 0;
+
+   ++last_seq;
+   last_seq &= drv->num_fences_mask;
+   fence = drv->fences[last_seq];
+   if (!fence)
+   return 0;
+
+   return ktime_us_delta(ktime_get(),
+   to_amdgpu_fence(fence)->start_timestamp);
+}
+
+/**
+ * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
+ * @ring: ring the fence is associated with
+ * @seq: the fence seq number to update.
+ * @timestamp: the start timestamp to update.
+ *
+ * The function called at the time the fence and related ib is about to
+ * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
+ * with amdgpu_fence_process to modify the same fence.
+ */
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t 
seq, ktime_t timestamp)
+{
+   struct amdgpu_fence_driver *drv = >fence_drv;
+   struct dma_fence *fence;
+
+   seq &= drv->num_fences_mask;
+   fence = drv->fences[seq];
+   if (!fence)
+   return;
+
+   to_amdgpu_fence(fence)->start_timestamp = timestamp;
+}
+
 /**
  * amdgpu_fence_driver_start_ring - make the fence driver
  * ready for use on the requested ring.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 9dc1487c6fb2..bc70d1e3efd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -219,6 +219,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
}
 
+   amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -293,6 +294,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE

[PATCH 3/4] drm/amdgpu: Modify unmap_queue format for gfx9 (v4)

2022-11-16 Thread jiadong.zhu
From: "Jiadong.Zhu" 

1. Modify the unmap_queue package on gfx9. Add trailing fence to track the
   preemption done.
2. Modify emit_ce_meta emit_de_meta functions for the resumed ibs.

v2: Restyle code not to use ternary operator.
v3: Modify code format.
v4: Enable Mid-Command Buffer Preemption for gfx9 by default.

Cc: Christian Koenig 
Cc: Michel Dänzer 
Signed-off-by: Jiadong.Zhu 
Acked-by: Christian König 
Acked-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 181 +++
 drivers/gpu/drm/amd/amdgpu/soc15d.h  |   2 +
 3 files changed, 155 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index a744aa9bac95..073e767c057f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -60,6 +60,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT   (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC  (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ec80047b1b41..6595c58ec26f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -755,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device 
*adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
  void *ras_error_status);
@@ -828,9 +828,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring 
*kiq_ring,

PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
if (action == PREEMPT_QUEUES_NO_UNMAP) {
-   amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, seq);
+   amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & 
ring->buf_mask));
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -5326,11 +5327,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 
control |= ib->length_dw | (vmid << 24);
 
-   if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
+   if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+   if (flags & AMDGPU_IB_PREEMPTED)
+   control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-   gfx_v9_0_ring_emit_de_meta(ring);
+   gfx_v9_0_ring_emit_de_meta(ring,
+  
(!amdgpu_sriov_vf(ring->adev) &&
+  flags & AMDGPU_IB_PREEMPTED) 
?
+  true : false);
}
 
amdgpu_ring_write(ring, header);
@@ -5385,17 +5392,23 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+   bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+   uint32_t dw2 = 0;
 
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-   amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
-  EOP_TC_NC_ACTION_EN) :
- (EOP_TCL1_ACTION_EN |
-  EOP_TC_ACTION_EN |
-  EOP_TC_WB_ACTION_EN |
-  EOP_TC_MD_ACTION_EN)) |
-EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
-EVENT_INDEX(5)));
+
+   if (writeback) {
+   dw2 = EOP_TC_WB_ACTION_EN | EOP_TC_NC_ACTION_EN;
+   } else {
+   

[PATCH 1/4] drm/amdgpu: Introduce gfx software ring (v8)

2022-11-16 Thread jiadong.zhu
From: "Jiadong.Zhu" 

The software ring is created to support priority context while there is only
one hardware queue for gfx.

Every software ring has its fence driver and could be used as an ordinary ring
for the GPU scheduler.
Multiple software rings are bound to a real ring with the ring muxer. The
packages committed on the software ring are copied to the real ring.

v2: Use array to store software ring entry.
v3: Remove unnecessary prints.
v4: Remove amdgpu_ring_sw_init/fini functions,
using gtt for sw ring buffer for later dma copy
optimization.
v5: Allocate ring entry dynamically in the muxer.
v6: Update comments for the ring muxer.
v7: Modify for function naming.
v8: Combine software ring functions into amdgpu_ring_mux.c

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky  
Cc: Michel Dänzer 
Signed-off-by: Jiadong.Zhu 
Acked-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 217 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  76 +++
 5 files changed, 302 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 74f109a56d90..f58aa5d2e83e 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -62,7 +62,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
-   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
+   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+   amdgpu_ring_mux.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 1e6e35ff3f13..7c2692f29311 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -33,6 +33,7 @@
 #include "amdgpu_imu.h"
 #include "soc15.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE 0xL
@@ -363,6 +364,8 @@ struct amdgpu_gfx {
struct amdgpu_gfx_ras   *ras;
 
boolis_poweron;
+
+   struct amdgpu_ring_mux  muxer;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 82c178a9033a..8be51ebfedd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -279,6 +279,10 @@ struct amdgpu_ring {
boolis_mes_queue;
uint32_thw_queue_id;
struct amdgpu_mes_ctx_data *mes_ctx;
+
+   boolis_sw_ring;
+   unsigned intentry_index;
+
 };
 
 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), 
(ib)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index ..43cab8a37754
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include 
+#include 
+
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_ring.h"
+#include "amdgpu.h"
+
+#define AMDGPU_MUX_RES

[PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9 (v8)

2022-11-16 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Set ring functions with software ring callbacks on gfx9.

The software ring could be tested by debugfs_test_ib case.

v2: Set sw_ring 2 to enable software ring by default.
v3: Remove the parameter for software ring enablement.
v4: Use amdgpu_ring_init/fini for software rings.
v5: Update for code format. Fix conflict.
v6: Remove unnecessary checks and enable software ring on gfx9 by default.
v7: Use static array for software ring names and priorities.
v8: Stop creating software rings if no gfx ring existed.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Cc: Michel Dänzer 
Cc: Likun Gao 
Signed-off-by: Jiadong.Zhu 
Acked-by: Luben Tuikov 
Acked-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c |  20 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 113 ++-
 5 files changed, 136 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 7c2692f29311..5d0c45659e97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -365,6 +365,7 @@ struct amdgpu_gfx {
 
boolis_poweron;
 
+   struct amdgpu_ring  sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
struct amdgpu_ring_mux  muxer;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 8be51ebfedd5..a744aa9bac95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -39,6 +39,7 @@ struct amdgpu_vm;
 #define AMDGPU_MAX_RINGS   28
 #define AMDGPU_MAX_HWIP_RINGS  8
 #define AMDGPU_MAX_GFX_RINGS   2
+#define AMDGPU_MAX_SW_GFX_RINGS 2
 #define AMDGPU_MAX_COMPUTE_RINGS   8
 #define AMDGPU_MAX_VCE_RINGS   3
 #define AMDGPU_MAX_UVD_ENC_RINGS   2
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 43cab8a37754..2e64ffccc030 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -29,6 +29,14 @@
 
 #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
 
+static const struct ring_info {
+   unsigned int hw_pio;
+   const char *ring_name;
+} sw_ring_info[] = {
+   { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
+   { AMDGPU_RING_PRIO_2, "gfx_high"},
+};
+
 int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
 unsigned int entry_size)
 {
@@ -215,3 +223,15 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
uint32_t count)
 {
WARN_ON(!ring->is_sw_ring);
 }
+
+const char *amdgpu_sw_ring_name(int idx)
+{
+   return idx < ARRAY_SIZE(sw_ring_info) ?
+   sw_ring_info[idx].ring_name : NULL;
+}
+
+unsigned int amdgpu_sw_ring_priority(int idx)
+{
+   return idx < ARRAY_SIZE(sw_ring_info) ?
+   sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
index d91629589577..28399f4b0e5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -73,4 +73,6 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
uint32_t count);
 void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
 void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
 
+const char *amdgpu_sw_ring_name(int idx);
+unsigned int amdgpu_sw_ring_priority(int idx);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0d26910a782b..ec80047b1b41 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
 
 #include "amdgpu_ras.h"
 
+#include "amdgpu_ring_mux.h"
 #include "gfx_v9_4.h"
 #include "gfx_v9_0.h"
 #include "gfx_v9_4_2.h"
@@ -56,6 +57,7 @@
 #include "asic_reg/gc/gc_9_0_default.h"
 
 #define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_SW_GFX_RINGS  2
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0xL
@@ -2105,6 +2107,7 @@ static int gfx_v9_0_sw_init(void *handle)
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   unsigned int hw_prio;
 
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
@@ -2195,6 +2198,9 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = tr

[PATCH 5/5] drm/amdgpu: Improve the software ring priority scheduler

2022-10-18 Thread jiadong.zhu
From: Jiadong Zhu 

Using the drm scheduler, the software rings' scheduling threads with different
priorities have the same opportunity to get the spinlock and copy its packages
into the real ring. Though preemption may happen for the low priority ring, it
will catch up with the high priority ring by copying more data (the resubmit
package and the current ibs) on the next calling of set_wptr. As a result, the
priority is not quite effective.

Here are some details to improve the priority of software rings at the bottom
of drm scheduler by slowing down the low priority thread with following
strategy.
1. If all the high priority fences are signaled which indicates gpu is idle
   while there are low priority packages to submit, no delay happens.
2. When there are unsignaled fences on high priority rings, we account for the
   portion of the ibs sent from the low priority ring. If the portion exceeds
   a certain threshold(eg, 30%), a timeout wait happens on low priority
   threads till more high priority ibs submitted.
3. The mechanism is started when the first time mcbp triggered, ended when all
   the high priority fences are signaled.

Cc: Christian Koenig 
Cc: Michel Dänzer 
Signed-off-by: Jiadong Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 93 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  3 +
 2 files changed, 90 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 41b057b9358e..eac89094f1d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -27,7 +27,13 @@
 #include "amdgpu_ring.h"
 #include "amdgpu.h"
 
+/* The jiffies fallback resubmission happens */
 #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
+
+/* Maximum waiting jiffies on low priority ring thread */
+#define AMDGPU_MUX_DELAY_JIFFIES_TIMEOUT (HZ / 10)
+
+/* The time threshold of unsignaled fence that trigger mcbp */
 #define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 1
 
 static const struct ring_info {
@@ -47,6 +53,69 @@ static inline struct amdgpu_mux_entry 
*amdgpu_ring_mux_sw_entry(struct amdgpu_ri
>ring_entry[ring->entry_index] : NULL;
 }
 
+static uint32_t ring_priority_to_ratio_pct(unsigned int hw_prio)
+{
+   uint32_t ratio;
+
+   switch (hw_prio) {
+   case AMDGPU_RING_PRIO_DEFAULT:
+   ratio = 30;
+   break;
+   case AMDGPU_RING_PRIO_2:
+   ratio = 100;
+   break;
+   default:
+   ratio = 100;
+   }
+   return ratio;
+}
+
+static void reset_wcnt_on_all_rings(struct amdgpu_ring_mux *mux)
+{
+   int i;
+
+   for (i = 0; i < mux->num_ring_entries; i++)
+   mux->ring_entry[i].w_cnt = 0;
+}
+
+/**
+ * Decide if the low priority ring task should be delayed when there are high
+ * priority ibs ongoing. If all the high priority fences are signaled at that
+ * time, gpu is idle, we do not need to wait. Otherwise we calculate the
+ * percentage of portions copying ibs on the current ring and compare with the
+ * threshold according to the priority.
+ */
+static bool proceed_on_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring 
*ring)
+{
+   struct amdgpu_ring *high_prio_ring;
+   u64 current_cnt, total_cnt = 0;
+   int i;
+
+   for (i = 0; i < mux->num_ring_entries; i++) {
+   if (mux->ring_entry[i].ring->hw_prio > 
AMDGPU_RING_PRIO_DEFAULT) {
+   high_prio_ring = mux->ring_entry[i].ring;
+   break;
+   }
+   }
+
+   /*All high priority fences signaled indicates gpu is idle.*/
+   if (amdgpu_fence_count_emitted(high_prio_ring) == 0) {
+   reset_wcnt_on_all_rings(mux);
+   return true;
+   }
+
+   for (i = 0; i < mux->num_ring_entries; i++) {
+   if (mux->ring_entry[i].ring->hw_prio == ring->hw_prio)
+   current_cnt = mux->ring_entry[i].w_cnt;
+   total_cnt += mux->ring_entry[i].w_cnt;
+   }
+
+   if (total_cnt == 0)
+   return true;
+
+   return ring_priority_to_ratio_pct(ring->hw_prio) > current_cnt * 100 / 
total_cnt;
+}
+
 /* copy packages on sw ring range[begin, end) */
 static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
  struct amdgpu_ring *ring,
@@ -73,6 +142,13 @@ static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct 
amdgpu_ring_mux *mux,
}
 }
 
+/* delay low priotiry task depending on high priority rings fence signal 
condition*/
+static void wait_on_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+   wait_event_interruptible_timeout(mux->wait, proceed_on_ring(mux, ring),
+AMDGPU_MUX_DELAY_JIFFIES_TIMEOUT);
+}
+
 static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)

[PATCH 4/5] drm/amdgpu: MCBP based on DRM scheduler (v8)

2022-10-18 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Trigger Mid-Command Buffer Preemption according to the priority of the software
rings and the hw fence signalling condition.

The muxer saves the locations of the indirect buffer frames from the software
ring together with the fence sequence number in its fifo queue, and pops out
those records when the fences are signalled. The locations are used to resubmit
packages in preemption scenarios by coping the chunks from the software ring.

v2: Update comment style.
v3: Fix conflict caused by previous modifications.
v4: Remove unnecessary prints.
v5: Fix corner cases for resubmission cases.
v6: Refactor functions for resubmission, calling fence_process in irq handler.
v7: Solve conflict for removing amdgpu_sw_ring.c.
v8: Add time threshold to judge if preemption request is needed.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Cc: Michel Dänzer 
Signed-off-by: Jiadong.Zhu 
Acked-by: Luben Tuikov 
Acked-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c|  53 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  12 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 353 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  29 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|   7 +-
 8 files changed, 420 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 790f7bfdc654..470448bc1ebb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -54,6 +54,7 @@ struct amdgpu_fence {
 
/* RB, DMA, etc. */
struct amdgpu_ring  *ring;
+   ktime_t start_timestamp;
 };
 
 static struct kmem_cache *amdgpu_fence_slab;
@@ -199,6 +200,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f, struct amd
rcu_assign_pointer(*ptr, dma_fence_get(fence));
 
*f = fence;
+   to_amdgpu_fence(fence)->start_timestamp = ktime_get();
 
return 0;
 }
@@ -400,6 +402,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring 
*ring)
return lower_32_bits(emitted);
 }
 
+/**
+ * amdgpu_fence_last_unsignaled_time_us - the time fence emited till now
+ * @ring: ring the fence is associated with
+ *
+ * Find the earlist fence unsignaled till now, calculate the time delta
+ * between the time fence emitted and now.
+ */
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
+{
+   struct amdgpu_fence_driver *drv = >fence_drv;
+   struct dma_fence *fence;
+   uint32_t last_seq, sync_seq;
+
+   last_seq = atomic_read(>fence_drv.last_seq);
+   sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+   if (last_seq == sync_seq)
+   return 0;
+
+   ++last_seq;
+   last_seq &= drv->num_fences_mask;
+   fence = drv->fences[last_seq];
+   if (!fence)
+   return 0;
+
+   return ktime_us_delta(ktime_get(),
+   to_amdgpu_fence(fence)->start_timestamp);
+}
+
+/**
+ * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
+ * @ring: ring the fence is associated with
+ * @seq: the fence seq number to update.
+ * @timestamp: the start timestamp to update.
+ *
+ * The function called at the time the fence and related ib is about to
+ * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
+ * with amdgpu_fence_process to modify the same fence.
+ */
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t 
seq, ktime_t timestamp)
+{
+   struct amdgpu_fence_driver *drv = >fence_drv;
+   struct dma_fence *fence;
+
+   seq &= drv->num_fences_mask;
+   fence = drv->fences[seq];
+   if (!fence)
+   return;
+
+   to_amdgpu_fence(fence)->start_timestamp = timestamp;
+}
+
 /**
  * amdgpu_fence_driver_start_ring - make the fence driver
  * ready for use on the requested ring.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 258cffe3c06a..af86d87e2f3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
}
 
+   amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false);
 
+   amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
return 0;
 }
diff --gi

[PATCH 3/5] drm/amdgpu: Modify unmap_queue format for gfx9 (v4)

2022-10-18 Thread jiadong.zhu
From: "Jiadong.Zhu" 

1. Modify the unmap_queue package on gfx9. Add trailing fence to track the
   preemption done.
2. Modify emit_ce_meta emit_de_meta functions for the resumed ibs.

v2: Restyle code not to use ternary operator.
v3: Modify code format.
v4: Enable Mid-Command Buffer Preemption for gfx9 by default.

Cc: Christian Koenig 
Cc: Michel Dänzer 
Signed-off-by: Jiadong.Zhu 
Acked-by: Christian König 
Acked-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 181 +++
 drivers/gpu/drm/amd/amdgpu/soc15d.h  |   2 +
 3 files changed, 155 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index f08ee1ac281c..e90d327a589e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -60,6 +60,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT   (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC  (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 8d4fbc9e3fc0..01ec0551d26a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -753,7 +753,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device 
*adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
  void *ras_error_status);
@@ -826,9 +826,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring 
*kiq_ring,

PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
if (action == PREEMPT_QUEUES_NO_UNMAP) {
-   amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, seq);
+   amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & 
ring->buf_mask));
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -5369,11 +5370,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 
control |= ib->length_dw | (vmid << 24);
 
-   if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
+   if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+   if (flags & AMDGPU_IB_PREEMPTED)
+   control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-   gfx_v9_0_ring_emit_de_meta(ring);
+   gfx_v9_0_ring_emit_de_meta(ring,
+  
(!amdgpu_sriov_vf(ring->adev) &&
+  flags & AMDGPU_IB_PREEMPTED) 
?
+  true : false);
}
 
amdgpu_ring_write(ring, header);
@@ -5428,17 +5435,23 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+   bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+   uint32_t dw2 = 0;
 
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-   amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
-  EOP_TC_NC_ACTION_EN) :
- (EOP_TCL1_ACTION_EN |
-  EOP_TC_ACTION_EN |
-  EOP_TC_WB_ACTION_EN |
-  EOP_TC_MD_ACTION_EN)) |
-EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
-EVENT_INDEX(5)));
+
+   if (writeback) {
+   dw2 = EOP_TC_WB_ACTION_EN | EOP_TC_NC_ACTION_EN;
+   } else {
+   

[PATCH 2/5] drm/amdgpu: Add software ring callbacks for gfx9 (v8)

2022-10-18 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Set ring functions with software ring callbacks on gfx9.

The software ring could be tested by debugfs_test_ib case.

v2: Set sw_ring 2 to enable software ring by default.
v3: Remove the parameter for software ring enablement.
v4: Use amdgpu_ring_init/fini for software rings.
v5: Update for code format. Fix conflict.
v6: Remove unnecessary checks and enable software ring on gfx9 by default.
v7: Use static array for software ring names and priorities.
v8: Stop creating software rings if no gfx ring existed.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Cc: Michel Dänzer 
Cc: Likun Gao 
Signed-off-by: Jiadong.Zhu 
Acked-by: Luben Tuikov 
Acked-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c |  20 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 113 ++-
 5 files changed, 136 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 9996dadb39f7..4fdfc3ec134a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -348,6 +348,7 @@ struct amdgpu_gfx {
 
boolis_poweron;
 
+   struct amdgpu_ring  sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
struct amdgpu_ring_mux  muxer;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 40b1277b4f0c..f08ee1ac281c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -39,6 +39,7 @@ struct amdgpu_vm;
 #define AMDGPU_MAX_RINGS   28
 #define AMDGPU_MAX_HWIP_RINGS  8
 #define AMDGPU_MAX_GFX_RINGS   2
+#define AMDGPU_MAX_SW_GFX_RINGS 2
 #define AMDGPU_MAX_COMPUTE_RINGS   8
 #define AMDGPU_MAX_VCE_RINGS   3
 #define AMDGPU_MAX_UVD_ENC_RINGS   2
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 43cab8a37754..2e64ffccc030 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -29,6 +29,14 @@
 
 #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
 
+static const struct ring_info {
+   unsigned int hw_pio;
+   const char *ring_name;
+} sw_ring_info[] = {
+   { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
+   { AMDGPU_RING_PRIO_2, "gfx_high"},
+};
+
 int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
 unsigned int entry_size)
 {
@@ -215,3 +223,15 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
uint32_t count)
 {
WARN_ON(!ring->is_sw_ring);
 }
+
+const char *amdgpu_sw_ring_name(int idx)
+{
+   return idx < ARRAY_SIZE(sw_ring_info) ?
+   sw_ring_info[idx].ring_name : NULL;
+}
+
+unsigned int amdgpu_sw_ring_priority(int idx)
+{
+   return idx < ARRAY_SIZE(sw_ring_info) ?
+   sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
index d91629589577..28399f4b0e5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -73,4 +73,6 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
uint32_t count);
 void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
 void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
 
+const char *amdgpu_sw_ring_name(int idx);
+unsigned int amdgpu_sw_ring_priority(int idx);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6b609f33261f..8d4fbc9e3fc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
 
 #include "amdgpu_ras.h"
 
+#include "amdgpu_ring_mux.h"
 #include "gfx_v9_4.h"
 #include "gfx_v9_0.h"
 #include "gfx_v9_4_2.h"
@@ -56,6 +57,7 @@
 #include "asic_reg/gc/gc_9_0_default.h"
 
 #define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_SW_GFX_RINGS  2
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0xL
@@ -2273,6 +2275,7 @@ static int gfx_v9_0_sw_init(void *handle)
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   unsigned int hw_prio;
 
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
@@ -2356,6 +2359,9 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = tr

[PATCH 1/5] drm/amdgpu: Introduce gfx software ring (v8)

2022-10-18 Thread jiadong.zhu
From: "Jiadong.Zhu" 

The software ring is created to support priority context while there is only
one hardware queue for gfx.

Every software ring has its fence driver and could be used as an ordinary ring
for the GPU scheduler.
Multiple software rings are bound to a real ring with the ring muxer. The
packages committed on the software ring are copied to the real ring.

v2: Use array to store software ring entry.
v3: Remove unnecessary prints.
v4: Remove amdgpu_ring_sw_init/fini functions,
using gtt for sw ring buffer for later dma copy
optimization.
v5: Allocate ring entry dynamically in the muxer.
v6: Update comments for the ring muxer.
v7: Modify for function naming.
v8: Combine software ring functions into amdgpu_ring_mux.c

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky  
Cc: Michel Dänzer 
Signed-off-by: Jiadong.Zhu 
Acked-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 217 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  76 +++
 5 files changed, 302 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 3e0e2eb7e235..add7da53950c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
-   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
+   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+   amdgpu_ring_mux.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53526ffb2ce1..9996dadb39f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -33,6 +33,7 @@
 #include "amdgpu_imu.h"
 #include "soc15.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE 0xL
@@ -346,6 +347,8 @@ struct amdgpu_gfx {
struct amdgpu_gfx_ras   *ras;
 
boolis_poweron;
+
+   struct amdgpu_ring_mux  muxer;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7d89a52091c0..40b1277b4f0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -278,6 +278,10 @@ struct amdgpu_ring {
boolis_mes_queue;
uint32_thw_queue_id;
struct amdgpu_mes_ctx_data *mes_ctx;
+
+   boolis_sw_ring;
+   unsigned intentry_index;
+
 };
 
 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), 
(ib)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index ..43cab8a37754
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include 
+#include 
+
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_ring.h"
+#include "amdgpu.h"
+
+#define AMDGPU_MUX_RES

[PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9 (v8)

2022-10-10 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Set ring functions with software ring callbacks on gfx9.

The software ring could be tested by debugfs_test_ib case.

v2: Set sw_ring 2 to enable software ring by default.
v3: Remove the parameter for software ring enablement.
v4: Use amdgpu_ring_init/fini for software rings.
v5: Update for code format. Fix conflict.
v6: Remove unnecessary checks and enable software ring on gfx9 by default.
v7: Use static array for software ring names and priorities.
v8: Stop creating software rings if no gfx ring existed.

Acked-by: Luben Tuikov 
Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Cc: Michel Dänzer 
Cc: Likun Gao 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c |  20 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 113 ++-
 5 files changed, 136 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 9996dadb39f7..4fdfc3ec134a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -348,6 +348,7 @@ struct amdgpu_gfx {
 
boolis_poweron;
 
+   struct amdgpu_ring  sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
struct amdgpu_ring_mux  muxer;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 40b1277b4f0c..f08ee1ac281c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -39,6 +39,7 @@ struct amdgpu_vm;
 #define AMDGPU_MAX_RINGS   28
 #define AMDGPU_MAX_HWIP_RINGS  8
 #define AMDGPU_MAX_GFX_RINGS   2
+#define AMDGPU_MAX_SW_GFX_RINGS 2
 #define AMDGPU_MAX_COMPUTE_RINGS   8
 #define AMDGPU_MAX_VCE_RINGS   3
 #define AMDGPU_MAX_UVD_ENC_RINGS   2
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 43cab8a37754..2e64ffccc030 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -29,6 +29,14 @@
 
 #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
 
+static const struct ring_info {
+   unsigned int hw_pio;
+   const char *ring_name;
+} sw_ring_info[] = {
+   { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
+   { AMDGPU_RING_PRIO_2, "gfx_high"},
+};
+
 int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
 unsigned int entry_size)
 {
@@ -215,3 +223,15 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
uint32_t count)
 {
WARN_ON(!ring->is_sw_ring);
 }
+
+const char *amdgpu_sw_ring_name(int idx)
+{
+   return idx < ARRAY_SIZE(sw_ring_info) ?
+   sw_ring_info[idx].ring_name : NULL;
+}
+
+unsigned int amdgpu_sw_ring_priority(int idx)
+{
+   return idx < ARRAY_SIZE(sw_ring_info) ?
+   sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
index d91629589577..28399f4b0e5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -73,4 +73,6 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
uint32_t count);
 void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
 void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
 
+const char *amdgpu_sw_ring_name(int idx);
+unsigned int amdgpu_sw_ring_priority(int idx);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6b609f33261f..8d4fbc9e3fc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
 
 #include "amdgpu_ras.h"
 
+#include "amdgpu_ring_mux.h"
 #include "gfx_v9_4.h"
 #include "gfx_v9_0.h"
 #include "gfx_v9_4_2.h"
@@ -56,6 +57,7 @@
 #include "asic_reg/gc/gc_9_0_default.h"
 
 #define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_SW_GFX_RINGS  2
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0xL
@@ -2273,6 +2275,7 @@ static int gfx_v9_0_sw_init(void *handle)
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   unsigned int hw_prio;
 
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
@@ -2356,6 +2359,9 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = true;
 

[PATCH 4/4] drm/amdgpu: MCBP based on DRM scheduler (v8)

2022-09-29 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Trigger Mid-Command Buffer Preemption according to the priority of the software
rings and the hw fence signalling condition.

The muxer saves the locations of the indirect buffer frames from the software
ring together with the fence sequence number in its fifo queue, and pops out
those records when the fences are signalled. The locations are used to resubmit
packages in preemption scenarios by coping the chunks from the software ring.

v2: Update comment style.
v3: Fix conflict caused by previous modifications.
v4: Remove unnecessary prints.
v5: Fix corner cases for resubmission cases.
v6: Refactor functions for resubmission, calling fence_process in irq handler.
v7: Solve conflict for removing amdgpu_sw_ring.c.
v8: Add time threshold to judge if preemption request is needed.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Cc: Michel Dänzer 
Acked-by: Luben Tuikov 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c|  53 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  12 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 353 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  29 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|  10 +-
 8 files changed, 422 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 790f7bfdc654..470448bc1ebb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -54,6 +54,7 @@ struct amdgpu_fence {
 
/* RB, DMA, etc. */
struct amdgpu_ring  *ring;
+   ktime_t start_timestamp;
 };
 
 static struct kmem_cache *amdgpu_fence_slab;
@@ -199,6 +200,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f, struct amd
rcu_assign_pointer(*ptr, dma_fence_get(fence));
 
*f = fence;
+   to_amdgpu_fence(fence)->start_timestamp = ktime_get();
 
return 0;
 }
@@ -400,6 +402,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring 
*ring)
return lower_32_bits(emitted);
 }
 
+/**
+ * amdgpu_fence_last_unsignaled_time_us - the time fence emited till now
+ * @ring: ring the fence is associated with
+ *
+ * Find the earlist fence unsignaled till now, calculate the time delta
+ * between the time fence emitted and now.
+ */
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
+{
+   struct amdgpu_fence_driver *drv = >fence_drv;
+   struct dma_fence *fence;
+   uint32_t last_seq, sync_seq;
+
+   last_seq = atomic_read(>fence_drv.last_seq);
+   sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+   if (last_seq == sync_seq)
+   return 0;
+
+   ++last_seq;
+   last_seq &= drv->num_fences_mask;
+   fence = drv->fences[last_seq];
+   if (!fence)
+   return 0;
+
+   return ktime_us_delta(ktime_get(),
+   to_amdgpu_fence(fence)->start_timestamp);
+}
+
+/**
+ * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
+ * @ring: ring the fence is associated with
+ * @seq: the fence seq number to update.
+ * @timestamp: the start timestamp to update.
+ *
+ * The function called at the time the fence and related ib is about to
+ * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
+ * with amdgpu_fence_process to modify the same fence.
+ */
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t 
seq, ktime_t timestamp)
+{
+   struct amdgpu_fence_driver *drv = >fence_drv;
+   struct dma_fence *fence;
+
+   seq &= drv->num_fences_mask;
+   fence = drv->fences[seq];
+   if (!fence)
+   return;
+
+   to_amdgpu_fence(fence)->start_timestamp = timestamp;
+}
+
 /**
  * amdgpu_fence_driver_start_ring - make the fence driver
  * ready for use on the requested ring.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 258cffe3c06a..af86d87e2f3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
}
 
+   amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false);
 
+   amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
return 0;
 }
diff --git a/drivers/gpu/drm/

[PATCH 3/4] drm/amdgpu: Modify unmap_queue format for gfx9 (v4)

2022-09-29 Thread jiadong.zhu
From: "Jiadong.Zhu" 

1. Modify the unmap_queue package on gfx9. Add trailing fence to track the
   preemption done.
2. Modify emit_ce_meta emit_de_meta functions for the resumed ibs.

v2: Restyle code not to use ternary operator.
v3: Modify code format.
v4: Enable Mid-Command Buffer Preemption for gfx9 by default.

Cc: Christian Koenig 
Cc: Michel Dänzer 
Acked-by: Christian König 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 181 +++
 drivers/gpu/drm/amd/amdgpu/soc15d.h  |   2 +
 3 files changed, 155 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index f08ee1ac281c..e90d327a589e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -60,6 +60,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT   (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC  (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 3b607c09d267..0864801241f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -753,7 +753,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device 
*adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
  void *ras_error_status);
@@ -826,9 +826,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring 
*kiq_ring,

PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
if (action == PREEMPT_QUEUES_NO_UNMAP) {
-   amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, seq);
+   amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & 
ring->buf_mask));
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -5364,11 +5365,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 
control |= ib->length_dw | (vmid << 24);
 
-   if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
+   if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+   if (flags & AMDGPU_IB_PREEMPTED)
+   control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-   gfx_v9_0_ring_emit_de_meta(ring);
+   gfx_v9_0_ring_emit_de_meta(ring,
+  
(!amdgpu_sriov_vf(ring->adev) &&
+  flags & AMDGPU_IB_PREEMPTED) 
?
+  true : false);
}
 
amdgpu_ring_write(ring, header);
@@ -5423,17 +5430,23 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+   bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+   uint32_t dw2 = 0;
 
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-   amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
-  EOP_TC_NC_ACTION_EN) :
- (EOP_TCL1_ACTION_EN |
-  EOP_TC_ACTION_EN |
-  EOP_TC_WB_ACTION_EN |
-  EOP_TC_MD_ACTION_EN)) |
-EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
-EVENT_INDEX(5)));
+
+   if (writeback) {
+   dw2 = EOP_TC_WB_ACTION_EN | EOP_TC_NC_ACTION_EN;
+   } else {
+   dw2 = EOP_TCL1_ACTION_EN 

[PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9 (v7)

2022-09-29 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Set ring functions with software ring callbacks on gfx9.

The software ring could be tested by debugfs_test_ib case.

v2: Set sw_ring 2 to enable software ring by default.
v3: Remove the parameter for software ring enablement.
v4: Use amdgpu_ring_init/fini for software rings.
v5: Update for code format. Fix conflict.
v6: Remove unnecessary checks and enable software ring on gfx9 by default.
v7: Use static array for software ring names and priorities.

Acked-by: Luben Tuikov 
Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Cc: Michel Dänzer 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c |  20 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 104 ++-
 5 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 9996dadb39f7..4fdfc3ec134a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -348,6 +348,7 @@ struct amdgpu_gfx {
 
boolis_poweron;
 
+   struct amdgpu_ring  sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
struct amdgpu_ring_mux  muxer;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 40b1277b4f0c..f08ee1ac281c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -39,6 +39,7 @@ struct amdgpu_vm;
 #define AMDGPU_MAX_RINGS   28
 #define AMDGPU_MAX_HWIP_RINGS  8
 #define AMDGPU_MAX_GFX_RINGS   2
+#define AMDGPU_MAX_SW_GFX_RINGS 2
 #define AMDGPU_MAX_COMPUTE_RINGS   8
 #define AMDGPU_MAX_VCE_RINGS   3
 #define AMDGPU_MAX_UVD_ENC_RINGS   2
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 43cab8a37754..2e64ffccc030 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -29,6 +29,14 @@
 
 #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
 
+static const struct ring_info {
+   unsigned int hw_pio;
+   const char *ring_name;
+} sw_ring_info[] = {
+   { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
+   { AMDGPU_RING_PRIO_2, "gfx_high"},
+};
+
 int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
 unsigned int entry_size)
 {
@@ -215,3 +223,15 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
uint32_t count)
 {
WARN_ON(!ring->is_sw_ring);
 }
+
+const char *amdgpu_sw_ring_name(int idx)
+{
+   return idx < ARRAY_SIZE(sw_ring_info) ?
+   sw_ring_info[idx].ring_name : NULL;
+}
+
+unsigned int amdgpu_sw_ring_priority(int idx)
+{
+   return idx < ARRAY_SIZE(sw_ring_info) ?
+   sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
index d91629589577..28399f4b0e5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -73,4 +73,6 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
uint32_t count);
 void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
 void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
 
+const char *amdgpu_sw_ring_name(int idx);
+unsigned int amdgpu_sw_ring_priority(int idx);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6b609f33261f..3b607c09d267 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
 
 #include "amdgpu_ras.h"
 
+#include "amdgpu_ring_mux.h"
 #include "gfx_v9_4.h"
 #include "gfx_v9_0.h"
 #include "gfx_v9_4_2.h"
@@ -56,6 +57,7 @@
 #include "asic_reg/gc/gc_9_0_default.h"
 
 #define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_SW_GFX_RINGS  2
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0xL
@@ -2273,6 +2275,7 @@ static int gfx_v9_0_sw_init(void *handle)
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   unsigned int hw_prio;
 
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
@@ -2356,6 +2359,9 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+
+ 

[PATCH 1/4] drm/amdgpu: Introduce gfx software ring (v8)

2022-09-29 Thread jiadong.zhu
From: "Jiadong.Zhu" 

The software ring is created to support priority context while there is only
one hardware queue for gfx.

Every software ring has its fence driver and could be used as an ordinary ring
for the GPU scheduler.
Multiple software rings are bound to a real ring with the ring muxer. The
packages committed on the software ring are copied to the real ring.

v2: Use array to store software ring entry.
v3: Remove unnecessary prints.
v4: Remove amdgpu_ring_sw_init/fini functions,
using gtt for sw ring buffer for later dma copy
optimization.
v5: Allocate ring entry dynamically in the muxer.
v6: Update comments for the ring muxer.
v7: Modify for function naming.
v8: Combine software ring functions into amdgpu_ring_mux.c

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky  
Cc: Michel Dänzer 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 217 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  76 +++
 5 files changed, 302 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 3e0e2eb7e235..add7da53950c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
-   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
+   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+   amdgpu_ring_mux.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53526ffb2ce1..9996dadb39f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -33,6 +33,7 @@
 #include "amdgpu_imu.h"
 #include "soc15.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE 0xL
@@ -346,6 +347,8 @@ struct amdgpu_gfx {
struct amdgpu_gfx_ras   *ras;
 
boolis_poweron;
+
+   struct amdgpu_ring_mux  muxer;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7d89a52091c0..40b1277b4f0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -278,6 +278,10 @@ struct amdgpu_ring {
boolis_mes_queue;
uint32_thw_queue_id;
struct amdgpu_mes_ctx_data *mes_ctx;
+
+   boolis_sw_ring;
+   unsigned intentry_index;
+
 };
 
 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), 
(ib)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index ..43cab8a37754
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include 
+#include 
+
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_ring.h"
+#include "amdgpu.h"
+
+#define AMDGPU_MUX_RES

[PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9 (v7)

2022-09-27 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Set ring functions with software ring callbacks on gfx9.

The software ring could be tested by debugfs_test_ib case.

v2: Set sw_ring 2 to enable software ring by default.
v3: Remove the parameter for software ring enablement.
v4: Use amdgpu_ring_init/fini for software rings.
v5: Update for code format. Fix conflict.
v6: Remove unnecessary checks and enable software ring on gfx9 by default.
v7: Use static array for software ring names and priorities.

Acked-by: Luben Tuikov 
Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c |  20 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 104 ++-
 5 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 9996dadb39f7..4fdfc3ec134a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -348,6 +348,7 @@ struct amdgpu_gfx {
 
boolis_poweron;
 
+   struct amdgpu_ring  sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
struct amdgpu_ring_mux  muxer;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 40b1277b4f0c..f08ee1ac281c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -39,6 +39,7 @@ struct amdgpu_vm;
 #define AMDGPU_MAX_RINGS   28
 #define AMDGPU_MAX_HWIP_RINGS  8
 #define AMDGPU_MAX_GFX_RINGS   2
+#define AMDGPU_MAX_SW_GFX_RINGS 2
 #define AMDGPU_MAX_COMPUTE_RINGS   8
 #define AMDGPU_MAX_VCE_RINGS   3
 #define AMDGPU_MAX_UVD_ENC_RINGS   2
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 43cab8a37754..2e64ffccc030 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -29,6 +29,14 @@
 
 #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
 
+static const struct ring_info {
+   unsigned int hw_pio;
+   const char *ring_name;
+} sw_ring_info[] = {
+   { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
+   { AMDGPU_RING_PRIO_2, "gfx_high"},
+};
+
 int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
 unsigned int entry_size)
 {
@@ -215,3 +223,15 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
uint32_t count)
 {
WARN_ON(!ring->is_sw_ring);
 }
+
+const char *amdgpu_sw_ring_name(int idx)
+{
+   return idx < ARRAY_SIZE(sw_ring_info) ?
+   sw_ring_info[idx].ring_name : NULL;
+}
+
+unsigned int amdgpu_sw_ring_priority(int idx)
+{
+   return idx < ARRAY_SIZE(sw_ring_info) ?
+   sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
index d91629589577..28399f4b0e5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -73,4 +73,6 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, 
uint32_t count);
 void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
 void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
 
+const char *amdgpu_sw_ring_name(int idx);
+unsigned int amdgpu_sw_ring_priority(int idx);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6b609f33261f..3b607c09d267 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
 
 #include "amdgpu_ras.h"
 
+#include "amdgpu_ring_mux.h"
 #include "gfx_v9_4.h"
 #include "gfx_v9_0.h"
 #include "gfx_v9_4_2.h"
@@ -56,6 +57,7 @@
 #include "asic_reg/gc/gc_9_0_default.h"
 
 #define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_SW_GFX_RINGS  2
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0xL
@@ -2273,6 +2275,7 @@ static int gfx_v9_0_sw_init(void *handle)
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   unsigned int hw_prio;
 
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
@@ -2356,6 +2359,9 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+
+   /* disa

[PATCH 4/4] drm/amdgpu: MCBP based on DRM scheduler (v7)

2022-09-27 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Trigger Mid-Command Buffer Preemption according to the priority of the software
rings and the hw fence signalling condition.

The muxer saves the locations of the indirect buffer frames from the software
ring together with the fence sequence number in its fifo queue, and pops out
those records when the fences are signalled. The locations are used to resubmit
packages in preemption scenarios by coping the chunks from the software ring.

v2: Update comment style.
v3: Fix conflict caused by previous modifications.
v4: Remove unnecessary prints.
v5: Fix corner cases for resubmission cases.
v6: Refactor functions for resubmission, calling fence_process in irq handler.
v7: Solve conflict for removing amdgpu_sw_ring.c.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Acked-by: Luben Tuikov 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  12 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 348 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  29 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|  10 +-
 7 files changed, 361 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 258cffe3c06a..af86d87e2f3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
}
 
+   amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false);
 
+   amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 13db99d653bd..1f15f9242e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -569,3 +569,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
 
return mqd_mgr->init_mqd(adev, ring->mqd_ptr, );
 }
+
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
+{
+   if (ring->is_sw_ring)
+   amdgpu_sw_ring_ib_begin(ring);
+}
+
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
+{
+   if (ring->is_sw_ring)
+   amdgpu_sw_ring_ib_end(ring);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index e90d327a589e..6fbc1627dab7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -312,6 +312,9 @@ struct amdgpu_ring {
 #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
 
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
+
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib 
*ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 2e64ffccc030..ebb37df2c897 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -37,23 +37,142 @@ static const struct ring_info {
{ AMDGPU_RING_PRIO_2, "gfx_high"},
 };
 
+static struct kmem_cache *amdgpu_mux_chunk_slab;
+
+static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct 
amdgpu_ring_mux *mux,
+   struct 
amdgpu_ring *ring)
+{
+   return ring->entry_index < mux->ring_entry_size ?
+   >ring_entry[ring->entry_index] : NULL;
+}
+
+/* copy packages on sw ring range[begin, end) */
+static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring,
+ u64 s_start, u64 s_end)
+{
+   u64 start, end;
+   struct amdgpu_ring *real_ring = mux->real_ring;
+
+   start = s_start & ring->buf_mask;
+   end = s_end & ring->buf_mask;
+
+   if (start == end) {
+   DRM_ERROR("no more data copied from sw ring\n");
+   return;
+   }
+   if (start > end) {
+   amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - 
start);
+   amdgp

[PATCH 3/4] drm/amdgpu: Modify unmap_queue format for gfx9 (v4)

2022-09-27 Thread jiadong.zhu
From: "Jiadong.Zhu" 

1. Modify the unmap_queue package on gfx9. Add trailing fence to track the
   preemption done.
2. Modify emit_ce_meta emit_de_meta functions for the resumed ibs.

v2: Restyle code not to use ternary operator.
v3: Modify code format.
v4: Enable Mid-Command Buffer Preemption for gfx9 by default.

Cc: Christian Koenig 
Acked-by: Christian König 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 181 +++
 drivers/gpu/drm/amd/amdgpu/soc15d.h  |   2 +
 3 files changed, 155 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index f08ee1ac281c..e90d327a589e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -60,6 +60,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT   (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC  (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 3b607c09d267..0864801241f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -753,7 +753,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device 
*adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
  void *ras_error_status);
@@ -826,9 +826,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring 
*kiq_ring,

PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
if (action == PREEMPT_QUEUES_NO_UNMAP) {
-   amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, seq);
+   amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & 
ring->buf_mask));
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -5364,11 +5365,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 
control |= ib->length_dw | (vmid << 24);
 
-   if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
+   if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+   if (flags & AMDGPU_IB_PREEMPTED)
+   control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-   gfx_v9_0_ring_emit_de_meta(ring);
+   gfx_v9_0_ring_emit_de_meta(ring,
+  
(!amdgpu_sriov_vf(ring->adev) &&
+  flags & AMDGPU_IB_PREEMPTED) 
?
+  true : false);
}
 
amdgpu_ring_write(ring, header);
@@ -5423,17 +5430,23 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+   bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+   uint32_t dw2 = 0;
 
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-   amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
-  EOP_TC_NC_ACTION_EN) :
- (EOP_TCL1_ACTION_EN |
-  EOP_TC_ACTION_EN |
-  EOP_TC_WB_ACTION_EN |
-  EOP_TC_MD_ACTION_EN)) |
-EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
-EVENT_INDEX(5)));
+
+   if (writeback) {
+   dw2 = EOP_TC_WB_ACTION_EN | EOP_TC_NC_ACTION_EN;
+   } else {
+   dw2 = EOP_TCL1_ACTION_EN 

[PATCH 1/4] drm/amdgpu: Introduce gfx software ring (v8)

2022-09-27 Thread jiadong.zhu
From: "Jiadong.Zhu" 

The software ring is created to support priority context while there is only
one hardware queue for gfx.

Every software ring has its fence driver and could be used as an ordinary ring
for the GPU scheduler.
Multiple software rings are bound to a real ring with the ring muxer. The
packages committed on the software ring are copied to the real ring.

v2: Use array to store software ring entry.
v3: Remove unnecessary prints.
v4: Remove amdgpu_ring_sw_init/fini functions,
using gtt for sw ring buffer for later dma copy
optimization.
v5: Allocate ring entry dynamically in the muxer.
v6: Update comments for the ring muxer.
v7: Modify for function naming.
v8: Combine software ring functions into amdgpu_ring_mux.c

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky  
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 217 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  76 +++
 5 files changed, 302 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 3e0e2eb7e235..add7da53950c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
-   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
+   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+   amdgpu_ring_mux.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53526ffb2ce1..9996dadb39f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -33,6 +33,7 @@
 #include "amdgpu_imu.h"
 #include "soc15.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE 0xL
@@ -346,6 +347,8 @@ struct amdgpu_gfx {
struct amdgpu_gfx_ras   *ras;
 
boolis_poweron;
+
+   struct amdgpu_ring_mux  muxer;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7d89a52091c0..40b1277b4f0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -278,6 +278,10 @@ struct amdgpu_ring {
boolis_mes_queue;
uint32_thw_queue_id;
struct amdgpu_mes_ctx_data *mes_ctx;
+
+   boolis_sw_ring;
+   unsigned intentry_index;
+
 };
 
 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), 
(ib)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index ..43cab8a37754
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include 
+#include 
+
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_ring.h"
+#include "amdgpu.h"
+
+#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
+
+int a

[PATCH] drm/amdgpu: Remove fence_process in count_emitted

2022-09-23 Thread jiadong.zhu
From: "Jiadong.Zhu" 

The function amdgpu_fence_count_emitted used in work_hander should not call
amdgpu_fence_process which must be used in irq handler.

Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d16c8c1f72db..790f7bfdc654 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -394,7 +394,6 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring 
*ring)
/* We are not protected by ring lock when reading the last sequence
 * but it's ok to report slightly wrong fence count here.
 */
-   amdgpu_fence_process(ring);
emitted = 0x1ull;
emitted -= atomic_read(>fence_drv.last_seq);
emitted += READ_ONCE(ring->fence_drv.sync_seq);
-- 
2.25.1



[PATCH 4/4] drm/amdgpu: MCBP based on DRM scheduler (v6)

2022-09-23 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Trigger Mid-Command Buffer Preemption according to the priority of the software
rings and the hw fence signalling condition.

The muxer saves the locations of the indirect buffer frames from the software
ring together with the fence sequence number in its fifo queue, and pops out
those records when the fences are signalled. The locations are used to resubmit
packages in preemption scenarios by coping the chunks from the software ring.

v2: Update comment style.
v3: Fix conflict caused by previous modifications.
v4: Remove unnecessary prints.
v5: Fix corner cases for resubmission cases.
v6: Refactor functions for resubmission, calling fence_process in irq handler.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Acked-by: Luben Tuikov 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  13 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 323 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  30 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  |  26 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c|  10 +-
 8 files changed, 368 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 258cffe3c06a..af86d87e2f3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
}
 
+   amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false);
 
+   amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 13db99d653bd..84b0b3c7d40f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -33,6 +33,7 @@
 
 #include 
 #include "amdgpu.h"
+#include "amdgpu_sw_ring.h"
 #include "atom.h"
 
 /*
@@ -569,3 +570,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
 
return mqd_mgr->init_mqd(adev, ring->mqd_ptr, );
 }
+
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
+{
+   if (ring->is_sw_ring)
+   amdgpu_sw_ring_ib_begin(ring);
+}
+
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
+{
+   if (ring->is_sw_ring)
+   amdgpu_sw_ring_ib_end(ring);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index e90d327a589e..6fbc1627dab7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -312,6 +312,9 @@ struct amdgpu_ring {
 #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
 
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
+
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib 
*ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 662aadebf111..788567e3b743 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -28,23 +28,146 @@
 
 #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
 
+static struct kmem_cache *amdgpu_mux_chunk_slab;
+
+static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct 
amdgpu_ring_mux *mux,
+   struct 
amdgpu_ring *ring)
+{
+   return ring->entry_index < mux->ring_entry_size ?
+   >ring_entry[ring->entry_index] : NULL;
+}
+
+/* copy packages on sw ring range[begin, end) */
+static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring,
+ u64 s_start, u64 s_end)
+{
+   u64 start, end;
+   struct amdgpu_ring *real_ring = mux->real_ring;
+
+   start = s_start & ring->buf_mask;
+   end = s_end & ring->buf_mask;
+
+   if (start == end) {
+   DRM_ERROR("no more data copied from sw ring\n");
+   return;
+   }
+   if (start > end) {
+

[PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9 (v6)

2022-09-23 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Set ring functions with software ring callbacks on gfx9.

The software ring could be tested by debugfs_test_ib case.

v2: Set sw_ring 2 to enable software ring by default.
v3: Remove the parameter for software ring enablement.
v4: Use amdgpu_ring_init/fini for software rings.
v5: Update for code format. Fix conflict.
v6: Remove unnecessary checks and enable software ring on gfx9 by default.

Acked-by: Luben Tuikov 
Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 108 ++-
 3 files changed, 109 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 9996dadb39f7..4fdfc3ec134a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -348,6 +348,7 @@ struct amdgpu_gfx {
 
boolis_poweron;
 
+   struct amdgpu_ring  sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
struct amdgpu_ring_mux  muxer;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 40b1277b4f0c..f08ee1ac281c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -39,6 +39,7 @@ struct amdgpu_vm;
 #define AMDGPU_MAX_RINGS   28
 #define AMDGPU_MAX_HWIP_RINGS  8
 #define AMDGPU_MAX_GFX_RINGS   2
+#define AMDGPU_MAX_SW_GFX_RINGS 2
 #define AMDGPU_MAX_COMPUTE_RINGS   8
 #define AMDGPU_MAX_VCE_RINGS   3
 #define AMDGPU_MAX_UVD_ENC_RINGS   2
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5349ca4d19e3..e688665cd1e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
 
 #include "amdgpu_ras.h"
 
+#include "amdgpu_sw_ring.h"
 #include "gfx_v9_4.h"
 #include "gfx_v9_0.h"
 #include "gfx_v9_4_2.h"
@@ -56,6 +57,7 @@
 #include "asic_reg/gc/gc_9_0_default.h"
 
 #define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_SW_GFX_RINGS  2
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0xL
@@ -2273,6 +2275,7 @@ static int gfx_v9_0_sw_init(void *handle)
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+   unsigned int hw_prio;
 
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 0, 1):
@@ -2356,6 +2359,9 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+
+   /* disable scheduler on the real ring */
+   ring->no_scheduler = true;
r = amdgpu_ring_init(adev, ring, 1024, >gfx.eop_irq,
 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
 AMDGPU_RING_PRIO_DEFAULT, NULL);
@@ -2363,6 +2369,42 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
}
 
+   /* set up the software rings */
+   for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
+   ring = >gfx.sw_gfx_ring[i];
+   ring->ring_obj = NULL;
+   if (!i)
+   sprintf(ring->name, "gfx_sw");
+   else
+   sprintf(ring->name, "gfx_sw_%d", i);
+   ring->use_doorbell = true;
+   ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+   ring->is_sw_ring = true;
+   hw_prio = (i == 1) ? AMDGPU_RING_PRIO_2 :
+   AMDGPU_RING_PRIO_DEFAULT;
+   r = amdgpu_ring_init(adev, ring, 1024, >gfx.eop_irq,
+AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
+NULL);
+   if (r)
+   return r;
+   ring->wptr = 0;
+   }
+
+   /* init the muxer and add software rings */
+   r = amdgpu_ring_mux_init(>gfx.muxer, >gfx.gfx_ring[0],
+GFX9_NUM_SW_GFX_RINGS);
+   if (r) {
+   DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
+   return r;
+   }
+   for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
+   r = amdgpu_ring_mux_add_sw_ring(>gfx.muxer, 
>gfx.sw_gfx_ring[i]);
+   if (r) {
+   DRM_ERROR("amdgpu_ring_

[PATCH 3/4] drm/amdgpu: Modify unmap_queue format for gfx9 (v4)

2022-09-23 Thread jiadong.zhu
From: "Jiadong.Zhu" 

1. Modify the unmap_queue package on gfx9. Add trailing fence to track the
   preemption done.
2. Modify emit_ce_meta emit_de_meta functions for the resumed ibs.

v2: Restyle code not to use ternary operator.
v3: Modify code format.
v4: Enable Mid-Command Buffer Preemption for gfx9 by default.

Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 181 +++
 drivers/gpu/drm/amd/amdgpu/soc15d.h  |   2 +
 3 files changed, 155 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index f08ee1ac281c..e90d327a589e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -60,6 +60,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT   (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC  (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e688665cd1e0..669532f658da 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -753,7 +753,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device 
*adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
  void *ras_error_status);
@@ -826,9 +826,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring 
*kiq_ring,

PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
if (action == PREEMPT_QUEUES_NO_UNMAP) {
-   amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, seq);
+   amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & 
ring->buf_mask));
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -5368,11 +5369,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 
control |= ib->length_dw | (vmid << 24);
 
-   if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
+   if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+   if (flags & AMDGPU_IB_PREEMPTED)
+   control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-   gfx_v9_0_ring_emit_de_meta(ring);
+   gfx_v9_0_ring_emit_de_meta(ring,
+  
(!amdgpu_sriov_vf(ring->adev) &&
+  flags & AMDGPU_IB_PREEMPTED) 
?
+  true : false);
}
 
amdgpu_ring_write(ring, header);
@@ -5427,17 +5434,23 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+   bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+   uint32_t dw2 = 0;
 
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-   amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
-  EOP_TC_NC_ACTION_EN) :
- (EOP_TCL1_ACTION_EN |
-  EOP_TC_ACTION_EN |
-  EOP_TC_WB_ACTION_EN |
-  EOP_TC_MD_ACTION_EN)) |
-EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
-EVENT_INDEX(5)));
+
+   if (writeback) {
+   dw2 = EOP_TC_WB_ACTION_EN | EOP_TC_NC_ACTION_EN;
+   } else {
+   dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
+   EOP_TC_WB_AC

[PATCH 1/4] drm/amdgpu: Introduce gfx software ring (v7)

2022-09-23 Thread jiadong.zhu
From: "Jiadong.Zhu" 

The software ring is created to support priority context while there is only
one hardware queue for gfx.

Every software ring has its fence driver and could be used as an ordinary ring
for the GPU scheduler.
Multiple software rings are bound to a real ring with the ring muxer. The
packages committed on the software ring are copied to the real ring.

v2: Use array to store software ring entry.
v3: Remove unnecessary prints.
v4: Remove amdgpu_ring_sw_init/fini functions,
using gtt for sw ring buffer for later dma copy
optimization.
v5: Allocate ring entry dynamically in the muxer.
v6: Update comments for the ring muxer.
v7: Modify for function naming.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky  
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 183 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  68 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  |  61 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  38 
 7 files changed, 359 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 3e0e2eb7e235..85224bc81ce5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
-   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
+   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+   amdgpu_sw_ring.o amdgpu_ring_mux.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53526ffb2ce1..9996dadb39f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -33,6 +33,7 @@
 #include "amdgpu_imu.h"
 #include "soc15.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE 0xL
@@ -346,6 +347,8 @@ struct amdgpu_gfx {
struct amdgpu_gfx_ras   *ras;
 
boolis_poweron;
+
+   struct amdgpu_ring_mux  muxer;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7d89a52091c0..40b1277b4f0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -278,6 +278,10 @@ struct amdgpu_ring {
boolis_mes_queue;
uint32_thw_queue_id;
struct amdgpu_mes_ctx_data *mes_ctx;
+
+   boolis_sw_ring;
+   unsigned intentry_index;
+
 };
 
 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), 
(ib)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index ..662aadebf111
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+

[PATCH 5/5] drm/amdgpu: Correct the position in patch_cond_exec for gfx9

2022-09-21 Thread jiadong.zhu
From: "Jiadong.Zhu" 

The current position calulated in gfx_v9_0_ring_emit_patch_cond_exec
underflows when the wptr is divisible by ring->buf_mask + 1.

Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index c568a4f5b81e..65f8c8d4f4ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5754,7 +5754,7 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct 
amdgpu_ring *ring, unsigne
BUG_ON(offset > ring->buf_mask);
BUG_ON(ring->ring[offset] != 0x55aa55aa);
 
-   cur = (ring->wptr & ring->buf_mask) - 1;
+   cur = (ring->wptr - 1) & ring->buf_mask;
if (likely(cur > offset))
ring->ring[offset] = cur - offset;
else
-- 
2.25.1



[PATCH 4/5] drm/amdgpu: Implement OS triggered MCBP (v5)

2022-09-21 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Trigger Mid-Command Buffer Preemption according to the priority of the software
rings and the hw fence signalling condition.

The muxer saves the locations of the indirect buffer frames from the software
ring together with the fence sequence number in its fifo queue, and pops out
those records when the fences are signalled. The locations are used to resubmit
packages in preemption scenarios by coping the chunks from the software ring.

v2: Update comment style.
v3: Fix conflict caused by previous modifications.
v4: Remove unnecessary prints.
v5: Fix corner cases for resubmission cases.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Acked-by: Luben Tuikov 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c |  91 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h |  29 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  12 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 186 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  24 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  |  27 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |   2 +
 10 files changed, 372 insertions(+), 6 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 85224bc81ce5..24c5aa19bbf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -59,7 +59,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_sw_ring.o amdgpu_ring_mux.o
+   amdgpu_sw_ring.o amdgpu_ring_mux.o amdgpu_mcbp.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 258cffe3c06a..af86d87e2f3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
}
 
+   amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false);
 
+   amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
new file mode 100644
index ..121b1a4e0f04
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "amdgpu.h"
+#include "amdgpu_mcbp.h"
+#include "amdgpu_ring.h"
+
+/* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to 
resubmit. */
+int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
+{
+   struct amdgpu_mux_entry *e;
+   struct amdgpu_ring *ring = NULL;
+   int i;
+
+   spin_lock(>lock);
+
+   amdgpu_ring_preempt_ib(mux->real_ring);
+
+   for (i = 0; i < mux->num_ring_entries; i++) {
+   e = >ring_entry[i];
+   if (e->ring->hw_p

[PATCH 2/5] drm/amdgpu: Add software ring callbacks for gfx9 (v5)

2022-09-21 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Set ring functions with software ring callbacks on gfx9.

The software ring could be tested by debugfs_test_ib case.

v2: Set sw_ring 2 to enable software ring by default.
v3: Remove the parameter for software ring enablement.
v4: Use amdgpu_ring_init/fini for software rings.
v5: Update for code format. Fix conflict.

Acked-by: Luben Tuikov 
Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 117 +--
 5 files changed, 120 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 96d058c4cd4b..525df0b4d55f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -207,6 +207,7 @@ extern bool amdgpu_ignore_bad_page_threshold;
 extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer;
 extern int amdgpu_async_gfx_ring;
 extern int amdgpu_mcbp;
+extern int amdgpu_sw_ring;
 extern int amdgpu_discovery;
 extern int amdgpu_mes;
 extern int amdgpu_mes_kiq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 9996dadb39f7..93b25d9a87f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -348,6 +348,8 @@ struct amdgpu_gfx {
 
boolis_poweron;
 
+   /* software ring */
+   unsignednum_sw_gfx_rings;
struct amdgpu_ring_mux  muxer;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 13db99d653bd..4eaf3bd332f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -33,6 +33,7 @@
 
 #include 
 #include "amdgpu.h"
+#include "amdgpu_sw_ring.h"
 #include "atom.h"
 
 /*
@@ -121,6 +122,11 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 {
uint32_t count;
 
+   if (ring->is_sw_ring) {
+   amdgpu_sw_ring_commit(ring);
+   return;
+   }
+
/* We pad to match fetch size */
count = ring->funcs->align_mask + 1 -
(ring->wptr & ring->funcs->align_mask);
@@ -343,7 +349,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct 
amdgpu_ring *ring,
  */
 void amdgpu_ring_fini(struct amdgpu_ring *ring)
 {
-
/* Not to finish a ring which is not initialized */
if (!(ring->adev) ||
(!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 40b1277b4f0c..275b885363c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -38,7 +38,8 @@ struct amdgpu_vm;
 /* max number of rings */
 #define AMDGPU_MAX_RINGS   28
 #define AMDGPU_MAX_HWIP_RINGS  8
-#define AMDGPU_MAX_GFX_RINGS   2
+/*2 software ring and 1 real ring*/
+#define AMDGPU_MAX_GFX_RINGS   3
 #define AMDGPU_MAX_COMPUTE_RINGS   8
 #define AMDGPU_MAX_VCE_RINGS   3
 #define AMDGPU_MAX_UVD_ENC_RINGS   2
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5349ca4d19e3..4a8be9595459 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
 
 #include "amdgpu_ras.h"
 
+#include "amdgpu_sw_ring.h"
 #include "gfx_v9_4.h"
 #include "gfx_v9_0.h"
 #include "gfx_v9_4_2.h"
@@ -55,7 +56,8 @@
 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
 #include "asic_reg/gc/gc_9_0_default.h"
 
-#define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_GFX_RINGS 3
+#define GFX9_NUM_SW_GFX_RINGS  2
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0xL
@@ -2270,6 +2272,7 @@ static int gfx_v9_0_compute_ring_init(struct 
amdgpu_device *adev, int ring_id,
 static int gfx_v9_0_sw_init(void *handle)
 {
int i, j, k, r, ring_id;
+   unsigned int hw_prio;
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2356,13 +2359,41 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+   ring->is_sw_ring = (adev->gfx.num_sw_gfx_rings > 1) && (i >

[PATCH 3/5] drm/amdgpu: Modify unmap_queue format for gfx9 (v3)

2022-09-21 Thread jiadong.zhu
From: "Jiadong.Zhu" 

1. Modify the unmap_queue package on gfx9. Add trailing fence to track the
   preemption done.
2. Modify emit_ce_meta emit_de_meta functions for the resumed ibs.

v2: Restyle code not to use ternary operator.
v3: Modify code format.

Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 182 +++
 drivers/gpu/drm/amd/amdgpu/soc15d.h  |   2 +
 3 files changed, 156 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 275b885363c3..aeb48cc3666c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -60,6 +60,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT   (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC  (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 4a8be9595459..c568a4f5b81e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -753,7 +753,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device 
*adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
  void *ras_error_status);
@@ -826,9 +826,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring 
*kiq_ring,

PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
if (action == PREEMPT_QUEUES_NO_UNMAP) {
-   amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, seq);
+   amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & 
ring->buf_mask));
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -5357,11 +5358,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 
control |= ib->length_dw | (vmid << 24);
 
-   if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
+   if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+   if (flags & AMDGPU_IB_PREEMPTED)
+   control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-   gfx_v9_0_ring_emit_de_meta(ring);
+   gfx_v9_0_ring_emit_de_meta(ring,
+  
(!amdgpu_sriov_vf(ring->adev) &&
+  flags & AMDGPU_IB_PREEMPTED) 
?
+  true : false);
}
 
amdgpu_ring_write(ring, header);
@@ -5416,17 +5423,23 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+   bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+   uint32_t dw2 = 0;
 
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-   amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
-  EOP_TC_NC_ACTION_EN) :
- (EOP_TCL1_ACTION_EN |
-  EOP_TC_ACTION_EN |
-  EOP_TC_WB_ACTION_EN |
-  EOP_TC_MD_ACTION_EN)) |
-EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
-EVENT_INDEX(5)));
+
+   if (writeback) {
+   dw2 = EOP_TC_WB_ACTION_EN | EOP_TC_NC_ACTION_EN;
+   } else {
+   dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
+   E

[PATCH 1/5] drm/amdgpu: Introduce gfx software ring (v6)

2022-09-21 Thread jiadong.zhu
From: "Jiadong.Zhu" 

The software ring is created to support priority context while there is only
one hardware queue for gfx.

Every software ring has its fence driver and could be used as an ordinary ring
for the GPU scheduler.
Multiple software rings are bound to a real ring with the ring muxer. The
packages committed on the software ring are copied to the real ring.

v2: Use array to store software ring entry.
v3: Remove unnecessary prints.
v4: Remove amdgpu_ring_sw_init/fini functions,
using gtt for sw ring buffer for later dma copy
optimization.
v5: Allocate ring entry dynamically in the muxer.
v6: Update comments for the ring muxer.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky  
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 185 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  66 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  |  60 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  43 +
 7 files changed, 363 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 3e0e2eb7e235..85224bc81ce5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
-   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
+   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+   amdgpu_sw_ring.o amdgpu_ring_mux.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53526ffb2ce1..9996dadb39f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -33,6 +33,7 @@
 #include "amdgpu_imu.h"
 #include "soc15.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE 0xL
@@ -346,6 +347,8 @@ struct amdgpu_gfx {
struct amdgpu_gfx_ras   *ras;
 
boolis_poweron;
+
+   struct amdgpu_ring_mux  muxer;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7d89a52091c0..40b1277b4f0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -278,6 +278,10 @@ struct amdgpu_ring {
boolis_mes_queue;
uint32_thw_queue_id;
struct amdgpu_mes_ctx_data *mes_ctx;
+
+   boolis_sw_ring;
+   unsigned intentry_index;
+
 };
 
 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), 
(ib)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index ..d6b30db27104
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#includ

[PATCH 4/4] drm/amdgpu: Implement OS triggered MCBP(v4)

2022-09-15 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Trigger Mid-Command Buffer Preemption according to the priority of the software
rings and the hw fence signalling condition.

The muxer saves the locations of the indirect buffer frames from the software
ring together with the fence sequence number in its fifo queue, and pops out
those records when the fences are signalled. The locations are used to resubmit
packages in preemption scenarios by coping the chunks from the software ring.

v2: Update comment style.
v3: Fix conflict caused by previous modifications.
v4: Remove unnecessary prints.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c |  91 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h |  29 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  12 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 153 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  22 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  |  27 
 9 files changed, 340 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 85224bc81ce5..24c5aa19bbf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -59,7 +59,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_sw_ring.o amdgpu_ring_mux.o
+   amdgpu_sw_ring.o amdgpu_ring_mux.o amdgpu_mcbp.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 258cffe3c06a..af86d87e2f3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
}
 
+   amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false);
 
+   amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
new file mode 100644
index ..4b0aae1a7ad6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "amdgpu.h"
+#include "amdgpu_mcbp.h"
+#include "amdgpu_ring.h"
+
+/* trigger mcbp and find if we need resubmit */
+int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
+{
+   struct amdgpu_mux_entry *e;
+   struct amdgpu_ring *ring = NULL;
+   int i;
+
+   spin_lock(>lock);
+
+   amdgpu_ring_preempt_ib(mux->real_ring);
+
+   ring = NULL;
+   for (i = 0; i < mux->num_ring_entries; i++) {
+   e = >ring_entry[i];
+   if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+   ring = e->ring;
+   break;
+   }
+   }
+
+  

[PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9(v5)

2022-09-15 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Set ring functions with software ring callbacks on gfx9.

The software ring could be tested by debugfs_test_ib case.

v2: Set sw_ring 2 to enable software ring by default.
v3: Remove the parameter for software ring enablement.
v4: Use amdgpu_ring_init/fini for software rings.
v5: Update for code format. Fix conflict.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky 
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 117 +--
 5 files changed, 120 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 96d058c4cd4b..525df0b4d55f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -207,6 +207,7 @@ extern bool amdgpu_ignore_bad_page_threshold;
 extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer;
 extern int amdgpu_async_gfx_ring;
 extern int amdgpu_mcbp;
+extern int amdgpu_sw_ring;
 extern int amdgpu_discovery;
 extern int amdgpu_mes;
 extern int amdgpu_mes_kiq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 9996dadb39f7..93b25d9a87f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -348,6 +348,8 @@ struct amdgpu_gfx {
 
boolis_poweron;
 
+   /* software ring */
+   unsignednum_sw_gfx_rings;
struct amdgpu_ring_mux  muxer;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 13db99d653bd..4eaf3bd332f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -33,6 +33,7 @@
 
 #include 
 #include "amdgpu.h"
+#include "amdgpu_sw_ring.h"
 #include "atom.h"
 
 /*
@@ -121,6 +122,11 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 {
uint32_t count;
 
+   if (ring->is_sw_ring) {
+   amdgpu_sw_ring_commit(ring);
+   return;
+   }
+
/* We pad to match fetch size */
count = ring->funcs->align_mask + 1 -
(ring->wptr & ring->funcs->align_mask);
@@ -343,7 +349,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct 
amdgpu_ring *ring,
  */
 void amdgpu_ring_fini(struct amdgpu_ring *ring)
 {
-
/* Not to finish a ring which is not initialized */
if (!(ring->adev) ||
(!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 40b1277b4f0c..275b885363c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -38,7 +38,8 @@ struct amdgpu_vm;
 /* max number of rings */
 #define AMDGPU_MAX_RINGS   28
 #define AMDGPU_MAX_HWIP_RINGS  8
-#define AMDGPU_MAX_GFX_RINGS   2
+/*2 software ring and 1 real ring*/
+#define AMDGPU_MAX_GFX_RINGS   3
 #define AMDGPU_MAX_COMPUTE_RINGS   8
 #define AMDGPU_MAX_VCE_RINGS   3
 #define AMDGPU_MAX_UVD_ENC_RINGS   2
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5349ca4d19e3..e85565b0e52a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
 
 #include "amdgpu_ras.h"
 
+#include "amdgpu_sw_ring.h"
 #include "gfx_v9_4.h"
 #include "gfx_v9_0.h"
 #include "gfx_v9_4_2.h"
@@ -55,7 +56,8 @@
 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
 #include "asic_reg/gc/gc_9_0_default.h"
 
-#define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_GFX_RINGS 3
+#define GFX9_NUM_SW_GFX_RINGS  2
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0xL
@@ -2270,6 +2272,7 @@ static int gfx_v9_0_compute_ring_init(struct 
amdgpu_device *adev, int ring_id,
 static int gfx_v9_0_sw_init(void *handle)
 {
int i, j, k, r, ring_id;
+   unsigned int hw_prio;
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2356,13 +2359,41 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+   ring->is_sw_ring = (adev->gfx.num_sw_gfx_rings > 1) && (i > 0);
+
+   if

[PATCH] drm/amdgpu: Introduce gfx software ring(v5)

2022-09-15 Thread jiadong.zhu
From: "Jiadong.Zhu" 

The software ring is created to support priority context while there is only
one hardware queue for gfx.

Every software ring has its fence driver and could be used as an ordinary ring
for the GPU scheduler.
Multiple software rings are bound to a real ring with the ring muxer. The
packages committed on the software ring are copied to the real ring.

v2: Use array to store software ring entry.
v3: Remove unnecessary prints.
v4: Remove amdgpu_ring_sw_init/fini functions,
using gtt for sw ring buffer for later dma copy
optimization.
v5: Allocate ring entry dynamicly in the muxer.

Cc: Christian Koenig 
Cc: Luben Tuikov 
Cc: Andrey Grodzovsky  
Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 176 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  66 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  |  60 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  43 +
 7 files changed, 354 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 3e0e2eb7e235..85224bc81ce5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
-   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
+   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+   amdgpu_sw_ring.o amdgpu_ring_mux.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53526ffb2ce1..9996dadb39f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -33,6 +33,7 @@
 #include "amdgpu_imu.h"
 #include "soc15.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE 0xL
@@ -346,6 +347,8 @@ struct amdgpu_gfx {
struct amdgpu_gfx_ras   *ras;
 
boolis_poweron;
+
+   struct amdgpu_ring_mux  muxer;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7d89a52091c0..40b1277b4f0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -278,6 +278,10 @@ struct amdgpu_ring {
boolis_mes_queue;
uint32_thw_queue_id;
struct amdgpu_mes_ctx_data *mes_ctx;
+
+   boolis_sw_ring;
+   unsigned intentry_index;
+
 };
 
 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), 
(ib)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index ..5e9c178f358b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include 
+#include 
+
+#include "amdgpu_ring_mux.h"
+#

[PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v4)

2022-09-13 Thread jiadong.zhu
From: "Jiadong.Zhu" 

The software ring is created to support priority
context while there is only one hardware queue
for gfx.

Every software rings has its fence driver and could
be used as an ordinary ring for the gpu_scheduler.
Multiple software rings are binded to a real ring
with the ring muxer. The packages committed on the
software ring are copied to the real ring.

v2: use array to store software ring entry.
v3: remove unnecessary prints.
v4: remove amdgpu_ring_sw_init/fini functions,
using gtt for sw ring buffer for later dma copy
optimization.

Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  |  60 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  43 +
 7 files changed, 360 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 3e0e2eb7e235..85224bc81ce5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
-   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
+   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+   amdgpu_sw_ring.o amdgpu_ring_mux.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53526ffb2ce1..0de8e3cd0f1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -33,6 +33,7 @@
 #include "amdgpu_imu.h"
 #include "soc15.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE 0xL
@@ -346,6 +347,8 @@ struct amdgpu_gfx {
struct amdgpu_gfx_ras   *ras;
 
boolis_poweron;
+
+   struct amdgpu_ring_mux  muxer;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7d89a52091c0..fe33a683bfba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -278,6 +278,9 @@ struct amdgpu_ring {
boolis_mes_queue;
uint32_thw_queue_id;
struct amdgpu_mes_ctx_data *mes_ctx;
+
+   boolis_sw_ring;
+
 };
 
 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), 
(ib)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index ..652a6d3e0ec3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include 
+
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_ring.h"
+
+#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
+
+static int copy_pkt_from_sw_ring(struct amdgpu_ring_mu

[PATCH 4/4] drm/amdgpu: Implement OS triggered MCBP(v2)

2022-09-08 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Trigger MCBP according to the priroty of the
software rings and the hw fence signaling
condition.

The muxer records some lastest locations from the
software ring which is used to resubmit packages
in preemption scenarios.

v2: update comment style

Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c | 101 
 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h |  29 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  12 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 163 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  16 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  |  26 +++
 9 files changed, 351 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 85224bc81ce5..24c5aa19bbf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -59,7 +59,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-   amdgpu_sw_ring.o amdgpu_ring_mux.o
+   amdgpu_sw_ring.o amdgpu_ring_mux.o amdgpu_mcbp.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 258cffe3c06a..af86d87e2f3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
}
 
+   amdgpu_ring_ib_begin(ring);
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false);
 
+   amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
new file mode 100644
index ..2a12101a7699
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "amdgpu.h"
+#include "amdgpu_mcbp.h"
+#include "amdgpu_ring.h"
+
+/* trigger mcbp and find if we need resubmit */
+int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
+{
+   struct amdgpu_mux_entry *e;
+   struct amdgpu_ring *ring = NULL;
+   int i;
+
+   DRM_INFO("%s in\n", __func__);
+
+   spin_lock(>lock);
+
+   amdgpu_ring_preempt_ib(mux->real_ring);
+
+   ring = NULL;
+   for (i = 0; i < mux->num_ring_entries; i++) {
+   e = >ring_entries[i];
+   if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+   ring = e->ring;
+   break;
+   }
+   }
+
+   if (!ring) {
+   DRM_ERROR("cannot find low priority ring\n");
+   return -ENOENT;
+   }
+
+   amdgpu_fence_process(ring);
+
+   DRM_INFO("after preempted ring_prio(%d) last_seq(%x) sync_seq(%x)\n",
+   ring->hw_prio, atomi

[PATCH 3/4] drm/amdgpu: Modify unmap_queue format for gfx9(v2)

2022-09-08 Thread jiadong.zhu
From: "Jiadong.Zhu" 

1. Modify the unmap_queue package on gfx9.
   Add trailing fence to track the preemption done.
2. Modify emit_ce_meta emit_de_meta functions
   for the resumed ibs.

v2: restyle code not to use ternary operator.

Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 181 +++
 drivers/gpu/drm/amd/amdgpu/soc15d.h  |   2 +
 3 files changed, 155 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index ba6d8c753f7e..d3155dc86c07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -60,6 +60,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT   (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC  (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 774e44e1074a..89a5c45b1006 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -753,7 +753,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device 
*adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
  void *ras_error_status);
@@ -826,9 +826,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring 
*kiq_ring,

PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
if (action == PREEMPT_QUEUES_NO_UNMAP) {
-   amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, seq);
+   amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & 
ring->buf_mask));
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -5356,11 +5357,16 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 
control |= ib->length_dw | (vmid << 24);
 
-   if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
+   if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+   if (flags & AMDGPU_IB_PREEMPTED)
+   control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-   gfx_v9_0_ring_emit_de_meta(ring);
+   gfx_v9_0_ring_emit_de_meta(ring,
+(!amdgpu_sriov_vf(ring->adev) && flags & 
AMDGPU_IB_PREEMPTED) ?
+   true : false);
}
 
amdgpu_ring_write(ring, header);
@@ -5415,17 +5421,23 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+   bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+   uint32_t dw2 = 0;
 
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-   amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
-  EOP_TC_NC_ACTION_EN) :
- (EOP_TCL1_ACTION_EN |
-  EOP_TC_ACTION_EN |
-  EOP_TC_WB_ACTION_EN |
-  EOP_TC_MD_ACTION_EN)) |
-EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
-EVENT_INDEX(5)));
+
+   if (writeback) {
+   dw2 = EOP_TC_WB_ACTION_EN | EOP_TC_NC_ACTION_EN;
+   } else {
+   dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
+   EOP_TC_WB_ACTION_EN | EOP_TC_MD_ACTION_EN;
+   }
+   dw2 |= EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT

[PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3)

2022-09-08 Thread jiadong.zhu
From: "Jiadong.Zhu" 

The software ring is created to support priority
context while there is only one hardware queue
for gfx.

Every software rings has its fence driver and could
be used as an ordinary ring for the gpu_scheduler.
Multiple software rings are binded to a real ring
with the ring muxer. The packages committed on the
software ring are copied to the real ring.

v2: use array to store software ring entry.
v3: remove unnecessary prints.

Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 182 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h |  67 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c  | 204 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h  |  48 +
 7 files changed, 509 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 3e0e2eb7e235..85224bc81ce5 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
-   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
+   amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+   amdgpu_sw_ring.o amdgpu_ring_mux.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53526ffb2ce1..0de8e3cd0f1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -33,6 +33,7 @@
 #include "amdgpu_imu.h"
 #include "soc15.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE 0xL
@@ -346,6 +347,8 @@ struct amdgpu_gfx {
struct amdgpu_gfx_ras   *ras;
 
boolis_poweron;
+
+   struct amdgpu_ring_mux  muxer;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7d89a52091c0..fe33a683bfba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -278,6 +278,9 @@ struct amdgpu_ring {
boolis_mes_queue;
uint32_thw_queue_id;
struct amdgpu_mes_ctx_data *mes_ctx;
+
+   boolis_sw_ring;
+
 };
 
 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), 
(ib)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index ..ea4a3c66119a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include 
+
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_ring.h"
+
+#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2)
+
+static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct 
amdgpu_ring *ring,
+   u64 s_begin, u64 s_end);
+
+int amdgpu_ring_mux_init(s

[PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9(v3)

2022-09-08 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Set ring functions with software ring callbacks
on gfx9.

The software ring could be tested by debugfs_test_ib
case.

v2: set sw_ring 2 to enable software ring by default.
v3: remove the parameter for software ring enablement.

Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h  |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  16 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 116 +--
 5 files changed, 128 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 96d058c4cd4b..525df0b4d55f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -207,6 +207,7 @@ extern bool amdgpu_ignore_bad_page_threshold;
 extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer;
 extern int amdgpu_async_gfx_ring;
 extern int amdgpu_mcbp;
+extern int amdgpu_sw_ring;
 extern int amdgpu_discovery;
 extern int amdgpu_mes;
 extern int amdgpu_mes_kiq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 0de8e3cd0f1c..5eec82014f0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -348,6 +348,8 @@ struct amdgpu_gfx {
 
boolis_poweron;
 
+   /*software ring*/
+   unsigned
num_sw_gfx_rings;
struct amdgpu_ring_mux  muxer;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 13db99d653bd..5b70a2c36d81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -33,6 +33,7 @@
 
 #include 
 #include "amdgpu.h"
+#include "amdgpu_sw_ring.h"
 #include "atom.h"
 
 /*
@@ -121,6 +122,11 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 {
uint32_t count;
 
+   if (ring->is_sw_ring) {
+   amdgpu_sw_ring_commit(ring);
+   return;
+   }
+
/* We pad to match fetch size */
count = ring->funcs->align_mask + 1 -
(ring->wptr & ring->funcs->align_mask);
@@ -183,6 +189,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct 
amdgpu_ring *ring,
u32 *num_sched;
u32 hw_ip;
 
+   if (adev->gfx.num_sw_gfx_rings > 0 && ring->is_sw_ring) {
+   return amdgpu_sw_ring_init(adev, ring, max_dw, irq_src, 
irq_type,
+   hw_prio, sched_score);
+   }
+
/* Set the hw submission limit higher for KIQ because
 * it's used for a number of gfx/compute tasks by both
 * KFD and KGD which may have outstanding fences and
@@ -343,7 +354,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct 
amdgpu_ring *ring,
  */
 void amdgpu_ring_fini(struct amdgpu_ring *ring)
 {
-
+   if (ring->is_sw_ring) {
+   amdgpu_sw_ring_fini(ring);
+   return;
+   }
/* Not to finish a ring which is not initialized */
if (!(ring->adev) ||
(!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index fe33a683bfba..ba6d8c753f7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -38,7 +38,8 @@ struct amdgpu_vm;
 /* max number of rings */
 #define AMDGPU_MAX_RINGS   28
 #define AMDGPU_MAX_HWIP_RINGS  8
-#define AMDGPU_MAX_GFX_RINGS   2
+/*2 software ring and 1 real ring*/
+#define AMDGPU_MAX_GFX_RINGS   3
 #define AMDGPU_MAX_COMPUTE_RINGS   8
 #define AMDGPU_MAX_VCE_RINGS   3
 #define AMDGPU_MAX_UVD_ENC_RINGS   2
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5349ca4d19e3..774e44e1074a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
 
 #include "amdgpu_ras.h"
 
+#include "amdgpu_sw_ring.h"
 #include "gfx_v9_4.h"
 #include "gfx_v9_0.h"
 #include "gfx_v9_4_2.h"
@@ -55,7 +56,8 @@
 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
 #include "asic_reg/gc/gc_9_0_default.h"
 
-#define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_GFX_RINGS 3
+#define GFX9_NUM_SW_GFX_RINGS  2
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0xL
@@ -2270,6 +2272,7 @@ static int gfx_v9_0_compute_ring_init(struct 
amdgpu_device *adev, int ring_id,
 static int gfx_v9_0_sw_init(void *handle)
 {
int i, j, k, r, ring_id;
+   unsigned int hw_

[PATCH] drm/amdgpu: modify mcbp implement for gfx9(v3)

2022-08-10 Thread jiadong.zhu
From: "Jiadong.Zhu" 

1. Use unmap_queue package to trigger preemption on gfx9
   Add trailing fence to track the preemption done.
2. Modify emit_ce_meta emit_de_meta functions
   for the resumed ibs.

Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 161 ---
 drivers/gpu/drm/amd/amdgpu/soc15d.h  |   2 +
 3 files changed, 143 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 82c178a9033a..ca626f0ad7b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -59,6 +59,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT   (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC  (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5332899642dc..887021fd56aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -751,7 +751,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device 
*adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
  void *ras_error_status);
@@ -824,9 +824,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring 
*kiq_ring,

PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
if (action == PREEMPT_QUEUES_NO_UNMAP) {
-   amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, seq);
+   amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & 
ring->buf_mask));
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -5446,11 +5447,16 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 
control |= ib->length_dw | (vmid << 24);
 
-   if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
+   if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+   if (flags & AMDGPU_IB_PREEMPTED)
+   control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-   gfx_v9_0_ring_emit_de_meta(ring);
+   gfx_v9_0_ring_emit_de_meta(ring,
+(!amdgpu_sriov_vf(ring->adev) && flags & 
AMDGPU_IB_PREEMPTED) ?
+   true : false);
}
 
amdgpu_ring_write(ring, header);
@@ -5505,6 +5511,7 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+   bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
 
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
@@ -5515,6 +5522,7 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
   EOP_TC_WB_ACTION_EN |
   EOP_TC_MD_ACTION_EN)) |
 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+(exec ? EOP_EXEC : 0x0) |
 EVENT_INDEX(5)));
amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel 
? 2 : 0));
 
@@ -5620,33 +5628,135 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring 
*ring)
amdgpu_ring_write(ring, 0);
 }
 
-static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
 {
+   struct amdgpu_device *adev = ring->adev;
struct v9_ce_ib_state ce_payload = {0};
-   uint64_t csa_addr;
+ 

[PATCH 1/2] drm/amdgpu: modify mcbp implement for gfx9(v2)

2022-08-09 Thread jiadong.zhu
From: "Jiadong.Zhu" 

1. Use unmap_queue package to trigger preemption on gfx9
   Add trailing fence to track the preemption done.
2. Modify emit_ce_meta emit_de_meta functions
   for the resumed ibs.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 159 ---
 drivers/gpu/drm/amd/amdgpu/soc15d.h  |   2 +
 3 files changed, 141 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 82c178a9033a..ca626f0ad7b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -59,6 +59,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT   (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC  (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5332899642dc..0b7cb4cf13c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -751,7 +751,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device 
*adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
  void *ras_error_status);
@@ -824,9 +824,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring 
*kiq_ring,

PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
if (action == PREEMPT_QUEUES_NO_UNMAP) {
-   amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, seq);
+   amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & 
ring->buf_mask));
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -5446,11 +5447,15 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 
control |= ib->length_dw | (vmid << 24);
 
-   if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
+   if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+   if (flags & AMDGPU_IB_PREEMPTED)
+   control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-   gfx_v9_0_ring_emit_de_meta(ring);
+   gfx_v9_0_ring_emit_de_meta(ring,
+(!amdgpu_sriov_vf(ring->adev) && flags & 
AMDGPU_IB_PREEMPTED) ? true : false);
}
 
amdgpu_ring_write(ring, header);
@@ -5505,6 +5510,7 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+   bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
 
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
@@ -5515,6 +5521,7 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
   EOP_TC_WB_ACTION_EN |
   EOP_TC_MD_ACTION_EN)) |
 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+(exec ? EOP_EXEC : 0x0) |
 EVENT_INDEX(5)));
amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel 
? 2 : 0));
 
@@ -5620,33 +5627,135 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring 
*ring)
amdgpu_ring_write(ring, 0);
 }
 
-static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
 {
+   struct amdgpu_device *adev = ring->adev;
struct v9_ce_ib_state ce_payload = {0};
-   uint64_t csa_addr;
+   uint64_t offset, ce_payload_gpu_addr;
+   void *ce_payload_

[PATCH 2/2] drm/amdgpu: add mcbp support for sdma v4.0

2022-08-09 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Set register to enable mcbp according to amdgpu_mcbp.
Add sdma preempt_ib function used for debugfs test.
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 53 ++
 1 file changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index d35f18536da2..bc69af4b4ada 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1502,6 +1502,11 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
/* set utc l1 enable flag always to 1 */
temp = RREG32_SDMA(i, mmSDMA0_CNTL);
temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+   if (amdgpu_mcbp){
+   /* enable MCBP */
+   temp = REG_SET_FIELD(temp, SDMA0_CNTL, 
MIDCMD_PREEMPT_ENABLE, 1);
+   }
WREG32_SDMA(i, mmSDMA0_CNTL, temp);
 
if (!amdgpu_sriov_vf(adev)) {
@@ -2102,6 +2107,53 @@ static int sdma_v4_0_soft_reset(void *handle)
return 0;
 }
 
+static int sdma_v4_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+   int i, r = 0;
+   struct amdgpu_device *adev = ring->adev;
+   u32 index = 0;
+   u64 sdma_gfx_preempt;
+
+   amdgpu_sdma_get_index_from_ring(ring, );
+   if (index == 0)
+   sdma_gfx_preempt = mmSDMA0_GFX_PREEMPT;
+   else
+   sdma_gfx_preempt = mmSDMA1_GFX_PREEMPT;
+
+   /* assert preemption condition */
+   amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+   /* emit the trailing fence */
+   ring->trail_seq += 1;
+   amdgpu_ring_alloc(ring, 10);
+   sdma_v4_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+   amdgpu_ring_commit(ring);
+
+   /* assert IB preemption */
+   WREG32(sdma_gfx_preempt, 1);
+
+   /* poll the trailing fence */
+   for (i = 0; i < adev->usec_timeout; i++) {
+   if (ring->trail_seq ==
+   le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+   break;
+   udelay(1);
+   }
+
+   if (i >= adev->usec_timeout) {
+   r = -EINVAL;
+   DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+   }
+
+   /* deassert IB preemption */
+   WREG32(sdma_gfx_preempt, 0);
+
+   /* deassert the preemption condition */
+   amdgpu_ring_set_preempt_cond_exec(ring, true);
+   return r;
+}
+
 static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -2435,6 +2487,7 @@ static const struct amdgpu_ring_funcs 
sdma_v4_0_ring_funcs = {
.emit_wreg = sdma_v4_0_ring_emit_wreg,
.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+   .preempt_ib = sdma_v4_0_ring_preempt_ib,
 };
 
 /*
-- 
2.25.1



[PATCH 2/2] drm/amdgpu: add mcbp support for sdma v4.0

2022-07-18 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Set register to enable mcbp according to amdgpu_mcbp.
Add sdma preempt_ib function used for debugfs test.
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 53 ++
 1 file changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index d35f18536da2..bc69af4b4ada 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1502,6 +1502,11 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
/* set utc l1 enable flag always to 1 */
temp = RREG32_SDMA(i, mmSDMA0_CNTL);
temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+   if (amdgpu_mcbp){
+   /* enable MCBP */
+   temp = REG_SET_FIELD(temp, SDMA0_CNTL, 
MIDCMD_PREEMPT_ENABLE, 1);
+   }
WREG32_SDMA(i, mmSDMA0_CNTL, temp);
 
if (!amdgpu_sriov_vf(adev)) {
@@ -2102,6 +2107,53 @@ static int sdma_v4_0_soft_reset(void *handle)
return 0;
 }
 
+static int sdma_v4_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+   int i, r = 0;
+   struct amdgpu_device *adev = ring->adev;
+   u32 index = 0;
+   u64 sdma_gfx_preempt;
+
+   amdgpu_sdma_get_index_from_ring(ring, );
+   if (index == 0)
+   sdma_gfx_preempt = mmSDMA0_GFX_PREEMPT;
+   else
+   sdma_gfx_preempt = mmSDMA1_GFX_PREEMPT;
+
+   /* assert preemption condition */
+   amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+   /* emit the trailing fence */
+   ring->trail_seq += 1;
+   amdgpu_ring_alloc(ring, 10);
+   sdma_v4_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+   amdgpu_ring_commit(ring);
+
+   /* assert IB preemption */
+   WREG32(sdma_gfx_preempt, 1);
+
+   /* poll the trailing fence */
+   for (i = 0; i < adev->usec_timeout; i++) {
+   if (ring->trail_seq ==
+   le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+   break;
+   udelay(1);
+   }
+
+   if (i >= adev->usec_timeout) {
+   r = -EINVAL;
+   DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+   }
+
+   /* deassert IB preemption */
+   WREG32(sdma_gfx_preempt, 0);
+
+   /* deassert the preemption condition */
+   amdgpu_ring_set_preempt_cond_exec(ring, true);
+   return r;
+}
+
 static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -2435,6 +2487,7 @@ static const struct amdgpu_ring_funcs 
sdma_v4_0_ring_funcs = {
.emit_wreg = sdma_v4_0_ring_emit_wreg,
.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+   .preempt_ib = sdma_v4_0_ring_preempt_ib,
 };
 
 /*
-- 
2.25.1



[PATCH 1/2] drm/amdgpu: modify mcbp implement for gfx9

2022-07-18 Thread jiadong.zhu
From: "Jiadong.Zhu" 

1. Use unmap_queue package to trigger preemption on gfx9
   Add trailing fence to track the preemption done.
2. Modify emit_ce_meta emit_de_meta functions
   for the resumed ibs.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 156 ---
 drivers/gpu/drm/amd/amdgpu/soc15d.h  |   2 +
 3 files changed, 138 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 82c178a9033a..ca626f0ad7b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -59,6 +59,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT   (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC  (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5332899642dc..e2c614441691 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -751,7 +751,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device 
*adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
  void *ras_error_status);
@@ -824,9 +824,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring 
*kiq_ring,

PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
if (action == PREEMPT_QUEUES_NO_UNMAP) {
-   amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, seq);
+   amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & 
ring->buf_mask));
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -5446,11 +5447,15 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 
control |= ib->length_dw | (vmid << 24);
 
-   if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
+   if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+   if (flags & AMDGPU_IB_PREEMPTED)
+   control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-   gfx_v9_0_ring_emit_de_meta(ring);
+   gfx_v9_0_ring_emit_de_meta(ring,
+(!amdgpu_sriov_vf(ring->adev) && flags & 
AMDGPU_IB_PREEMPTED) ? true : false);
}
 
amdgpu_ring_write(ring, header);
@@ -5505,6 +5510,7 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+   bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
 
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
@@ -5515,6 +5521,7 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
   EOP_TC_WB_ACTION_EN |
   EOP_TC_MD_ACTION_EN)) |
 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+(exec ? EOP_EXEC : 0x0) |
 EVENT_INDEX(5)));
amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel 
? 2 : 0));
 
@@ -5620,33 +5627,132 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring 
*ring)
amdgpu_ring_write(ring, 0);
 }
 
-static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
 {
+   struct amdgpu_device *adev = ring->adev;
struct v9_ce_ib_state ce_payload = {0};
-   uint64_t csa_addr;
+   uint64_t offset, ce_payload_gpu_addr;
+   void *ce_payload_

[PATCH 2/3] drm/amdgpu: add mcbp support for sdma v4.0

2022-07-15 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Set register to enable mcbp according to amdgpu_mcbp.
Add sdma preempt_ib function used for debugfs test.
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 53 ++
 1 file changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index d35f18536da2..bc69af4b4ada 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1502,6 +1502,11 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
/* set utc l1 enable flag always to 1 */
temp = RREG32_SDMA(i, mmSDMA0_CNTL);
temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+   if (amdgpu_mcbp){
+   /* enable MCBP */
+   temp = REG_SET_FIELD(temp, SDMA0_CNTL, 
MIDCMD_PREEMPT_ENABLE, 1);
+   }
WREG32_SDMA(i, mmSDMA0_CNTL, temp);
 
if (!amdgpu_sriov_vf(adev)) {
@@ -2102,6 +2107,53 @@ static int sdma_v4_0_soft_reset(void *handle)
return 0;
 }
 
+static int sdma_v4_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+   int i, r = 0;
+   struct amdgpu_device *adev = ring->adev;
+   u32 index = 0;
+   u64 sdma_gfx_preempt;
+
+   amdgpu_sdma_get_index_from_ring(ring, );
+   if (index == 0)
+   sdma_gfx_preempt = mmSDMA0_GFX_PREEMPT;
+   else
+   sdma_gfx_preempt = mmSDMA1_GFX_PREEMPT;
+
+   /* assert preemption condition */
+   amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+   /* emit the trailing fence */
+   ring->trail_seq += 1;
+   amdgpu_ring_alloc(ring, 10);
+   sdma_v4_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+   amdgpu_ring_commit(ring);
+
+   /* assert IB preemption */
+   WREG32(sdma_gfx_preempt, 1);
+
+   /* poll the trailing fence */
+   for (i = 0; i < adev->usec_timeout; i++) {
+   if (ring->trail_seq ==
+   le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+   break;
+   udelay(1);
+   }
+
+   if (i >= adev->usec_timeout) {
+   r = -EINVAL;
+   DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+   }
+
+   /* deassert IB preemption */
+   WREG32(sdma_gfx_preempt, 0);
+
+   /* deassert the preemption condition */
+   amdgpu_ring_set_preempt_cond_exec(ring, true);
+   return r;
+}
+
 static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -2435,6 +2487,7 @@ static const struct amdgpu_ring_funcs 
sdma_v4_0_ring_funcs = {
.emit_wreg = sdma_v4_0_ring_emit_wreg,
.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+   .preempt_ib = sdma_v4_0_ring_preempt_ib,
 };
 
 /*
-- 
2.25.1



[PATCH 3/3] drm/amdgpu: skip put fence if signal fails

2022-07-15 Thread jiadong.zhu
From: "Jiadong.Zhu" 

Dma_fence_signal returning non-zero indicates
that the fence is signaled and put somewhere else.
Skip dma_fence_put to make the fence refcount correct.

Signed-off-by: Jiadong.Zhu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index f4ed0785d523..93c1a5e83835 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1500,8 +1500,8 @@ static void amdgpu_ib_preempt_signal_fences(struct 
dma_fence **fences,
fence = fences[i];
if (!fence)
continue;
-   dma_fence_signal(fence);
-   dma_fence_put(fence);
+   if (!dma_fence_signal(fence))
+   dma_fence_put(fence);
}
 }
 
-- 
2.25.1



[PATCH 1/3] drm/amdgpu: modify mcbp implement for gfx9

2022-07-15 Thread jiadong.zhu
From: "Jiadong.Zhu" 

1. Use unmap_queue package to trigger preemption on gfx9
   Add trailing fence to track the preemption done.
2. Modify emit_ce_meta emit_de_meta functions
   for the resumed ibs.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c| 156 ---
 drivers/gpu/drm/amd/amdgpu/soc15d.h  |   2 +
 3 files changed, 138 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7d89a52091c0..2b402a8bc4fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -59,6 +59,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT   (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY(1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC  (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 603bfa52e6e8..d6106d480d0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -751,7 +751,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device 
*adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
  void *ras_error_status);
@@ -824,9 +824,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring 
*kiq_ring,

PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
if (action == PREEMPT_QUEUES_NO_UNMAP) {
-   amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-   amdgpu_ring_write(kiq_ring, seq);
+   amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & 
ring->buf_mask));
+   amdgpu_ring_write(kiq_ring, 0);
+   amdgpu_ring_write(kiq_ring, 0);
+
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
@@ -5463,11 +5464,15 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 
control |= ib->length_dw | (vmid << 24);
 
-   if (amdgpu_sriov_vf(ring->adev) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
+   if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & 
AMDGPU_IB_FLAG_PREEMPT)) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+   if (flags & AMDGPU_IB_PREEMPTED)
+   control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-   gfx_v9_0_ring_emit_de_meta(ring);
+   gfx_v9_0_ring_emit_de_meta(ring,
+(!amdgpu_sriov_vf(ring->adev) && flags & 
AMDGPU_IB_PREEMPTED) ? true : false);
}
 
amdgpu_ring_write(ring, header);
@@ -5522,6 +5527,7 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+   bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
 
/* RELEASE_MEM - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
@@ -5532,6 +5538,7 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
   EOP_TC_WB_ACTION_EN |
   EOP_TC_MD_ACTION_EN)) |
 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+(exec ? EOP_EXEC : 0x0) |
 EVENT_INDEX(5)));
amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel 
? 2 : 0));
 
@@ -5637,33 +5644,132 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring 
*ring)
amdgpu_ring_write(ring, 0);
 }
 
-static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
 {
+   struct amdgpu_device *adev = ring->adev;
struct v9_ce_ib_state ce_payload = {0};
-   uint64_t csa_addr;
+   uint64_t offset, ce_payload_gpu_addr;
+   void *ce_payload_