for GFX 8, originally we use double switch_buffer to
prevents CE go ahead of DE, thus it can avoid VM fault
in case of VM_flush not finish. but double SWITCH_BUFFER
drops performance, and world switch preemption requires
that only one SWITCH_BUFFER is needed at the end of
DMAframe.

to Pevent CE vm fault without double switch_buffer,
we need to insert 128 dw NOP after vm flush because
CE and DE can have at most 128 DW gap from hw perspective.

Even no context switch, SWITCH_BUFFER is encouraged to use
to get better CE performance.

Change-Id: Ifdabf23f97e74156c1f66dddd0918ea7ffcddb20
Signed-off-by: Monk Liu <monk....@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 28 +++++++++++++---------------
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cb0098a..a935831 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -338,6 +338,7 @@ struct amdgpu_ring_funcs {
        void (*end_use)(struct amdgpu_ring *ring);
        void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, uint32_t 
val);
        void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
+       void (*emit_switch_buffer) (struct amdgpu_ring *ring);
 };
 
 /*
@@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
 #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v))
 #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i))
+#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index a31d7ef..4e5b2f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
                amdgpu_ring_patch_cond_exec(ring, patch_offset);
 
        ring->current_ctx = ctx;
+
+       /* Insert one SB at the end of DMAframe,
+        * Now only GFX8 ip need handling like this, CI won't
+        * insert one SB at this place, instead it insert double
+        * switch buffer after VM FLUSH and PIPELINE sync.
+        */
+       if (ring->funcs->emit_switch_buffer)
+               amdgpu_ring_emit_switch_buffer(ring);
        amdgpu_ring_commit(ring);
        return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index dfa2288..41a2ef1 100755
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
 {
        u32 header, control = 0;
 
-       /* insert SWITCH_BUFFER packet before first IB in the ring frame */
-       if (ctx_switch) {
-               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-               amdgpu_ring_write(ring, 0);
-       }
-
        if (ib->flags & AMDGPU_IB_FLAG_CE)
                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
        else
@@ -6013,11 +6007,9 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct 
amdgpu_ring *ring)
        amdgpu_ring_write(ring, 4); /* poll interval */
 
        if (usepfp) {
-               /* synce CE with ME to prevent CE fetch CEIB before context 
switch done */
-               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-               amdgpu_ring_write(ring, 0);
-               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-               amdgpu_ring_write(ring, 0);
+               /* sync PFP to ME, otherwise we might get invalid PFP reads */
+               amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+               amdgpu_ring_write(ring, 0x0);
        }
 }
 
@@ -6065,11 +6057,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct 
amdgpu_ring *ring,
                /* sync PFP to ME, otherwise we might get invalid PFP reads */
                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
                amdgpu_ring_write(ring, 0x0);
-               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-               amdgpu_ring_write(ring, 0);
-               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-               amdgpu_ring_write(ring, 0);
        }
+
+       /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish 
*/
+       amdgpu_ring_insert_nop(ring, 128);
 }
 
 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
@@ -6170,6 +6161,12 @@ static void gfx_v8_0_ring_emit_wreg_kiq(struct 
amdgpu_ring *ring, u32 idx, u32 v
        amdgpu_ring_write(ring, val);
 }
 
+static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
+{
+       amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+       amdgpu_ring_write(ring, 0);
+}
+
 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
                                                 enum amdgpu_interrupt_state 
state)
 {
@@ -6477,6 +6474,7 @@ static const struct amdgpu_ring_funcs 
gfx_v8_0_ring_funcs_gfx = {
        .test_ib = gfx_v8_0_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
+       .emit_switch_buffer = gfx_v8_ring_emit_sb,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to