-----Original Message-----
From: Monk Liu [mailto:[email protected]] 
Sent: Wednesday, August 24, 2016 12:00 PM
To: [email protected]
Cc: Liu, Monk <[email protected]>
Subject: [PATCH] drm/amdgpu:fix DMAframe for GFX8

1,drop inserting double SWITCH_BUFFERS scheme, which impacts performance, 
because double SWITCH_BUFFER actaully doesn't switch buffer, so the CE's 
ping-pong buffer is not used at all. Now only insert one SWITCH_BUFFERS at the 
bottom of each GFX dmaframe. And only inserting one SWITCH_BUFFER is a must for 
Virtualization World Switch!

2,without double SWITCH_BUFFRES, CE will go very ahead of DE which will trigger 
VM fault (CE IB run before DE finished VM flush), so we need a way to prevent 
that happen.
according to GFX8.0 spec, CE will go ahead of DE at most by 128dw, so we can 
insert 128 NOP before CE ib, so that CE won't run const IB before DE finished 
VM flush.

3,for virtualization, CP requires that each DMAframe need two const IB and one 
IB, so we cannot remove preamble CE IB even no context switch take place.

4,each DMAframe won't occupy more than 256dw, so allocate 256dw is enough

TODO:
use DMAframe entry design to implement ib_schedule(), make sure each DMAframe 
is aligned and fixed at their offset, which is needed for preemption and easy 
for ring buffer debug & dump.

Change-Id: I1b98d6352b7ead91b661094921bfd43cfeaae190
Signed-off-by: Monk Liu <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 15 ++++++++++-----  
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 27 ++++++++++++---------------
 3 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 316bd2a..11bb05f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -337,6 +337,7 @@ struct amdgpu_ring_funcs {
        void (*end_use)(struct amdgpu_ring *ring);
        void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, uint32_t 
val);
        void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
+       void (*emit_sb) (struct amdgpu_ring *ring, int cnt);
 };
 
 /*
@@ -2370,6 +2371,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)  
#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
 #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v))  
#define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i))
+#define amdgpu_ring_emit_sb(r, c) (r)->funcs->emit_sb((r), (c))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))  #define 
amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))  #define 
amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index a31d7ef..d99af07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -151,7 +151,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
                return -EINVAL;
        }
 
-       r = amdgpu_ring_alloc(ring, 256 * num_ibs);
+       r = amdgpu_ring_alloc(ring, 256);
        if (r) {
                dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
                return r;
@@ -174,15 +174,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
        /* always set cond_exec_polling to CONTINUE */
        *ring->cond_exe_cpu_addr = 1;
 
+
+       /* emit 128 dw nop before IBc */
+       if (ring->type == AMDGPU_RING_TYPE_GFX)
+               amdgpu_ring_insert_nop(ring, 128);
+
        skip_preamble = ring->current_ctx == ctx;
        need_ctx_switch = ring->current_ctx != ctx;
        for (i = 0; i < num_ibs; ++i) {
                ib = &ibs[i];
 
-               /* drop preamble IBs if we don't have a context switch */
-               if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
-                       continue;
-
                amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
                                    need_ctx_switch);
                need_ctx_switch = false;
@@ -210,6 +211,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
                amdgpu_ring_patch_cond_exec(ring, patch_offset);
 
        ring->current_ctx = ctx;
+
+       /* Insert one SB at the bottom of each DMA frame */
+       if (ring->funcs->emit_sb)
+               amdgpu_ring_emit_sb(ring, 1);
        amdgpu_ring_commit(ring);
        return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index dfa2288..37dc64b 100755
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,  {
        u32 header, control = 0;
 
-       /* insert SWITCH_BUFFER packet before first IB in the ring frame */
-       if (ctx_switch) {
-               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-               amdgpu_ring_write(ring, 0);
-       }
-
        if (ib->flags & AMDGPU_IB_FLAG_CE)
                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
        else
@@ -6013,11 +6007,9 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct 
amdgpu_ring *ring)
        amdgpu_ring_write(ring, 4); /* poll interval */
 
        if (usepfp) {
-               /* synce CE with ME to prevent CE fetch CEIB before context 
switch done */
-               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-               amdgpu_ring_write(ring, 0);
-               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-               amdgpu_ring_write(ring, 0);
+               /* sync PFP to ME, otherwise we might get invalid PFP reads */
+               amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+               amdgpu_ring_write(ring, 0x0);
        }
 }
 
@@ -6065,10 +6057,6 @@ static void gfx_v8_0_ring_emit_vm_flush(struct 
amdgpu_ring *ring,
                /* sync PFP to ME, otherwise we might get invalid PFP reads */
                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
                amdgpu_ring_write(ring, 0x0);
-               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-               amdgpu_ring_write(ring, 0);
-               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-               amdgpu_ring_write(ring, 0);
        }
 }
 
@@ -6170,6 +6158,14 @@ static void gfx_v8_0_ring_emit_wreg_kiq(struct 
amdgpu_ring *ring, u32 idx, u32 v
        amdgpu_ring_write(ring, val);
 }
 
+static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring, int cnt) {
+       while (cnt--) {
+               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+               amdgpu_ring_write(ring, 0);
+       }
+}
+
 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
                                                 enum amdgpu_interrupt_state 
state)  { @@ -6477,6 +6473,7 @@ static const struct amdgpu_ring_funcs 
gfx_v8_0_ring_funcs_gfx = {
        .test_ib = gfx_v8_0_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
+       .emit_sb = gfx_v8_ring_emit_sb,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
--
1.9.1

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to