previously we always insert 128nops behind vm_flush, which
may lead to DAMframe size above 256 dw and automatially aligned
to 512 dw.

now we calculate how many DWs already inserted after vm_flush
and make up for the reset to pad up to 128dws before emit_ib.

that way we only take 256 dw per submit.

v2:
drop the 128nop inserting in gfx_v8_vm_flush
and the estimated frame size should minor those between
vm_flush and emit_ib, since we already consdier vm_flush
will take 128 + 19 DWs.

Change-Id: Iac198e16f35b071476ba7bd48ab338223f6fe650
Signed-off-by: Monk Liu <monk....@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c    | 25 ++++++++++++++++++++-----
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 9129b8c..e91f227 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -165,6 +165,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
                patch_offset = amdgpu_ring_init_cond_exec(ring);
 
        need_ctx_switch = ring->current_ctx != fence_ctx;
+       ring->dws_between_vm_ib = 0; /* clear before recalculate */
        if (vm) {
                r = amdgpu_vm_flush(ring, job);
                if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index c813cbe..1dbe600 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -173,6 +173,7 @@ struct amdgpu_ring {
 #if defined(CONFIG_DEBUG_FS)
        struct dentry *ent;
 #endif
+       u32 dws_between_vm_ib;
 };
 
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5f37313..5e8e4eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5670,6 +5670,8 @@ static void gfx_v8_0_ring_emit_gds_switch(struct 
amdgpu_ring *ring,
        amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
        amdgpu_ring_write(ring, 0);
        amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
+
+       ring->dws_between_vm_ib += 20;
 }
 
 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, 
uint32_t wave, uint32_t address)
@@ -6489,6 +6491,8 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct 
amdgpu_ring *ring)
        amdgpu_ring_write(ring, ref_and_mask);
        amdgpu_ring_write(ring, ref_and_mask);
        amdgpu_ring_write(ring, 0x20); /* poll interval */
+
+       ring->dws_between_vm_ib += 7;
 }
 
 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
@@ -6500,6 +6504,8 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct 
amdgpu_ring *ring)
        amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
        amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
                EVENT_INDEX(0));
+
+       ring->dws_between_vm_ib += 4;
 }
 
 
@@ -6573,6 +6579,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct 
amdgpu_ring *ring, u64 addr,
        amdgpu_ring_write(ring, lower_32_bits(seq));
        amdgpu_ring_write(ring, upper_32_bits(seq));
 
+       ring->dws_between_vm_ib += 6;
 }
 
 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -6636,8 +6643,6 @@ static void gfx_v8_0_ring_emit_vm_flush(struct 
amdgpu_ring *ring,
                /* sync PFP to ME, otherwise we might get invalid PFP reads */
                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
                amdgpu_ring_write(ring, 0x0);
-               /* GFX8 emits 128 dw nop to prevent CE access VM before 
vm_flush finish */
-               amdgpu_ring_insert_nop(ring, 128);
        }
 }
 
@@ -6711,9 +6716,11 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring 
*ring, uint32_t flags)
 {
        uint32_t dw2 = 0;
 
-       if (amdgpu_sriov_vf(ring->adev))
+       if (amdgpu_sriov_vf(ring->adev)) {
                gfx_v8_0_ring_emit_ce_meta_init(ring,
                        (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : 
ring->adev->virt.csa_vmid0_addr);
+               ring->dws_between_vm_ib += 8;
+       }
 
        dw2 |= 0x80000000; /* set load_enable otherwise this package is just 
NOPs */
        if (flags & AMDGPU_HAVE_CTX_SWITCH) {
@@ -6739,10 +6746,17 @@ static void gfx_v8_ring_emit_cntxcntl(struct 
amdgpu_ring *ring, uint32_t flags)
        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
        amdgpu_ring_write(ring, dw2);
        amdgpu_ring_write(ring, 0);
+       ring->dws_between_vm_ib += 3;
 
-       if (amdgpu_sriov_vf(ring->adev))
+       if (amdgpu_sriov_vf(ring->adev)) {
                gfx_v8_0_ring_emit_de_meta_init(ring,
                        (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : 
ring->adev->virt.csa_vmid0_addr);
+               ring->dws_between_vm_ib += 21;
+       }
+
+       /* We need to pad some NOPs before emit_ib to prevent CE run ahead of
+        * vm_flush, which may trigger VM fault. */
+       amdgpu_ring_insert_nop(ring, 128 - ring->dws_between_vm_ib);
 }
 
 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
@@ -7018,7 +7032,8 @@ static const struct amdgpu_ring_funcs 
gfx_v8_0_ring_funcs_gfx = {
                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
                128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
                2 + /* gfx_v8_ring_emit_sb */
-               3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt 
flush/meta-data */
+               3 + 4 + 29 - /* gfx_v8_ring_emit_cntxcntl including vgt 
flush/meta-data */
+               20 - 7 - 6 - 3 - 4 - 29, /* no need to count 
gds/hdp_flush/vm_flush fence/cntx_cntl/vgt_flush/meta-data anymore */
        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
        .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
        .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to