Optimize the ring_insert_nop fn for n dwords in one
step rather then call to amdgpu_ring_write for each
nop packet. This avoid function call for each nop
packet and also wptr is updated once only.

Signed-off-by: Sunil Khatri <sunil.kha...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 03bce2fa866a..910293664902 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -108,10 +108,26 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned 
int ndw)
  */
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 {
-       int i;
+       uint32_t occupied, chunk1, chunk2;
+       uint32_t *dst;
 
-       for (i = 0; i < count; i++)
-               amdgpu_ring_write(ring, ring->funcs->nop);
+       occupied = ring->wptr & ring->buf_mask;
+       dst = &ring->ring[occupied];
+       chunk1 = ring->buf_mask + 1 - occupied;
+       chunk1 = (chunk1 >= count) ? count : chunk1;
+       chunk2 = count - chunk1;
+
+       if (chunk1)
+               memset32(dst, ring->funcs->nop, chunk1);
+
+       if (chunk2) {
+               dst = ring->ring;
+               memset32(dst, ring->funcs->nop, chunk2);
+       }
+
+       ring->wptr += count;
+       ring->wptr &= ring->ptr_mask;
+       ring->count_dw -= count;
 }
 
 /**
-- 
2.34.1

Reply via email to