Avoid constant register reloads while emitting IBs by using a local write pointer and only updating the size at the end of each helper.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@igalia.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 90 +++++++++++++------------ 1 file changed, 48 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index ce318f5de047..4bde057e56d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -444,6 +444,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, struct amdgpu_ib ib_msg; struct dma_fence *f = NULL; uint64_t addr; + u32 *ptr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, @@ -462,45 +463,47 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, goto err; ib = &job->ibs[0]; + ptr = ib->ptr; /* let addr point to page boundary */ addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg.gpu_addr); /* stitch together an VCE create msg */ - ib->length_dw = 0; - ib->ptr[ib->length_dw++] = 0x0000000c; /* len */ - ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ - ib->ptr[ib->length_dw++] = handle; + *ptr++ = 0x0000000c; /* len */ + *ptr++ = 0x00000001; /* session cmd */ + *ptr++ = handle; if ((ring->adev->vce.fw_version >> 24) >= 52) - ib->ptr[ib->length_dw++] = 0x00000040; /* len */ + *ptr++ = 0x00000040; /* len */ else - ib->ptr[ib->length_dw++] = 0x00000030; /* len */ - ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */ - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000042; - ib->ptr[ib->length_dw++] = 0x0000000a; - ib->ptr[ib->length_dw++] = 0x00000001; - ib->ptr[ib->length_dw++] = 0x00000080; - ib->ptr[ib->length_dw++] = 0x00000060; - ib->ptr[ib->length_dw++] = 0x00000100; - ib->ptr[ib->length_dw++] = 0x00000100; - ib->ptr[ib->length_dw++] = 0x0000000c; - ib->ptr[ib->length_dw++] = 0x00000000; + *ptr++ = 0x00000030; /* len */ + *ptr++ = 0x01000001; /* create cmd */ + *ptr++ = 0x00000000; + *ptr++ = 0x00000042; + *ptr++ = 0x0000000a; + *ptr++ = 0x00000001; + *ptr++ = 0x00000080; + *ptr++ = 0x00000060; + *ptr++ = 0x00000100; + *ptr++ = 0x00000100; + *ptr++ = 0x0000000c; + *ptr++ = 0x00000000; if ((ring->adev->vce.fw_version >> 24) >= 52) { - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000000; + *ptr++ = 0x00000000; + *ptr++ = 0x00000000; + *ptr++ = 0x00000000; + *ptr++ = 0x00000000; } - ib->ptr[ib->length_dw++] = 0x00000014; /* len */ - ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ - ib->ptr[ib->length_dw++] = upper_32_bits(addr); - ib->ptr[ib->length_dw++] = addr; - ib->ptr[ib->length_dw++] = 0x00000001; + *ptr++ = 0x00000014; /* len */ + *ptr++ = 0x05000005; /* feedback buffer */ + *ptr++ = upper_32_bits(addr); + *ptr++ = addr; + *ptr++ = 0x00000001; + + ib->length_dw = ptr - ib->ptr; for (i = ib->length_dw; i < ib_size_dw; ++i) - ib->ptr[i] = 0x0; + *ptr++ = 0x0; r = amdgpu_job_submit_direct(job, ring, &f); amdgpu_ib_free(&ib_msg, f); @@ -534,6 +537,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; + u32 *ptr; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, @@ -546,27 +550,29 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, return r; ib = &job->ibs[0]; + ptr = ib->ptr; /* stitch together an VCE destroy msg */ - ib->length_dw = 0; - ib->ptr[ib->length_dw++] = 0x0000000c; /* len */ - ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ - ib->ptr[ib->length_dw++] = handle; + *ptr++ = 0x0000000c; /* len */ + *ptr++ = 0x00000001; /* session cmd */ + *ptr++ = handle; - ib->ptr[ib->length_dw++] = 0x00000020; /* len */ - ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ - ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */ - ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */ - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000000; - ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */ - ib->ptr[ib->length_dw++] = 0x00000000; + *ptr++ = 0x00000020; /* len */ + *ptr++ = 0x00000002; /* task info */ + *ptr++ = 0xffffffff; /* next task info, set to 0xffffffff if no */ + *ptr++ = 0x00000001; /* destroy session */ + *ptr++ = 0x00000000; + *ptr++ = 0x00000000; + *ptr++ = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */ + *ptr++ = 0x00000000; - ib->ptr[ib->length_dw++] = 0x00000008; /* len */ - ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */ + *ptr++ = 0x00000008; /* len */ + *ptr++ = 0x02000001; /* destroy cmd */ + + ib->length_dw = ptr - ib->ptr; for (i = ib->length_dw; i < ib_size_dw; ++i) - ib->ptr[i] = 0x0; + *ptr++ = 0x0; if (direct) r = amdgpu_job_submit_direct(job, ring, &f); -- 2.48.0