From: Leo Liu <leo....@amd.com>

swizzle mode needs reference and input picture luma and
chroma pitch aligned with 256

Signed-off-by: Leo Liu <leo....@amd.com>
Acked-by: Alex Deucher <alexander.deuc...@amd.com>
---
 tests/amdgpu/vce_tests.c | 54 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/tests/amdgpu/vce_tests.c b/tests/amdgpu/vce_tests.c
index de63aa1..b03807b 100644
--- a/tests/amdgpu/vce_tests.c
+++ b/tests/amdgpu/vce_tests.c
@@ -227,36 +227,39 @@ static void free_resource(struct amdgpu_vce_bo *vce_bo)
        r = amdgpu_va_range_free(vce_bo->va_handle);
        CU_ASSERT_EQUAL(r, 0);
 
        r = amdgpu_bo_free(vce_bo->handle);
        CU_ASSERT_EQUAL(r, 0);
        memset(vce_bo, 0, sizeof(*vce_bo));
 }
 
 static void amdgpu_cs_vce_create(void)
 {
+       unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
        int len, r;
 
        enc.width = vce_create[6];
        enc.height = vce_create[7];
 
        num_resources  = 0;
        alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT);
        resources[num_resources++] = enc.fb[0].handle;
        resources[num_resources++] = ib_handle;
 
        len = 0;
        memcpy(ib_cpu, vce_session, sizeof(vce_session));
        len += sizeof(vce_session) / 4;
        memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo));
        len += sizeof(vce_taskinfo) / 4;
        memcpy((ib_cpu + len), vce_create, sizeof(vce_create));
+       ib_cpu[len + 8] = ALIGN(enc.width, align);
+       ib_cpu[len + 9] = ALIGN(enc.width, align);
        len += sizeof(vce_create) / 4;
        memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
        ib_cpu[len + 2] = enc.fb[0].addr >> 32;
        ib_cpu[len + 3] = enc.fb[0].addr;
        len += sizeof(vce_feedback) / 4;
 
        r = submit(len, AMDGPU_HW_IP_VCE);
        CU_ASSERT_EQUAL(r, 0);
 
        free_resource(&enc.fb[0]);
@@ -284,102 +287,118 @@ static void amdgpu_cs_vce_config(void)
        len += sizeof(vce_pic_ctrl) / 4;
 
        r = submit(len, AMDGPU_HW_IP_VCE);
        CU_ASSERT_EQUAL(r, 0);
 }
 
 static  void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc)
 {
 
        uint64_t luma_offset, chroma_offset;
-       int len = 0, r;
+       unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
+       unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16);
+       int len = 0, i, r;
 
        luma_offset = enc->vbuf.addr;
-       chroma_offset = luma_offset + enc->width * enc->height;
+       chroma_offset = luma_offset + luma_size;
 
        memcpy((ib_cpu + len), vce_session, sizeof(vce_session));
        len += sizeof(vce_session) / 4;
        memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo));
        len += sizeof(vce_taskinfo) / 4;
        memcpy((ib_cpu + len), vce_bs_buffer, sizeof(vce_bs_buffer));
        ib_cpu[len + 2] = enc->bs[0].addr >> 32;
        ib_cpu[len + 3] = enc->bs[0].addr;
        len += sizeof(vce_bs_buffer) / 4;
        memcpy((ib_cpu + len), vce_context_buffer, sizeof(vce_context_buffer));
        ib_cpu[len + 2] = enc->cpb.addr >> 32;
        ib_cpu[len + 3] = enc->cpb.addr;
        len += sizeof(vce_context_buffer) / 4;
        memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer));
+       for (i = 0; i <  8; ++i)
+               ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2);
+       for (i = 0; i <  8; ++i)
+               ib_cpu[len + 10 + i] = luma_size * 1.5;
        len += sizeof(vce_aux_buffer) / 4;
        memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
        ib_cpu[len + 2] = enc->fb[0].addr >> 32;
        ib_cpu[len + 3] = enc->fb[0].addr;
        len += sizeof(vce_feedback) / 4;
        memcpy((ib_cpu + len), vce_encode, sizeof(vce_encode));
        ib_cpu[len + 9] = luma_offset >> 32;
        ib_cpu[len + 10] = luma_offset;
        ib_cpu[len + 11] = chroma_offset >> 32;
        ib_cpu[len + 12] = chroma_offset;
-       ib_cpu[len + 73] = 0x7800;
-       ib_cpu[len + 74] = 0x7800 + 0x5000;
+       ib_cpu[len + 14] = ALIGN(enc->width, align);
+       ib_cpu[len + 15] = ALIGN(enc->width, align);
+       ib_cpu[len + 73] = luma_size * 1.5;
+       ib_cpu[len + 74] = luma_size * 2.5;
        len += sizeof(vce_encode) / 4;
        enc->ib_len = len;
        if (!enc->two_instance) {
                r = submit(len, AMDGPU_HW_IP_VCE);
                CU_ASSERT_EQUAL(r, 0);
        }
 }
 
 static void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc)
 {
        uint64_t luma_offset, chroma_offset;
-       int len, r;
+       int len, i, r;
+       unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
+       unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16);
 
        len = (enc->two_instance) ? enc->ib_len : 0;
        luma_offset = enc->vbuf.addr;
-       chroma_offset = luma_offset + enc->width * enc->height;
+       chroma_offset = luma_offset + luma_size;
 
        if (!enc->two_instance) {
                memcpy((ib_cpu + len), vce_session, sizeof(vce_session));
                len += sizeof(vce_session) / 4;
        }
        memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo));
        len += sizeof(vce_taskinfo) / 4;
        memcpy((ib_cpu + len), vce_bs_buffer, sizeof(vce_bs_buffer));
        ib_cpu[len + 2] = enc->bs[1].addr >> 32;
        ib_cpu[len + 3] = enc->bs[1].addr;
        len += sizeof(vce_bs_buffer) / 4;
        memcpy((ib_cpu + len), vce_context_buffer, sizeof(vce_context_buffer));
        ib_cpu[len + 2] = enc->cpb.addr >> 32;
        ib_cpu[len + 3] = enc->cpb.addr;
        len += sizeof(vce_context_buffer) / 4;
        memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer));
+       for (i = 0; i <  8; ++i)
+               ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2);
+       for (i = 0; i <  8; ++i)
+               ib_cpu[len + 10 + i] = luma_size * 1.5;
        len += sizeof(vce_aux_buffer) / 4;
        memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback));
        ib_cpu[len + 2] = enc->fb[1].addr >> 32;
        ib_cpu[len + 3] = enc->fb[1].addr;
        len += sizeof(vce_feedback) / 4;
        memcpy((ib_cpu + len), vce_encode, sizeof(vce_encode));
        ib_cpu[len + 2] = 0;
        ib_cpu[len + 9] = luma_offset >> 32;
        ib_cpu[len + 10] = luma_offset;
        ib_cpu[len + 11] = chroma_offset >> 32;
        ib_cpu[len + 12] = chroma_offset;
+       ib_cpu[len + 14] = ALIGN(enc->width, align);
+       ib_cpu[len + 15] = ALIGN(enc->width, align);
        ib_cpu[len + 18] = 0;
        ib_cpu[len + 19] = 0;
        ib_cpu[len + 56] = 3;
        ib_cpu[len + 57] = 0;
        ib_cpu[len + 58] = 0;
-       ib_cpu[len + 59] = 0x7800;
-       ib_cpu[len + 60] = 0x7800 + 0x5000;
+       ib_cpu[len + 59] = luma_size * 1.5;
+       ib_cpu[len + 60] = luma_size * 2.5;
        ib_cpu[len + 73] = 0;
-       ib_cpu[len + 74] = 0x5000;
+       ib_cpu[len + 74] = luma_size;
        ib_cpu[len + 81] = 1;
        ib_cpu[len + 82] = 1;
        len += sizeof(vce_encode) / 4;
 
        r = submit(len, AMDGPU_HW_IP_VCE);
        CU_ASSERT_EQUAL(r, 0);
 }
 
 static void check_result(struct amdgpu_vce_encode *enc)
 {
@@ -401,42 +420,53 @@ static void check_result(struct amdgpu_vce_encode *enc)
                        sum += enc->bs[i].ptr[j];
                CU_ASSERT_EQUAL(sum, s[i]);
                r = amdgpu_bo_cpu_unmap(enc->bs[i].handle);
                CU_ASSERT_EQUAL(r, 0);
        }
 }
 
 static void amdgpu_cs_vce_encode(void)
 {
        uint32_t vbuf_size, bs_size = 0x154000, cpb_size;
-       int r;
+       unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16;
+       int i, r;
 
-       vbuf_size = enc.width * enc.height * 1.5;
+       vbuf_size = ALIGN(enc.width, align) * ALIGN(enc.height, 16) * 1.5;
        cpb_size = vbuf_size * 10;
        num_resources = 0;
        alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT);
        resources[num_resources++] = enc.fb[0].handle;
        alloc_resource(&enc.fb[1], 4096, AMDGPU_GEM_DOMAIN_GTT);
        resources[num_resources++] = enc.fb[1].handle;
        alloc_resource(&enc.bs[0], bs_size, AMDGPU_GEM_DOMAIN_GTT);
        resources[num_resources++] = enc.bs[0].handle;
        alloc_resource(&enc.bs[1], bs_size, AMDGPU_GEM_DOMAIN_GTT);
        resources[num_resources++] = enc.bs[1].handle;
        alloc_resource(&enc.vbuf, vbuf_size, AMDGPU_GEM_DOMAIN_VRAM);
        resources[num_resources++] = enc.vbuf.handle;
        alloc_resource(&enc.cpb, cpb_size, AMDGPU_GEM_DOMAIN_VRAM);
        resources[num_resources++] = enc.cpb.handle;
        resources[num_resources++] = ib_handle;
 
        r = amdgpu_bo_cpu_map(enc.vbuf.handle, (void **)&enc.vbuf.ptr);
        CU_ASSERT_EQUAL(r, 0);
-       memcpy(enc.vbuf.ptr, frame, sizeof(frame));
+
+       memset(enc.vbuf.ptr, 0, vbuf_size);
+       for (i = 0; i < enc.height; ++i) {
+               memcpy(enc.vbuf.ptr, (frame + i * enc.width), enc.width);
+               enc.vbuf.ptr += ALIGN(enc.width, align);
+       }
+       for (i = 0; i < enc.height / 2; ++i) {
+               memcpy(enc.vbuf.ptr, ((frame + enc.height * enc.width) + i * 
enc.width), enc.width);
+               enc.vbuf.ptr += ALIGN(enc.width, align);
+       }
+
        r = amdgpu_bo_cpu_unmap(enc.vbuf.handle);
        CU_ASSERT_EQUAL(r, 0);
 
        amdgpu_cs_vce_config();
 
        if (family_id >= AMDGPU_FAMILY_VI) {
                vce_taskinfo[3] = 3;
                amdgpu_cs_vce_encode_idr(&enc);
                amdgpu_cs_vce_encode_p(&enc);
                check_result(&enc);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to