Enable unit tests for 64x64 variants of calcresidual, transpose, cpy1Dto2D, cpy2Dto1D and SSD primitives. Enabling these unit tests exposed bugs in cpy1Dto2D_shl_64x64_sve and cpy2Dto1D_shl_64x64_sve. This patch also fixes these issues. --- source/common/aarch64/blockcopy8-sve.S | 4 +- source/test/pixelharness.cpp | 135 ++++++++++++------------- 2 files changed, 68 insertions(+), 71 deletions(-)
diff --git a/source/common/aarch64/blockcopy8-sve.S b/source/common/aarch64/blockcopy8-sve.S index 1d742a64c..793723f68 100644 --- a/source/common/aarch64/blockcopy8-sve.S +++ b/source/common/aarch64/blockcopy8-sve.S @@ -965,7 +965,7 @@ function PFX(cpy2Dto1D_shl_64x64_sve) whilelt p0.h, x9, x8 b.first .L_cpy2Dto1D_shl_64x64 add x1, x1, x2, lsl #1 - addvl x0, x0, #1 + add x0, x0, #128 cbnz w12, .L_init_cpy2Dto1D_shl_64x64 ret endfunc @@ -1204,7 +1204,7 @@ function PFX(cpy1Dto2D_shl_64x64_sve) inch x9 whilelt p0.h, x9, x8 b.first .L_cpy1Dto2D_shl_64x64 - addvl x1, x1, #1 + add x1, x1, #128 add x0, x0, x2, lsl #1 cbnz w12, .L_init_cpy1Dto2D_shl_64x64 ret diff --git a/source/test/pixelharness.cpp b/source/test/pixelharness.cpp index 311985d83..b730c15c0 100644 --- a/source/test/pixelharness.cpp +++ b/source/test/pixelharness.cpp @@ -2686,102 +2686,99 @@ bool PixelHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPr } } - if (i < BLOCK_64x64) - { - /* TU only primitives */ + /* TU only primitives */ - if (opt.cu[i].calcresidual[NONALIGNED]) + if (opt.cu[i].calcresidual[NONALIGNED]) + { + if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], opt.cu[i].calcresidual[NONALIGNED])) { - if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], opt.cu[i].calcresidual[NONALIGNED])) - { - printf("calcresidual width: %d failed!\n", 4 << i); - return false; - } + printf("calcresidual width: %d failed!\n", 4 << i); + return false; } + } - if (opt.cu[i].calcresidual[ALIGNED]) + if (opt.cu[i].calcresidual[ALIGNED]) + { + if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], opt.cu[i].calcresidual[ALIGNED])) { - if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], opt.cu[i].calcresidual[ALIGNED])) - { - printf("calcresidual_aligned width: %d failed!\n", 4 << i); - return false; - } + printf("calcresidual_aligned width: %d failed!\n", 4 << i); + return false; } + } - if (opt.cu[i].transpose) + if (opt.cu[i].transpose) + { + if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose)) { - if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose)) - { - printf("transpose[%dx%d] failed\n", 4 << i, 4 << i); - return false; - } + printf("transpose[%dx%d] failed\n", 4 << i, 4 << i); + return false; } - if (opt.cu[i].ssd_s[NONALIGNED]) + } + if (opt.cu[i].ssd_s[NONALIGNED]) + { + if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], opt.cu[i].ssd_s[NONALIGNED])) { - if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], opt.cu[i].ssd_s[NONALIGNED])) - { - printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i); - return false; - } + printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i); + return false; } - if (opt.cu[i].ssd_s[ALIGNED]) + } + if (opt.cu[i].ssd_s[ALIGNED]) + { + if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], opt.cu[i].ssd_s[ALIGNED])) { - if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], opt.cu[i].ssd_s[ALIGNED])) - { - printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i); - return false; - } + printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i); + return false; } - if (opt.cu[i].copy_cnt) + } + if (opt.cu[i].copy_cnt) + { + if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt)) { - if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt)) - { - printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i); - return false; - } + printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i); + return false; } + } - if (opt.cu[i].cpy2Dto1D_shl) + if (opt.cu[i].cpy2Dto1D_shl) + { + if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl)) { - if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl)) - { - printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i); - return false; - } + printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i); + return false; } + } - if (opt.cu[i].cpy2Dto1D_shr) + if (opt.cu[i].cpy2Dto1D_shr) + { + if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr)) { - if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr)) - { - printf("cpy2Dto1D_shr failed!\n"); - return false; - } + printf("cpy2Dto1D_shr failed!\n"); + return false; } - if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED]) + } + if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED]) + { + if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], opt.cu[i].cpy1Dto2D_shl[NONALIGNED])) { - if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], opt.cu[i].cpy1Dto2D_shl[NONALIGNED])) - { - printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i); - return false; - } + printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i); + return false; } - if (opt.cu[i].cpy1Dto2D_shl[ALIGNED]) + } + if (opt.cu[i].cpy1Dto2D_shl[ALIGNED]) + { + if (!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], opt.cu[i].cpy1Dto2D_shl[ALIGNED])) { - if (!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], opt.cu[i].cpy1Dto2D_shl[ALIGNED])) - { - printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << i); - return false; - } + printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << i); + return false; } + } - if (opt.cu[i].cpy1Dto2D_shr) + if (opt.cu[i].cpy1Dto2D_shr) + { + if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr)) { - if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr)) - { - printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i); - return false; - } + printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i); + return false; } } } -- 2.39.5 (Apple Git-154)
>From 3189175ca54becfbb5a6444fcd31cf7558f256e5 Mon Sep 17 00:00:00 2001 Message-Id: <3189175ca54becfbb5a6444fcd31cf7558f256e5.1740155166.git.gerdazsejke.m...@arm.com> In-Reply-To: <cover.1740155166.git.gerdazsejke.m...@arm.com> References: <cover.1740155166.git.gerdazsejke.m...@arm.com> From: Arpad Panyik <arpad.pan...@arm.com> Date: Thu, 30 Jan 2025 15:54:43 +0100 Subject: [PATCH v2 1/2] pixelharness.cpp: Enable tests for 64x64 TU primitives Enable unit tests for 64x64 variants of calcresidual, transpose, cpy1Dto2D, cpy2Dto1D and SSD primitives. Enabling these unit tests exposed bugs in cpy1Dto2D_shl_64x64_sve and cpy2Dto1D_shl_64x64_sve. This patch also fixes these issues. --- source/common/aarch64/blockcopy8-sve.S | 4 +- source/test/pixelharness.cpp | 135 ++++++++++++------------- 2 files changed, 68 insertions(+), 71 deletions(-) diff --git a/source/common/aarch64/blockcopy8-sve.S b/source/common/aarch64/blockcopy8-sve.S index 1d742a64c..793723f68 100644 --- a/source/common/aarch64/blockcopy8-sve.S +++ b/source/common/aarch64/blockcopy8-sve.S @@ -965,7 +965,7 @@ function PFX(cpy2Dto1D_shl_64x64_sve) whilelt p0.h, x9, x8 b.first .L_cpy2Dto1D_shl_64x64 add x1, x1, x2, lsl #1 - addvl x0, x0, #1 + add x0, x0, #128 cbnz w12, .L_init_cpy2Dto1D_shl_64x64 ret endfunc @@ -1204,7 +1204,7 @@ function PFX(cpy1Dto2D_shl_64x64_sve) inch x9 whilelt p0.h, x9, x8 b.first .L_cpy1Dto2D_shl_64x64 - addvl x1, x1, #1 + add x1, x1, #128 add x0, x0, x2, lsl #1 cbnz w12, .L_init_cpy1Dto2D_shl_64x64 ret diff --git a/source/test/pixelharness.cpp b/source/test/pixelharness.cpp index 311985d83..b730c15c0 100644 --- a/source/test/pixelharness.cpp +++ b/source/test/pixelharness.cpp @@ -2686,102 +2686,99 @@ bool PixelHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPr } } - if (i < BLOCK_64x64) - { - /* TU only primitives */ + /* TU only primitives */ - if (opt.cu[i].calcresidual[NONALIGNED]) + if (opt.cu[i].calcresidual[NONALIGNED]) + { + if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], opt.cu[i].calcresidual[NONALIGNED])) { - if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], opt.cu[i].calcresidual[NONALIGNED])) - { - printf("calcresidual width: %d failed!\n", 4 << i); - return false; - } + printf("calcresidual width: %d failed!\n", 4 << i); + return false; } + } - if (opt.cu[i].calcresidual[ALIGNED]) + if (opt.cu[i].calcresidual[ALIGNED]) + { + if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], opt.cu[i].calcresidual[ALIGNED])) { - if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], opt.cu[i].calcresidual[ALIGNED])) - { - printf("calcresidual_aligned width: %d failed!\n", 4 << i); - return false; - } + printf("calcresidual_aligned width: %d failed!\n", 4 << i); + return false; } + } - if (opt.cu[i].transpose) + if (opt.cu[i].transpose) + { + if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose)) { - if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose)) - { - printf("transpose[%dx%d] failed\n", 4 << i, 4 << i); - return false; - } + printf("transpose[%dx%d] failed\n", 4 << i, 4 << i); + return false; } - if (opt.cu[i].ssd_s[NONALIGNED]) + } + if (opt.cu[i].ssd_s[NONALIGNED]) + { + if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], opt.cu[i].ssd_s[NONALIGNED])) { - if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], opt.cu[i].ssd_s[NONALIGNED])) - { - printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i); - return false; - } + printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i); + return false; } - if (opt.cu[i].ssd_s[ALIGNED]) + } + if (opt.cu[i].ssd_s[ALIGNED]) + { + if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], opt.cu[i].ssd_s[ALIGNED])) { - if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], opt.cu[i].ssd_s[ALIGNED])) - { - printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i); - return false; - } + printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i); + return false; } - if (opt.cu[i].copy_cnt) + } + if (opt.cu[i].copy_cnt) + { + if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt)) { - if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt)) - { - printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i); - return false; - } + printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i); + return false; } + } - if (opt.cu[i].cpy2Dto1D_shl) + if (opt.cu[i].cpy2Dto1D_shl) + { + if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl)) { - if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl)) - { - printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i); - return false; - } + printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i); + return false; } + } - if (opt.cu[i].cpy2Dto1D_shr) + if (opt.cu[i].cpy2Dto1D_shr) + { + if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr)) { - if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr)) - { - printf("cpy2Dto1D_shr failed!\n"); - return false; - } + printf("cpy2Dto1D_shr failed!\n"); + return false; } - if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED]) + } + if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED]) + { + if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], opt.cu[i].cpy1Dto2D_shl[NONALIGNED])) { - if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], opt.cu[i].cpy1Dto2D_shl[NONALIGNED])) - { - printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i); - return false; - } + printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i); + return false; } - if (opt.cu[i].cpy1Dto2D_shl[ALIGNED]) + } + if (opt.cu[i].cpy1Dto2D_shl[ALIGNED]) + { + if (!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], opt.cu[i].cpy1Dto2D_shl[ALIGNED])) { - if (!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], opt.cu[i].cpy1Dto2D_shl[ALIGNED])) - { - printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << i); - return false; - } + printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << i); + return false; } + } - if (opt.cu[i].cpy1Dto2D_shr) + if (opt.cu[i].cpy1Dto2D_shr) + { + if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr)) { - if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr)) - { - printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i); - return false; - } + printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i); + return false; } } } -- 2.39.5 (Apple Git-154)
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel