Enable unit tests for 64x64 variants of calcresidual, transpose,
cpy1Dto2D, cpy2Dto1D and SSD primitives. Enabling these unit tests
exposed bugs in cpy1Dto2D_shl_64x64_sve and cpy2Dto1D_shl_64x64_sve.
This patch also fixes these issues.
---
 source/common/aarch64/blockcopy8-sve.S |   4 +-
 source/test/pixelharness.cpp           | 135 ++++++++++++-------------
 2 files changed, 68 insertions(+), 71 deletions(-)

diff --git a/source/common/aarch64/blockcopy8-sve.S 
b/source/common/aarch64/blockcopy8-sve.S
index 1d742a64c..793723f68 100644
--- a/source/common/aarch64/blockcopy8-sve.S
+++ b/source/common/aarch64/blockcopy8-sve.S
@@ -965,7 +965,7 @@ function PFX(cpy2Dto1D_shl_64x64_sve)
     whilelt         p0.h, x9, x8
     b.first         .L_cpy2Dto1D_shl_64x64
     add             x1, x1, x2, lsl #1
-    addvl           x0, x0, #1
+    add             x0, x0, #128
     cbnz            w12, .L_init_cpy2Dto1D_shl_64x64
     ret
 endfunc
@@ -1204,7 +1204,7 @@ function PFX(cpy1Dto2D_shl_64x64_sve)
     inch            x9
     whilelt         p0.h, x9, x8
     b.first         .L_cpy1Dto2D_shl_64x64
-    addvl           x1, x1, #1
+    add             x1, x1, #128
     add             x0, x0, x2, lsl #1
     cbnz            w12, .L_init_cpy1Dto2D_shl_64x64
     ret
diff --git a/source/test/pixelharness.cpp b/source/test/pixelharness.cpp
index 311985d83..b730c15c0 100644
--- a/source/test/pixelharness.cpp
+++ b/source/test/pixelharness.cpp
@@ -2686,102 +2686,99 @@ bool PixelHarness::testCorrectness(const 
EncoderPrimitives& ref, const EncoderPr
             }
         }
 
-        if (i < BLOCK_64x64)
-        {
-            /* TU only primitives */
+        /* TU only primitives */
 
-            if (opt.cu[i].calcresidual[NONALIGNED])
+        if (opt.cu[i].calcresidual[NONALIGNED])
+        {
+            if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], 
opt.cu[i].calcresidual[NONALIGNED]))
             {
-                if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], 
opt.cu[i].calcresidual[NONALIGNED]))
-                {
-                    printf("calcresidual width: %d failed!\n", 4 << i);
-                    return false;
-                }
+                printf("calcresidual width: %d failed!\n", 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].calcresidual[ALIGNED])
+        if (opt.cu[i].calcresidual[ALIGNED])
+        {
+            if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], 
opt.cu[i].calcresidual[ALIGNED]))
             {
-                if 
(!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], 
opt.cu[i].calcresidual[ALIGNED]))
-                {
-                    printf("calcresidual_aligned width: %d failed!\n", 4 << i);
-                    return false;
-                }
+                printf("calcresidual_aligned width: %d failed!\n", 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].transpose)
+        if (opt.cu[i].transpose)
+        {
+            if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose))
             {
-                if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose))
-                {
-                    printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].ssd_s[NONALIGNED])
+        }
+        if (opt.cu[i].ssd_s[NONALIGNED])
+        {
+            if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], 
opt.cu[i].ssd_s[NONALIGNED]))
             {
-                if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], 
opt.cu[i].ssd_s[NONALIGNED]))
-                {
-                    printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].ssd_s[ALIGNED])
+        }
+        if (opt.cu[i].ssd_s[ALIGNED])
+        {
+            if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], 
opt.cu[i].ssd_s[ALIGNED]))
             {
-                if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], 
opt.cu[i].ssd_s[ALIGNED]))
-                {
-                    printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].copy_cnt)
+        }
+        if (opt.cu[i].copy_cnt)
+        {
+            if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt))
             {
-                if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt))
-                {
-                    printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].cpy2Dto1D_shl)
+        if (opt.cu[i].cpy2Dto1D_shl)
+        {
+            if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, 
opt.cu[i].cpy2Dto1D_shl))
             {
-                if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, 
opt.cu[i].cpy2Dto1D_shl))
-                {
-                    printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].cpy2Dto1D_shr)
+        if (opt.cu[i].cpy2Dto1D_shr)
+        {
+            if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, 
opt.cu[i].cpy2Dto1D_shr))
             {
-                if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, 
opt.cu[i].cpy2Dto1D_shr))
-                {
-                    printf("cpy2Dto1D_shr failed!\n");
-                    return false;
-                }
+                printf("cpy2Dto1D_shr failed!\n");
+                return false;
             }
-            if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED])
+        }
+        if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED])
+        {
+            if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], 
opt.cu[i].cpy1Dto2D_shl[NONALIGNED]))
             {
-                if 
(!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], 
opt.cu[i].cpy1Dto2D_shl[NONALIGNED]))
-                {
-                    printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].cpy1Dto2D_shl[ALIGNED])
+        }
+        if (opt.cu[i].cpy1Dto2D_shl[ALIGNED])
+        {
+            if 
(!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], 
opt.cu[i].cpy1Dto2D_shl[ALIGNED]))
             {
-                if 
(!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], 
opt.cu[i].cpy1Dto2D_shl[ALIGNED]))
-                {
-                    printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 
<< i);
-                    return false;
-                }
+                printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << 
i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].cpy1Dto2D_shr)
+        if (opt.cu[i].cpy1Dto2D_shr)
+        {
+            if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, 
opt.cu[i].cpy1Dto2D_shr))
             {
-                if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, 
opt.cu[i].cpy1Dto2D_shr))
-                {
-                    printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
         }
     }
-- 
2.39.5 (Apple Git-154)

>From 3189175ca54becfbb5a6444fcd31cf7558f256e5 Mon Sep 17 00:00:00 2001
Message-Id: 
<3189175ca54becfbb5a6444fcd31cf7558f256e5.1740155166.git.gerdazsejke.m...@arm.com>
In-Reply-To: <cover.1740155166.git.gerdazsejke.m...@arm.com>
References: <cover.1740155166.git.gerdazsejke.m...@arm.com>
From: Arpad Panyik <arpad.pan...@arm.com>
Date: Thu, 30 Jan 2025 15:54:43 +0100
Subject: [PATCH v2 1/2] pixelharness.cpp: Enable tests for 64x64 TU primitives

Enable unit tests for 64x64 variants of calcresidual, transpose,
cpy1Dto2D, cpy2Dto1D and SSD primitives. Enabling these unit tests
exposed bugs in cpy1Dto2D_shl_64x64_sve and cpy2Dto1D_shl_64x64_sve.
This patch also fixes these issues.
---
 source/common/aarch64/blockcopy8-sve.S |   4 +-
 source/test/pixelharness.cpp           | 135 ++++++++++++-------------
 2 files changed, 68 insertions(+), 71 deletions(-)

diff --git a/source/common/aarch64/blockcopy8-sve.S 
b/source/common/aarch64/blockcopy8-sve.S
index 1d742a64c..793723f68 100644
--- a/source/common/aarch64/blockcopy8-sve.S
+++ b/source/common/aarch64/blockcopy8-sve.S
@@ -965,7 +965,7 @@ function PFX(cpy2Dto1D_shl_64x64_sve)
     whilelt         p0.h, x9, x8
     b.first         .L_cpy2Dto1D_shl_64x64
     add             x1, x1, x2, lsl #1
-    addvl           x0, x0, #1
+    add             x0, x0, #128
     cbnz            w12, .L_init_cpy2Dto1D_shl_64x64
     ret
 endfunc
@@ -1204,7 +1204,7 @@ function PFX(cpy1Dto2D_shl_64x64_sve)
     inch            x9
     whilelt         p0.h, x9, x8
     b.first         .L_cpy1Dto2D_shl_64x64
-    addvl           x1, x1, #1
+    add             x1, x1, #128
     add             x0, x0, x2, lsl #1
     cbnz            w12, .L_init_cpy1Dto2D_shl_64x64
     ret
diff --git a/source/test/pixelharness.cpp b/source/test/pixelharness.cpp
index 311985d83..b730c15c0 100644
--- a/source/test/pixelharness.cpp
+++ b/source/test/pixelharness.cpp
@@ -2686,102 +2686,99 @@ bool PixelHarness::testCorrectness(const 
EncoderPrimitives& ref, const EncoderPr
             }
         }
 
-        if (i < BLOCK_64x64)
-        {
-            /* TU only primitives */
+        /* TU only primitives */
 
-            if (opt.cu[i].calcresidual[NONALIGNED])
+        if (opt.cu[i].calcresidual[NONALIGNED])
+        {
+            if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], 
opt.cu[i].calcresidual[NONALIGNED]))
             {
-                if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], 
opt.cu[i].calcresidual[NONALIGNED]))
-                {
-                    printf("calcresidual width: %d failed!\n", 4 << i);
-                    return false;
-                }
+                printf("calcresidual width: %d failed!\n", 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].calcresidual[ALIGNED])
+        if (opt.cu[i].calcresidual[ALIGNED])
+        {
+            if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], 
opt.cu[i].calcresidual[ALIGNED]))
             {
-                if 
(!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], 
opt.cu[i].calcresidual[ALIGNED]))
-                {
-                    printf("calcresidual_aligned width: %d failed!\n", 4 << i);
-                    return false;
-                }
+                printf("calcresidual_aligned width: %d failed!\n", 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].transpose)
+        if (opt.cu[i].transpose)
+        {
+            if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose))
             {
-                if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose))
-                {
-                    printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].ssd_s[NONALIGNED])
+        }
+        if (opt.cu[i].ssd_s[NONALIGNED])
+        {
+            if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], 
opt.cu[i].ssd_s[NONALIGNED]))
             {
-                if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], 
opt.cu[i].ssd_s[NONALIGNED]))
-                {
-                    printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].ssd_s[ALIGNED])
+        }
+        if (opt.cu[i].ssd_s[ALIGNED])
+        {
+            if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], 
opt.cu[i].ssd_s[ALIGNED]))
             {
-                if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], 
opt.cu[i].ssd_s[ALIGNED]))
-                {
-                    printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].copy_cnt)
+        }
+        if (opt.cu[i].copy_cnt)
+        {
+            if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt))
             {
-                if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt))
-                {
-                    printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].cpy2Dto1D_shl)
+        if (opt.cu[i].cpy2Dto1D_shl)
+        {
+            if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, 
opt.cu[i].cpy2Dto1D_shl))
             {
-                if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, 
opt.cu[i].cpy2Dto1D_shl))
-                {
-                    printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].cpy2Dto1D_shr)
+        if (opt.cu[i].cpy2Dto1D_shr)
+        {
+            if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, 
opt.cu[i].cpy2Dto1D_shr))
             {
-                if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, 
opt.cu[i].cpy2Dto1D_shr))
-                {
-                    printf("cpy2Dto1D_shr failed!\n");
-                    return false;
-                }
+                printf("cpy2Dto1D_shr failed!\n");
+                return false;
             }
-            if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED])
+        }
+        if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED])
+        {
+            if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], 
opt.cu[i].cpy1Dto2D_shl[NONALIGNED]))
             {
-                if 
(!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], 
opt.cu[i].cpy1Dto2D_shl[NONALIGNED]))
-                {
-                    printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].cpy1Dto2D_shl[ALIGNED])
+        }
+        if (opt.cu[i].cpy1Dto2D_shl[ALIGNED])
+        {
+            if 
(!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], 
opt.cu[i].cpy1Dto2D_shl[ALIGNED]))
             {
-                if 
(!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], 
opt.cu[i].cpy1Dto2D_shl[ALIGNED]))
-                {
-                    printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 
<< i);
-                    return false;
-                }
+                printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << 
i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].cpy1Dto2D_shr)
+        if (opt.cu[i].cpy1Dto2D_shr)
+        {
+            if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, 
opt.cu[i].cpy1Dto2D_shr))
             {
-                if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, 
opt.cu[i].cpy1Dto2D_shr))
-                {
-                    printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
         }
     }
-- 
2.39.5 (Apple Git-154)

_______________________________________________
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel

Reply via email to