The Neon intrinsics implementation of SSD_S is not used anymore given that a faster asm implementation exists. Delete the pixel_ssd_s_neon function.
Change-Id: I419bc8609f6f3d8bfb07d06d77b817947ce1ccc1 --- source/common/aarch64/pixel-prim.cpp | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/source/common/aarch64/pixel-prim.cpp b/source/common/aarch64/pixel-prim.cpp index 63b30604c..1ceec869d 100644 --- a/source/common/aarch64/pixel-prim.cpp +++ b/source/common/aarch64/pixel-prim.cpp @@ -1625,34 +1625,6 @@ void transpose_neon<64>(pixel *dst, const pixel *src, intptr_t stride) } -template<int size> -sse_t pixel_ssd_s_neon(const int16_t *a, intptr_t dstride) -{ - sse_t sum = 0; - - - int32x4_t vsum = vdupq_n_s32(0); - - for (int y = 0; y < size; y++) - { - int x = 0; - - for (; (x + 8) <= size; x += 8) - { - int16x8_t in = vld1q_s16(a + x); - vsum = vmlal_s16(vsum, vget_low_s16(in), vget_low_s16(in)); - vsum = vmlal_high_s16(vsum, (in), (in)); - } - for (; x < size; x++) - { - sum += a[x] * a[x]; - } - - a += dstride; - } - return sum + vaddvq_s32(vsum); -} - }; -- 2.39.5 (Apple Git-154)
>From 25369992e65c239c350255c4a3ae7a53682bab7a Mon Sep 17 00:00:00 2001 Message-Id: <25369992e65c239c350255c4a3ae7a53682bab7a.1733846134.git.gerdazsejke.m...@arm.com> In-Reply-To: <cover.1733846134.git.gerdazsejke.m...@arm.com> References: <cover.1733846134.git.gerdazsejke.m...@arm.com> From: Gerda Zsejke More <gerdazsejke.m...@arm.com> Date: Sat, 7 Dec 2024 10:55:33 +0100 Subject: [PATCH 11/11] AArch64: Delete pixel_ssd_s_neon implementation The Neon intrinsics implementation of SSD_S is not used anymore given that a faster asm implementation exists. Delete the pixel_ssd_s_neon function. Change-Id: I419bc8609f6f3d8bfb07d06d77b817947ce1ccc1 --- source/common/aarch64/pixel-prim.cpp | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/source/common/aarch64/pixel-prim.cpp b/source/common/aarch64/pixel-prim.cpp index 63b30604c..1ceec869d 100644 --- a/source/common/aarch64/pixel-prim.cpp +++ b/source/common/aarch64/pixel-prim.cpp @@ -1625,34 +1625,6 @@ void transpose_neon<64>(pixel *dst, const pixel *src, intptr_t stride) } -template<int size> -sse_t pixel_ssd_s_neon(const int16_t *a, intptr_t dstride) -{ - sse_t sum = 0; - - - int32x4_t vsum = vdupq_n_s32(0); - - for (int y = 0; y < size; y++) - { - int x = 0; - - for (; (x + 8) <= size; x += 8) - { - int16x8_t in = vld1q_s16(a + x); - vsum = vmlal_s16(vsum, vget_low_s16(in), vget_low_s16(in)); - vsum = vmlal_high_s16(vsum, (in), (in)); - } - for (; x < size; x++) - { - sum += a[x] * a[x]; - } - - a += dstride; - } - return sum + vaddvq_s32(vsum); -} - }; -- 2.39.5 (Apple Git-154)
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel