PR #21579 opened by mkver URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21579 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21579.patch
>From 86e553bdda774c17c30b87192b198eddae9dd2ef Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Sun, 25 Jan 2026 23:23:36 +0100 Subject: [PATCH 1/4] avcodec/hevc/dsp_template: Optimize impossible branches away Saves 1856B of .text here. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/hevc/dsp_template.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/hevc/dsp_template.c b/libavcodec/hevc/dsp_template.c index 573cf9ee1e..f703f6d071 100644 --- a/libavcodec/hevc/dsp_template.c +++ b/libavcodec/hevc/dsp_template.c @@ -132,7 +132,7 @@ static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size) int x, y; int size = 1 << log2_size; - if (shift > 0) { + if (BIT_DEPTH <= 9 || shift > 0) { int offset = 1 << (shift - 1); for (y = 0; y < size; y++) { for (x = 0; x < size; x++) { @@ -140,7 +140,7 @@ static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size) coeffs++; } } - } else if (shift < 0) { + } else if (BIT_DEPTH > 10 && shift < 0) { for (y = 0; y < size; y++) { for (x = 0; x < size; x++) { *coeffs = *(uint16_t*)coeffs << -shift; -- 2.52.0 >From 2e5ae4f840dea1a8cd3c2907d5a007616e7ed27b Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Sun, 25 Jan 2026 23:32:14 +0100 Subject: [PATCH 2/4] avcodec/hevc/dsp: Add alignment for dequant Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/hevc/dsp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/hevc/dsp.h b/libavcodec/hevc/dsp.h index a63586c3a2..b884cd36be 100644 --- a/libavcodec/hevc/dsp.h +++ b/libavcodec/hevc/dsp.h @@ -50,7 +50,7 @@ typedef struct HEVCDSPContext { void (*add_residual[4])(uint8_t *dst, const int16_t *res, ptrdiff_t stride); - void (*dequant)(int16_t *coeffs, int16_t log2_size); + void (*dequant)(int16_t *coeffs /* align 32 */, int16_t log2_size); void (*transform_rdpcm)(int16_t *coeffs, int16_t log2_size, int mode); -- 2.52.0 >From 5edc6a6274f1592c3d2de62f9782f4e3b93d1842 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Mon, 26 Jan 2026 02:03:32 +0100 Subject: [PATCH 3/4] avcodec/x86/hevc/dequant: Add SSSE3 dequant ASM function hevc_dequant_4x4_8_c (GCC): 20.2 ( 1.00x) hevc_dequant_4x4_8_c (Clang): 21.7 ( 1.00x) hevc_dequant_4x4_8_ssse3: 5.8 ( 3.51x) hevc_dequant_8x8_8_c (GCC): 32.9 ( 1.00x) hevc_dequant_8x8_8_c (Clang): 78.7 ( 1.00x) hevc_dequant_8x8_8_ssse3: 6.8 ( 4.83x) hevc_dequant_16x16_8_c (GCC): 105.1 ( 1.00x) hevc_dequant_16x16_8_c (Clang): 151.1 ( 1.00x) hevc_dequant_16x16_8_ssse3: 19.3 ( 5.45x) hevc_dequant_32x32_8_c (GCC): 415.7 ( 1.00x) hevc_dequant_32x32_8_c (Clang): 602.3 ( 1.00x) hevc_dequant_32x32_8_ssse3: 78.2 ( 5.32x) Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/hevc/Makefile | 1 + libavcodec/x86/hevc/dequant.asm | 60 +++++++++++++++++++++++++++++++++ libavcodec/x86/hevc/dsp_init.c | 3 ++ 3 files changed, 64 insertions(+) create mode 100644 libavcodec/x86/hevc/dequant.asm diff --git a/libavcodec/x86/hevc/Makefile b/libavcodec/x86/hevc/Makefile index 74418a322c..d09c613a19 100644 --- a/libavcodec/x86/hevc/Makefile +++ b/libavcodec/x86/hevc/Makefile @@ -4,6 +4,7 @@ clean:: X86ASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc/dsp_init.o \ x86/hevc/add_res.o \ x86/hevc/deblock.o \ + x86/hevc/dequant.o \ x86/hevc/idct.o \ x86/hevc/mc.o \ x86/hevc/sao.o \ diff --git a/libavcodec/x86/hevc/dequant.asm b/libavcodec/x86/hevc/dequant.asm new file mode 100644 index 0000000000..f0453c940b --- /dev/null +++ b/libavcodec/x86/hevc/dequant.asm @@ -0,0 +1,60 @@ +;***************************************************************************** +;* SSSE3-optimized HEVC dequant code +;***************************************************************************** +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +INIT_XMM ssse3 +; void ff_hevc_dequant_8_ssse3(int16_t *coeffs, int16_t log2_size) +cglobal hevc_dequant_8, 2, 3+UNIX64, 3 + +; coeffs, log2_size (in ecx), tmp/size +%if WIN64 + DECLARE_REG_TMP 1,0,2 + ; r0 is the shift register (ecx) on win64 + xchg r0, r1 +%elif ARCH_X86_64 + DECLARE_REG_TMP 0,3,1 + ; r3 is ecx + mov t1d, r1d +%else + ; r1 is ecx + DECLARE_REG_TMP 0,1,2 +%endif + + mov t2d, 256 + shl t2d, t1b + movd m0, t2d + add t1d, t1d + SPLATW m0, m0 + mov t2d, 1 + shl t2d, t1b +.loop: + mova m1, [t0] + mova m2, [t0+mmsize] + pmulhrsw m1, m0 + pmulhrsw m2, m0 + mova [t0], m1 + mova [t0+mmsize], m2 + add t0, 2*mmsize + sub t2d, mmsize + jg .loop + RET diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c index 5b2b10f33a..bd967eac67 100644 --- a/libavcodec/x86/hevc/dsp_init.c +++ b/libavcodec/x86/hevc/dsp_init.c @@ -30,6 +30,8 @@ #include "libavcodec/x86/hevc/dsp.h" #include "libavcodec/x86/h26x/h2656dsp.h" +void ff_hevc_dequant_8_ssse3(int16_t *coeffs, int16_t log2_size); + #define LFC_FUNC(DIR, DEPTH, OPT) \ void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q); @@ -847,6 +849,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; #endif + c->dequant = ff_hevc_dequant_8_ssse3; SAO_EDGE_INIT(8, ssse3); } #if HAVE_SSE4_EXTERNAL && ARCH_X86_64 -- 2.52.0 >From 3fbdf06a6d681a86578bca2812fd052c639f35f9 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Mon, 26 Jan 2026 02:16:47 +0100 Subject: [PATCH 4/4] tests/checkasm/hevc_dequant: Only init buffer when needed Signed-off-by: Andreas Rheinhardt <[email protected]> --- tests/checkasm/hevc_dequant.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/checkasm/hevc_dequant.c b/tests/checkasm/hevc_dequant.c index 20e322994a..5036662666 100644 --- a/tests/checkasm/hevc_dequant.c +++ b/tests/checkasm/hevc_dequant.c @@ -48,11 +48,11 @@ static void check_dequant(HEVCDSPContext *h, int bit_depth) int size = block_size * block_size; declare_func(void, int16_t *coeffs, int16_t log2_size); - randomize_buffers(coeffs0, size); - memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size); - if (check_func(h->dequant, "hevc_dequant_%dx%d_%d", block_size, block_size, bit_depth)) { + randomize_buffers(coeffs0, size); + memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size); + call_ref(coeffs0, i); call_new(coeffs1, i); if (memcmp(coeffs0, coeffs1, sizeof(*coeffs0) * size)) -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
