This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 436b74b725a95ef9f00edd6264d9adab1a66a9c8 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Mon Jan 26 02:03:32 2026 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Thu Jan 29 12:25:33 2026 +0100 avcodec/x86/hevc/dequant: Add SSSE3 dequant ASM function hevc_dequant_4x4_8_c (GCC): 20.2 ( 1.00x) hevc_dequant_4x4_8_c (Clang): 21.7 ( 1.00x) hevc_dequant_4x4_8_ssse3: 5.8 ( 3.51x) hevc_dequant_8x8_8_c (GCC): 32.9 ( 1.00x) hevc_dequant_8x8_8_c (Clang): 78.7 ( 1.00x) hevc_dequant_8x8_8_ssse3: 6.8 ( 4.83x) hevc_dequant_16x16_8_c (GCC): 105.1 ( 1.00x) hevc_dequant_16x16_8_c (Clang): 151.1 ( 1.00x) hevc_dequant_16x16_8_ssse3: 19.3 ( 5.45x) hevc_dequant_32x32_8_c (GCC): 415.7 ( 1.00x) hevc_dequant_32x32_8_c (Clang): 602.3 ( 1.00x) hevc_dequant_32x32_8_ssse3: 78.2 ( 5.32x) Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/hevc/Makefile | 1 + .../x86/hevc/dequant.asm | 65 ++++++++++++---------- libavcodec/x86/hevc/dsp_init.c | 3 + 3 files changed, 40 insertions(+), 29 deletions(-) diff --git a/libavcodec/x86/hevc/Makefile b/libavcodec/x86/hevc/Makefile index 74418a322c..d09c613a19 100644 --- a/libavcodec/x86/hevc/Makefile +++ b/libavcodec/x86/hevc/Makefile @@ -4,6 +4,7 @@ clean:: X86ASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc/dsp_init.o \ x86/hevc/add_res.o \ x86/hevc/deblock.o \ + x86/hevc/dequant.o \ x86/hevc/idct.o \ x86/hevc/mc.o \ x86/hevc/sao.o \ diff --git a/libavutil/x86/imgutils.asm b/libavcodec/x86/hevc/dequant.asm similarity index 51% copy from libavutil/x86/imgutils.asm copy to libavcodec/x86/hevc/dequant.asm index 3cca56cdca..f0453c940b 100644 --- a/libavutil/x86/imgutils.asm +++ b/libavcodec/x86/hevc/dequant.asm @@ -1,6 +1,6 @@ ;***************************************************************************** -;* Copyright 2016 Anton Khirnov -;* +;* SSSE3-optimized HEVC dequant code +;***************************************************************************** ;* This file is part of FFmpeg. ;* ;* FFmpeg is free software; you can redistribute it and/or @@ -22,32 +22,39 @@ SECTION .text -INIT_XMM sse4 -cglobal image_copy_plane_uc_from, 6, 7, 4, dst, dst_linesize, src, src_linesize, bw, height, rowpos - add dstq, bwq - add srcq, bwq - neg bwq - -.row_start: - mov rowposq, bwq - +INIT_XMM ssse3 +; void ff_hevc_dequant_8_ssse3(int16_t *coeffs, int16_t log2_size) +cglobal hevc_dequant_8, 2, 3+UNIX64, 3 + +; coeffs, log2_size (in ecx), tmp/size +%if WIN64 + DECLARE_REG_TMP 1,0,2 + ; r0 is the shift register (ecx) on win64 + xchg r0, r1 +%elif ARCH_X86_64 + DECLARE_REG_TMP 0,3,1 + ; r3 is ecx + mov t1d, r1d +%else + ; r1 is ecx + DECLARE_REG_TMP 0,1,2 +%endif + + mov t2d, 256 + shl t2d, t1b + movd m0, t2d + add t1d, t1d + SPLATW m0, m0 + mov t2d, 1 + shl t2d, t1b .loop: - movntdqa m0, [srcq + rowposq + 0 * mmsize] - movntdqa m1, [srcq + rowposq + 1 * mmsize] - movntdqa m2, [srcq + rowposq + 2 * mmsize] - movntdqa m3, [srcq + rowposq + 3 * mmsize] - - mova [dstq + rowposq + 0 * mmsize], m0 - mova [dstq + rowposq + 1 * mmsize], m1 - mova [dstq + rowposq + 2 * mmsize], m2 - mova [dstq + rowposq + 3 * mmsize], m3 - - add rowposq, 4 * mmsize - jnz .loop - - add srcq, src_linesizeq - add dstq, dst_linesizeq - dec heightd - jnz .row_start - + mova m1, [t0] + mova m2, [t0+mmsize] + pmulhrsw m1, m0 + pmulhrsw m2, m0 + mova [t0], m1 + mova [t0+mmsize], m2 + add t0, 2*mmsize + sub t2d, mmsize + jg .loop RET diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c index 5b2b10f33a..bd967eac67 100644 --- a/libavcodec/x86/hevc/dsp_init.c +++ b/libavcodec/x86/hevc/dsp_init.c @@ -30,6 +30,8 @@ #include "libavcodec/x86/hevc/dsp.h" #include "libavcodec/x86/h26x/h2656dsp.h" +void ff_hevc_dequant_8_ssse3(int16_t *coeffs, int16_t log2_size); + #define LFC_FUNC(DIR, DEPTH, OPT) \ void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q); @@ -847,6 +849,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; #endif + c->dequant = ff_hevc_dequant_8_ssse3; SAO_EDGE_INIT(8, ssse3); } #if HAVE_SSE4_EXTERNAL && ARCH_X86_64 _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
