This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 436b74b725a95ef9f00edd6264d9adab1a66a9c8
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Mon Jan 26 02:03:32 2026 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Thu Jan 29 12:25:33 2026 +0100

    avcodec/x86/hevc/dequant: Add SSSE3 dequant ASM function
    
    hevc_dequant_4x4_8_c (GCC):                             20.2 ( 1.00x)
    hevc_dequant_4x4_8_c (Clang):                           21.7 ( 1.00x)
    hevc_dequant_4x4_8_ssse3:                                5.8 ( 3.51x)
    hevc_dequant_8x8_8_c (GCC):                             32.9 ( 1.00x)
    hevc_dequant_8x8_8_c (Clang):                           78.7 ( 1.00x)
    hevc_dequant_8x8_8_ssse3:                                6.8 ( 4.83x)
    hevc_dequant_16x16_8_c (GCC):                          105.1 ( 1.00x)
    hevc_dequant_16x16_8_c (Clang):                        151.1 ( 1.00x)
    hevc_dequant_16x16_8_ssse3:                             19.3 ( 5.45x)
    hevc_dequant_32x32_8_c (GCC):                          415.7 ( 1.00x)
    hevc_dequant_32x32_8_c (Clang):                        602.3 ( 1.00x)
    hevc_dequant_32x32_8_ssse3:                             78.2 ( 5.32x)
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/hevc/Makefile                       |  1 +
 .../x86/hevc/dequant.asm                           | 65 ++++++++++++----------
 libavcodec/x86/hevc/dsp_init.c                     |  3 +
 3 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/libavcodec/x86/hevc/Makefile b/libavcodec/x86/hevc/Makefile
index 74418a322c..d09c613a19 100644
--- a/libavcodec/x86/hevc/Makefile
+++ b/libavcodec/x86/hevc/Makefile
@@ -4,6 +4,7 @@ clean::
 X86ASM-OBJS-$(CONFIG_HEVC_DECODER)      += x86/hevc/dsp_init.o      \
                                            x86/hevc/add_res.o       \
                                            x86/hevc/deblock.o       \
+                                           x86/hevc/dequant.o       \
                                            x86/hevc/idct.o          \
                                            x86/hevc/mc.o            \
                                            x86/hevc/sao.o           \
diff --git a/libavutil/x86/imgutils.asm b/libavcodec/x86/hevc/dequant.asm
similarity index 51%
copy from libavutil/x86/imgutils.asm
copy to libavcodec/x86/hevc/dequant.asm
index 3cca56cdca..f0453c940b 100644
--- a/libavutil/x86/imgutils.asm
+++ b/libavcodec/x86/hevc/dequant.asm
@@ -1,6 +1,6 @@
 ;*****************************************************************************
-;* Copyright 2016 Anton Khirnov
-;*
+;* SSSE3-optimized HEVC dequant code
+;*****************************************************************************
 ;* This file is part of FFmpeg.
 ;*
 ;* FFmpeg is free software; you can redistribute it and/or
@@ -22,32 +22,39 @@
 
 SECTION .text
 
-INIT_XMM sse4
-cglobal image_copy_plane_uc_from, 6, 7, 4, dst, dst_linesize, src, 
src_linesize, bw, height, rowpos
-    add dstq, bwq
-    add srcq, bwq
-    neg bwq
-
-.row_start:
-    mov rowposq, bwq
-
+INIT_XMM ssse3
+; void ff_hevc_dequant_8_ssse3(int16_t *coeffs, int16_t log2_size)
+cglobal hevc_dequant_8, 2, 3+UNIX64, 3
+
+; coeffs, log2_size (in ecx), tmp/size
+%if WIN64
+    DECLARE_REG_TMP 1,0,2
+    ; r0 is the shift register (ecx) on win64
+    xchg          r0, r1
+%elif ARCH_X86_64
+    DECLARE_REG_TMP 0,3,1
+    ; r3 is ecx
+    mov          t1d, r1d
+%else
+    ; r1 is ecx
+    DECLARE_REG_TMP 0,1,2
+%endif
+
+    mov          t2d, 256
+    shl          t2d, t1b
+    movd          m0, t2d
+    add          t1d, t1d
+    SPLATW        m0, m0
+    mov          t2d, 1
+    shl          t2d, t1b
 .loop:
-    movntdqa m0, [srcq + rowposq + 0 * mmsize]
-    movntdqa m1, [srcq + rowposq + 1 * mmsize]
-    movntdqa m2, [srcq + rowposq + 2 * mmsize]
-    movntdqa m3, [srcq + rowposq + 3 * mmsize]
-
-    mova [dstq + rowposq + 0 * mmsize], m0
-    mova [dstq + rowposq + 1 * mmsize], m1
-    mova [dstq + rowposq + 2 * mmsize], m2
-    mova [dstq + rowposq + 3 * mmsize], m3
-
-    add rowposq, 4 * mmsize
-    jnz .loop
-
-    add srcq, src_linesizeq
-    add dstq, dst_linesizeq
-    dec heightd
-    jnz .row_start
-
+    mova          m1, [t0]
+    mova          m2, [t0+mmsize]
+    pmulhrsw      m1, m0
+    pmulhrsw      m2, m0
+    mova        [t0], m1
+    mova [t0+mmsize], m2
+    add           t0, 2*mmsize
+    sub          t2d, mmsize
+    jg         .loop
     RET
diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c
index 5b2b10f33a..bd967eac67 100644
--- a/libavcodec/x86/hevc/dsp_init.c
+++ b/libavcodec/x86/hevc/dsp_init.c
@@ -30,6 +30,8 @@
 #include "libavcodec/x86/hevc/dsp.h"
 #include "libavcodec/x86/h26x/h2656dsp.h"
 
+void ff_hevc_dequant_8_ssse3(int16_t *coeffs, int16_t log2_size);
+
 #define LFC_FUNC(DIR, DEPTH, OPT) \
 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t 
*pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t 
*no_q);
 
@@ -847,6 +849,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int 
bit_depth)
             c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
             c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
 #endif
+            c->dequant = ff_hevc_dequant_8_ssse3;
             SAO_EDGE_INIT(8, ssse3);
         }
 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to