This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit c35f57f3c4c4c6da5278a9f14a18d8c7d16e5b3a
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Sun Apr 12 15:24:05 2026 +0200
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Sun Jun 14 22:04:42 2026 +0200

    avcodec/x86/fpel: Use SSE2 in avg_pixels8
    
    No change in benchmarks here; this already allows
    to remove an emms_c from cavsdec.c.
    
    Reviewed-by: James Almer <[email protected]>
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/cavsdec.c          |  2 --
 libavcodec/x86/cavsdsp.c      |  6 +-----
 libavcodec/x86/fpel.asm       | 46 +++++++++++++++++++++++++++++++++++--------
 libavcodec/x86/fpel.h         |  8 ++++----
 libavcodec/x86/h264_qpel.c    |  2 +-
 libavcodec/x86/hpeldsp_init.c |  2 +-
 libavcodec/x86/qpeldsp_init.c |  6 +-----
 libavcodec/x86/vc1dsp_init.c  |  5 ++---
 tests/checkasm/cavsdsp.c      |  2 +-
 9 files changed, 49 insertions(+), 30 deletions(-)

diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c
index cc26a904db..bc1ed60bf0 100644
--- a/libavcodec/cavsdec.c
+++ b/libavcodec/cavsdec.c
@@ -27,7 +27,6 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
-#include "libavutil/emms.h"
 #include "libavutil/mem.h"
 #include "avcodec.h"
 #include "get_bits.h"
@@ -1161,7 +1160,6 @@ static int decode_pic(AVSContext *h)
                 break;
         } while (ff_cavs_next_mb(h));
     }
-    emms_c();
     if (ret >= 0 && h->cur.f->pict_type != AV_PICTURE_TYPE_B) {
         av_frame_unref(h->DPB[1].f);
         FFSWAP(AVSFrame, h->cur, h->DPB[1]);
diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c
index e333bbee49..91ec866681 100644
--- a/libavcodec/x86/cavsdsp.c
+++ b/libavcodec/x86/cavsdsp.c
@@ -91,11 +91,6 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c)
 {
     av_unused int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_MMX_EXTERNAL
-    if (EXTERNAL_MMXEXT(cpu_flags)) {
-        c->avg_cavs_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext;
-    }
-#endif
 #if HAVE_SSE2_EXTERNAL
     if (EXTERNAL_SSE2(cpu_flags)) {
         c->put_cavs_qpel_pixels_tab[0][ 0] = ff_put_pixels16x16_sse2;
@@ -114,6 +109,7 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c)
         c->avg_cavs_qpel_pixels_tab[0][ 4] = avg_cavs_qpel16_mc01_sse2;
         c->avg_cavs_qpel_pixels_tab[0][ 8] = avg_cavs_qpel16_mc02_sse2;
         c->avg_cavs_qpel_pixels_tab[0][12] = avg_cavs_qpel16_mc03_sse2;
+        c->avg_cavs_qpel_pixels_tab[1][ 0] = ff_avg_pixels8x8_sse2;
         c->avg_cavs_qpel_pixels_tab[1][ 2] = ff_avg_cavs_qpel8_mc20_sse2;
         c->avg_cavs_qpel_pixels_tab[1][ 4] = avg_cavs_qpel8_mc01_sse2;
         c->avg_cavs_qpel_pixels_tab[1][ 8] = ff_avg_cavs_qpel8_mc02_sse2;
diff --git a/libavcodec/x86/fpel.asm b/libavcodec/x86/fpel.asm
index e4becca5fb..598a57ab0d 100644
--- a/libavcodec/x86/fpel.asm
+++ b/libavcodec/x86/fpel.asm
@@ -25,8 +25,40 @@
 
 SECTION .text
 
-; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels,
-;                        ptrdiff_t line_size, int h)
+INIT_XMM sse2
+; void ff_avg_pixels8x8_sse2(uint8_t *block, const uint8_t *pixels,
+;                            ptrdiff_t line_size)
+cglobal avg_pixels8x8, 3,5,6
+    mov         r3d, 8
+    jmp         avg_pixels8_after_prologue
+
+; void ff_avg_pixels8_sse2(uint8_t *block, const uint8_t *pixels,
+;                          ptrdiff_t line_size, int h)
+cglobal avg_pixels8, 4,5,6
+avg_pixels8_after_prologue:
+    lea          r4, [r2*3]
+.loop:
+    movq         m0, [r1]
+    movq         m1, [r0]
+    movhps       m0, [r1+r2]
+    movhps       m1, [r0+r2]
+    movq         m2, [r1+r2*2]
+    movq         m3, [r0+r2*2]
+    pavgb        m0, m1
+    movq         m4, [r1+r4]
+    pavgb        m2, m3
+    movq         m5, [r0+r4]
+    lea          r1, [r1+r2*4]
+    pavgb        m4, m5
+    movq       [r0], m0
+    movhps  [r0+r2], m0
+    movq  [r0+r2*2], m2
+    movq    [r0+r4], m4
+    lea          r0, [r0+r2*4]
+    sub         r3d, 4
+    jne       .loop
+    RET
+
 %macro OP_PIXELS 2-3 0
 %if %2 == mmsize/2
 %define LOAD movh
@@ -35,11 +67,13 @@ SECTION .text
 %define LOAD movu
 %define SAVE mova
 %endif
-cglobal %1_pixels%2x%2, 3,5+4*%3,%3 ? 4 : 0
+cglobal %1_pixels%2x%2, 3,5+4*%3,4
     mov         r3d, %2
     jmp         %1_pixels%2_after_prologue
 
-cglobal %1_pixels%2, 4,5+4*%3,%3 ? 4 : 0
+; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels,
+;                        ptrdiff_t line_size, int h)
+cglobal %1_pixels%2, 4,5+4*%3,4
 %1_pixels%2_after_prologue:
     lea          r4, [r2*3]
 .loop:
@@ -76,10 +110,6 @@ cglobal %1_pixels%2, 4,5+4*%3,%3 ? 4 : 0
     RET
 %endmacro
 
-INIT_MMX mmxext
-OP_PIXELS avg, 8
-
-INIT_XMM sse2
 OP_PIXELS put, 8, UNIX64
 OP_PIXELS put, 16
 OP_PIXELS avg, 16
diff --git a/libavcodec/x86/fpel.h b/libavcodec/x86/fpel.h
index 0b0056021e..6ec28af635 100644
--- a/libavcodec/x86/fpel.h
+++ b/libavcodec/x86/fpel.h
@@ -22,10 +22,10 @@
 #include <stddef.h>
 #include <stdint.h>
 
-void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
-                           ptrdiff_t line_size, int h);
-void ff_avg_pixels8x8_mmxext(uint8_t *block, const uint8_t *pixels,
-                             ptrdiff_t line_size);
+void ff_avg_pixels8_sse2(uint8_t *block, const uint8_t *pixels,
+                         ptrdiff_t line_size, int h);
+void ff_avg_pixels8x8_sse2(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size);
 void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
                           ptrdiff_t line_size, int h);
 void ff_avg_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels,
diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c
index 5d618651a4..0cc653c6ca 100644
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -391,7 +391,6 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int 
bit_depth)
     if (EXTERNAL_MMXEXT(cpu_flags)) {
         if (!high_bit_depth) {
             SET_QPEL_FUNCS_1PP(put_h264_qpel, 2,  4, mmxext, );
-            c->avg_h264_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext;
             SET_QPEL_FUNCS_1PP(avg_h264_qpel, 2,  4, mmxext, );
             c->avg_h264_qpel_pixels_tab[2][0] = ff_avg_pixels4_mmxext;
         } else if (bit_depth == 10) {
@@ -416,6 +415,7 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int 
bit_depth)
             H264_QPEL_FUNCS(3, 3, sse2);
             c->put_h264_qpel_pixels_tab[0][0] = ff_put_pixels16x16_sse2;
             c->avg_h264_qpel_pixels_tab[0][0] = ff_avg_pixels16x16_sse2;
+            c->avg_h264_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_sse2;
         }
 
         if (bit_depth == 10) {
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
index f689879d51..4e4abd5273 100644
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -80,7 +80,6 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags)
     c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext;
     c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext;
 
-    c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext;
     c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext;
     c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
 
@@ -114,6 +113,7 @@ static void hpeldsp_init_sse2(HpelDSPContext *c, int flags)
     c->avg_pixels_tab[0][1]        = ff_avg_pixels16_x2_sse2;
     c->avg_pixels_tab[0][2]        = ff_avg_pixels16_y2_sse2;
     c->avg_pixels_tab[0][3]        = ff_avg_pixels16_xy2_sse2;
+    c->avg_pixels_tab[1][0]        = ff_avg_pixels8_sse2;
 
     c->avg_no_rnd_pixels_tab[0]    = ff_avg_pixels16_sse2;
     c->avg_no_rnd_pixels_tab[1]    = ff_avg_no_rnd_pixels16_x2_sse2;
diff --git a/libavcodec/x86/qpeldsp_init.c b/libavcodec/x86/qpeldsp_init.c
index 18c259b0d8..771961c1b5 100644
--- a/libavcodec/x86/qpeldsp_init.c
+++ b/libavcodec/x86/qpeldsp_init.c
@@ -281,11 +281,6 @@ av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (X86_MMXEXT(cpu_flags)) {
-#if HAVE_MMXEXT_EXTERNAL
-        c->avg_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext;
-#endif /* HAVE_MMXEXT_EXTERNAL */
-    }
 #if HAVE_SSE2_EXTERNAL
     if (EXTERNAL_SSE2(cpu_flags)) {
         c->put_no_rnd_qpel_pixels_tab[0][0] =
@@ -293,6 +288,7 @@ av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
         c->put_no_rnd_qpel_pixels_tab[1][0] =
         c->put_qpel_pixels_tab[1][0] = ff_put_pixels8x8_sse2;
         c->avg_qpel_pixels_tab[0][0] = ff_avg_pixels16x16_sse2;
+        c->avg_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_sse2;
 
         SET_V_QPEL_FUNCS (16, sse2,);
         SET_V_QPEL_FUNCS (8,  sse2,);
diff --git a/libavcodec/x86/vc1dsp_init.c b/libavcodec/x86/vc1dsp_init.c
index 9f80048791..3f0eb5746c 100644
--- a/libavcodec/x86/vc1dsp_init.c
+++ b/libavcodec/x86/vc1dsp_init.c
@@ -72,7 +72,7 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, ptrdiff_t 
stride, int pq)
     }
 
 DECLARE_FUNCTION(put_,  8, _sse2)
-DECLARE_FUNCTION(avg_,  8, _mmxext)
+DECLARE_FUNCTION(avg_,  8, _sse2)
 DECLARE_FUNCTION(put_, 16, _sse2)
 DECLARE_FUNCTION(avg_, 16, _sse2)
 
@@ -114,8 +114,6 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
         dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT
 
     if (EXTERNAL_MMXEXT(cpu_flags)) {
-        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_8_mmxext;
-
         dsp->vc1_inv_trans_8x8_dc                = 
ff_vc1_inv_trans_8x8_dc_mmxext;
         dsp->vc1_inv_trans_4x8_dc                = 
ff_vc1_inv_trans_4x8_dc_mmxext;
         dsp->vc1_inv_trans_8x4_dc                = 
ff_vc1_inv_trans_8x4_dc_mmxext;
@@ -127,6 +125,7 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
         dsp->put_vc1_mspel_pixels_tab[0][0]      = put_vc1_mspel_mc00_16_sse2;
         dsp->put_vc1_mspel_pixels_tab[1][0]      = put_vc1_mspel_mc00_8_sse2;
         dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_sse2;
+        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_8_sse2;
     }
     if (EXTERNAL_SSSE3(cpu_flags)) {
         ASSIGN_LF4(ssse3);
diff --git a/tests/checkasm/cavsdsp.c b/tests/checkasm/cavsdsp.c
index 3e4a9ac127..ab6b695ae4 100644
--- a/tests/checkasm/cavsdsp.c
+++ b/tests/checkasm/cavsdsp.c
@@ -71,7 +71,7 @@ static void check_cavs_qpeldsp(void)
         TEST(put_cavs_qpel_pixels_tab),
         TEST(avg_cavs_qpel_pixels_tab),
     };
-    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t 
*dst, const uint8_t *src, ptrdiff_t stride);
+    declare_func(void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
 
     ff_cavsdsp_init(&cavsdsp);
 

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to