This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

The following commit(s) were added to refs/heads/master by this push:
     new 3a7c09eb39 avcodec/x86/mpegvideoencdsp_init: Port draw_edges to SSSE3
3a7c09eb39 is described below

commit 3a7c09eb395b0b485b6ad3b1fbebc6c50950a677
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Sat Feb 7 00:38:47 2026 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Wed Feb 11 11:15:06 2026 +0100

    avcodec/x86/mpegvideoencdsp_init: Port draw_edges to SSSE3
    
    Benchmarks:
    draw_edges_8_1724_4_c:                                2672.2 ( 1.00x)
    draw_edges_8_1724_4_mmx:                              3191.5 ( 0.84x)
    draw_edges_8_1724_4_ssse3:                            2179.6 ( 1.23x)
    draw_edges_8_1724_8_c:                                2852.3 ( 1.00x)
    draw_edges_8_1724_8_mmx:                              3683.0 ( 0.77x)
    draw_edges_8_1724_8_ssse3:                            2225.7 ( 1.28x)
    draw_edges_8_1724_16_c:                               4169.4 ( 1.00x)
    draw_edges_8_1724_16_mmx:                             4665.9 ( 0.89x)
    draw_edges_8_1724_16_ssse3:                           2765.8 ( 1.51x)
    draw_edges_128_407_4_c:                               1126.6 ( 1.00x)
    draw_edges_128_407_4_mmx:                              943.9 ( 1.19x)
    draw_edges_128_407_4_ssse3:                            925.7 ( 1.22x)
    draw_edges_128_407_8_c:                               1208.8 ( 1.00x)
    draw_edges_128_407_8_mmx:                             1119.1 ( 1.08x)
    draw_edges_128_407_8_ssse3:                            997.8 ( 1.21x)
    draw_edges_128_407_16_c:                              1352.4 ( 1.00x)
    draw_edges_128_407_16_mmx:                            1368.7 ( 0.99x)
    draw_edges_128_407_16_ssse3:                          1148.3 ( 1.18x)
    draw_edges_1080_31_4_c:                                228.5 ( 1.00x)
    draw_edges_1080_31_4_mmx:                              240.8 ( 0.95x)
    draw_edges_1080_31_4_ssse3:                            226.7 ( 1.01x)
    draw_edges_1080_31_8_c:                                411.1 ( 1.00x)
    draw_edges_1080_31_8_mmx:                              432.9 ( 0.95x)
    draw_edges_1080_31_8_ssse3:                            403.2 ( 1.02x)
    draw_edges_1080_31_16_c:                              1121.2 ( 1.00x)
    draw_edges_1080_31_16_mmx:                            1124.9 ( 1.00x)
    draw_edges_1080_31_16_ssse3:                          1125.4 ( 1.00x)
    draw_edges_1920_4_4_c:                                 310.8 ( 1.00x)
    draw_edges_1920_4_4_mmx:                               311.6 ( 1.00x)
    draw_edges_1920_4_4_ssse3:                             311.6 ( 1.00x)
    draw_edges_1920_4_4_negstride_c:                       307.0 ( 1.00x)
    draw_edges_1920_4_4_negstride_mmx:                     306.7 ( 1.00x)
    draw_edges_1920_4_4_negstride_ssse3:                   306.7 ( 1.00x)
    draw_edges_1920_4_8_c:                                 724.2 ( 1.00x)
    draw_edges_1920_4_8_mmx:                               724.9 ( 1.00x)
    draw_edges_1920_4_8_ssse3:                             717.3 ( 1.01x)
    draw_edges_1920_4_8_negstride_c:                       719.2 ( 1.00x)
    draw_edges_1920_4_8_negstride_mmx:                     717.1 ( 1.00x)
    draw_edges_1920_4_8_negstride_ssse3:                   710.9 ( 1.01x)
    draw_edges_1920_4_16_c:                               1752.9 ( 1.00x)
    draw_edges_1920_4_16_mmx:                             1754.6 ( 1.00x)
    draw_edges_1920_4_16_ssse3:                           1751.1 ( 1.00x)
    draw_edges_1920_4_16_negstride_c:                     1783.2 ( 1.00x)
    draw_edges_1920_4_16_negstride_mmx:                   1778.2 ( 1.00x)
    draw_edges_1920_4_16_negstride_ssse3:                 1768.3 ( 1.01x)
    
    Reviewed-by: Michael Niedermayer <[email protected]>
    Reviewed-by: James Almer <[email protected]>
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/mpegvideo_enc.c            |   3 -
 libavcodec/snowenc.c                  |   2 -
 libavcodec/x86/mpegvideoencdsp_init.c | 131 ++++++++++++++--------------------
 tests/checkasm/mpegvideoencdsp.c      |   4 +-
 4 files changed, 56 insertions(+), 84 deletions(-)

diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index a4f78c25db..46c8863a14 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -1419,7 +1419,6 @@ static int load_input_picture(MPVMainEncContext *const m, 
const AVFrame *pic_arg
                                             EDGE_BOTTOM);
                 }
             }
-            emms_c();
         }
 
         pic->display_picture_number = display_picture_number;
@@ -1886,8 +1885,6 @@ static void frame_end(MPVMainEncContext *const m)
                                 EDGE_TOP | EDGE_BOTTOM);
     }
 
-    emms_c();
-
     m->last_pict_type                  = s->c.pict_type;
     m->last_lambda_for[s->c.pict_type] = s->c.cur_pic.ptr->f->quality;
     if (s->c.pict_type != AV_PICTURE_TYPE_B)
diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c
index 68c2bb2ebc..feaf4dc1a0 100644
--- a/libavcodec/snowenc.c
+++ b/libavcodec/snowenc.c
@@ -1786,7 +1786,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket 
*pkt,
                                 EDGE_TOP | EDGE_BOTTOM);
 
     }
-    emms_c();
     pic = s->input_picture;
     pic->pict_type = pict->pict_type;
     pic->quality = pict->quality;
@@ -1831,7 +1830,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket 
*pkt,
                                       s->current_picture->linesize[2], 
w>>s->chroma_h_shift, h>>s->chroma_v_shift,
                                       EDGE_WIDTH>>s->chroma_h_shift, 
EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
         }
-        emms_c();
     }
 
     ff_snow_frames_prepare(s);
diff --git a/libavcodec/x86/mpegvideoencdsp_init.c 
b/libavcodec/x86/mpegvideoencdsp_init.c
index 220c75785a..1b6c75e9e5 100644
--- a/libavcodec/x86/mpegvideoencdsp_init.c
+++ b/libavcodec/x86/mpegvideoencdsp_init.c
@@ -34,7 +34,6 @@ int ff_pix_sum16_xop(const uint8_t *pix, ptrdiff_t line_size);
 int ff_pix_norm1_sse2(const uint8_t *pix, ptrdiff_t line_size);
 void ff_add_8x8basis_ssse3(int16_t rem[64], const int16_t basis[64], int 
scale);
 
-#if HAVE_INLINE_ASM
 #if HAVE_SSSE3_INLINE
 #define SCALE_OFFSET -1
 
@@ -84,77 +83,62 @@ static int try_8x8basis_ssse3(const int16_t rem[64], const 
int16_t weight[64], c
     );
     return i;
 }
-#endif /* HAVE_SSSE3_INLINE */
 
 /* Draw the edges of width 'w' of an image of size width, height */
-static void draw_edges_mmx(uint8_t *buf, ptrdiff_t wrap, int width, int height,
-                           int w, int h, int sides)
+static void draw_edges_ssse3(uint8_t *buf, ptrdiff_t wrap, int width, int 
height,
+                             int w, int h, int sides)
 {
-    uint8_t *ptr, *last_line;
+    uint8_t *ptr = buf, *last_line;
     int i;
 
+    av_assert1(w == 16 || w == 8 || w == 4);
+
     /* left and right */
-    ptr = buf;
-    if (w == 8) {
-        __asm__ volatile (
-            "1:                             \n\t"
-            "movd            (%0), %%mm0    \n\t"
-            "punpcklbw      %%mm0, %%mm0    \n\t"
-            "punpcklwd      %%mm0, %%mm0    \n\t"
-            "punpckldq      %%mm0, %%mm0    \n\t"
-            "movq           %%mm0, -8(%0)   \n\t"
-            "movq      -8(%0, %2), %%mm1    \n\t"
-            "punpckhbw      %%mm1, %%mm1    \n\t"
-            "punpckhwd      %%mm1, %%mm1    \n\t"
-            "punpckhdq      %%mm1, %%mm1    \n\t"
-            "movq           %%mm1, (%0, %2) \n\t"
-            "add               %1, %0       \n\t"
-            "cmp               %3, %0       \n\t"
-            "jnz               1b           \n\t"
-            : "+r" (ptr)
-            : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
-              "r" (ptr + wrap * height));
-    } else if (w == 16) {
-        __asm__ volatile (
-            "1:                                 \n\t"
-            "movd            (%0), %%mm0        \n\t"
-            "punpcklbw      %%mm0, %%mm0        \n\t"
-            "punpcklwd      %%mm0, %%mm0        \n\t"
-            "punpckldq      %%mm0, %%mm0        \n\t"
-            "movq           %%mm0, -8(%0)       \n\t"
-            "movq           %%mm0, -16(%0)      \n\t"
-            "movq      -8(%0, %2), %%mm1        \n\t"
-            "punpckhbw      %%mm1, %%mm1        \n\t"
-            "punpckhwd      %%mm1, %%mm1        \n\t"
-            "punpckhdq      %%mm1, %%mm1        \n\t"
-            "movq           %%mm1,  (%0, %2)    \n\t"
-            "movq           %%mm1, 8(%0, %2)    \n\t"
-            "add               %1, %0           \n\t"
-            "cmp               %3, %0           \n\t"
-            "jnz               1b               \n\t"
-            : "+r"(ptr)
-            : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
-            );
-    } else {
-        av_assert1(w == 4);
-        __asm__ volatile (
-            "1:                             \n\t"
-            "movd            (%0), %%mm0    \n\t"
-            "punpcklbw      %%mm0, %%mm0    \n\t"
-            "punpcklwd      %%mm0, %%mm0    \n\t"
-            "movd           %%mm0, -4(%0)   \n\t"
-            "movd      -4(%0, %2), %%mm1    \n\t"
-            "punpcklbw      %%mm1, %%mm1    \n\t"
-            "punpckhwd      %%mm1, %%mm1    \n\t"
-            "punpckhdq      %%mm1, %%mm1    \n\t"
-            "movd           %%mm1, (%0, %2) \n\t"
-            "add               %1, %0       \n\t"
-            "cmp               %3, %0       \n\t"
-            "jnz               1b           \n\t"
-            : "+r" (ptr)
-            : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
-              "r" (ptr + wrap * height));
-    }
+    __asm__ volatile (
+        "pcmpeqw         %%xmm3, %%xmm3     \n\t"
+        "pxor            %%xmm2, %%xmm2     \n\t"
+        "psrlw              $14, %%xmm3     \n\t"  // pw_3
+        "pshufb          %%xmm2, %%xmm3     \n\t"  // pb_3
+        "cmp                 $8, %4         \n\t"
+        "jg                 16f             \n\t"
+        "jl                  4f             \n\t"
+        "8:                                 \n\t"
+        "movd              (%0), %%xmm0     \n\t"
+        "movd        -4(%0, %2), %%xmm1     \n\t"
+        "pshufb          %%xmm2, %%xmm0     \n\t"
+        "pshufb          %%xmm3, %%xmm1     \n\t"
+        "movq            %%xmm0, -8(%0)     \n\t"
+        "movq            %%xmm1, (%0, %2)   \n\t"
+        "add                 %1, %0         \n\t"
+        "cmp                 %3, %0         \n\t"
+        "jnz                 8b             \n\t"
+        "jmp                 1f             \n\t"
+        "4:                                 \n\t"
+        "movd              (%0), %%xmm0     \n\t"
+        "movd        -4(%0, %2), %%xmm1     \n\t"
+        "pshufb          %%xmm2, %%xmm0     \n\t"
+        "pshufb          %%xmm3, %%xmm1     \n\t"
+        "movd            %%xmm0, -4(%0)     \n\t"
+        "movd            %%xmm1, (%0, %2)   \n\t"
+        "add                 %1, %0         \n\t"
+        "cmp                 %3, %0         \n\t"
+        "jnz                 4b             \n\t"
+        "jmp                 1f             \n\t"
+        "16:                                \n\t"
+        "movd              (%0), %%xmm0     \n\t"
+        "movd        -4(%0, %2), %%xmm1     \n\t"
+        "pshufb          %%xmm2, %%xmm0     \n\t"
+        "pshufb          %%xmm3, %%xmm1     \n\t"
+        "movdqu          %%xmm0, -16(%0)    \n\t"
+        "movdqu          %%xmm1, (%0, %2)   \n\t"
+        "add                 %1, %0         \n\t"
+        "cmp                 %3, %0         \n\t"
+        "jnz                16b             \n\t"
+        "1:                                 \n\t"
+        : "+r" (ptr)
+        : "r" ((x86_reg) wrap), "r" ((x86_reg) width), "r"(ptr + wrap * 
height), "r" (w)
+        XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")
+    );
 
     /* top and bottom + corners */
     buf -= w;
@@ -168,8 +152,7 @@ static void draw_edges_mmx(uint8_t *buf, ptrdiff_t wrap, 
int width, int height,
             // bottom
             memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
 }
-
-#endif /* HAVE_INLINE_ASM */
+#endif /* HAVE_SSSE3_INLINE */
 
 av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
                                          AVCodecContext *avctx)
@@ -186,20 +169,14 @@ av_cold void 
ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
         c->pix_sum     = ff_pix_sum16_xop;
     }
 
-#if HAVE_INLINE_ASM
-
-    if (INLINE_MMX(cpu_flags)) {
-        if (avctx->bits_per_raw_sample <= 8) {
-            c->draw_edges = draw_edges_mmx;
-        }
-    }
-#endif /* HAVE_INLINE_ASM */
-
     if (X86_SSSE3(cpu_flags)) {
 #if HAVE_SSSE3_INLINE
         if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
             c->try_8x8basis = try_8x8basis_ssse3;
         }
+        if (avctx->bits_per_raw_sample <= 8) {
+            c->draw_edges = draw_edges_ssse3;
+        }
 #endif /* HAVE_SSSE3_INLINE */
 #if HAVE_SSSE3_EXTERNAL
         c->add_8x8basis = ff_add_8x8basis_ssse3;
diff --git a/tests/checkasm/mpegvideoencdsp.c b/tests/checkasm/mpegvideoencdsp.c
index 955cd9f5b7..5fad1d4bb4 100644
--- a/tests/checkasm/mpegvideoencdsp.c
+++ b/tests/checkasm/mpegvideoencdsp.c
@@ -147,8 +147,8 @@ static void check_draw_edges(MpegvideoEncDSPContext *c)
     LOCAL_ALIGNED_16(uint8_t, buf0, [BUFSIZE]);
     LOCAL_ALIGNED_16(uint8_t, buf1, [BUFSIZE]);
 
-    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *buf, ptrdiff_t wrap, int 
width, int height,
-                                             int w, int h, int sides);
+    declare_func(void, uint8_t *buf, ptrdiff_t wrap, int width, int height,
+                       int w, int h, int sides);
 
     for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++) {
         int input_size = input_sizes[isi];

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to