This is an automated email from the git hooks/post-receive script.
Git pushed a commit to branch master
in repository ffmpeg.
The following commit(s) were added to refs/heads/master by this push:
new 3a7c09eb39 avcodec/x86/mpegvideoencdsp_init: Port draw_edges to SSSE3
3a7c09eb39 is described below
commit 3a7c09eb395b0b485b6ad3b1fbebc6c50950a677
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Sat Feb 7 00:38:47 2026 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Wed Feb 11 11:15:06 2026 +0100
avcodec/x86/mpegvideoencdsp_init: Port draw_edges to SSSE3
Benchmarks:
draw_edges_8_1724_4_c: 2672.2 ( 1.00x)
draw_edges_8_1724_4_mmx: 3191.5 ( 0.84x)
draw_edges_8_1724_4_ssse3: 2179.6 ( 1.23x)
draw_edges_8_1724_8_c: 2852.3 ( 1.00x)
draw_edges_8_1724_8_mmx: 3683.0 ( 0.77x)
draw_edges_8_1724_8_ssse3: 2225.7 ( 1.28x)
draw_edges_8_1724_16_c: 4169.4 ( 1.00x)
draw_edges_8_1724_16_mmx: 4665.9 ( 0.89x)
draw_edges_8_1724_16_ssse3: 2765.8 ( 1.51x)
draw_edges_128_407_4_c: 1126.6 ( 1.00x)
draw_edges_128_407_4_mmx: 943.9 ( 1.19x)
draw_edges_128_407_4_ssse3: 925.7 ( 1.22x)
draw_edges_128_407_8_c: 1208.8 ( 1.00x)
draw_edges_128_407_8_mmx: 1119.1 ( 1.08x)
draw_edges_128_407_8_ssse3: 997.8 ( 1.21x)
draw_edges_128_407_16_c: 1352.4 ( 1.00x)
draw_edges_128_407_16_mmx: 1368.7 ( 0.99x)
draw_edges_128_407_16_ssse3: 1148.3 ( 1.18x)
draw_edges_1080_31_4_c: 228.5 ( 1.00x)
draw_edges_1080_31_4_mmx: 240.8 ( 0.95x)
draw_edges_1080_31_4_ssse3: 226.7 ( 1.01x)
draw_edges_1080_31_8_c: 411.1 ( 1.00x)
draw_edges_1080_31_8_mmx: 432.9 ( 0.95x)
draw_edges_1080_31_8_ssse3: 403.2 ( 1.02x)
draw_edges_1080_31_16_c: 1121.2 ( 1.00x)
draw_edges_1080_31_16_mmx: 1124.9 ( 1.00x)
draw_edges_1080_31_16_ssse3: 1125.4 ( 1.00x)
draw_edges_1920_4_4_c: 310.8 ( 1.00x)
draw_edges_1920_4_4_mmx: 311.6 ( 1.00x)
draw_edges_1920_4_4_ssse3: 311.6 ( 1.00x)
draw_edges_1920_4_4_negstride_c: 307.0 ( 1.00x)
draw_edges_1920_4_4_negstride_mmx: 306.7 ( 1.00x)
draw_edges_1920_4_4_negstride_ssse3: 306.7 ( 1.00x)
draw_edges_1920_4_8_c: 724.2 ( 1.00x)
draw_edges_1920_4_8_mmx: 724.9 ( 1.00x)
draw_edges_1920_4_8_ssse3: 717.3 ( 1.01x)
draw_edges_1920_4_8_negstride_c: 719.2 ( 1.00x)
draw_edges_1920_4_8_negstride_mmx: 717.1 ( 1.00x)
draw_edges_1920_4_8_negstride_ssse3: 710.9 ( 1.01x)
draw_edges_1920_4_16_c: 1752.9 ( 1.00x)
draw_edges_1920_4_16_mmx: 1754.6 ( 1.00x)
draw_edges_1920_4_16_ssse3: 1751.1 ( 1.00x)
draw_edges_1920_4_16_negstride_c: 1783.2 ( 1.00x)
draw_edges_1920_4_16_negstride_mmx: 1778.2 ( 1.00x)
draw_edges_1920_4_16_negstride_ssse3: 1768.3 ( 1.01x)
Reviewed-by: Michael Niedermayer <[email protected]>
Reviewed-by: James Almer <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
---
libavcodec/mpegvideo_enc.c | 3 -
libavcodec/snowenc.c | 2 -
libavcodec/x86/mpegvideoencdsp_init.c | 131 ++++++++++++++--------------------
tests/checkasm/mpegvideoencdsp.c | 4 +-
4 files changed, 56 insertions(+), 84 deletions(-)
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index a4f78c25db..46c8863a14 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -1419,7 +1419,6 @@ static int load_input_picture(MPVMainEncContext *const m,
const AVFrame *pic_arg
EDGE_BOTTOM);
}
}
- emms_c();
}
pic->display_picture_number = display_picture_number;
@@ -1886,8 +1885,6 @@ static void frame_end(MPVMainEncContext *const m)
EDGE_TOP | EDGE_BOTTOM);
}
- emms_c();
-
m->last_pict_type = s->c.pict_type;
m->last_lambda_for[s->c.pict_type] = s->c.cur_pic.ptr->f->quality;
if (s->c.pict_type != AV_PICTURE_TYPE_B)
diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c
index 68c2bb2ebc..feaf4dc1a0 100644
--- a/libavcodec/snowenc.c
+++ b/libavcodec/snowenc.c
@@ -1786,7 +1786,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket
*pkt,
EDGE_TOP | EDGE_BOTTOM);
}
- emms_c();
pic = s->input_picture;
pic->pict_type = pict->pict_type;
pic->quality = pict->quality;
@@ -1831,7 +1830,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket
*pkt,
s->current_picture->linesize[2],
w>>s->chroma_h_shift, h>>s->chroma_v_shift,
EDGE_WIDTH>>s->chroma_h_shift,
EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
}
- emms_c();
}
ff_snow_frames_prepare(s);
diff --git a/libavcodec/x86/mpegvideoencdsp_init.c
b/libavcodec/x86/mpegvideoencdsp_init.c
index 220c75785a..1b6c75e9e5 100644
--- a/libavcodec/x86/mpegvideoencdsp_init.c
+++ b/libavcodec/x86/mpegvideoencdsp_init.c
@@ -34,7 +34,6 @@ int ff_pix_sum16_xop(const uint8_t *pix, ptrdiff_t line_size);
int ff_pix_norm1_sse2(const uint8_t *pix, ptrdiff_t line_size);
void ff_add_8x8basis_ssse3(int16_t rem[64], const int16_t basis[64], int
scale);
-#if HAVE_INLINE_ASM
#if HAVE_SSSE3_INLINE
#define SCALE_OFFSET -1
@@ -84,77 +83,62 @@ static int try_8x8basis_ssse3(const int16_t rem[64], const
int16_t weight[64], c
);
return i;
}
-#endif /* HAVE_SSSE3_INLINE */
/* Draw the edges of width 'w' of an image of size width, height */
-static void draw_edges_mmx(uint8_t *buf, ptrdiff_t wrap, int width, int height,
- int w, int h, int sides)
+static void draw_edges_ssse3(uint8_t *buf, ptrdiff_t wrap, int width, int
height,
+ int w, int h, int sides)
{
- uint8_t *ptr, *last_line;
+ uint8_t *ptr = buf, *last_line;
int i;
+ av_assert1(w == 16 || w == 8 || w == 4);
+
/* left and right */
- ptr = buf;
- if (w == 8) {
- __asm__ volatile (
- "1: \n\t"
- "movd (%0), %%mm0 \n\t"
- "punpcklbw %%mm0, %%mm0 \n\t"
- "punpcklwd %%mm0, %%mm0 \n\t"
- "punpckldq %%mm0, %%mm0 \n\t"
- "movq %%mm0, -8(%0) \n\t"
- "movq -8(%0, %2), %%mm1 \n\t"
- "punpckhbw %%mm1, %%mm1 \n\t"
- "punpckhwd %%mm1, %%mm1 \n\t"
- "punpckhdq %%mm1, %%mm1 \n\t"
- "movq %%mm1, (%0, %2) \n\t"
- "add %1, %0 \n\t"
- "cmp %3, %0 \n\t"
- "jnz 1b \n\t"
- : "+r" (ptr)
- : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
- "r" (ptr + wrap * height));
- } else if (w == 16) {
- __asm__ volatile (
- "1: \n\t"
- "movd (%0), %%mm0 \n\t"
- "punpcklbw %%mm0, %%mm0 \n\t"
- "punpcklwd %%mm0, %%mm0 \n\t"
- "punpckldq %%mm0, %%mm0 \n\t"
- "movq %%mm0, -8(%0) \n\t"
- "movq %%mm0, -16(%0) \n\t"
- "movq -8(%0, %2), %%mm1 \n\t"
- "punpckhbw %%mm1, %%mm1 \n\t"
- "punpckhwd %%mm1, %%mm1 \n\t"
- "punpckhdq %%mm1, %%mm1 \n\t"
- "movq %%mm1, (%0, %2) \n\t"
- "movq %%mm1, 8(%0, %2) \n\t"
- "add %1, %0 \n\t"
- "cmp %3, %0 \n\t"
- "jnz 1b \n\t"
- : "+r"(ptr)
- : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
- );
- } else {
- av_assert1(w == 4);
- __asm__ volatile (
- "1: \n\t"
- "movd (%0), %%mm0 \n\t"
- "punpcklbw %%mm0, %%mm0 \n\t"
- "punpcklwd %%mm0, %%mm0 \n\t"
- "movd %%mm0, -4(%0) \n\t"
- "movd -4(%0, %2), %%mm1 \n\t"
- "punpcklbw %%mm1, %%mm1 \n\t"
- "punpckhwd %%mm1, %%mm1 \n\t"
- "punpckhdq %%mm1, %%mm1 \n\t"
- "movd %%mm1, (%0, %2) \n\t"
- "add %1, %0 \n\t"
- "cmp %3, %0 \n\t"
- "jnz 1b \n\t"
- : "+r" (ptr)
- : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
- "r" (ptr + wrap * height));
- }
+ __asm__ volatile (
+ "pcmpeqw %%xmm3, %%xmm3 \n\t"
+ "pxor %%xmm2, %%xmm2 \n\t"
+ "psrlw $14, %%xmm3 \n\t" // pw_3
+ "pshufb %%xmm2, %%xmm3 \n\t" // pb_3
+ "cmp $8, %4 \n\t"
+ "jg 16f \n\t"
+ "jl 4f \n\t"
+ "8: \n\t"
+ "movd (%0), %%xmm0 \n\t"
+ "movd -4(%0, %2), %%xmm1 \n\t"
+ "pshufb %%xmm2, %%xmm0 \n\t"
+ "pshufb %%xmm3, %%xmm1 \n\t"
+ "movq %%xmm0, -8(%0) \n\t"
+ "movq %%xmm1, (%0, %2) \n\t"
+ "add %1, %0 \n\t"
+ "cmp %3, %0 \n\t"
+ "jnz 8b \n\t"
+ "jmp 1f \n\t"
+ "4: \n\t"
+ "movd (%0), %%xmm0 \n\t"
+ "movd -4(%0, %2), %%xmm1 \n\t"
+ "pshufb %%xmm2, %%xmm0 \n\t"
+ "pshufb %%xmm3, %%xmm1 \n\t"
+ "movd %%xmm0, -4(%0) \n\t"
+ "movd %%xmm1, (%0, %2) \n\t"
+ "add %1, %0 \n\t"
+ "cmp %3, %0 \n\t"
+ "jnz 4b \n\t"
+ "jmp 1f \n\t"
+ "16: \n\t"
+ "movd (%0), %%xmm0 \n\t"
+ "movd -4(%0, %2), %%xmm1 \n\t"
+ "pshufb %%xmm2, %%xmm0 \n\t"
+ "pshufb %%xmm3, %%xmm1 \n\t"
+ "movdqu %%xmm0, -16(%0) \n\t"
+ "movdqu %%xmm1, (%0, %2) \n\t"
+ "add %1, %0 \n\t"
+ "cmp %3, %0 \n\t"
+ "jnz 16b \n\t"
+ "1: \n\t"
+ : "+r" (ptr)
+ : "r" ((x86_reg) wrap), "r" ((x86_reg) width), "r"(ptr + wrap *
height), "r" (w)
+ XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")
+ );
/* top and bottom + corners */
buf -= w;
@@ -168,8 +152,7 @@ static void draw_edges_mmx(uint8_t *buf, ptrdiff_t wrap,
int width, int height,
// bottom
memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
}
-
-#endif /* HAVE_INLINE_ASM */
+#endif /* HAVE_SSSE3_INLINE */
av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
AVCodecContext *avctx)
@@ -186,20 +169,14 @@ av_cold void
ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
c->pix_sum = ff_pix_sum16_xop;
}
-#if HAVE_INLINE_ASM
-
- if (INLINE_MMX(cpu_flags)) {
- if (avctx->bits_per_raw_sample <= 8) {
- c->draw_edges = draw_edges_mmx;
- }
- }
-#endif /* HAVE_INLINE_ASM */
-
if (X86_SSSE3(cpu_flags)) {
#if HAVE_SSSE3_INLINE
if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
c->try_8x8basis = try_8x8basis_ssse3;
}
+ if (avctx->bits_per_raw_sample <= 8) {
+ c->draw_edges = draw_edges_ssse3;
+ }
#endif /* HAVE_SSSE3_INLINE */
#if HAVE_SSSE3_EXTERNAL
c->add_8x8basis = ff_add_8x8basis_ssse3;
diff --git a/tests/checkasm/mpegvideoencdsp.c b/tests/checkasm/mpegvideoencdsp.c
index 955cd9f5b7..5fad1d4bb4 100644
--- a/tests/checkasm/mpegvideoencdsp.c
+++ b/tests/checkasm/mpegvideoencdsp.c
@@ -147,8 +147,8 @@ static void check_draw_edges(MpegvideoEncDSPContext *c)
LOCAL_ALIGNED_16(uint8_t, buf0, [BUFSIZE]);
LOCAL_ALIGNED_16(uint8_t, buf1, [BUFSIZE]);
- declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *buf, ptrdiff_t wrap, int
width, int height,
- int w, int h, int sides);
+ declare_func(void, uint8_t *buf, ptrdiff_t wrap, int width, int height,
+ int w, int h, int sides);
for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++) {
int input_size = input_sizes[isi];
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]