This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit c35f57f3c4c4c6da5278a9f14a18d8c7d16e5b3a Author: Andreas Rheinhardt <[email protected]> AuthorDate: Sun Apr 12 15:24:05 2026 +0200 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Sun Jun 14 22:04:42 2026 +0200 avcodec/x86/fpel: Use SSE2 in avg_pixels8 No change in benchmarks here; this already allows to remove an emms_c from cavsdec.c. Reviewed-by: James Almer <[email protected]> Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/cavsdec.c | 2 -- libavcodec/x86/cavsdsp.c | 6 +----- libavcodec/x86/fpel.asm | 46 +++++++++++++++++++++++++++++++++++-------- libavcodec/x86/fpel.h | 8 ++++---- libavcodec/x86/h264_qpel.c | 2 +- libavcodec/x86/hpeldsp_init.c | 2 +- libavcodec/x86/qpeldsp_init.c | 6 +----- libavcodec/x86/vc1dsp_init.c | 5 ++--- tests/checkasm/cavsdsp.c | 2 +- 9 files changed, 49 insertions(+), 30 deletions(-) diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c index cc26a904db..bc1ed60bf0 100644 --- a/libavcodec/cavsdec.c +++ b/libavcodec/cavsdec.c @@ -27,7 +27,6 @@ #include "libavutil/attributes.h" #include "libavutil/avassert.h" -#include "libavutil/emms.h" #include "libavutil/mem.h" #include "avcodec.h" #include "get_bits.h" @@ -1161,7 +1160,6 @@ static int decode_pic(AVSContext *h) break; } while (ff_cavs_next_mb(h)); } - emms_c(); if (ret >= 0 && h->cur.f->pict_type != AV_PICTURE_TYPE_B) { av_frame_unref(h->DPB[1].f); FFSWAP(AVSFrame, h->cur, h->DPB[1]); diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c index e333bbee49..91ec866681 100644 --- a/libavcodec/x86/cavsdsp.c +++ b/libavcodec/x86/cavsdsp.c @@ -91,11 +91,6 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c) { av_unused int cpu_flags = av_get_cpu_flags(); -#if HAVE_MMX_EXTERNAL - if (EXTERNAL_MMXEXT(cpu_flags)) { - c->avg_cavs_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext; - } -#endif #if HAVE_SSE2_EXTERNAL if (EXTERNAL_SSE2(cpu_flags)) { c->put_cavs_qpel_pixels_tab[0][ 0] = ff_put_pixels16x16_sse2; @@ -114,6 +109,7 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c) c->avg_cavs_qpel_pixels_tab[0][ 4] = avg_cavs_qpel16_mc01_sse2; c->avg_cavs_qpel_pixels_tab[0][ 8] = avg_cavs_qpel16_mc02_sse2; c->avg_cavs_qpel_pixels_tab[0][12] = avg_cavs_qpel16_mc03_sse2; + c->avg_cavs_qpel_pixels_tab[1][ 0] = ff_avg_pixels8x8_sse2; c->avg_cavs_qpel_pixels_tab[1][ 2] = ff_avg_cavs_qpel8_mc20_sse2; c->avg_cavs_qpel_pixels_tab[1][ 4] = avg_cavs_qpel8_mc01_sse2; c->avg_cavs_qpel_pixels_tab[1][ 8] = ff_avg_cavs_qpel8_mc02_sse2; diff --git a/libavcodec/x86/fpel.asm b/libavcodec/x86/fpel.asm index e4becca5fb..598a57ab0d 100644 --- a/libavcodec/x86/fpel.asm +++ b/libavcodec/x86/fpel.asm @@ -25,8 +25,40 @@ SECTION .text -; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels, -; ptrdiff_t line_size, int h) +INIT_XMM sse2 +; void ff_avg_pixels8x8_sse2(uint8_t *block, const uint8_t *pixels, +; ptrdiff_t line_size) +cglobal avg_pixels8x8, 3,5,6 + mov r3d, 8 + jmp avg_pixels8_after_prologue + +; void ff_avg_pixels8_sse2(uint8_t *block, const uint8_t *pixels, +; ptrdiff_t line_size, int h) +cglobal avg_pixels8, 4,5,6 +avg_pixels8_after_prologue: + lea r4, [r2*3] +.loop: + movq m0, [r1] + movq m1, [r0] + movhps m0, [r1+r2] + movhps m1, [r0+r2] + movq m2, [r1+r2*2] + movq m3, [r0+r2*2] + pavgb m0, m1 + movq m4, [r1+r4] + pavgb m2, m3 + movq m5, [r0+r4] + lea r1, [r1+r2*4] + pavgb m4, m5 + movq [r0], m0 + movhps [r0+r2], m0 + movq [r0+r2*2], m2 + movq [r0+r4], m4 + lea r0, [r0+r2*4] + sub r3d, 4 + jne .loop + RET + %macro OP_PIXELS 2-3 0 %if %2 == mmsize/2 %define LOAD movh @@ -35,11 +67,13 @@ SECTION .text %define LOAD movu %define SAVE mova %endif -cglobal %1_pixels%2x%2, 3,5+4*%3,%3 ? 4 : 0 +cglobal %1_pixels%2x%2, 3,5+4*%3,4 mov r3d, %2 jmp %1_pixels%2_after_prologue -cglobal %1_pixels%2, 4,5+4*%3,%3 ? 4 : 0 +; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels, +; ptrdiff_t line_size, int h) +cglobal %1_pixels%2, 4,5+4*%3,4 %1_pixels%2_after_prologue: lea r4, [r2*3] .loop: @@ -76,10 +110,6 @@ cglobal %1_pixels%2, 4,5+4*%3,%3 ? 4 : 0 RET %endmacro -INIT_MMX mmxext -OP_PIXELS avg, 8 - -INIT_XMM sse2 OP_PIXELS put, 8, UNIX64 OP_PIXELS put, 16 OP_PIXELS avg, 16 diff --git a/libavcodec/x86/fpel.h b/libavcodec/x86/fpel.h index 0b0056021e..6ec28af635 100644 --- a/libavcodec/x86/fpel.h +++ b/libavcodec/x86/fpel.h @@ -22,10 +22,10 @@ #include <stddef.h> #include <stdint.h> -void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_avg_pixels8x8_mmxext(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size); +void ff_avg_pixels8_sse2(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_avg_pixels8x8_sse2(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size); void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_avg_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels, diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 5d618651a4..0cc653c6ca 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -391,7 +391,6 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) if (EXTERNAL_MMXEXT(cpu_flags)) { if (!high_bit_depth) { SET_QPEL_FUNCS_1PP(put_h264_qpel, 2, 4, mmxext, ); - c->avg_h264_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext; SET_QPEL_FUNCS_1PP(avg_h264_qpel, 2, 4, mmxext, ); c->avg_h264_qpel_pixels_tab[2][0] = ff_avg_pixels4_mmxext; } else if (bit_depth == 10) { @@ -416,6 +415,7 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) H264_QPEL_FUNCS(3, 3, sse2); c->put_h264_qpel_pixels_tab[0][0] = ff_put_pixels16x16_sse2; c->avg_h264_qpel_pixels_tab[0][0] = ff_avg_pixels16x16_sse2; + c->avg_h264_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_sse2; } if (bit_depth == 10) { diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c index f689879d51..4e4abd5273 100644 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@ -80,7 +80,6 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags) c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext; c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext; - c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext; c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext; c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext; @@ -114,6 +113,7 @@ static void hpeldsp_init_sse2(HpelDSPContext *c, int flags) c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_sse2; c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_sse2; c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_sse2; + c->avg_pixels_tab[1][0] = ff_avg_pixels8_sse2; c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_sse2; c->avg_no_rnd_pixels_tab[1] = ff_avg_no_rnd_pixels16_x2_sse2; diff --git a/libavcodec/x86/qpeldsp_init.c b/libavcodec/x86/qpeldsp_init.c index 18c259b0d8..771961c1b5 100644 --- a/libavcodec/x86/qpeldsp_init.c +++ b/libavcodec/x86/qpeldsp_init.c @@ -281,11 +281,6 @@ av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c) { int cpu_flags = av_get_cpu_flags(); - if (X86_MMXEXT(cpu_flags)) { -#if HAVE_MMXEXT_EXTERNAL - c->avg_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext; -#endif /* HAVE_MMXEXT_EXTERNAL */ - } #if HAVE_SSE2_EXTERNAL if (EXTERNAL_SSE2(cpu_flags)) { c->put_no_rnd_qpel_pixels_tab[0][0] = @@ -293,6 +288,7 @@ av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c) c->put_no_rnd_qpel_pixels_tab[1][0] = c->put_qpel_pixels_tab[1][0] = ff_put_pixels8x8_sse2; c->avg_qpel_pixels_tab[0][0] = ff_avg_pixels16x16_sse2; + c->avg_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_sse2; SET_V_QPEL_FUNCS (16, sse2,); SET_V_QPEL_FUNCS (8, sse2,); diff --git a/libavcodec/x86/vc1dsp_init.c b/libavcodec/x86/vc1dsp_init.c index 9f80048791..3f0eb5746c 100644 --- a/libavcodec/x86/vc1dsp_init.c +++ b/libavcodec/x86/vc1dsp_init.c @@ -72,7 +72,7 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, ptrdiff_t stride, int pq) } DECLARE_FUNCTION(put_, 8, _sse2) -DECLARE_FUNCTION(avg_, 8, _mmxext) +DECLARE_FUNCTION(avg_, 8, _sse2) DECLARE_FUNCTION(put_, 16, _sse2) DECLARE_FUNCTION(avg_, 16, _sse2) @@ -114,8 +114,6 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT if (EXTERNAL_MMXEXT(cpu_flags)) { - dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_vc1_mspel_mc00_8_mmxext; - dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_mmxext; dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_mmxext; dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_mmxext; @@ -127,6 +125,7 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) dsp->put_vc1_mspel_pixels_tab[0][0] = put_vc1_mspel_mc00_16_sse2; dsp->put_vc1_mspel_pixels_tab[1][0] = put_vc1_mspel_mc00_8_sse2; dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_vc1_mspel_mc00_16_sse2; + dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_vc1_mspel_mc00_8_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { ASSIGN_LF4(ssse3); diff --git a/tests/checkasm/cavsdsp.c b/tests/checkasm/cavsdsp.c index 3e4a9ac127..ab6b695ae4 100644 --- a/tests/checkasm/cavsdsp.c +++ b/tests/checkasm/cavsdsp.c @@ -71,7 +71,7 @@ static void check_cavs_qpeldsp(void) TEST(put_cavs_qpel_pixels_tab), TEST(avg_cavs_qpel_pixels_tab), }; - declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride); + declare_func(void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride); ff_cavsdsp_init(&cavsdsp); _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
