This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 7971953d299e71ca3ab5d95f452dddfb2f061ad7 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Sun May 10 22:01:15 2026 +0200 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Fri May 15 20:29:29 2026 +0200 avfilter/x86/vf_pp7: Port ff_pp7_dctB_mmx to SSE2 Unfortunately a bit slower than the MMX version due to the impossibility to use memory operands in paddw. The situation would reverse if ff_dctB_mmx() would have to issue emms. dctB_c: 3.7 ( 1.00x) dctB_mmx: 3.3 ( 1.13x) dctB_sse2: 3.6 ( 1.03x) Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavfilter/vf_pp7.c | 2 -- libavfilter/x86/vf_pp7.asm | 55 ++++++++++++++++++++----------------------- libavfilter/x86/vf_pp7_init.c | 6 ++--- tests/checkasm/vf_pp7.c | 2 +- 4 files changed, 30 insertions(+), 35 deletions(-) diff --git a/libavfilter/vf_pp7.c b/libavfilter/vf_pp7.c index d8a5501b47..10f56c804f 100644 --- a/libavfilter/vf_pp7.c +++ b/libavfilter/vf_pp7.c @@ -27,7 +27,6 @@ * project, and ported by Arwa Arif for FFmpeg. */ -#include "libavutil/emms.h" #include "libavutil/imgutils.h" #include "libavutil/mem.h" #include "libavutil/mem_internal.h" @@ -351,7 +350,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) cw, ch, qp_table, qp_stride, 0); filter(pp7, out->data[2], in->data[2], out->linesize[2], in->linesize[2], cw, ch, qp_table, qp_stride, 0); - emms_c(); } } diff --git a/libavfilter/x86/vf_pp7.asm b/libavfilter/x86/vf_pp7.asm index 9dfabdcc8d..1a0921ed50 100644 --- a/libavfilter/x86/vf_pp7.asm +++ b/libavfilter/x86/vf_pp7.asm @@ -24,34 +24,31 @@ SECTION .text -INIT_MMX mmx +INIT_XMM sse2 +;void ff_pp7_dctB_sse2(int16_t *dst, const int16_t *src) +cglobal pp7_dctB, 2, 2, 6, dst, src + movq m0, [srcq+8*0] + movq m5, [srcq+8*6] + movq m3, [srcq+8*3] + movq m1, [srcq+8*1] + movq m4, [srcq+8*5] + movq m2, [srcq+8*2] + paddw m0, m5 + movq m5, [srcq+8*4] + paddw m3, m3 + paddw m1, m4 + paddw m2, m5 -;void ff_pp7_dctB_mmx(int16_t *dst, const int16_t *src) -cglobal pp7_dctB, 2, 2, 0, dst, src - movq m0, [srcq] - movq m1, [srcq+mmsize*1] - paddw m0, [srcq+mmsize*6] - paddw m1, [srcq+mmsize*5] - movq m2, [srcq+mmsize*2] - movq m3, [srcq+mmsize*3] - paddw m2, [srcq+mmsize*4] - paddw m3, m3 - movq m4, m3 - psubw m3, m0 - paddw m4, m0 - movq m0, m2 - psubw m2, m1 - paddw m0, m1 - movq m1, m4 - psubw m4, m0 - paddw m1, m0 - movq m0, m3 - psubw m3, m2 - psubw m3, m2 - paddw m2, m0 - paddw m2, m0 - movq [dstq], m1 - movq [dstq+mmsize*2], m4 - movq [dstq+mmsize*1], m2 - movq [dstq+mmsize*3], m3 + SUMSUB_BA w, 0, 3, 4 + SUMSUB_BA w, 1, 2, 5 + + SUMSUB_BA w, 1, 0, 4 + movq [dstq], m1 + paddw m4, m2, m3 + paddw m2, m2 + movq [dstq+8*2], m0 + paddw m4, m3 + psubw m3, m2 + movq [dstq+8*1], m4 + movq [dstq+8*3], m3 RET diff --git a/libavfilter/x86/vf_pp7_init.c b/libavfilter/x86/vf_pp7_init.c index f294ca7764..725326382b 100644 --- a/libavfilter/x86/vf_pp7_init.c +++ b/libavfilter/x86/vf_pp7_init.c @@ -23,12 +23,12 @@ #include "libavutil/x86/cpu.h" #include "libavfilter/vf_pp7dsp.h" -void ff_pp7_dctB_mmx(int16_t *restrict dst, const int16_t *restrict src); +void ff_pp7_dctB_sse2(int16_t *restrict dst, const int16_t *restrict src); av_cold void ff_pp7dsp_init_x86(PP7DSPContext *p) { int cpu_flags = av_get_cpu_flags(); - if (EXTERNAL_MMX(cpu_flags)) - p->dctB = ff_pp7_dctB_mmx; + if (EXTERNAL_SSE2(cpu_flags)) + p->dctB = ff_pp7_dctB_sse2; } diff --git a/tests/checkasm/vf_pp7.c b/tests/checkasm/vf_pp7.c index 07664f7472..e506eeb16c 100644 --- a/tests/checkasm/vf_pp7.c +++ b/tests/checkasm/vf_pp7.c @@ -35,7 +35,7 @@ static void check_dctB(const PP7DSPContext *const pp7dsp) { - declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *dst, const int16_t *src); + declare_func(void, int16_t *dst, const int16_t *src); if (!check_func(pp7dsp->dctB, "dctB")) return; _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
