This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 848c3ca772b5a7e18fedd32b80fa647785410426 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Mon Jan 26 03:00:39 2026 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Thu Jan 29 01:33:37 2026 +0100 avcodec/x86/cfhddsp: Avoid pmaddwd The result of using pmaddwd with the coefficients 1,-1,...,1,-1 is just the negative of using pmaddwd with the coefficients -1,1,...,-1,1, so avoid one pmaddwd. Reviewed-by: James Almer <[email protected]> Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/cfhddsp.asm | 43 ++++++++++++++----------------------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/libavcodec/x86/cfhddsp.asm b/libavcodec/x86/cfhddsp.asm index 87c2df634a..821d511ba2 100644 --- a/libavcodec/x86/cfhddsp.asm +++ b/libavcodec/x86/cfhddsp.asm @@ -24,7 +24,6 @@ SECTION_RODATA factor_p1_n1: dw 1, -1, 1, -1, 1, -1, 1, -1, -factor_n1_p1: dw -1, 1, -1, 1, -1, 1, -1, 1, factor_p11_n4: dw 11, -4, 11, -4, 11, -4, 11, -4, factor_p5_p4: dw 5, 4, 5, 4, 5, 4, 5, 4, pd_4: times 4 dd 4 @@ -80,7 +79,6 @@ cglobal cfhd_horiz_filter, 7, 7, 8, output, x, low, y, high, temp, width, height %if ARCH_X86_64 mova m8, [factor_p1_n1] - mova m9, [factor_n1_p1] mova m10, [pw_1] mova m11, [pd_4] %endif @@ -144,29 +142,23 @@ cglobal cfhd_horiz_filter, 7, 7, 8, output, x, low, y, high, temp, width, height punpcklwd m4, m1 punpckhwd m5, m1 - mova m6, m4 - mova m7, m5 - %if ARCH_X86_64 pmaddwd m4, m8 pmaddwd m5, m8 - pmaddwd m6, m9 - pmaddwd m7, m9 + psubd m6, m11, m4 + psubd m7, m11, m5 paddd m4, m11 paddd m5, m11 - paddd m6, m11 - paddd m7, m11 %else + mova m2, [pd_4] pmaddwd m4, [factor_p1_n1] pmaddwd m5, [factor_p1_n1] - pmaddwd m6, [factor_n1_p1] - pmaddwd m7, [factor_n1_p1] - paddd m4, [pd_4] - paddd m5, [pd_4] - paddd m6, [pd_4] - paddd m7, [pd_4] + psubd m6, m2, m4 + psubd m7, m2, m5 + paddd m4, m2 + paddd m5, m2 %endif psrad m4, 3 @@ -313,7 +305,6 @@ cglobal cfhd_vert_filter, 8, 11, 14, output, ostride, low, lwidth, high, hwidth, dec heightd mova m8, [factor_p1_n1] - mova m9, [factor_n1_p1] mova m10, [pw_1] mova m11, [pd_4] mova m12, [factor_p11_n4] @@ -471,29 +462,23 @@ cglobal cfhd_vert_filter, 7, 7, 8, output, x, low, y, high, pos, width, height punpcklwd m4, m1 punpckhwd m5, m1 - mova m6, m4 - mova m7, m5 - %if ARCH_X86_64 pmaddwd m4, m8 pmaddwd m5, m8 - pmaddwd m6, m9 - pmaddwd m7, m9 + psubd m6, m11, m4 + psubd m7, m11, m5 paddd m4, m11 paddd m5, m11 - paddd m6, m11 - paddd m7, m11 %else + mova m2, [pd_4] pmaddwd m4, [factor_p1_n1] pmaddwd m5, [factor_p1_n1] - pmaddwd m6, [factor_n1_p1] - pmaddwd m7, [factor_n1_p1] - paddd m4, [pd_4] - paddd m5, [pd_4] - paddd m6, [pd_4] - paddd m7, [pd_4] + psubd m6, m2, m4 + psubd m7, m2, m5 + paddd m4, m2 + paddd m5, m2 %endif psrad m4, 3 _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
