This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 848c3ca772b5a7e18fedd32b80fa647785410426
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Mon Jan 26 03:00:39 2026 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Thu Jan 29 01:33:37 2026 +0100

    avcodec/x86/cfhddsp: Avoid pmaddwd
    
    The result of using pmaddwd with the coefficients 1,-1,...,1,-1
    is just the negative of using pmaddwd with the coefficients
    -1,1,...,-1,1, so avoid one pmaddwd.
    
    Reviewed-by: James Almer <[email protected]>
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/cfhddsp.asm | 43 ++++++++++++++-----------------------------
 1 file changed, 14 insertions(+), 29 deletions(-)

diff --git a/libavcodec/x86/cfhddsp.asm b/libavcodec/x86/cfhddsp.asm
index 87c2df634a..821d511ba2 100644
--- a/libavcodec/x86/cfhddsp.asm
+++ b/libavcodec/x86/cfhddsp.asm
@@ -24,7 +24,6 @@
 SECTION_RODATA
 
 factor_p1_n1: dw 1, -1, 1, -1, 1, -1, 1, -1,
-factor_n1_p1: dw -1, 1, -1, 1, -1, 1, -1, 1,
 factor_p11_n4: dw 11, -4, 11, -4, 11, -4, 11, -4,
 factor_p5_p4: dw 5, 4, 5, 4, 5, 4, 5, 4,
 pd_4: times 4 dd 4
@@ -80,7 +79,6 @@ cglobal cfhd_horiz_filter, 7, 7, 8, output, x, low, y, high, 
temp, width, height
 
 %if ARCH_X86_64
     mova       m8, [factor_p1_n1]
-    mova       m9, [factor_n1_p1]
     mova      m10, [pw_1]
     mova      m11, [pd_4]
 %endif
@@ -144,29 +142,23 @@ cglobal cfhd_horiz_filter, 7, 7, 8, output, x, low, y, 
high, temp, width, height
     punpcklwd      m4, m1
     punpckhwd      m5, m1
 
-    mova           m6, m4
-    mova           m7, m5
-
 %if ARCH_X86_64
     pmaddwd        m4, m8
     pmaddwd        m5, m8
-    pmaddwd        m6, m9
-    pmaddwd        m7, m9
 
+    psubd          m6, m11, m4
+    psubd          m7, m11, m5
     paddd          m4, m11
     paddd          m5, m11
-    paddd          m6, m11
-    paddd          m7, m11
 %else
+    mova           m2, [pd_4]
     pmaddwd        m4, [factor_p1_n1]
     pmaddwd        m5, [factor_p1_n1]
-    pmaddwd        m6, [factor_n1_p1]
-    pmaddwd        m7, [factor_n1_p1]
 
-    paddd          m4, [pd_4]
-    paddd          m5, [pd_4]
-    paddd          m6, [pd_4]
-    paddd          m7, [pd_4]
+    psubd          m6, m2, m4
+    psubd          m7, m2, m5
+    paddd          m4, m2
+    paddd          m5, m2
 %endif
 
     psrad          m4, 3
@@ -313,7 +305,6 @@ cglobal cfhd_vert_filter, 8, 11, 14, output, ostride, low, 
lwidth, high, hwidth,
     dec   heightd
 
     mova       m8, [factor_p1_n1]
-    mova       m9, [factor_n1_p1]
     mova      m10, [pw_1]
     mova      m11, [pd_4]
     mova      m12, [factor_p11_n4]
@@ -471,29 +462,23 @@ cglobal cfhd_vert_filter, 7, 7, 8, output, x, low, y, 
high, pos, width, height
     punpcklwd  m4, m1
     punpckhwd  m5, m1
 
-    mova       m6, m4
-    mova       m7, m5
-
 %if ARCH_X86_64
     pmaddwd    m4, m8
     pmaddwd    m5, m8
-    pmaddwd    m6, m9
-    pmaddwd    m7, m9
 
+    psubd      m6, m11, m4
+    psubd      m7, m11, m5
     paddd      m4, m11
     paddd      m5, m11
-    paddd      m6, m11
-    paddd      m7, m11
 %else
+    mova       m2, [pd_4]
     pmaddwd    m4, [factor_p1_n1]
     pmaddwd    m5, [factor_p1_n1]
-    pmaddwd    m6, [factor_n1_p1]
-    pmaddwd    m7, [factor_n1_p1]
 
-    paddd      m4, [pd_4]
-    paddd      m5, [pd_4]
-    paddd      m6, [pd_4]
-    paddd      m7, [pd_4]
+    psubd      m6, m2, m4
+    psubd      m7, m2, m5
+    paddd      m4, m2
+    paddd      m5, m2
 %endif
 
     psrad      m4, 3

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to