This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 91c7710412bdb9bcd6aac76840b86e8a5da19785
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Sun Jan 25 20:16:57 2026 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Thu Jan 29 01:33:23 2026 +0100

    avcodec/x86/cfhdencdsp: Avoid unnecessary constants
    
    Up until now, cfhdencdsp used constants consisting
    of -1, 1, ...,-1,1 words and 1, -1,...,1,-1 words
    for use as constants in pmaddwd. But one can use
    the same constants if one shuffles the words in
    a dword the opposite order. Similarly for some other
    constants. This also allowed to avoid a register in
    chfdenc_vert_filter.
    
    Reviewed-by: James Almer <[email protected]>
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/cfhdencdsp.asm | 38 ++++++++++++++------------------------
 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/libavcodec/x86/cfhdencdsp.asm b/libavcodec/x86/cfhdencdsp.asm
index 3d4aa90e96..73e12f283e 100644
--- a/libavcodec/x86/cfhdencdsp.asm
+++ b/libavcodec/x86/cfhdencdsp.asm
@@ -24,11 +24,8 @@
 SECTION_RODATA
 
 pw_p1_n1:  dw  1, -1, 1, -1, 1, -1, 1, -1
-pw_n1_p1:  dw  -1, 1, -1, 1, -1, 1, -1, 1
 pw_p5_n11: dw  5, -11, 5, -11, 5, -11, 5, -11
-pw_n5_p11: dw -5, 11, -5, 11, -5, 11, -5, 11
 pw_p11_n5: dw 11, -5, 11, -5, 11, -5, 11, -5
-pw_n11_p5: dw -11, 5, -11, 5, -11, 5, -11, 5
 pd_4:  times 4 dd  4
 pw_n4: times 8 dw -4
 cextern pw_1
@@ -44,7 +41,7 @@ cglobal cfhdenc_horiz_filter, 8, 10, 11, input, low, high, 
istride, lwidth, hwid
     shl   hwidthq, 1
     mova       m7, [pd_4]
     mova       m8, [pw_1]
-    pcmpeqw    m9, m9       ; -1
+    pcmpeqw        m9, m9       ; -1
     mova       m10,[pw_p1_n1]
     movsxdifnidn yq, yd
     movsxdifnidn widthq, widthd
@@ -196,7 +193,7 @@ cglobal cfhdenc_horiz_filter, 8, 10, 11, input, low, high, 
istride, lwidth, hwid
 
 %if ARCH_X86_64
 INIT_XMM sse2
-cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, 
hwidth, width, height, x, y, pos
+cglobal cfhdenc_vert_filter, 8, 11, 13, input, low, high, istride, lwidth, 
hwidth, width, height, x, y, pos
     shl  istrideq, 1
 
     shl    widthd, 1
@@ -208,9 +205,8 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, 
istride, lwidth, hwidt
     mova       m8, [pw_1]
     pcmpeqw    m9, m9      ; -1
     mova       m10,[pw_p1_n1]
-    mova       m11,[pw_n1_p1]
-    mova       m12,[pw_4]
-    mova       m13,[pw_n4]
+    mova      m11, [pw_4]
+    mova      m12, [pw_n4]
 .loopw:
     mov        yq, 2
 
@@ -237,9 +233,7 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, 
istride, lwidth, hwidt
     add      posq, istrideq
     movu       m5, [inputq + posq]
 
-    mova       m6, m0
-    punpcklwd  m0, m1
-    punpckhwd  m1, m6
+    SBUTTERFLY wd, 0, 1, 6
 
     mova       m6, m2
     punpcklwd  m2, m3
@@ -250,9 +244,9 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, 
istride, lwidth, hwidt
     punpckhwd  m5, m6
 
     pmaddwd    m0, [pw_p5_n11]
-    pmaddwd    m1, [pw_n11_p5]
-    pmaddwd    m2, m12
-    pmaddwd    m3, m12
+    pmaddwd    m1, [pw_p5_n11]
+    pmaddwd    m2, m11
+    pmaddwd    m3, m11
     pmaddwd    m4, m9
     pmaddwd    m5, m9
 
@@ -313,9 +307,7 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, 
istride, lwidth, hwidt
     punpcklwd  m0, m1
     punpckhwd  m1, m6
 
-    mova       m6, m2
-    punpcklwd  m2, m3
-    punpckhwd  m3, m6
+    SBUTTERFLY wd, 2, 3, 6
 
     mova       m6, m4
     punpcklwd  m4, m5
@@ -324,7 +316,7 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, 
istride, lwidth, hwidt
     pmaddwd    m0, m9
     pmaddwd    m1, m9
     pmaddwd    m2, m10
-    pmaddwd    m3, m11
+    pmaddwd    m3, m10
     pmaddwd    m4, m8
     pmaddwd    m5, m8
 
@@ -394,16 +386,14 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, 
istride, lwidth, hwidt
     punpcklwd  m2, m3
     punpckhwd  m3, m6
 
-    mova       m6, m4
-    punpcklwd  m4, m5
-    punpckhwd  m5, m6
+    SBUTTERFLY wd, 4, 5, 6
 
     pmaddwd    m0, m8
     pmaddwd    m1, m8
-    pmaddwd    m2, m13
-    pmaddwd    m3, m13
+    pmaddwd    m2, m12
+    pmaddwd    m3, m12
     pmaddwd    m4, [pw_p11_n5]
-    pmaddwd    m5, [pw_n5_p11]
+    pmaddwd    m5, [pw_p11_n5]
 
     paddd      m4, m2
     paddd      m5, m3

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to