This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 9cb5280c0e4c0f1e832276d160055d3f9a71b17e
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Sat Feb 28 19:21:51 2026 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Fri Mar 6 20:02:41 2026 +0100

    avcodec/x86/vvc/alf: Improve storing 8bpp
    
    When width is known to be 8 (i.e. for luma that is not width 16),
    the upper lane is unused, so use an xmm-sized packuswb and avoid
    the vpermq altogether. For chroma not known to be 16 (i.e. 4,8 or
    12) defer extracting from the high lane until it is known to be needed.
    Also do so via vextracti128 instead of vpermq (also do this for
    bpp>8).
    Also use vextracti128 and an xmm-sized packuswb in case of width 16
    instead of an ymm-sized packuswb followed by vextracti128.
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vvc/alf.asm | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/libavcodec/x86/vvc/alf.asm b/libavcodec/x86/vvc/alf.asm
index ed83134cd4..8798d7b3c9 100644
--- a/libavcodec/x86/vvc/alf.asm
+++ b/libavcodec/x86/vvc/alf.asm
@@ -354,11 +354,7 @@ SECTION .text
     jl .w4
     STORE_PIXELS_W8 %1, %2
     je .end
-    %if ps == 2
-        vpermq      m%2,  m%2, q0302
-    %else
-        vpermq      m%2,  m%2, q0101
-    %endif
+    vextracti128    xm%2, m%2, 1
     STORE_PIXELS_W4 %1, %2, 8
     jmp .end
 .w4:
@@ -366,19 +362,24 @@ SECTION .text
 .end:
 %endmacro
 
-; STORE_PIXELS(dst, src, width)
-%macro STORE_PIXELS 3
-    %if ps == 1
-        packuswb    m%2, m%2
-        vpermq      m%2, m%2, 0x8
-    %endif
-
+; STORE_PIXELS(dst, src, width, tmp reg)
+%macro STORE_PIXELS 4
     %ifidn %3, 16
+        %if ps == 1
+            vextracti128 xm%4, m%2, 1
+            packuswb     xm%2, xm%4
+        %endif
         STORE_PIXELS_W16  %1, %2
     %else
         %if LUMA
+            %if ps == 1
+                packuswb     xm%2, xm%2
+            %endif
             STORE_PIXELS_W8   %1, %2
         %else
+            %if ps == 1
+                packuswb      m%2, m%2
+            %endif
             STORE_PIXELS_W8LE %1, %2, %3
         %endif
     %endif
@@ -413,7 +414,7 @@ SECTION .text
     CLIPW             m0, m14, m15
 %endif
 
-    STORE_PIXELS    dstq, 0, %1
+    STORE_PIXELS    dstq, 0, %1, 2
 
     lea             srcq, [srcq + src_strideq]
     lea             dstq, [dstq + dst_strideq]

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to