This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 9ba33cc1980d624a45d47581efe00663f9eac4e3
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Wed Dec 10 15:56:34 2025 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Sun Dec 14 10:16:40 2025 +0100

    avcodec/x86/lossless_videoencdsp_init: Avoid special-casing first pixel
    
    Old benchmarks:
    sub_median_pred_c:                                     404.1 ( 1.00x)
    sub_median_pred_sse2:                                   20.5 (19.67x)
    
    New benchmarks:
    sub_median_pred_c:                                     408.5 ( 1.00x)
    sub_median_pred_sse2:                                   19.2 (21.27x)
    
    Reviewed-by: Lynne <[email protected]>
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/lossless_videoencdsp_init.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/libavcodec/x86/lossless_videoencdsp_init.c 
b/libavcodec/x86/lossless_videoencdsp_init.c
index b3ea2da388..d5dd576e5f 100644
--- a/libavcodec/x86/lossless_videoencdsp_init.c
+++ b/libavcodec/x86/lossless_videoencdsp_init.c
@@ -27,7 +27,6 @@
 #include "libavutil/x86/asm.h"
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/lossless_videoencdsp.h"
-#include "libavcodec/mathops.h"
 
 void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
                         intptr_t w);
@@ -37,20 +36,23 @@ void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, 
const uint8_t *src2,
 void ff_sub_left_predict_avx(uint8_t *dst, const uint8_t *src,
                             ptrdiff_t stride, ptrdiff_t width, int height);
 
-#if HAVE_SSE2_INLINE
+#if HAVE_SSE2_INLINE && HAVE_7REGS
 
 static void sub_median_pred_sse2(uint8_t *dst, const uint8_t *src1,
                                  const uint8_t *src2, intptr_t w,
                                  int *left, int *left_top)
 {
     x86_reg i = 0;
-    uint8_t l, lt;
 
     __asm__ volatile (
         "movdqu  (%1, %0), %%xmm0    \n\t" // LT
         "movdqu  (%2, %0), %%xmm2    \n\t" // L
+        "movd        (%6), %%xmm1    \n\t" // LT
+        "movd        (%5), %%xmm3    \n\t" // L
         "pslldq        $1, %%xmm0    \n\t"
         "pslldq        $1, %%xmm2    \n\t"
+        "por       %%xmm1, %%xmm0    \n\t" // LT
+        "por       %%xmm3, %%xmm2    \n\t" // L
         "jmp 2f                      \n\t"
         "1:                          \n\t"
         "movdqu -1(%2, %0), %%xmm2   \n\t" // L
@@ -72,15 +74,10 @@ static void sub_median_pred_sse2(uint8_t *dst, const 
uint8_t *src1,
         "cmp           %4, %0        \n\t"
         " jb 1b                      \n\t"
         : "+r" (i)
-        : "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w)
+        : "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w), "r" (left), 
"r" (left_top)
         : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5",) 
"memory"
     );
 
-    l  = *left;
-    lt = *left_top;
-
-    dst[0] = src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt) & 0xFF);
-
     *left_top = src1[w - 1];
     *left     = src2[w - 1];
 }
@@ -91,7 +88,7 @@ av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c)
 {
     av_unused int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_SSE2_INLINE
+#if HAVE_SSE2_INLINE && HAVE_7REGS
     if (INLINE_SSE2(cpu_flags)) {
         c->sub_median_pred = sub_median_pred_sse2;
     }

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to