This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit 3a3e7080f1f0b9731c66495dd1dba9225d883843
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Wed Dec 10 15:38:39 2025 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Sun Dec 14 10:16:35 2025 +0100

    avcodec/x86/lossless_videoencdsp_init: Port sub_median_pred to SSE2
    
    Old benchmarks:
    sub_median_pred_c:                                     405.7 ( 1.00x)
    sub_median_pred_mmxext:                                 35.1 (11.57x)
    
    New benchmarks:
    sub_median_pred_c:                                     404.1 ( 1.00x)
    sub_median_pred_sse2:                                   20.5 (19.67x)
    
    Reviewed-by: Lynne <[email protected]>
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/lossless_videoencdsp_init.c | 68 +++++++++++++++---------------
 tests/checkasm/llviddspenc.c               |  6 +--
 2 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/libavcodec/x86/lossless_videoencdsp_init.c 
b/libavcodec/x86/lossless_videoencdsp_init.c
index d7dfa2e3ae..b3ea2da388 100644
--- a/libavcodec/x86/lossless_videoencdsp_init.c
+++ b/libavcodec/x86/lossless_videoencdsp_init.c
@@ -37,42 +37,44 @@ void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, 
const uint8_t *src2,
 void ff_sub_left_predict_avx(uint8_t *dst, const uint8_t *src,
                             ptrdiff_t stride, ptrdiff_t width, int height);
 
-#if HAVE_INLINE_ASM
+#if HAVE_SSE2_INLINE
 
-static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
-                                   const uint8_t *src2, intptr_t w,
-                                   int *left, int *left_top)
+static void sub_median_pred_sse2(uint8_t *dst, const uint8_t *src1,
+                                 const uint8_t *src2, intptr_t w,
+                                 int *left, int *left_top)
 {
     x86_reg i = 0;
     uint8_t l, lt;
 
     __asm__ volatile (
-        "movq  (%1, %0), %%mm0          \n\t" // LT
-        "psllq $8, %%mm0                \n\t"
-        "movq  (%2, %0), %%mm2          \n\t" // L
-        "psllq $8, %%mm2                \n\t"
-        "jmp 2f                         \n\t"
-        "1:                             \n\t"
-        "movq  -1(%2, %0), %%mm2        \n\t" // L
-        "movq -1(%1, %0), %%mm0         \n\t" // LT
-        "2:                             \n\t"
-        "movq  (%1, %0), %%mm1          \n\t" // T
-        "movq  (%2, %0), %%mm3          \n\t" // X
-        "movq %%mm2, %%mm4              \n\t" // L
-        "psubb %%mm0, %%mm2             \n\t"
-        "paddb %%mm1, %%mm2             \n\t" // L + T - LT
-        "movq %%mm4, %%mm5              \n\t" // L
-        "pmaxub %%mm1, %%mm4            \n\t" // max(T, L)
-        "pminub %%mm5, %%mm1            \n\t" // min(T, L)
-        "pminub %%mm2, %%mm4            \n\t"
-        "pmaxub %%mm1, %%mm4            \n\t"
-        "psubb %%mm4, %%mm3             \n\t" // dst - pred
-        "movq %%mm3, (%3, %0)           \n\t"
-        "add $8, %0                     \n\t"
-        "cmp %4, %0                     \n\t"
-        " jb 1b                         \n\t"
+        "movdqu  (%1, %0), %%xmm0    \n\t" // LT
+        "movdqu  (%2, %0), %%xmm2    \n\t" // L
+        "pslldq        $1, %%xmm0    \n\t"
+        "pslldq        $1, %%xmm2    \n\t"
+        "jmp 2f                      \n\t"
+        "1:                          \n\t"
+        "movdqu -1(%2, %0), %%xmm2   \n\t" // L
+        "movdqu -1(%1, %0), %%xmm0   \n\t" // LT
+        "2:                          \n\t"
+        "movdqu  (%1, %0), %%xmm1    \n\t" // T
+        "movdqu  (%2, %0), %%xmm3    \n\t" // X
+        "movdqa    %%xmm2, %%xmm4    \n\t" // L
+        "psubb     %%xmm0, %%xmm2    \n\t"
+        "paddb     %%xmm1, %%xmm2    \n\t" // L + T - LT
+        "movdqa    %%xmm4, %%xmm5    \n\t" // L
+        "pmaxub    %%xmm1, %%xmm4    \n\t" // max(T, L)
+        "pminub    %%xmm5, %%xmm1    \n\t" // min(T, L)
+        "pminub    %%xmm2, %%xmm4    \n\t"
+        "pmaxub    %%xmm1, %%xmm4    \n\t"
+        "psubb     %%xmm4, %%xmm3    \n\t" // dst - pred
+        "movdqu    %%xmm3, (%3, %0)  \n\t"
+        "add          $16, %0        \n\t"
+        "cmp           %4, %0        \n\t"
+        " jb 1b                      \n\t"
         : "+r" (i)
-        : "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w));
+        : "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w)
+        : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5",) 
"memory"
+    );
 
     l  = *left;
     lt = *left_top;
@@ -89,11 +91,11 @@ av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c)
 {
     av_unused int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_INLINE_ASM
-    if (INLINE_MMXEXT(cpu_flags)) {
-        c->sub_median_pred = sub_median_pred_mmxext;
+#if HAVE_SSE2_INLINE
+    if (INLINE_SSE2(cpu_flags)) {
+        c->sub_median_pred = sub_median_pred_sse2;
     }
-#endif /* HAVE_INLINE_ASM */
+#endif /* HAVE_SSE2_INLINE */
 
     if (EXTERNAL_SSE2(cpu_flags)) {
         c->diff_bytes = ff_diff_bytes_sse2;
diff --git a/tests/checkasm/llviddspenc.c b/tests/checkasm/llviddspenc.c
index 8757b22618..f974e79165 100644
--- a/tests/checkasm/llviddspenc.c
+++ b/tests/checkasm/llviddspenc.c
@@ -82,9 +82,9 @@ static void check_sub_median_pred(LLVidEncDSPContext *c)
     uint8_t dst_ref[BUF_SIZE], dst_new[BUF_SIZE];
     uint8_t src1[BUF_SIZE], src2[BUF_SIZE];
 
-    declare_func_emms(AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, const uint8_t 
*src1,
-                      const uint8_t *src2, intptr_t w,
-                      int *left, int *left_top);
+    declare_func(void, uint8_t *dst, const uint8_t *src1,
+                 const uint8_t *src2, intptr_t w,
+                 int *left, int *left_top);
 
     if (check_func(c->sub_median_pred, "sub_median_pred")) {
         size_t width  = 1 + rnd() % MAX_STRIDE;

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to