This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit abe6ba17faa10bda8b0672467187c9358f59e54c
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Wed Dec 10 17:22:50 2025 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Sun Dec 14 10:16:43 2025 +0100

    avcodec/x86/lossless_videoencdsp: Port sub_median_pred to NASM
    
    Reviewed-by: Lynne <[email protected]>
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/lossless_videoencdsp.asm    | 39 +++++++++++++++++++
 libavcodec/x86/lossless_videoencdsp_init.c | 60 +++---------------------------
 2 files changed, 44 insertions(+), 55 deletions(-)

diff --git a/libavcodec/x86/lossless_videoencdsp.asm 
b/libavcodec/x86/lossless_videoencdsp.asm
index 8ccaea9139..47a10ae135 100644
--- a/libavcodec/x86/lossless_videoencdsp.asm
+++ b/libavcodec/x86/lossless_videoencdsp.asm
@@ -143,6 +143,45 @@ DIFF_BYTES_PROLOGUE
 %undef i
 %endif
 
+;--------------------------------------------------------------------------------------------------
+;void sub_median_pred(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
+;                     intptr_t w, int *left, int *left_top)
+;--------------------------------------------------------------------------------------------------
+
+INIT_XMM sse2
+cglobal sub_median_pred, 6, 7, 6, dst, src1, src2, w, l, lt
+    movu          m0, [src1q]       ; LT
+    movu          m4, [src2q]       ; L
+    movd          m1, [ltq]         ; LT
+    movd          m3, [lq]          ; L
+    xor          r6d, r6d
+    pslldq        m0, 1
+    pslldq        m4, 1
+    por           m0, m1            ; LT
+    por           m4, m3            ; L
+    jmp .first_iteration
+.loop:
+    movu          m4, [src2q+r6q-1] ; L
+    movu          m0, [src1q+r6q-1] ; LT
+.first_iteration:
+    movu          m1, [src1q+r6q]   ; T
+    movu          m3, [src2q+r6q]   ; X
+    psubb         m2, m4, m0        ; L - LT
+    paddb         m2, m1            ; L + T - LT
+    pmaxub        m5, m4, m1        ; max(T, L)
+    pminub        m1, m4            ; min(T, L)
+    pminub        m5, m2
+    pmaxub        m5, m1
+    psubb         m3, m5            ; dst - pred
+    movu  [dstq+r6q], m3
+    add          r6d, 16
+    cmp          r6d, wd
+    jb         .loop
+    movzx      src1d, BYTE [src1q+wq-1]
+    movzx      src2d, BYTE [src2q+wq-1]
+    mov        [ltq], src1d
+    mov         [lq], src2d
+    RET
 
 
;--------------------------------------------------------------------------------------------------
 ;void sub_left_predict(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, 
ptrdiff_t width, int height)
diff --git a/libavcodec/x86/lossless_videoencdsp_init.c 
b/libavcodec/x86/lossless_videoencdsp_init.c
index d5dd576e5f..b47e23fe42 100644
--- a/libavcodec/x86/lossless_videoencdsp_init.c
+++ b/libavcodec/x86/lossless_videoencdsp_init.c
@@ -24,7 +24,6 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
-#include "libavutil/x86/asm.h"
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/lossless_videoencdsp.h"
 
@@ -33,68 +32,19 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, 
const uint8_t *src2,
 void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
                         intptr_t w);
 
+void ff_sub_median_pred_sse2(uint8_t *dst, const uint8_t *src1,
+                             const uint8_t *src2, intptr_t w,
+                             int *left, int *left_top);
+
 void ff_sub_left_predict_avx(uint8_t *dst, const uint8_t *src,
                             ptrdiff_t stride, ptrdiff_t width, int height);
 
-#if HAVE_SSE2_INLINE && HAVE_7REGS
-
-static void sub_median_pred_sse2(uint8_t *dst, const uint8_t *src1,
-                                 const uint8_t *src2, intptr_t w,
-                                 int *left, int *left_top)
-{
-    x86_reg i = 0;
-
-    __asm__ volatile (
-        "movdqu  (%1, %0), %%xmm0    \n\t" // LT
-        "movdqu  (%2, %0), %%xmm2    \n\t" // L
-        "movd        (%6), %%xmm1    \n\t" // LT
-        "movd        (%5), %%xmm3    \n\t" // L
-        "pslldq        $1, %%xmm0    \n\t"
-        "pslldq        $1, %%xmm2    \n\t"
-        "por       %%xmm1, %%xmm0    \n\t" // LT
-        "por       %%xmm3, %%xmm2    \n\t" // L
-        "jmp 2f                      \n\t"
-        "1:                          \n\t"
-        "movdqu -1(%2, %0), %%xmm2   \n\t" // L
-        "movdqu -1(%1, %0), %%xmm0   \n\t" // LT
-        "2:                          \n\t"
-        "movdqu  (%1, %0), %%xmm1    \n\t" // T
-        "movdqu  (%2, %0), %%xmm3    \n\t" // X
-        "movdqa    %%xmm2, %%xmm4    \n\t" // L
-        "psubb     %%xmm0, %%xmm2    \n\t"
-        "paddb     %%xmm1, %%xmm2    \n\t" // L + T - LT
-        "movdqa    %%xmm4, %%xmm5    \n\t" // L
-        "pmaxub    %%xmm1, %%xmm4    \n\t" // max(T, L)
-        "pminub    %%xmm5, %%xmm1    \n\t" // min(T, L)
-        "pminub    %%xmm2, %%xmm4    \n\t"
-        "pmaxub    %%xmm1, %%xmm4    \n\t"
-        "psubb     %%xmm4, %%xmm3    \n\t" // dst - pred
-        "movdqu    %%xmm3, (%3, %0)  \n\t"
-        "add          $16, %0        \n\t"
-        "cmp           %4, %0        \n\t"
-        " jb 1b                      \n\t"
-        : "+r" (i)
-        : "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w), "r" (left), 
"r" (left_top)
-        : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5",) 
"memory"
-    );
-
-    *left_top = src1[w - 1];
-    *left     = src2[w - 1];
-}
-
-#endif /* HAVE_INLINE_ASM */
-
 av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c)
 {
     av_unused int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_SSE2_INLINE && HAVE_7REGS
-    if (INLINE_SSE2(cpu_flags)) {
-        c->sub_median_pred = sub_median_pred_sse2;
-    }
-#endif /* HAVE_SSE2_INLINE */
-
     if (EXTERNAL_SSE2(cpu_flags)) {
+        c->sub_median_pred = ff_sub_median_pred_sse2;
         c->diff_bytes = ff_diff_bytes_sse2;
     }
 

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to