This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit abe6ba17faa10bda8b0672467187c9358f59e54c Author: Andreas Rheinhardt <[email protected]> AuthorDate: Wed Dec 10 17:22:50 2025 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Sun Dec 14 10:16:43 2025 +0100 avcodec/x86/lossless_videoencdsp: Port sub_median_pred to NASM Reviewed-by: Lynne <[email protected]> Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/lossless_videoencdsp.asm | 39 +++++++++++++++++++ libavcodec/x86/lossless_videoencdsp_init.c | 60 +++--------------------------- 2 files changed, 44 insertions(+), 55 deletions(-) diff --git a/libavcodec/x86/lossless_videoencdsp.asm b/libavcodec/x86/lossless_videoencdsp.asm index 8ccaea9139..47a10ae135 100644 --- a/libavcodec/x86/lossless_videoencdsp.asm +++ b/libavcodec/x86/lossless_videoencdsp.asm @@ -143,6 +143,45 @@ DIFF_BYTES_PROLOGUE %undef i %endif +;-------------------------------------------------------------------------------------------------- +;void sub_median_pred(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, +; intptr_t w, int *left, int *left_top) +;-------------------------------------------------------------------------------------------------- + +INIT_XMM sse2 +cglobal sub_median_pred, 6, 7, 6, dst, src1, src2, w, l, lt + movu m0, [src1q] ; LT + movu m4, [src2q] ; L + movd m1, [ltq] ; LT + movd m3, [lq] ; L + xor r6d, r6d + pslldq m0, 1 + pslldq m4, 1 + por m0, m1 ; LT + por m4, m3 ; L + jmp .first_iteration +.loop: + movu m4, [src2q+r6q-1] ; L + movu m0, [src1q+r6q-1] ; LT +.first_iteration: + movu m1, [src1q+r6q] ; T + movu m3, [src2q+r6q] ; X + psubb m2, m4, m0 ; L - LT + paddb m2, m1 ; L + T - LT + pmaxub m5, m4, m1 ; max(T, L) + pminub m1, m4 ; min(T, L) + pminub m5, m2 + pmaxub m5, m1 + psubb m3, m5 ; dst - pred + movu [dstq+r6q], m3 + add r6d, 16 + cmp r6d, wd + jb .loop + movzx src1d, BYTE [src1q+wq-1] + movzx src2d, BYTE [src2q+wq-1] + mov [ltq], src1d + mov [lq], src2d + RET ;-------------------------------------------------------------------------------------------------- ;void sub_left_predict(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, ptrdiff_t width, int height) diff --git a/libavcodec/x86/lossless_videoencdsp_init.c b/libavcodec/x86/lossless_videoencdsp_init.c index d5dd576e5f..b47e23fe42 100644 --- a/libavcodec/x86/lossless_videoencdsp_init.c +++ b/libavcodec/x86/lossless_videoencdsp_init.c @@ -24,7 +24,6 @@ #include "libavutil/attributes.h" #include "libavutil/cpu.h" -#include "libavutil/x86/asm.h" #include "libavutil/x86/cpu.h" #include "libavcodec/lossless_videoencdsp.h" @@ -33,68 +32,19 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w); +void ff_sub_median_pred_sse2(uint8_t *dst, const uint8_t *src1, + const uint8_t *src2, intptr_t w, + int *left, int *left_top); + void ff_sub_left_predict_avx(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, ptrdiff_t width, int height); -#if HAVE_SSE2_INLINE && HAVE_7REGS - -static void sub_median_pred_sse2(uint8_t *dst, const uint8_t *src1, - const uint8_t *src2, intptr_t w, - int *left, int *left_top) -{ - x86_reg i = 0; - - __asm__ volatile ( - "movdqu (%1, %0), %%xmm0 \n\t" // LT - "movdqu (%2, %0), %%xmm2 \n\t" // L - "movd (%6), %%xmm1 \n\t" // LT - "movd (%5), %%xmm3 \n\t" // L - "pslldq $1, %%xmm0 \n\t" - "pslldq $1, %%xmm2 \n\t" - "por %%xmm1, %%xmm0 \n\t" // LT - "por %%xmm3, %%xmm2 \n\t" // L - "jmp 2f \n\t" - "1: \n\t" - "movdqu -1(%2, %0), %%xmm2 \n\t" // L - "movdqu -1(%1, %0), %%xmm0 \n\t" // LT - "2: \n\t" - "movdqu (%1, %0), %%xmm1 \n\t" // T - "movdqu (%2, %0), %%xmm3 \n\t" // X - "movdqa %%xmm2, %%xmm4 \n\t" // L - "psubb %%xmm0, %%xmm2 \n\t" - "paddb %%xmm1, %%xmm2 \n\t" // L + T - LT - "movdqa %%xmm4, %%xmm5 \n\t" // L - "pmaxub %%xmm1, %%xmm4 \n\t" // max(T, L) - "pminub %%xmm5, %%xmm1 \n\t" // min(T, L) - "pminub %%xmm2, %%xmm4 \n\t" - "pmaxub %%xmm1, %%xmm4 \n\t" - "psubb %%xmm4, %%xmm3 \n\t" // dst - pred - "movdqu %%xmm3, (%3, %0) \n\t" - "add $16, %0 \n\t" - "cmp %4, %0 \n\t" - " jb 1b \n\t" - : "+r" (i) - : "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w), "r" (left), "r" (left_top) - : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5",) "memory" - ); - - *left_top = src1[w - 1]; - *left = src2[w - 1]; -} - -#endif /* HAVE_INLINE_ASM */ - av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c) { av_unused int cpu_flags = av_get_cpu_flags(); -#if HAVE_SSE2_INLINE && HAVE_7REGS - if (INLINE_SSE2(cpu_flags)) { - c->sub_median_pred = sub_median_pred_sse2; - } -#endif /* HAVE_SSE2_INLINE */ - if (EXTERNAL_SSE2(cpu_flags)) { + c->sub_median_pred = ff_sub_median_pred_sse2; c->diff_bytes = ff_diff_bytes_sse2; } _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
