This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 9ba33cc1980d624a45d47581efe00663f9eac4e3 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Wed Dec 10 15:56:34 2025 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Sun Dec 14 10:16:40 2025 +0100 avcodec/x86/lossless_videoencdsp_init: Avoid special-casing first pixel Old benchmarks: sub_median_pred_c: 404.1 ( 1.00x) sub_median_pred_sse2: 20.5 (19.67x) New benchmarks: sub_median_pred_c: 408.5 ( 1.00x) sub_median_pred_sse2: 19.2 (21.27x) Reviewed-by: Lynne <[email protected]> Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/lossless_videoencdsp_init.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/libavcodec/x86/lossless_videoencdsp_init.c b/libavcodec/x86/lossless_videoencdsp_init.c index b3ea2da388..d5dd576e5f 100644 --- a/libavcodec/x86/lossless_videoencdsp_init.c +++ b/libavcodec/x86/lossless_videoencdsp_init.c @@ -27,7 +27,6 @@ #include "libavutil/x86/asm.h" #include "libavutil/x86/cpu.h" #include "libavcodec/lossless_videoencdsp.h" -#include "libavcodec/mathops.h" void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w); @@ -37,20 +36,23 @@ void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, void ff_sub_left_predict_avx(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, ptrdiff_t width, int height); -#if HAVE_SSE2_INLINE +#if HAVE_SSE2_INLINE && HAVE_7REGS static void sub_median_pred_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w, int *left, int *left_top) { x86_reg i = 0; - uint8_t l, lt; __asm__ volatile ( "movdqu (%1, %0), %%xmm0 \n\t" // LT "movdqu (%2, %0), %%xmm2 \n\t" // L + "movd (%6), %%xmm1 \n\t" // LT + "movd (%5), %%xmm3 \n\t" // L "pslldq $1, %%xmm0 \n\t" "pslldq $1, %%xmm2 \n\t" + "por %%xmm1, %%xmm0 \n\t" // LT + "por %%xmm3, %%xmm2 \n\t" // L "jmp 2f \n\t" "1: \n\t" "movdqu -1(%2, %0), %%xmm2 \n\t" // L @@ -72,15 +74,10 @@ static void sub_median_pred_sse2(uint8_t *dst, const uint8_t *src1, "cmp %4, %0 \n\t" " jb 1b \n\t" : "+r" (i) - : "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w) + : "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w), "r" (left), "r" (left_top) : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5",) "memory" ); - l = *left; - lt = *left_top; - - dst[0] = src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt) & 0xFF); - *left_top = src1[w - 1]; *left = src2[w - 1]; } @@ -91,7 +88,7 @@ av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c) { av_unused int cpu_flags = av_get_cpu_flags(); -#if HAVE_SSE2_INLINE +#if HAVE_SSE2_INLINE && HAVE_7REGS if (INLINE_SSE2(cpu_flags)) { c->sub_median_pred = sub_median_pred_sse2; } _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
