This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit ce096aa4ee9197701502d0c32e3c7f56c44d863e Author: Niklas Haas <[email protected]> AuthorDate: Wed Feb 25 17:00:07 2026 +0100 Commit: Niklas Haas <[email protected]> CommitDate: Thu Feb 26 13:09:14 2026 +0000 swscale/x86/ops: add support for optional dither indices Instead of defining multiple patterns for the dither ops, just define a single generic function that branches internally. The branch is well-predicted and ridiculously cheap. At least on my end, within margin of error. Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops_chain.h | 1 + libswscale/x86/ops.c | 51 ++++++++++++++++++++++---------------------- libswscale/x86/ops_float.asm | 8 +++++-- 3 files changed, 33 insertions(+), 27 deletions(-) diff --git a/libswscale/ops_chain.h b/libswscale/ops_chain.h index 56543c6b0b..532622fd2c 100644 --- a/libswscale/ops_chain.h +++ b/libswscale/ops_chain.h @@ -47,6 +47,7 @@ typedef union SwsOpPriv { int8_t i8[16]; uint8_t u8[16]; uint16_t u16[8]; + int16_t i16[8]; uint32_t u32[4]; float f32[4]; } SwsOpPriv; diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c index 0db6837c3a..44bdf159c4 100644 --- a/libswscale/x86/ops.c +++ b/libswscale/x86/ops.c @@ -201,10 +201,11 @@ static int setup_dither(const SwsOp *op, SwsOpPriv *out) } const int size = 1 << op->dither.size_log2; + const int8_t *off = op->dither.y_offset; int max_offset = 0; for (int i = 0; i < 4; i++) { - const int offset = op->dither.y_offset[i] & (size - 1); - max_offset = FFMAX(max_offset, offset); + if (off[i] >= 0) + max_offset = FFMAX(max_offset, off[i] & (size - 1)); } /* Allocate extra rows to allow over-reading for row offsets. Note that @@ -223,17 +224,17 @@ static int setup_dither(const SwsOp *op, SwsOpPriv *out) memcpy(&matrix[size * size], matrix, max_offset * stride); /* Store relative pointer offset to each row inside extra space */ - static_assert(sizeof(out->ptr) <= sizeof(uint16_t[4]), ">8 byte pointers not supported"); - assert(max_offset * stride <= UINT16_MAX); - uint16_t *offset = &out->u16[4]; + static_assert(sizeof(out->ptr) <= sizeof(int16_t[4]), ">8 byte pointers not supported"); + assert(max_offset * stride <= INT16_MAX); + int16_t *off_out = &out->i16[4]; for (int i = 0; i < 4; i++) - offset[i] = (op->dither.y_offset[i] & (size - 1)) * stride; + off_out[i] = off[i] >= 0 ? (off[i] & (size - 1)) * stride : -1; return 0; } -#define DECL_DITHER(EXT, SIZE) \ - DECL_COMMON_PATTERNS(F32, dither##SIZE##EXT, \ +#define DECL_DITHER(DECL_MACRO, EXT, SIZE) \ + DECL_MACRO(F32, dither##SIZE##EXT, \ .op = SWS_OP_DITHER, \ .setup = setup_dither, \ .free = (SIZE) ? av_free : NULL, \ @@ -453,15 +454,15 @@ static const SwsOpTable ops16##EXT = { DECL_EXPAND(EXT, U8, U32) \ DECL_MIN_MAX(EXT) \ DECL_SCALE(EXT) \ - DECL_DITHER(EXT, 0) \ - DECL_DITHER(EXT, 1) \ - DECL_DITHER(EXT, 2) \ - DECL_DITHER(EXT, 3) \ - DECL_DITHER(EXT, 4) \ - DECL_DITHER(EXT, 5) \ - DECL_DITHER(EXT, 6) \ - DECL_DITHER(EXT, 7) \ - DECL_DITHER(EXT, 8) \ + DECL_DITHER(DECL_COMMON_PATTERNS, EXT, 0) \ + DECL_DITHER(DECL_ASM, EXT, 1) \ + DECL_DITHER(DECL_ASM, EXT, 2) \ + DECL_DITHER(DECL_ASM, EXT, 3) \ + DECL_DITHER(DECL_ASM, EXT, 4) \ + DECL_DITHER(DECL_ASM, EXT, 5) \ + DECL_DITHER(DECL_ASM, EXT, 6) \ + DECL_DITHER(DECL_ASM, EXT, 7) \ + DECL_DITHER(DECL_ASM, EXT, 8) \ DECL_LINEAR(EXT, luma, SWS_MASK_LUMA) \ DECL_LINEAR(EXT, alpha, SWS_MASK_ALPHA) \ DECL_LINEAR(EXT, lumalpha, SWS_MASK_LUMA | SWS_MASK_ALPHA) \ @@ -505,14 +506,14 @@ static const SwsOpTable ops32##EXT = { REF_COMMON_PATTERNS(max##EXT), \ REF_COMMON_PATTERNS(scale##EXT), \ REF_COMMON_PATTERNS(dither0##EXT), \ - REF_COMMON_PATTERNS(dither1##EXT), \ - REF_COMMON_PATTERNS(dither2##EXT), \ - REF_COMMON_PATTERNS(dither3##EXT), \ - REF_COMMON_PATTERNS(dither4##EXT), \ - REF_COMMON_PATTERNS(dither5##EXT), \ - REF_COMMON_PATTERNS(dither6##EXT), \ - REF_COMMON_PATTERNS(dither7##EXT), \ - REF_COMMON_PATTERNS(dither8##EXT), \ + &op_dither1##EXT, \ + &op_dither2##EXT, \ + &op_dither3##EXT, \ + &op_dither4##EXT, \ + &op_dither5##EXT, \ + &op_dither6##EXT, \ + &op_dither7##EXT, \ + &op_dither8##EXT, \ &op_luma##EXT, \ &op_alpha##EXT, \ &op_lumalpha##EXT, \ diff --git a/libswscale/x86/ops_float.asm b/libswscale/x86/ops_float.asm index 78f35a9785..c9dc408a9b 100644 --- a/libswscale/x86/ops_float.asm +++ b/libswscale/x86/ops_float.asm @@ -197,6 +197,9 @@ IF W, addps mw2, m8 %macro dither_row 5 ; size_log2, comp_idx, matrix, out, out2 mov tmp0w, [implq + SwsOpImpl.priv + (4 + %2) * 2] ; priv.u16[4 + i] + ; test is tmp0w < 0 + test tmp0w, tmp0w + js .skip%2 %if %1 == 1 vbroadcastsd m8, [%3 + tmp0q] addps %4, m8 @@ -209,6 +212,7 @@ IF W, addps mw2, m8 addps %4, [%3 + tmp0q] addps %5, [%3 + tmp0q + mmsize * ((4 << %1) > mmsize)] %endif +.skip%2: %endmacro %macro dither 1 ; size_log2 @@ -238,7 +242,7 @@ op dither%1 %endmacro %macro dither_fns 0 - dither0 + decl_common_patterns dither0 dither 1 dither 2 dither 3 @@ -364,5 +368,5 @@ decl_common_patterns conv32fto8 decl_common_patterns conv32fto16 decl_common_patterns min_max decl_common_patterns scale -decl_common_patterns dither_fns +dither_fns linear_fns _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
