This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit ca8774b9d61bef432a75f77e5b9461f18800d83c Author: Niklas Haas <[email protected]> AuthorDate: Sun Apr 19 13:46:15 2026 +0200 Commit: Niklas Haas <[email protected]> CommitDate: Tue Jun 9 18:27:20 2026 +0200 swscale/x86: remove broken and unnecessary 1x1 dither fast path This is broken because it fails to check dither.y_offset[] to determine if dithering for a channel is requested or not. This is unnecessary because the generic dither code already jumps over unused components, which is cheap enough not to worry about this special case for now. This code will, in any case, soon be replaced by a uops_macros.h-derived approach. This commit is only needed as a stopgap to make checkasm continue working after the sws_uops refactor. Signed-off-by: Niklas Haas <[email protected]> --- libswscale/x86/ops.c | 27 +++++++++++---------------- libswscale/x86/ops_float.asm | 28 ++++++++++------------------ 2 files changed, 21 insertions(+), 34 deletions(-) diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c index 1191a2fc35..2087176cee 100644 --- a/libswscale/x86/ops.c +++ b/libswscale/x86/ops.c @@ -209,14 +209,8 @@ static int setup_shift(const SwsImplParams *params, SwsImplResult *out) static int setup_dither(const SwsImplParams *params, SwsImplResult *out) { const SwsOp *op = params->op; - /* 1x1 matrix / single constant */ - if (!op->dither.size_log2) { - const AVRational k = op->dither.matrix[0]; - out->priv.f32[0] = (float) k.num / k.den; - return 0; - } - const int size = 1 << op->dither.size_log2; + const int stride = size * sizeof(float); const int8_t *off = op->dither.y_offset; int max_offset = 0; for (int i = 0; i < 4; i++) { @@ -224,11 +218,17 @@ static int setup_dither(const SwsImplParams *params, SwsImplResult *out) max_offset = FFMAX(max_offset, off[i] & (size - 1)); } + /* 1x1 matrix / single constant */ + if (!op->dither.size_log2) { + const AVRational k = op->dither.matrix[0]; + out->priv.f32[0] = (float) k.num / k.den; + goto store_offsets; + } + /* Allocate extra rows to allow over-reading for row offsets. Note that * max_offset is currently never larger than 5, so the extra space needed * for this over-allocation is bounded by 5 * size * sizeof(float), * typically 320 bytes for a 16x16 dither matrix. */ - const int stride = size * sizeof(float); const int num_rows = size + max_offset; float *matrix = out->priv.ptr = av_mallocz(num_rows * stride); if (!matrix) @@ -240,6 +240,7 @@ static int setup_dither(const SwsImplParams *params, SwsImplResult *out) memcpy(&matrix[size * size], matrix, max_offset * stride); +store_offsets: /* Store relative pointer offset to each row inside extra space */ static_assert(sizeof(out->priv.ptr) <= sizeof(int16_t[4]), ">8 byte pointers not supported"); @@ -251,12 +252,6 @@ static int setup_dither(const SwsImplParams *params, SwsImplResult *out) return 0; } -#define DECL_DITHER0(EXT) \ - DECL_COMMON_PATTERNS(F32, dither0##EXT, \ - .op = SWS_OP_DITHER, \ - .setup = setup_dither, \ - ); - #define DECL_DITHER(EXT, SIZE) \ DECL_ASM(F32, SWS_COMP_ALL, dither##SIZE##EXT, \ .op = SWS_OP_DITHER, \ @@ -729,7 +724,7 @@ static const SwsOpTable ops16##EXT = { DECL_EXPAND(EXT, U8, U32) \ DECL_MIN_MAX(EXT) \ DECL_SCALE(EXT) \ - DECL_DITHER0(EXT) \ + DECL_DITHER(EXT, 0) \ DECL_DITHER(EXT, 1) \ DECL_DITHER(EXT, 2) \ DECL_DITHER(EXT, 3) \ @@ -790,7 +785,7 @@ static const SwsOpTable ops32##EXT = { REF_COMMON_PATTERNS(min##EXT), \ REF_COMMON_PATTERNS(max##EXT), \ REF_COMMON_PATTERNS(scale##EXT), \ - REF_COMMON_PATTERNS(dither0##EXT), \ + &op_dither0##EXT, \ &op_dither1##EXT, \ &op_dither2##EXT, \ &op_dither3##EXT, \ diff --git a/libswscale/x86/ops_float.asm b/libswscale/x86/ops_float.asm index 845f0081db..605a9aa005 100644 --- a/libswscale/x86/ops_float.asm +++ b/libswscale/x86/ops_float.asm @@ -209,28 +209,15 @@ IF W, mulps mw2, m8 ;--------------------------------------------------------- ; Dithering -%macro dither0 0 -op dither0 - ; constant offset for all channels - vbroadcastss m8, [implq + SwsOpImpl.priv] - LOAD_CONT tmp0q -IF X, addps mx, m8 -IF Y, addps my, m8 -IF Z, addps mz, m8 -IF W, addps mw, m8 -IF X, addps mx2, m8 -IF Y, addps my2, m8 -IF Z, addps mz2, m8 -IF W, addps mw2, m8 - CONTINUE tmp0q -%endmacro - %macro dither_row 5 ; size_log2, comp_idx, matrix, out, out2 mov tmp0w, [implq + SwsOpImpl.priv + (4 + %2) * 2] ; priv.u16[4 + i] ; test is tmp0w < 0 test tmp0w, tmp0w js .skip%2 -%if %1 == 1 +%if %1 == 0 + addps %4, m8 + addps %5, m8 +%elif %1 == 1 vbroadcastsd m8, [%3 + tmp0q] addps %4, m8 addps %5, m8 @@ -247,6 +234,10 @@ IF W, addps mw2, m8 %macro dither 1 ; size_log2 op dither%1 +%if %1 == 0 + ; dither offset is constant + vbroadcastss m8, [implq + SwsOpImpl.priv] +%else ; dither matrix is stored indirectly at the private data address mov tmp1q, [implq + SwsOpImpl.priv] ; add y offset. note that for 2x2, we would only need to look at the @@ -264,6 +255,7 @@ op dither%1 and tmp0d, (4 << %1) - 1 add tmp1q, tmp0q %endif +%endif dither_row %1, 0, tmp1q, mx, mx2 dither_row %1, 1, tmp1q, my, my2 dither_row %1, 2, tmp1q, mz, mz2 @@ -272,7 +264,7 @@ op dither%1 %endmacro %macro dither_fns 0 - decl_common_patterns dither0 + dither 0 dither 1 dither 2 dither 3 _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
