This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 3f7e3cedb58cb963a5cb90e81b7654d993b842cf Author: Niklas Haas <[email protected]> AuthorDate: Wed Dec 3 19:12:37 2025 +0100 Commit: Niklas Haas <[email protected]> CommitDate: Mon Dec 15 14:31:58 2025 +0000 swscale/x86/ops_float: store and load per row dither offset directly Instead of computing y + N with a hard-coded index offset, calculate the relative offset as a 16-bit integer in C and add that to the pointer directly. Since we no longer mask the resulting combined address, this may result in overread, but that's fine since we over-provisioned the array in the previous commit. --- libswscale/x86/ops.c | 7 +++++++ libswscale/x86/ops_common.asm | 3 +++ libswscale/x86/ops_float.asm | 19 +++++++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c index 1d8a2e77da..bc61266588 100644 --- a/libswscale/x86/ops.c +++ b/libswscale/x86/ops.c @@ -215,6 +215,13 @@ static int setup_dither(const SwsOp *op, SwsOpPriv *out) memcpy(&matrix[size * size], matrix, max_offset * stride); + /* Store relative pointer offset to each row inside extra space */ + static_assert(sizeof(out->ptr) <= sizeof(uint16_t[4]), ">8 byte pointers not supported"); + assert(max_offset * stride <= UINT16_MAX); + uint16_t *offset = &out->u16[4]; + for (int i = 0; i < 4; i++) + offset[i] = (op->dither.y_offset[i] & (size - 1)) * stride; + return 0; } diff --git a/libswscale/x86/ops_common.asm b/libswscale/x86/ops_common.asm index 3c9154584a..e04ee70b56 100644 --- a/libswscale/x86/ops_common.asm +++ b/libswscale/x86/ops_common.asm @@ -245,6 +245,9 @@ endstruc %define tmp0d r4d %define tmp1d r5d +%define tmp0w r4w +%define tmp1w r5w + ; Registers for plane pointers; put at the end (and in ascending plane order) ; so that we can avoid reserving them when not necessary %define out0q r6q diff --git a/libswscale/x86/ops_float.asm b/libswscale/x86/ops_float.asm index ef08212fd6..2863085a8e 100644 --- a/libswscale/x86/ops_float.asm +++ b/libswscale/x86/ops_float.asm @@ -179,10 +179,8 @@ IF W, mulps mw2, m8 CONTINUE tmp0q %endmacro -%macro load_dither_row 5 ; size_log2, y, addr, out, out2 - lea tmp0q, %2 - and tmp0q, (1 << %1) - 1 - shl tmp0q, %1+2 +%macro load_dither_row 5 ; size_log2, comp_idx, addr, out, out2 + mov tmp0w, [implq + SwsOpImpl.priv + (4 + %2) * 2] ; priv.u16[4 + i] %if %1 == 1 vbroadcastsd %4, [%3 + tmp0q] %elif %1 == 2 @@ -225,6 +223,11 @@ op dither%1 %endif ; dither matrix is stored indirectly at the private data address mov tmp1q, [implq + SwsOpImpl.priv] + ; add y offset + mov tmp0d, yd + and tmp0d, (1 << %1) - 1 + shl tmp0d, %1 + 2 ; * sizeof(float) + add tmp1q, tmp0q %if (4 << %1) > 2 * mmsize ; need to add in x offset mov tmp0d, bxd @@ -232,10 +235,10 @@ op dither%1 and tmp0d, (4 << %1) - 1 add tmp1q, tmp0q %endif -IF X, load_dither_row %1, [yd + 0], tmp1q, DX, DX2 -IF Y, load_dither_row %1, [yd + 3], tmp1q, DY, DY2 -IF Z, load_dither_row %1, [yd + 2], tmp1q, DZ, DZ2 -IF W, load_dither_row %1, [yd + 5], tmp1q, DW, DW2 +IF X, load_dither_row %1, 0, tmp1q, DX, DX2 +IF Y, load_dither_row %1, 1, tmp1q, DY, DY2 +IF Z, load_dither_row %1, 2, tmp1q, DZ, DZ2 +IF W, load_dither_row %1, 3, tmp1q, DW, DW2 %endif LOAD_CONT tmp0q IF X, addps mx, DX _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
