This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit addee699551473d5adfc1d9180cc735ddd15e530 Author: Niklas Haas <[email protected]> AuthorDate: Tue Jun 2 14:30:17 2026 +0200 Commit: Niklas Haas <[email protected]> CommitDate: Thu Jun 11 16:27:47 2026 +0000 swscale/ops_dispatch: generalize block_size_in/out to array See previous commit for justification. I decided to split these refactors up into several independent commits to make it easier to review and bisect, since they are all independent atomic changes. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops_dispatch.c | 29 ++++++++++++++++------------- libswscale/ops_dispatch.h | 6 +++--- libswscale/ops_memcpy.c | 2 +- libswscale/x86/ops_include.asm | 4 ++-- tests/checkasm/sw_ops.c | 8 ++++---- 5 files changed, 26 insertions(+), 23 deletions(-) diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c index fcf77204db..069474c056 100644 --- a/libswscale/ops_dispatch.c +++ b/libswscale/ops_dispatch.c @@ -242,7 +242,7 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, safe_bytes - filter_size, exec->in_offset_x); } else { - safe_blocks_in = safe_bytes / exec->block_size_in; + safe_blocks_in = safe_bytes / exec->block_size_in[i]; } if (safe_blocks_in < num_blocks) { @@ -251,7 +251,7 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, safe_blocks = FFMIN(safe_blocks, safe_blocks_in); } - size_t loop_size = num_blocks * exec->block_size_in; + size_t loop_size = num_blocks * exec->block_size_in[i]; exec->in[i] = in->data[idx]; exec->in_stride[i] = in->linesize[idx]; exec->in_bump[i] = in->linesize[idx] - loop_size; @@ -265,13 +265,13 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, int sub_x = chroma ? outdesc->log2_chroma_w : 0; int sub_y = chroma ? outdesc->log2_chroma_h : 0; size_t safe_bytes = safe_bytes_pad(out->linesize[idx], comp->over_write[i]); - size_t safe_blocks_out = safe_bytes / exec->block_size_out; + size_t safe_blocks_out = safe_bytes / exec->block_size_out[i]; if (safe_blocks_out < num_blocks) { p->memcpy_out = true; safe_blocks = FFMIN(safe_blocks, safe_blocks_out); } - size_t loop_size = num_blocks * exec->block_size_out; + size_t loop_size = num_blocks * exec->block_size_out[i]; exec->out[i] = out->data[idx]; exec->out_stride[i] = out->linesize[idx]; exec->out_bump[i] = out->linesize[idx] - loop_size; @@ -317,7 +317,7 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, } else { needed_size = pixel_bytes(alloc_width, p->pixel_bits_in, AV_ROUND_UP); } - size_t loop_size = p->tail_blocks * exec->block_size_in; + size_t loop_size = p->tail_blocks * exec->block_size_in[i]; tail->in_stride[i] = FFALIGN(needed_size + comp->over_read[i], align); tail->in_bump[i] = tail->in_stride[i] - loop_size; alloc_size += tail->in_stride[i] * in->height; @@ -325,7 +325,7 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, for (int i = 0; p->memcpy_out && i < p->planes_out; i++) { size_t needed_size = pixel_bytes(alloc_width, p->pixel_bits_out, AV_ROUND_UP); - size_t loop_size = p->tail_blocks * exec->block_size_out; + size_t loop_size = p->tail_blocks * exec->block_size_out[i]; tail->out_stride[i] = FFALIGN(needed_size + comp->over_write[i], align); tail->out_bump[i] = tail->out_stride[i] - loop_size; alloc_size += tail->out_stride[i] * out->height; @@ -419,8 +419,8 @@ static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y, /* We process fewer blocks, so the in_bump needs to be increased * to reflect that the plane pointers are left on the last block, * not the end of the processed line, after each loop iteration */ - exec.in_bump[i] += exec.block_size_in * tail_blocks; - exec.out_bump[i] += exec.block_size_out * tail_blocks; + exec.in_bump[i] += exec.block_size_in[i] * tail_blocks; + exec.out_bump[i] += exec.block_size_out[i] * tail_blocks; } comp->func(&exec, comp->priv, 0, y, num_blocks - tail_blocks, y + h); @@ -448,7 +448,7 @@ static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y, exec.in[i], exec.in_stride[i], lines, p->tail_size_in); } else { /* Reuse input pointers directly */ - const size_t loop_size = tail_blocks * exec.block_size_in; + const size_t loop_size = tail_blocks * exec.block_size_in[i]; tail.in[i] = exec.in[i]; tail.in_stride[i] = exec.in_stride[i]; tail.in_bump[i] = exec.in_stride[i] - loop_size; @@ -457,7 +457,7 @@ static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y, for (int i = 0; !memcpy_out && i < p->planes_out; i++) { /* Reuse output pointers directly */ - const size_t loop_size = tail_blocks * exec.block_size_out; + const size_t loop_size = tail_blocks * exec.block_size_out[i]; tail.out[i] = exec.out[i]; tail.out_stride[i] = exec.out_stride[i]; tail.out_bump[i] = exec.out_stride[i] - loop_size; @@ -552,8 +552,10 @@ static int compile(SwsGraph *graph, const SwsOpBackend *backend, goto fail; } - p->exec_base.block_size_in = block_bits_in >> 3; - p->exec_base.block_size_out = block_bits_out >> 3; + for (int i = 0; i < 4; i++) { + p->exec_base.block_size_in[i] = block_bits_in >> 3; + p->exec_base.block_size_out[i] = block_bits_out >> 3; + } for (int i = 0; i < 4; i++) { p->idx_in[i] = i < p->planes_in ? ops->plane_src[i] : -1; @@ -602,7 +604,8 @@ static int compile(SwsGraph *graph, const SwsOpBackend *backend, } for (int x = filter->dst_size; x < pixels; x++) offset[x] = offset[filter->dst_size - 1]; - p->exec_base.block_size_in = 0; /* ptr does not advance */ + for (int i = 0; i < 4; i++) + p->exec_base.block_size_in[i] = 0; /* ptr does not advance */ p->filter_size_h = filter->filter_size; } diff --git a/libswscale/ops_dispatch.h b/libswscale/ops_dispatch.h index 237a036f69..4097b8e947 100644 --- a/libswscale/ops_dispatch.h +++ b/libswscale/ops_dispatch.h @@ -54,8 +54,8 @@ typedef struct SwsOpExec { /* Extra metadata, may or may not be useful */ int32_t width, height; /* Overall output image dimensions */ int32_t slice_y, slice_h; /* Start and height of current slice */ - int32_t block_size_in; /* Size of a block of pixels in bytes */ - int32_t block_size_out; + int32_t block_size_in[4]; /* Size of a block of pixels in bytes */ + int32_t block_size_out[4]; /* Subsampling factors for each plane */ uint8_t in_sub_y[4], out_sub_y[4]; @@ -81,7 +81,7 @@ typedef struct SwsOpExec { } SwsOpExec; static_assert(sizeof(SwsOpExec) == 24 * sizeof(void *) + - 6 * sizeof(int32_t) + + 12 * sizeof(int32_t) + 16 * sizeof(uint8_t) + 2 * sizeof(void *), "SwsOpExec layout mismatch"); diff --git a/libswscale/ops_memcpy.c b/libswscale/ops_memcpy.c index 00f3e79608..26634049dd 100644 --- a/libswscale/ops_memcpy.c +++ b/libswscale/ops_memcpy.c @@ -43,12 +43,12 @@ static void process(const SwsOpExec *exec, const void *priv, { const MemcpyPriv *p = priv; const int lines = y_end - y_start; - const int bytes = x_end * exec->block_size_out; av_assert1(x_start == 0 && x_end == exec->width); for (int i = 0; i < p->num_planes; i++) { uint8_t *out = exec->out[i]; const int idx = p->index[i]; + const int bytes = x_end * exec->block_size_out[i]; const int use_loop = exec->out_stride[i] > bytes + SWS_MAX_PADDING; if (idx < 0 && !use_loop) { memset(out, p->clear_value[i], exec->out_stride[i] * lines); diff --git a/libswscale/x86/ops_include.asm b/libswscale/x86/ops_include.asm index cc44a247b0..073ed31e57 100644 --- a/libswscale/x86/ops_include.asm +++ b/libswscale/x86/ops_include.asm @@ -123,8 +123,8 @@ struc SwsOpExec .height resd 1 .slice_y resd 1 .slice_h resd 1 - .block_size_in resd 1 - .block_size_out resd 1 + .block_size_in resd 4 + .block_size_out resd 4 .in_sub_y4 resb 4 .out_sub_y4 resb 4 .in_sub_x4 resb 4 diff --git a/tests/checkasm/sw_ops.c b/tests/checkasm/sw_ops.c index e09635abd4..fdc17bffa8 100644 --- a/tests/checkasm/sw_ops.c +++ b/tests/checkasm/sw_ops.c @@ -190,19 +190,19 @@ static void check_compiled(const char *name, exec.in_offset_x = in_offset_x; } - exec.block_size_in = comp_ref->block_size * rw_pixel_bits(read_op) >> 3; - exec.block_size_out = comp_ref->block_size * rw_pixel_bits(write_op) >> 3; for (int i = 0; i < NB_PLANES; i++) { exec.in[i] = (void *) src0[i]; exec.out[i] = (void *) dst0[i]; + exec.block_size_in[i] = comp_ref->block_size * rw_pixel_bits(read_op) >> 3; + exec.block_size_out[i] = comp_ref->block_size * rw_pixel_bits(write_op) >> 3; } checkasm_call(comp_ref->func, &exec, comp_ref->priv, 0, 0, PIXELS / comp_ref->block_size, LINES); - exec.block_size_in = comp_new->block_size * rw_pixel_bits(read_op) >> 3; - exec.block_size_out = comp_new->block_size * rw_pixel_bits(write_op) >> 3; for (int i = 0; i < NB_PLANES; i++) { exec.in[i] = (void *) src1[i]; exec.out[i] = (void *) dst1[i]; + exec.block_size_in[i] = comp_new->block_size * rw_pixel_bits(read_op) >> 3; + exec.block_size_out[i] = comp_new->block_size * rw_pixel_bits(write_op) >> 3; } checkasm_call_checked(comp_new->func, &exec, comp_new->priv, 0, 0, PIXELS / comp_new->block_size, LINES); _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
