This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 2d01687fd0b13b00c5991017cf7e38c932cb2d18 Author: Niklas Haas <[email protected]> AuthorDate: Tue Jun 2 14:29:39 2026 +0200 Commit: Niklas Haas <[email protected]> CommitDate: Thu Jun 11 16:27:47 2026 +0000 swscale/ops_dispatch: generalize over_read/over_read to array I want to introduce operations like semiplanar reads, which would possibly require a different number of over_read bytes per plane. That aside, this is just a general cleanliness improvement. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops_chain.c | 7 +++++-- libswscale/ops_chain.h | 10 +++++----- libswscale/ops_dispatch.c | 27 ++++++++++++++++++--------- libswscale/ops_dispatch.h | 6 +++--- libswscale/uops_backend.c | 5 +++-- libswscale/x86/ops.c | 26 +++++++++++++++++--------- 6 files changed, 51 insertions(+), 30 deletions(-) diff --git a/libswscale/ops_chain.c b/libswscale/ops_chain.c index 331f2f696b..d60f7ccf28 100644 --- a/libswscale/ops_chain.c +++ b/libswscale/ops_chain.c @@ -173,9 +173,12 @@ int ff_sws_uop_lookup(SwsContext *ctx, const SwsOpTable *const tables[], return ret; } + for (int i = 0; i < 4; i++) { + chain->over_read[i] = FFMAX(chain->over_read[i], res.over_read[i]); + chain->over_write[i] = FFMAX(chain->over_write[i], res.over_write[i]); + } + chain->cpu_flags |= params.table->cpu_flags; - chain->over_read = FFMAX(chain->over_read, res.over_read); - chain->over_write = FFMAX(chain->over_write, res.over_write); return 0; } diff --git a/libswscale/ops_chain.h b/libswscale/ops_chain.h index 2126787782..abe4c545cd 100644 --- a/libswscale/ops_chain.h +++ b/libswscale/ops_chain.h @@ -86,9 +86,9 @@ typedef struct SwsOpChain { SwsOpImpl impl[SWS_MAX_OPS + 1]; /* reserve extra space for the entrypoint */ void (*free[SWS_MAX_OPS + 1])(SwsOpPriv *); int num_impl; - int cpu_flags; /* set of all used CPU flags */ - int over_read; /* chain over-reads input by this many bytes */ - int over_write; /* chain over-writes output by this many bytes */ + int cpu_flags; /* set of all used CPU flags */ + int over_read[4]; /* chain over-reads input by this many bytes */ + int over_write[4]; /* chain over-writes output by this many bytes */ } SwsOpChain; SwsOpChain *ff_sws_op_chain_alloc(void); @@ -115,8 +115,8 @@ typedef struct SwsImplResult { SwsFuncPtr func; /* overrides `SwsOpEntry.func` if non-NULL */ SwsOpPriv priv; /* private data for this implementation instance */ void (*free)(SwsOpPriv *priv); /* free function for `priv` */ - int over_read; /* implementation over-reads input by this many bytes */ - int over_write; /* implementation over-writes output by this many bytes */ + int over_read[4]; /* implementation over-reads input by this many bytes */ + int over_write[4]; /* implementation over-writes output by this many bytes */ } SwsImplResult; typedef struct SwsOpEntry { diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c index 1186a8a73e..fcf77204db 100644 --- a/libswscale/ops_dispatch.c +++ b/libswscale/ops_dispatch.c @@ -81,8 +81,12 @@ static int compile_backend(SwsContext *ctx, const SwsOpBackend *backend, *out = compiled; av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': " - "block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n", - backend->name, out->block_size, out->over_read, out->over_write, + "block size = %d, over-read = {%d %d %d %d}, over-write = {%d %d %d %d}, " + "cpu flags = 0x%x\n", backend->name, out->block_size, + out->over_read[0], out->over_read[1], + out->over_read[2], out->over_read[3], + out->over_write[0], out->over_write[1], + out->over_write[2], out->over_write[3], out->cpu_flags); ff_sws_op_list_print(ctx, AV_LOG_VERBOSE, AV_LOG_TRACE, ops); @@ -229,7 +233,7 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, input_bytes = pixel_bytes(plane_w, p->pixel_bits_in, AV_ROUND_UP); } - size_t safe_bytes = safe_bytes_pad(input_bytes, comp->over_read); + size_t safe_bytes = safe_bytes_pad(input_bytes, comp->over_read[i]); size_t safe_blocks_in; if (exec->in_offset_x) { size_t filter_size = pixel_bytes(p->filter_size_h, p->pixel_bits_in, @@ -260,7 +264,7 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, int chroma = idx == 1 || idx == 2; int sub_x = chroma ? outdesc->log2_chroma_w : 0; int sub_y = chroma ? outdesc->log2_chroma_h : 0; - size_t safe_bytes = safe_bytes_pad(out->linesize[idx], comp->over_write); + size_t safe_bytes = safe_bytes_pad(out->linesize[idx], comp->over_write[i]); size_t safe_blocks_out = safe_bytes / exec->block_size_out; if (safe_blocks_out < num_blocks) { p->memcpy_out = true; @@ -314,7 +318,7 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, needed_size = pixel_bytes(alloc_width, p->pixel_bits_in, AV_ROUND_UP); } size_t loop_size = p->tail_blocks * exec->block_size_in; - tail->in_stride[i] = FFALIGN(needed_size + comp->over_read, align); + tail->in_stride[i] = FFALIGN(needed_size + comp->over_read[i], align); tail->in_bump[i] = tail->in_stride[i] - loop_size; alloc_size += tail->in_stride[i] * in->height; } @@ -322,7 +326,7 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, for (int i = 0; p->memcpy_out && i < p->planes_out; i++) { size_t needed_size = pixel_bytes(alloc_width, p->pixel_bits_out, AV_ROUND_UP); size_t loop_size = p->tail_blocks * exec->block_size_out; - tail->out_stride[i] = FFALIGN(needed_size + comp->over_write, align); + tail->out_stride[i] = FFALIGN(needed_size + comp->over_write[i], align); tail->out_bump[i] = tail->out_stride[i] - loop_size; alloc_size += tail->out_stride[i] * out->height; } @@ -484,17 +488,22 @@ static int rw_pixel_bits(const SwsOp *op) return elems * size * bits; } -static void align_pass(SwsPass *pass, int block_size, int over_rw, int pixel_bits) +static void align_pass(SwsPass *pass, int block_size, const int *over_rw, + int pixel_bits) { if (!pass) return; /* Add at least as many pixels as needed to cover the padding requirement */ - const int pad = (over_rw * 8 + pixel_bits - 1) / pixel_bits; + int pad_max = 0; + for (int i = 0; i < 4; i++) { + const int pad = (over_rw[i] * 8 + pixel_bits - 1) / pixel_bits; + pad_max = FFMAX(pad_max, pad); + } SwsPassBuffer *buf = pass->output; buf->width_align = FFMAX(buf->width_align, block_size); - buf->width_pad = FFMAX(buf->width_pad, pad); + buf->width_pad = FFMAX(buf->width_pad, pad_max); } static int compile(SwsGraph *graph, const SwsOpBackend *backend, diff --git a/libswscale/ops_dispatch.h b/libswscale/ops_dispatch.h index 7f1304dcc4..237a036f69 100644 --- a/libswscale/ops_dispatch.h +++ b/libswscale/ops_dispatch.h @@ -119,9 +119,9 @@ typedef struct SwsCompiledOp { int cpu_flags; /* active set of CPU flags (informative) */ /* Execution parameters for non-opaque functions only */ - int block_size; /* number of pixels processed per iteration */ - int over_read; /* implementation over-reads input by this many bytes */ - int over_write; /* implementation over-writes output by this many bytes */ + int block_size; /* number of pixels processed per iteration */ + int over_read[4]; /* implementation over-reads input by this many bytes */ + int over_write[4]; /* implementation over-writes output by this many bytes */ /* Arbitrary private data */ void *priv; diff --git a/libswscale/uops_backend.c b/libswscale/uops_backend.c index fd7220a57a..50f5302ca6 100644 --- a/libswscale/uops_backend.c +++ b/libswscale/uops_backend.c @@ -165,13 +165,14 @@ static int compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out) .slice_align = 1, .block_size = SWS_BLOCK_SIZE, .cpu_flags = chain->cpu_flags, - .over_read = chain->over_read, - .over_write = chain->over_write, .priv = chain, .free = ff_sws_op_chain_free_cb, .func = process, }; + memcpy(out->over_read, chain->over_read, sizeof(out->over_read)); + memcpy(out->over_write, chain->over_write, sizeof(out->over_write)); + av_log(ctx, AV_LOG_DEBUG, "Compiled micro-ops:\n"); for (int i = 0; i < uops->num_ops; i++) { char name[SWS_UOP_NAME_MAX]; diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c index 4c8eceb1cb..e8b0a20a1c 100644 --- a/libswscale/x86/ops.c +++ b/libswscale/x86/ops.c @@ -35,8 +35,8 @@ static int setup_rw_packed(const SwsImplParams *params, SwsImplResult *out) /* 3-component packed reads/writes process one extra garbage word */ if (uop->mask == SWS_COMP_ELEMS(3)) { switch (uop->uop) { - case SWS_UOP_READ_PACKED: out->over_read = sizeof(uint32_t); break; - case SWS_UOP_WRITE_PACKED: out->over_write = sizeof(uint32_t); break; + case SWS_UOP_READ_PACKED: out->over_read[0] = sizeof(uint32_t); break; + case SWS_UOP_WRITE_PACKED: out->over_write[0] = sizeof(uint32_t); break; } } @@ -153,7 +153,11 @@ static int setup_filter_h(const SwsImplParams *params, SwsImplResult *out) out->priv.ptr = weights.ptr; out->priv.uptr[1] = aligned_size; out->free = ff_op_priv_free; - out->over_read = (aligned_size - filter_size) * pixel_size; + + for (int i = 0; i < 4; i++) { + if (uop->mask & SWS_COMP(i)) + out->over_read[i] = (aligned_size - filter_size) * pixel_size; + } return 0; } @@ -236,7 +240,11 @@ static int setup_filter_h_4x4(const SwsImplParams *params, SwsImplResult *out) out->priv.ptr = weights.ptr; out->priv.uptr[1] = aligned_size * sizeof_weights; out->free = ff_op_priv_free; - out->over_read = (aligned_size - filter_size) * pixel_size; + + for (int i = 0; i < 4; i++) { + if (uop->mask & SWS_COMP(i)) + out->over_read[i] = (aligned_size - filter_size) * pixel_size; + } return 0; } @@ -506,8 +514,8 @@ static int solve_shuffle(const SwsOpList *ops, int mmsize, SwsCompiledOp *out) .free = av_free, .slice_align = 1, .block_size = pixels * num_lanes, - .over_read = movsize(in_total, mmsize) - in_total, - .over_write = movsize(out_total, mmsize) - out_total, + .over_read = { movsize(in_total, mmsize) - in_total }, + .over_write = { movsize(out_total, mmsize) - out_total }, .cpu_flags = mmsize > 32 ? AV_CPU_FLAG_AVX512 : mmsize > 16 ? AV_CPU_FLAG_AVX2 : AV_CPU_FLAG_SSE4, @@ -640,9 +648,9 @@ static int compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out) return ret; } - out->cpu_flags = chain->cpu_flags; - out->over_read = chain->over_read; - out->over_write = chain->over_write; + out->cpu_flags = chain->cpu_flags; + memcpy(out->over_read, chain->over_read, sizeof(out->over_read)); + memcpy(out->over_write, chain->over_write, sizeof(out->over_write)); ff_sws_uop_list_free(&uops); return 0; _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
