PR #23538 opened by Niklas Haas (haasn) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23538 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23538.patch
Cherry-picked a bit of low hanging fruit to get an LLM review and CI workflow. >From 127efb70727bd9a82659238f415506c913bac66a Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Tue, 16 Jun 2026 15:03:56 +0200 Subject: [PATCH 01/16] swscale/format: pass SwsFormat to ff_sws_decode_colors() Needed to set initial plane size metadata. I decided to update ff_sws_encode_colors() as well for symmetry. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/format.c | 23 +++++++++++------------ libswscale/format.h | 4 ++-- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/libswscale/format.c b/libswscale/format.c index 545ba1909e..c34d335500 100644 --- a/libswscale/format.c +++ b/libswscale/format.c @@ -958,11 +958,10 @@ static void swizzle_inv(SwsSwizzleOp *swiz) * it will end up getting pushed towards the output or optimized away entirely * by the optimization pass. */ -static SwsClearOp fmt_clear(enum AVPixelFormat fmt) +static SwsClearOp fmt_clear(const SwsFormat *fmt) { - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); - const bool has_chroma = desc->nb_components >= 3; - const bool has_alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA; + const bool has_chroma = fmt->desc->nb_components >= 3; + const bool has_alpha = fmt->desc->flags & AV_PIX_FMT_FLAG_ALPHA; SwsClearOp c = {0}; if (!has_chroma) { @@ -984,9 +983,9 @@ static SwsClearOp fmt_clear(enum AVPixelFormat fmt) # define NATIVE_ENDIAN_FLAG 0 #endif -int ff_sws_decode_pixfmt(SwsOpList *ops, enum AVPixelFormat fmt) +int ff_sws_decode_pixfmt(SwsOpList *ops, const SwsFormat *fmt) { - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); + const AVPixFmtDescriptor *desc = fmt->desc; SwsPixelType pixel_type, raw_type; SwsReadWriteOp rw_op; SwsSwizzleOp swizzle; @@ -994,7 +993,7 @@ int ff_sws_decode_pixfmt(SwsOpList *ops, enum AVPixelFormat fmt) SwsComps *comps = &ops->comps_src; SwsShiftOp shift; - RET(fmt_analyze(fmt, &rw_op, &unpack, &swizzle, &shift, + RET(fmt_analyze(fmt->format, &rw_op, &unpack, &swizzle, &shift, &pixel_type, &raw_type)); swizzle_inv(&swizzle); @@ -1073,16 +1072,16 @@ int ff_sws_decode_pixfmt(SwsOpList *ops, enum AVPixelFormat fmt) return 0; } -int ff_sws_encode_pixfmt(SwsOpList *ops, enum AVPixelFormat fmt) +int ff_sws_encode_pixfmt(SwsOpList *ops, const SwsFormat *fmt) { - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); + const AVPixFmtDescriptor *desc = fmt->desc; SwsPixelType pixel_type, raw_type; SwsReadWriteOp rw_op; SwsSwizzleOp swizzle; SwsPackOp pack; SwsShiftOp shift; - RET(fmt_analyze(fmt, &rw_op, &pack, &swizzle, &shift, + RET(fmt_analyze(fmt->format, &rw_op, &pack, &swizzle, &shift, &pixel_type, &raw_type)); if (shift.amount) { @@ -1680,7 +1679,7 @@ int ff_sws_op_list_generate(SwsContext *ctx, const SwsFormat *src, ops->dst = *dst; const SwsPixelType type = SWS_PIXEL_F32; - int ret = ff_sws_decode_pixfmt(ops, src->format); + int ret = ff_sws_decode_pixfmt(ops, src); if (ret < 0) goto fail; ret = ff_sws_decode_colors(ctx, type, ops, src, incomplete); @@ -1692,7 +1691,7 @@ int ff_sws_op_list_generate(SwsContext *ctx, const SwsFormat *src, ret = ff_sws_encode_colors(ctx, type, ops, src, dst, incomplete); if (ret < 0) goto fail; - ret = ff_sws_encode_pixfmt(ops, dst->format); + ret = ff_sws_encode_pixfmt(ops, dst); if (ret < 0) goto fail; diff --git a/libswscale/format.h b/libswscale/format.h index 67f25d7006..36158da55a 100644 --- a/libswscale/format.h +++ b/libswscale/format.h @@ -174,8 +174,8 @@ typedef enum SwsPixelType SwsPixelType; * * Returns 0 on success, or a negative error code on failure. */ -int ff_sws_decode_pixfmt(SwsOpList *ops, enum AVPixelFormat fmt); -int ff_sws_encode_pixfmt(SwsOpList *ops, enum AVPixelFormat fmt); +int ff_sws_decode_pixfmt(SwsOpList *ops, const SwsFormat *fmt); +int ff_sws_encode_pixfmt(SwsOpList *ops, const SwsFormat *fmt); /** * Append a set of operations for transforming decoded pixel values to/from -- 2.52.0 >From e52459195c247b51cd813fd5aa8ab68e88d3affe Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Sat, 20 Jun 2026 02:50:26 +0200 Subject: [PATCH 02/16] swscale/ops: simplify SWS_OP_READ default comps handling We can still pre-fill the prev array here; ff_sws_apply_op_q() is a no-op. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/libswscale/ops.c b/libswscale/ops.c index 31bb2bdec4..1ea5261c3d 100644 --- a/libswscale/ops.c +++ b/libswscale/ops.c @@ -361,7 +361,6 @@ void ff_sws_op_list_update_comps(SwsOpList *ops) SwsOp *op = &ops->ops[n]; switch (op->op) { - case SWS_OP_READ: case SWS_OP_LINEAR: case SWS_OP_DITHER: case SWS_OP_SWAP_BYTES: @@ -393,11 +392,6 @@ void ff_sws_op_list_update_comps(SwsOpList *ops) op->comps.min[i] = ops->comps_src.min[idx]; op->comps.max[i] = ops->comps_src.max[idx]; } - for (int i = op->rw.elems; i < 4; i++) { - op->comps.flags[i] = prev.flags[i]; - op->comps.min[i] = prev.min[i]; - op->comps.max[i] = prev.max[i]; - } if (op->rw.filter.op) { const SwsComps prev = op->comps; -- 2.52.0 >From faac9fa705e58ea60597da091e35e58a7640128a Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Sat, 20 Jun 2026 02:52:02 +0200 Subject: [PATCH 03/16] swscale/ops_optimizer: set correct range metadata after split pass Replaces a few "nan" value ranges by real values, and drops a bunch of redundant non-FMA variants that resulted from this bug. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops_optimizer.c | 5 ++++- libswscale/uops_macros.h | 16 ---------------- tests/ref/fate/sws-ops-list | 2 +- 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/libswscale/ops_optimizer.c b/libswscale/ops_optimizer.c index 69973b63ce..e6ebd21515 100644 --- a/libswscale/ops_optimizer.c +++ b/libswscale/ops_optimizer.c @@ -1005,8 +1005,11 @@ int ff_sws_op_list_subpass(SwsOpList *ops1, SwsOpList **out_rest) ops1->dst = ops2->src; for (int i = 0; i < nb_planes; i++) { + const int idx = swiz_wr.in[i]; ops1->plane_dst[i] = ops2->plane_src[i] = i; - ops2->comps_src.flags[i] = prev->comps.flags[swiz_wr.in[i]]; + ops2->comps_src.flags[i] = prev->comps.flags[idx]; + ops2->comps_src.min[i] = prev->comps.min[idx]; + ops2->comps_src.max[i] = prev->comps.max[idx]; } ff_sws_op_list_remove_at(ops1, idx, ops1->num_ops - idx); diff --git a/libswscale/uops_macros.h b/libswscale/uops_macros.h index 3c4d6b6a3e..5d9e1a8026 100644 --- a/libswscale/uops_macros.h +++ b/libswscale/uops_macros.h @@ -1177,8 +1177,6 @@ MACRO(__VA_ARGS__, f32_linear_xyzw_x0000_0x000_00x00_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0xf, .par.lin.one = 0x0, .par.lin.zero = 0xbefbe) #define SWS_FOR_F32_LINEAR_FMA(MACRO, ...) \ MACRO(__VA_ARGS__, f32_linear_fma_x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefa8, 0xfffe8) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_XXx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefa8, 0xfffeb) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_xXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefa8, 0xfffee) \ MACRO(__VA_ARGS__, f32_linear_fma_x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefa8, 0xfffef) \ MACRO(__VA_ARGS__, f32_linear_fma_x_x000x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefae, 0xfffee) \ MACRO(__VA_ARGS__, f32_linear_fma_x_X000x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefae, 0xfffef) \ @@ -1188,18 +1186,12 @@ MACRO(__VA_ARGS__, f32_linear_fma_x_xXX00 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefb8, 0xffffe) \ MACRO(__VA_ARGS__, f32_linear_fma_y_0x000 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x2, 0x41001, 0xbefbe, 0xfffbf) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx0x_xxx0x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba108, 0xfa108) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXx0x_XXx0x_XXx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba108, 0xfad6b) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XxX0x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba108, 0xfbdaf) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX0x_xXX0x_xXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba108, 0xfb9ce) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XXX0x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba108, 0xfbdef) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_x0x0x_xxx0x_xx00x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xbb10a, 0xfb10a) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_X0X0x_XXX0x_XX00x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xbb10a, 0xfbdef) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx00_xxx0x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba118, 0xfa118) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXx00_XXx0x_XXx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba118, 0xfad7a) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XxX0x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba118, 0xfbdbe) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_xXX0x_xXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba118, 0xfb9de) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XXX0x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba118, 0xfbdfe) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_x000x_0x00x_00x0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xbadae, 0xfadae) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_X000x_0X00x_00X0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xbadae, 0xfbdef) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_x0000_0x000_00x00 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xbefbe, 0xfefbe) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_10X0x_1XX0x_1X00x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40421, 0xbb10a, 0xfbdef) \ @@ -1214,8 +1206,6 @@ MACRO(__VA_ARGS__, f32_linear_fma_xyzw_x0000_0x000_00x00_000x0, SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0xf, 0x00000, 0xbefbe, 0xbefbe) #define SWS_FOR_STRUCT_F32_LINEAR_FMA(MACRO, ...) \ MACRO(__VA_ARGS__, f32_linear_fma_x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefa8, .par.lin.exact = 0xfffe8) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_XXx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefa8, .par.lin.exact = 0xfffeb) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_xXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefa8, .par.lin.exact = 0xfffee) \ MACRO(__VA_ARGS__, f32_linear_fma_x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefa8, .par.lin.exact = 0xfffef) \ MACRO(__VA_ARGS__, f32_linear_fma_x_x000x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefae, .par.lin.exact = 0xfffee) \ MACRO(__VA_ARGS__, f32_linear_fma_x_X000x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefae, .par.lin.exact = 0xfffef) \ @@ -1225,18 +1215,12 @@ MACRO(__VA_ARGS__, f32_linear_fma_x_xXX00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefb8, .par.lin.exact = 0xffffe) \ MACRO(__VA_ARGS__, f32_linear_fma_y_0x000 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x2, .par.lin.one = 0x41001, .par.lin.zero = 0xbefbe, .par.lin.exact = 0xfffbf) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx0x_xxx0x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba108, .par.lin.exact = 0xfa108) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXx0x_XXx0x_XXx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba108, .par.lin.exact = 0xfad6b) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XxX0x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba108, .par.lin.exact = 0xfbdaf) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX0x_xXX0x_xXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba108, .par.lin.exact = 0xfb9ce) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XXX0x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba108, .par.lin.exact = 0xfbdef) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_x0x0x_xxx0x_xx00x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xbb10a, .par.lin.exact = 0xfb10a) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_X0X0x_XXX0x_XX00x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xbb10a, .par.lin.exact = 0xfbdef) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx00_xxx0x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba118, .par.lin.exact = 0xfa118) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXx00_XXx0x_XXx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba118, .par.lin.exact = 0xfad7a) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XxX0x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba118, .par.lin.exact = 0xfbdbe) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_xXX0x_xXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba118, .par.lin.exact = 0xfb9de) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XXX0x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba118, .par.lin.exact = 0xfbdfe) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_x000x_0x00x_00x0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xbadae, .par.lin.exact = 0xfadae) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_X000x_0X00x_00X0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xbadae, .par.lin.exact = 0xfbdef) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_x0000_0x000_00x00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xbefbe, .par.lin.exact = 0xfefbe) \ MACRO(__VA_ARGS__, f32_linear_fma_xyz_10X0x_1XX0x_1X00x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40421, .par.lin.zero = 0xbb10a, .par.lin.exact = 0xfbdef) \ diff --git a/tests/ref/fate/sws-ops-list b/tests/ref/fate/sws-ops-list index 6b4003121a..dcda011ccc 100644 --- a/tests/ref/fate/sws-ops-list +++ b/tests/ref/fate/sws-ops-list @@ -1 +1 @@ -bbe27c8c324f08d933f6397f5fb96650 +e490d908612d059c644e64b43247fb08 -- 2.52.0 >From b120505ce2d5e934451019250af4e846d0ced9c4 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Sat, 20 Jun 2026 02:55:04 +0200 Subject: [PATCH 04/16] swscale/ops: apply ff_sws_comp_mask_swizzle() in-place More convenient at every use site. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops.c | 7 ++++--- libswscale/ops.h | 2 +- libswscale/ops_optimizer.c | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/libswscale/ops.c b/libswscale/ops.c index 1ea5261c3d..71f450fc6b 100644 --- a/libswscale/ops.c +++ b/libswscale/ops.c @@ -144,16 +144,17 @@ SwsCompMask ff_sws_comp_mask_q4(const AVRational q[4]) return mask; } -SwsCompMask ff_sws_comp_mask_swizzle(const SwsCompMask mask, const SwsSwizzleOp *swiz) +void ff_sws_comp_mask_swizzle(SwsCompMask *mask, const SwsSwizzleOp *swiz) { + const SwsCompMask orig = *mask; SwsCompMask res = 0; for (int i = 0; i < 4; i++) { const int src = swiz->in[i]; - if (SWS_COMP_TEST(mask, src)) + if (SWS_COMP_TEST(orig, src)) res |= SWS_COMP(i); } - return res; + *mask = res; } SwsCompMask ff_sws_comp_mask_needed(const SwsOp *op) diff --git a/libswscale/ops.h b/libswscale/ops.h index 41755f35f4..b5de7546d5 100644 --- a/libswscale/ops.h +++ b/libswscale/ops.h @@ -146,7 +146,7 @@ typedef struct SwsSwizzleOp { } SwsSwizzleOp; #define SWS_SWIZZLE(X,Y,Z,W) ((SwsSwizzleOp) { .in = {X, Y, Z, W} }) -SwsCompMask ff_sws_comp_mask_swizzle(SwsCompMask mask, const SwsSwizzleOp *swiz); +void ff_sws_comp_mask_swizzle(SwsCompMask *mask, const SwsSwizzleOp *swiz); typedef struct SwsShiftOp { uint8_t amount; /* number of bits to shift */ diff --git a/libswscale/ops_optimizer.c b/libswscale/ops_optimizer.c index e6ebd21515..8c473c1a52 100644 --- a/libswscale/ops_optimizer.c +++ b/libswscale/ops_optimizer.c @@ -61,7 +61,7 @@ static bool op_commute_clear(SwsOp *op, SwsOp *next) op->type = next->filter.type; return true; case SWS_OP_SWIZZLE: - op->clear.mask = ff_sws_comp_mask_swizzle(op->clear.mask, &next->swizzle); + ff_sws_comp_mask_swizzle(&op->clear.mask, &next->swizzle); ff_sws_apply_op_q(next, op->clear.value); return true; case SWS_OP_SWAP_BYTES: -- 2.52.0 >From d474b408f2966646a824be2500e82a39038a5dc5 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 19 Jun 2026 16:14:49 +0200 Subject: [PATCH 05/16] swscale/ops_optimizer: simplify unused op check (cosmetic) Signed-off-by: Niklas Haas <[email protected]> --- libswscale/ops_optimizer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/libswscale/ops_optimizer.c b/libswscale/ops_optimizer.c index 8c473c1a52..e41af380e9 100644 --- a/libswscale/ops_optimizer.c +++ b/libswscale/ops_optimizer.c @@ -393,12 +393,10 @@ retry: SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy; /* common helper variable */ + const SwsCompMask needed = ff_sws_comp_mask_needed(op); bool noop = true; - if (!SWS_OP_NEEDED(op, 0) && !SWS_OP_NEEDED(op, 1) && - !SWS_OP_NEEDED(op, 2) && !SWS_OP_NEEDED(op, 3) && - op->op != SWS_OP_WRITE) - { + if (!needed && op->op != SWS_OP_WRITE) { /* Remove any operation whose output is not needed */ ff_sws_op_list_remove_at(ops, n, 1); goto retry; -- 2.52.0 >From cb8a006f8a2777ad2766ed7018458e9024586ad8 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 19 Jun 2026 14:09:42 +0200 Subject: [PATCH 06/16] swscale/graph: don't over-allocate pass buffer lines This is not only wasteful but also serves no real purpose. Looping over the correct number of lines is trivial; there is far less point in vertical padding than horizontal padding. Furthermore, this might actually introduce issues when linking output buffers; since the extra padding depends on the pass's alignment and threading requirements, which may differ from pass to pass. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/graph.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/libswscale/graph.c b/libswscale/graph.c index 3db3b98c7b..06e5ebefc8 100644 --- a/libswscale/graph.c +++ b/libswscale/graph.c @@ -201,6 +201,10 @@ int ff_sws_graph_add_pass(SwsGraph *graph, enum AVPixelFormat fmt, goto fail; } + pass->output->height = pass->height; + pass->output->width = pass->width; + pass->output->width_align = 1; + if (!align) { pass->slice_h = pass->height; pass->num_slices = 1; @@ -210,11 +214,6 @@ int ff_sws_graph_add_pass(SwsGraph *graph, enum AVPixelFormat fmt, pass->num_slices = (pass->height + pass->slice_h - 1) / pass->slice_h; } - /* Align output buffer to include extra slice padding */ - pass->output->height = pass->slice_h * pass->num_slices; - pass->output->width = pass->width; - pass->output->width_align = 1; - ret = av_dynarray_add_nofree(&graph->passes, &graph->num_passes, pass); if (ret < 0) goto fail; -- 2.52.0 >From ba1c1d9eee75a2bd5f2677907af1753112c5c69f Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Sat, 20 Jun 2026 02:55:31 +0200 Subject: [PATCH 07/16] swscale/graph: separate pass dispatch size from buffer size This allows adding passes which will be dispatched over a reduced number of lines, without affecting the allocated buffer dimensions - e.g. for passes which purely write to subsampled chroma planes. A few hard-coded references to pass->width/height need to be replaced by the corresponding output frame references, but it's not a huge deal. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/graph.c | 43 +++++++++++++++++++++------------------ libswscale/graph.h | 6 ++++-- libswscale/ops_dispatch.c | 15 +++++++------- 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/libswscale/graph.c b/libswscale/graph.c index 06e5ebefc8..a765b4cd5c 100644 --- a/libswscale/graph.c +++ b/libswscale/graph.c @@ -174,7 +174,8 @@ static void pass_free(SwsPass *pass) int ff_sws_graph_add_pass(SwsGraph *graph, enum AVPixelFormat fmt, int width, int height, SwsPass *input, - int align, SwsPassFunc run, SwsPassSetup setup, + int lines, int align, + SwsPassFunc run, SwsPassSetup setup, void *priv, void (*free_cb)(void *priv), SwsPass **out_pass) { @@ -186,14 +187,16 @@ int ff_sws_graph_add_pass(SwsGraph *graph, enum AVPixelFormat fmt, return AVERROR(ENOMEM); } + if (!lines) + lines = height; + pass->graph = graph; pass->run = run; pass->setup = setup; pass->priv = priv; pass->free = free_cb; pass->format = fmt; - pass->width = width; - pass->height = height; + pass->lines = lines; pass->input = input; pass->output = av_refstruct_alloc_ext(sizeof(*pass->output), 0, NULL, free_buffer); if (!pass->output) { @@ -201,17 +204,17 @@ int ff_sws_graph_add_pass(SwsGraph *graph, enum AVPixelFormat fmt, goto fail; } - pass->output->height = pass->height; - pass->output->width = pass->width; + pass->output->height = height; + pass->output->width = width; pass->output->width_align = 1; if (!align) { - pass->slice_h = pass->height; + pass->slice_h = pass->lines; pass->num_slices = 1; } else { - pass->slice_h = (pass->height + graph->num_threads - 1) / graph->num_threads; + pass->slice_h = (pass->lines + graph->num_threads - 1) / graph->num_threads; pass->slice_h = FFALIGN(pass->slice_h, align); - pass->num_slices = (pass->height + pass->slice_h - 1) / pass->slice_h; + pass->num_slices = (pass->lines + pass->slice_h - 1) / pass->slice_h; } ret = av_dynarray_add_nofree(&graph->passes, &graph->num_passes, pass); @@ -267,7 +270,7 @@ static void run_rgb0(const SwsFrame *out, const SwsFrame *in, int y, int h, { SwsInternal *c = pass->priv; const int x0 = c->src0Alpha - 1; - const int w4 = 4 * pass->width; + const int w4 = 4 * out->width; const int src_stride = in->linesize[0]; const int dst_stride = out->linesize[0]; const uint8_t *src = in->data[0] + y * src_stride; @@ -289,7 +292,7 @@ static void run_xyz2rgb(const SwsFrame *out, const SwsFrame *in, int y, int h, const SwsInternal *c = pass->priv; c->xyz12Torgb48(c, out->data[0] + y * out->linesize[0], out->linesize[0], in->data[0] + y * in->linesize[0], in->linesize[0], - pass->width, h); + out->width, h); } static void run_rgb2xyz(const SwsFrame *out, const SwsFrame *in, int y, int h, @@ -298,7 +301,7 @@ static void run_rgb2xyz(const SwsFrame *out, const SwsFrame *in, int y, int h, const SwsInternal *c = pass->priv; c->rgb48Toxyz12(c, out->data[0] + y * out->linesize[0], out->linesize[0], in->data[0] + y * in->linesize[0], in->linesize[0], - pass->width, h); + out->width, h); } /*********************************************************************** @@ -465,7 +468,7 @@ static int init_legacy_subpass(SwsGraph *graph, SwsContext *sws, if (c->src0Alpha && !c->dst0Alpha && isALPHA(sws->dst_format)) { ret = ff_sws_graph_add_pass(graph, AV_PIX_FMT_RGBA, src_w, src_h, input, - 1, run_rgb0, NULL, c, NULL, &input); + 0, 1, run_rgb0, NULL, c, NULL, &input); if (ret < 0) { sws_free_context(&sws); return ret; @@ -474,14 +477,14 @@ static int init_legacy_subpass(SwsGraph *graph, SwsContext *sws, if (c->srcXYZ && !(c->dstXYZ && unscaled)) { ret = ff_sws_graph_add_pass(graph, AV_PIX_FMT_RGB48, src_w, src_h, input, - 1, run_xyz2rgb, NULL, c, NULL, &input); + 0, 1, run_xyz2rgb, NULL, c, NULL, &input); if (ret < 0) { sws_free_context(&sws); return ret; } } - ret = ff_sws_graph_add_pass(graph, sws->dst_format, dst_w, dst_h, input, align, + ret = ff_sws_graph_add_pass(graph, sws->dst_format, dst_w, dst_h, input, 0, align, c->convert_unscaled ? run_legacy_unscaled : run_legacy_swscale, setup_legacy_swscale, sws, free_legacy_swscale, &pass); if (ret < 0) @@ -533,7 +536,7 @@ static int init_legacy_subpass(SwsGraph *graph, SwsContext *sws, if (c->dstXYZ && !(c->srcXYZ && unscaled)) { ret = ff_sws_graph_add_pass(graph, AV_PIX_FMT_RGB48, dst_w, dst_h, pass, - 1, run_rgb2xyz, NULL, c, NULL, &pass); + 0, 1, run_rgb2xyz, NULL, c, NULL, &pass); if (ret < 0) return ret; } @@ -715,7 +718,7 @@ static void run_lut3d(const SwsFrame *out, const SwsFrame *in, int y, int h, frame_shift(out, y, out_data); ff_sws_lut3d_apply(lut, in_data[0], in->linesize[0], out_data[0], - out->linesize[0], pass->width, h); + out->linesize[0], out->width, h); } static int adapt_colors(SwsGraph *graph, const SwsFormat *src_fmt, @@ -777,7 +780,7 @@ static int adapt_colors(SwsGraph *graph, const SwsFormat *src_fmt, } return ff_sws_graph_add_pass(graph, fmt_out, src.width, src.height, - input, 1, run_lut3d, setup_lut3d, lut, + input, 0, 1, run_lut3d, setup_lut3d, lut, free_lut3d, output); } @@ -812,7 +815,7 @@ static int init_passes(SwsGraph *graph) /* Add threaded memcpy pass */ return ff_sws_graph_add_pass(graph, dst.format, dst.width, dst.height, - pass, 1, run_copy, NULL, NULL, NULL, &pass); + pass, 0, 1, run_copy, NULL, NULL, NULL, &pass); } static void sws_graph_worker(void *priv, int jobnr, int threadnr, int nb_jobs, @@ -821,7 +824,7 @@ static void sws_graph_worker(void *priv, int jobnr, int threadnr, int nb_jobs, SwsGraph *graph = priv; const SwsPass *pass = graph->exec.pass; const int slice_y = jobnr * pass->slice_h; - const int slice_h = FFMIN(pass->slice_h, pass->height - slice_y); + const int slice_h = FFMIN(pass->slice_h, pass->lines - slice_y); pass->run(graph->exec.output, graph->exec.input, slice_y, slice_h, pass); } @@ -1016,7 +1019,7 @@ int ff_sws_graph_run(SwsGraph *graph, const AVFrame *dst, const AVFrame *src) } if (pass->num_slices == 1) { - pass->run(graph->exec.output, graph->exec.input, 0, pass->height, pass); + pass->run(graph->exec.output, graph->exec.input, 0, pass->lines, pass); } else { avpriv_slicethread_execute(graph->slicethread, pass->num_slices, 0); } diff --git a/libswscale/graph.h b/libswscale/graph.h index adf4b19675..eff2dcc47f 100644 --- a/libswscale/graph.h +++ b/libswscale/graph.h @@ -83,7 +83,7 @@ struct SwsPass { SwsPassFunc run; SwsBackend backend; /* backend this pass is using, or 0 */ enum AVPixelFormat format; /* new pixel format */ - int width, height; /* new output size */ + int lines; /* pass dispatch size */ int slice_h; /* filter granularity */ int num_slices; @@ -184,6 +184,7 @@ int ff_sws_graph_create(SwsContext *ctx, const SwsFormat *dst, const SwsFormat * * @param w Width of the output image. * @param h Height of the output image. * @param input Previous pass to read from, or NULL for the input image. + * @param lines Override the number of lines processed for this pass. (Optional) * @param align Minimum slice alignment for this pass, or 0 for no threading. * @param run Filter function to run. * @param setup Optional setup function to run from the main thread. @@ -194,7 +195,8 @@ int ff_sws_graph_create(SwsContext *ctx, const SwsFormat *dst, const SwsFormat * */ int ff_sws_graph_add_pass(SwsGraph *graph, enum AVPixelFormat fmt, int width, int height, SwsPass *input, - int align, SwsPassFunc run, SwsPassSetup setup, + int lines, int align, + SwsPassFunc run, SwsPassSetup setup, void *priv, void (*free)(void *priv), SwsPass **out_pass); diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c index 44248195d7..abf67b0d55 100644 --- a/libswscale/ops_dispatch.c +++ b/libswscale/ops_dispatch.c @@ -202,6 +202,7 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, { const AVPixFmtDescriptor *indesc = av_pix_fmt_desc_get(in->format); const bool float_in = indesc->flags & AV_PIX_FMT_FLAG_FLOAT; + const int width = out->width; SwsOpPass *p = pass->priv; SwsOpExec *exec = &p->exec_base; @@ -209,9 +210,9 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, /* Set up main loop parameters */ const unsigned block_size = comp->block_size; - const size_t num_blocks = (pass->width + block_size - 1) / block_size; + const size_t num_blocks = (width + block_size - 1) / block_size; const size_t aligned_w = num_blocks * block_size; - if (aligned_w < pass->width) /* overflow */ + if (aligned_w < width) /* overflow */ return AVERROR(EINVAL); p->num_blocks = num_blocks; p->memcpy_first = false; @@ -280,14 +281,14 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, *tail = *exec; const size_t safe_width = safe_blocks * block_size; - const size_t tail_size = pass->width - safe_width; + const size_t tail_size = width - safe_width; p->tail_off_out = pixel_bytes(safe_width, p->pixel_bits_out, AV_ROUND_DOWN); p->tail_size_out = pixel_bytes(tail_size, p->pixel_bits_out, AV_ROUND_UP); p->tail_blocks = num_blocks - safe_blocks; if (exec->in_offset_x) { p->tail_off_in = exec->in_offset_x[safe_width]; - p->tail_size_in = exec->in_offset_x[pass->width - 1] - p->tail_off_in; + p->tail_size_in = exec->in_offset_x[width - 1] - p->tail_off_in; p->tail_size_in += pixel_bytes(p->filter_size_h, p->pixel_bits_in, AV_ROUND_UP); } else { p->tail_off_in = pixel_bytes(safe_width, p->pixel_bits_in, AV_ROUND_DOWN); @@ -387,7 +388,7 @@ static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y, * memcpy the last column on the output side if unpadded. */ - const bool memcpy_in = p->memcpy_last && y + h == pass->height || + const bool memcpy_in = p->memcpy_last && y + h == pass->lines || p->memcpy_first && y == 0; const bool memcpy_out = p->memcpy_out; const size_t num_blocks = p->num_blocks; @@ -515,7 +516,7 @@ static int compile(SwsGraph *graph, const SwsOpBackend *backend, SwsCompiledOp c = *comp; av_free(p); ret = ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height, - input, c.slice_align, c.func_opaque, + input, 0, c.slice_align, c.func_opaque, NULL, c.priv, c.free, output); if (ret >= 0) (*output)->backend = comp->backend->flags; @@ -616,7 +617,7 @@ static int compile(SwsGraph *graph, const SwsOpBackend *backend, } ret = ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height, - input, comp->slice_align, op_pass_run, + input, 0, comp->slice_align, op_pass_run, op_pass_setup, p, op_pass_free, output); if (ret < 0) return ret; -- 2.52.0 >From 3a2c5050c6eb5dd2ffdf74b0af735c4d76ee9013 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 19 Jun 2026 13:30:08 +0200 Subject: [PATCH 08/16] swscale: fix format equality check I can't say I remember why this logic was written this way, but I can't think of any good reason why we should exclude comparing the image dimensions here - the intent is obviously to allow passthrough / noop. Signed-off-by: Niklas Haas <[email protected]> --- libswscale/swscale.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libswscale/swscale.c b/libswscale/swscale.c index c0cdd17b78..508967a13c 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1512,7 +1512,7 @@ int sws_frame_setup(SwsContext *ctx, const AVFrame *dst, const AVFrame *src) src_ok = ff_test_fmt(backends, &src_fmt, 0); dst_ok = ff_test_fmt(backends, &dst_fmt, 1); - if ((!src_ok || !dst_ok) && !ff_props_equal(&src_fmt, &dst_fmt)) { + if ((!src_ok || !dst_ok) && !ff_fmt_equal(&src_fmt, &dst_fmt)) { err_msg = src_ok ? "Unsupported output" : "Unsupported input"; ret = AVERROR(ENOTSUP); goto fail; -- 2.52.0 >From 7e7c1c0d94634543a7cc215d7c3b32235e487e25 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 19 Jun 2026 13:31:30 +0200 Subject: [PATCH 09/16] swscale/format: nuke ff_props_equal() And merge it with the more clear ff_fmt_equal(). Signed-off-by: Niklas Haas <[email protected]> --- libswscale/format.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/libswscale/format.h b/libswscale/format.h index 36158da55a..24d53b8e80 100644 --- a/libswscale/format.h +++ b/libswscale/format.h @@ -120,23 +120,17 @@ static inline int ff_color_equal(const SwsColor *c1, const SwsColor *c2) ff_prim_equal(&c1->gamut, &c2->gamut); } -/* Tests only the static components of a colorspace, ignoring dimensions and per-frame data */ -static inline int ff_props_equal(const SwsFormat *fmt1, const SwsFormat *fmt2) -{ - return fmt1->interlaced == fmt2->interlaced && - fmt1->format == fmt2->format && - fmt1->range == fmt2->range && - fmt1->csp == fmt2->csp && - fmt1->loc == fmt2->loc && - ff_color_equal(&fmt1->color, &fmt2->color); -} - /* Tests only the static components of a colorspace, ignoring per-frame data */ static inline int ff_fmt_equal(const SwsFormat *fmt1, const SwsFormat *fmt2) { return fmt1->width == fmt2->width && fmt1->height == fmt2->height && - ff_props_equal(fmt1, fmt2); + fmt1->interlaced == fmt2->interlaced && + fmt1->format == fmt2->format && + fmt1->range == fmt2->range && + fmt1->csp == fmt2->csp && + fmt1->loc == fmt2->loc && + ff_color_equal(&fmt1->color, &fmt2->color); } static inline int ff_fmt_align(enum AVPixelFormat fmt) -- 2.52.0 >From aedede0cee249e32a7cd9712255d4d33f89e5858 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 19 Jun 2026 14:25:02 +0200 Subject: [PATCH 10/16] swscale/format: add SwsFormat.field This metadata is needed to compute the correct chroma sampling offsets. We previously stored this in graph->field, but that's a bad place for it, because it doesn't survive the translation to the ops abstraction layer. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/format.c | 1 + libswscale/format.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/libswscale/format.c b/libswscale/format.c index c34d335500..a68565e8e5 100644 --- a/libswscale/format.c +++ b/libswscale/format.c @@ -388,6 +388,7 @@ SwsFormat ff_fmt_from_frame(const AVFrame *frame, int field) if (frame->flags & AV_FRAME_FLAG_INTERLACED) { fmt.height = (fmt.height + (field == FIELD_TOP)) >> 1; fmt.interlaced = 1; + fmt.field = field; } /* Set luminance and gamut information */ diff --git a/libswscale/format.h b/libswscale/format.h index 24d53b8e80..9b852efd39 100644 --- a/libswscale/format.h +++ b/libswscale/format.h @@ -77,6 +77,7 @@ static inline void ff_color_update_dynamic(SwsColor *dst, const SwsColor *src) typedef struct SwsFormat { int width, height; int interlaced; + int field; enum AVPixelFormat format; enum AVPixelFormat hw_format; enum AVColorRange range; @@ -126,6 +127,7 @@ static inline int ff_fmt_equal(const SwsFormat *fmt1, const SwsFormat *fmt2) return fmt1->width == fmt2->width && fmt1->height == fmt2->height && fmt1->interlaced == fmt2->interlaced && + fmt1->field == fmt2->field && fmt1->format == fmt2->format && fmt1->range == fmt2->range && fmt1->csp == fmt2->csp && -- 2.52.0 >From 4653e68aaba6754cd63a8da4327113564a9a9d0b Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 19 Jun 2026 14:30:25 +0200 Subject: [PATCH 11/16] swscale/graph: nuke SwsGraph.field No longer needed after the previous commit. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/graph.c | 28 +++++++++++++++------------- libswscale/graph.h | 7 +++---- libswscale/swscale.c | 2 +- libswscale/vulkan/ops.c | 2 +- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/libswscale/graph.c b/libswscale/graph.c index a765b4cd5c..73df6b8907 100644 --- a/libswscale/graph.c +++ b/libswscale/graph.c @@ -405,7 +405,7 @@ static void get_chroma_pos(SwsGraph *graph, int *h_chr_pos, int *v_chr_pos, * For 4x vertical subsampling (v_sub == 2), they are only placed * next to every *other* even row, so we need to shift by three luma * rows to get to the chroma sample. */ - if (graph->field == FIELD_BOTTOM) + if (fmt->field == FIELD_BOTTOM) y_pos += (256 << sub_y) - 256; /* Luma row distance is doubled for fields, so halve offsets */ @@ -846,7 +846,7 @@ static void graph_uninit(SwsGraph *graph) } int ff_sws_graph_init(SwsGraph *graph, SwsContext *ctx, const SwsFormat *dst, - const SwsFormat *src, int field) + const SwsFormat *src) { int ret; if (graph->ctx) { @@ -857,8 +857,9 @@ int ff_sws_graph_init(SwsGraph *graph, SwsContext *ctx, const SwsFormat *dst, graph->ctx = ctx; graph->src = *src; graph->dst = *dst; - graph->field = field; graph->opts_copy = *ctx; + av_assert0(src->interlaced == dst->interlaced); + av_assert0(src->field == dst->field); if (ctx->threads == 1) { graph->num_threads = 1; @@ -895,13 +896,13 @@ error: } int ff_sws_graph_create(SwsContext *ctx, const SwsFormat *dst, const SwsFormat *src, - int field, SwsGraph **out_graph) + SwsGraph **out_graph) { SwsGraph *graph = ff_sws_graph_alloc(); if (!graph) return AVERROR(ENOMEM); - int ret = ff_sws_graph_init(graph, ctx, dst, src, field); + int ret = ff_sws_graph_init(graph, ctx, dst, src); if (ret < 0) { ff_sws_graph_free(&graph); return ret; @@ -950,7 +951,7 @@ static int opts_equal(const SwsContext *c1, const SwsContext *c2) } int ff_sws_graph_reinit(SwsGraph *graph, SwsContext *ctx, const SwsFormat *dst, - const SwsFormat *src, int field) + const SwsFormat *src) { if (ff_fmt_equal(&graph->src, src) && ff_fmt_equal(&graph->dst, dst) && opts_equal(ctx, &graph->opts_copy)) @@ -960,7 +961,7 @@ int ff_sws_graph_reinit(SwsGraph *graph, SwsContext *ctx, const SwsFormat *dst, } graph_uninit(graph); - return ff_sws_graph_init(graph, ctx, dst, src, field); + return ff_sws_graph_init(graph, ctx, dst, src); } void ff_sws_graph_update_metadata(SwsGraph *graph, const SwsColor *color) @@ -971,16 +972,17 @@ void ff_sws_graph_update_metadata(SwsGraph *graph, const SwsColor *color) ff_color_update_dynamic(&graph->src.color, color); } -static void get_field(SwsGraph *graph, const AVFrame *avframe, SwsFrame *frame) +static void get_field(SwsGraph *graph, const SwsFormat *fmt, + const AVFrame *avframe, SwsFrame *frame) { ff_sws_frame_from_avframe(frame, avframe); if (!(avframe->flags & AV_FRAME_FLAG_INTERLACED)) { - av_assert1(!graph->field); + av_assert1(!fmt->field); return; } - if (graph->field == FIELD_BOTTOM) { + if (fmt->field == FIELD_BOTTOM) { /* Odd rows, offset by one line */ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); for (int i = 0; i < 4; i++) { @@ -995,7 +997,7 @@ static void get_field(SwsGraph *graph, const AVFrame *avframe, SwsFrame *frame) for (int i = 0; i < 4; i++) frame->linesize[i] <<= 1; - frame->height = (frame->height + (graph->field == FIELD_TOP)) >> 1; + frame->height = (frame->height + (fmt->field == FIELD_TOP)) >> 1; } int ff_sws_graph_run(SwsGraph *graph, const AVFrame *dst, const AVFrame *src) @@ -1004,8 +1006,8 @@ int ff_sws_graph_run(SwsGraph *graph, const AVFrame *dst, const AVFrame *src) av_assert0(src->format == graph->src.hw_format || src->format == graph->src.format); SwsFrame src_field, dst_field; - get_field(graph, dst, &dst_field); - get_field(graph, src, &src_field); + get_field(graph, &graph->dst, dst, &dst_field); + get_field(graph, &graph->src, src, &src_field); for (int i = 0; i < graph->num_passes; i++) { const SwsPass *pass = graph->passes[i]; diff --git a/libswscale/graph.h b/libswscale/graph.h index eff2dcc47f..cb06f480cc 100644 --- a/libswscale/graph.h +++ b/libswscale/graph.h @@ -143,7 +143,6 @@ typedef struct SwsGraph { * Currently active format and processing parameters. */ SwsFormat src, dst; - int field; /** * Temporary execution state inside ff_sws_graph_run(); used to pass @@ -166,13 +165,13 @@ SwsGraph *ff_sws_graph_alloc(void); * negative error. */ int ff_sws_graph_init(SwsGraph *graph, SwsContext *ctx, const SwsFormat *dst, - const SwsFormat *src, int field); + const SwsFormat *src); /** * Allocate and initialize the filter graph. Returns 0 or a negative error. */ int ff_sws_graph_create(SwsContext *ctx, const SwsFormat *dst, const SwsFormat *src, - int field, SwsGraph **out_graph); + SwsGraph **out_graph); /** @@ -223,7 +222,7 @@ void ff_sws_graph_update_metadata(SwsGraph *graph, const SwsColor *color); * will have no effect. */ int ff_sws_graph_reinit(SwsGraph *graph, SwsContext *ctx, const SwsFormat *dst, - const SwsFormat *src, int field); + const SwsFormat *src); /** * Dispatch the filter graph on a single field of the given frames. Internally diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 508967a13c..969456efcc 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1527,7 +1527,7 @@ int sws_frame_setup(SwsContext *ctx, const AVFrame *dst, const AVFrame *src) } } - ret = ff_sws_graph_reinit(s->graph[field], ctx, &dst_fmt, &src_fmt, field); + ret = ff_sws_graph_reinit(s->graph[field], ctx, &dst_fmt, &src_fmt); if (ret < 0) { err_msg = "Failed initializing scaling graph"; goto fail; diff --git a/libswscale/vulkan/ops.c b/libswscale/vulkan/ops.c index 1218fab2c7..9d5f201e69 100644 --- a/libswscale/vulkan/ops.c +++ b/libswscale/vulkan/ops.c @@ -153,7 +153,7 @@ static void process(const SwsFrame *dst, const SwsFrame *src, int y, int h, }); if (p->interlaced) { - uint32_t field = pass->graph ? pass->graph->field : 0; + uint32_t field = pass->graph ? pass->graph->dst.field : 0; ff_vk_shader_update_push_const(&p->s->vkctx, ec, &p->shd, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(field), &field); -- 2.52.0 >From 1f6dc79c807ab6044e8b4ef990386b2e9d9e918e Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 19 Jun 2026 15:07:24 +0200 Subject: [PATCH 12/16] swscale/format: factor out ff_sws_chroma_pos() helper Moved here from graph.c, as it's needed for the new chroma scaling code. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/format.c | 41 ++++++++++++++++++++++++++++++++ libswscale/format.h | 7 ++++++ libswscale/graph.c | 58 ++++++++++----------------------------------- 3 files changed, 61 insertions(+), 45 deletions(-) diff --git a/libswscale/format.c b/libswscale/format.c index a68565e8e5..0538ab9db4 100644 --- a/libswscale/format.c +++ b/libswscale/format.c @@ -1393,6 +1393,47 @@ linear_mat3(const AVRational m00, const AVRational m01, const AVRational m02, return c; } +void ff_sws_chroma_pos(const SwsFormat *fmt, bool *incomplete, + int *out_x_pos, int *out_y_pos) +{ + enum AVChromaLocation chroma_loc = fmt->loc; + const int sub_x = fmt->desc->log2_chroma_w; + const int sub_y = fmt->desc->log2_chroma_h; + int x_pos, y_pos; + + /* Explicitly default to center siting for compatibility with swscale */ + if (chroma_loc == AVCHROMA_LOC_UNSPECIFIED) { + chroma_loc = AVCHROMA_LOC_CENTER; + *incomplete |= sub_x || sub_y; + } + + /* av_chroma_location_enum_to_pos() always gives us values in the range from + * 0 to 256, but we need to adjust this to the true value range of the + * subsampling grid, which may be larger for h/v_sub > 1 */ + av_chroma_location_enum_to_pos(&x_pos, &y_pos, chroma_loc); + x_pos *= (1 << sub_x) - 1; + y_pos *= (1 << sub_y) - 1; + + /* Fix vertical chroma position for interlaced frames */ + if (sub_y && fmt->interlaced) { + /* When vertically subsampling, chroma samples are effectively only + * placed next to even rows. To access them from the odd field, we need + * to account for this shift by offsetting the distance of one luma row. + * + * For 4x vertical subsampling (v_sub == 2), they are only placed + * next to every *other* even row, so we need to shift by three luma + * rows to get to the chroma sample. */ + if (fmt->field == FIELD_BOTTOM) + y_pos += (256 << sub_y) - 256; + + /* Luma row distance is doubled for fields, so halve offsets */ + y_pos >>= 1; + } + + *out_x_pos = x_pos; + *out_y_pos = y_pos; +} + int ff_sws_decode_colors(SwsContext *ctx, SwsPixelType type, SwsOpList *ops, const SwsFormat *fmt, bool *incomplete) { diff --git a/libswscale/format.h b/libswscale/format.h index 9b852efd39..ea2ab7dc41 100644 --- a/libswscale/format.h +++ b/libswscale/format.h @@ -161,6 +161,13 @@ int ff_test_fmt(SwsBackend backends, const SwsFormat *fmt, int output); /* Returns true if the formats are incomplete, false otherwise */ bool ff_infer_colors(SwsColor *src, SwsColor *dst); +/** + * Wrapper around av_chroma_location_enum_to_pos() that accounts for + * the per-field offset introduced by interlacing. + */ +void ff_sws_chroma_pos(const SwsFormat *fmt, bool *incomplete, + int *out_xpos, int *out_ypos); + typedef struct SwsOpList SwsOpList; typedef enum SwsPixelType SwsPixelType; diff --git a/libswscale/graph.c b/libswscale/graph.c index 73df6b8907..a99cd7cadf 100644 --- a/libswscale/graph.c +++ b/libswscale/graph.c @@ -375,49 +375,6 @@ static void run_legacy_swscale(const SwsFrame *out, const SwsFrame *in, sws->src_h, out_data, out->linesize, y, h); } -static void get_chroma_pos(SwsGraph *graph, int *h_chr_pos, int *v_chr_pos, - const SwsFormat *fmt) -{ - enum AVChromaLocation chroma_loc = fmt->loc; - const int sub_x = fmt->desc->log2_chroma_w; - const int sub_y = fmt->desc->log2_chroma_h; - int x_pos, y_pos; - - /* Explicitly default to center siting for compatibility with swscale */ - if (chroma_loc == AVCHROMA_LOC_UNSPECIFIED) { - chroma_loc = AVCHROMA_LOC_CENTER; - graph->incomplete |= sub_x || sub_y; - } - - /* av_chroma_location_enum_to_pos() always gives us values in the range from - * 0 to 256, but we need to adjust this to the true value range of the - * subsampling grid, which may be larger for h/v_sub > 1 */ - av_chroma_location_enum_to_pos(&x_pos, &y_pos, chroma_loc); - x_pos *= (1 << sub_x) - 1; - y_pos *= (1 << sub_y) - 1; - - /* Fix vertical chroma position for interlaced frames */ - if (sub_y && fmt->interlaced) { - /* When vertically subsampling, chroma samples are effectively only - * placed next to even rows. To access them from the odd field, we need - * to account for this shift by offsetting the distance of one luma row. - * - * For 4x vertical subsampling (v_sub == 2), they are only placed - * next to every *other* even row, so we need to shift by three luma - * rows to get to the chroma sample. */ - if (fmt->field == FIELD_BOTTOM) - y_pos += (256 << sub_y) - 256; - - /* Luma row distance is doubled for fields, so halve offsets */ - y_pos >>= 1; - } - - /* Explicitly strip chroma offsets when not subsampling, because it - * interferes with the operation of flags like SWS_FULL_CHR_H_INP */ - *h_chr_pos = sub_x ? x_pos : -513; - *v_chr_pos = sub_y ? y_pos : -513; -} - static void legacy_chr_pos(SwsGraph *graph, int *chr_pos, int override, int *warned) { if (override == -513 || override == *chr_pos) @@ -582,8 +539,8 @@ static int add_legacy_sws_pass(SwsGraph *graph, const SwsFormat *src, sws->dst_h = dst->height; sws->dst_format = dst->format; sws->dst_range = dst->range == AVCOL_RANGE_JPEG; - get_chroma_pos(graph, &sws->src_h_chr_pos, &sws->src_v_chr_pos, src); - get_chroma_pos(graph, &sws->dst_h_chr_pos, &sws->dst_v_chr_pos, dst); + ff_sws_chroma_pos(src, &graph->incomplete, &sws->src_h_chr_pos, &sws->src_v_chr_pos); + ff_sws_chroma_pos(dst, &graph->incomplete, &sws->dst_h_chr_pos, &sws->dst_v_chr_pos); graph->incomplete |= src->range == AVCOL_RANGE_UNSPECIFIED; graph->incomplete |= dst->range == AVCOL_RANGE_UNSPECIFIED; @@ -594,6 +551,17 @@ static int add_legacy_sws_pass(SwsGraph *graph, const SwsFormat *src, legacy_chr_pos(graph, &sws->dst_h_chr_pos, ctx->dst_h_chr_pos, &warned); legacy_chr_pos(graph, &sws->dst_v_chr_pos, ctx->dst_v_chr_pos, &warned); + /* Explicitly strip chroma offsets when not subsampling, because it + * interferes with the operation of flags like SWS_FULL_CHR_H_INP */ + if (!src->desc->log2_chroma_w) + sws->src_h_chr_pos = -513; + if (!src->desc->log2_chroma_h) + sws->src_v_chr_pos = -513; + if (!dst->desc->log2_chroma_w) + sws->dst_h_chr_pos = -513; + if (!dst->desc->log2_chroma_h) + sws->dst_v_chr_pos = -513; + for (int i = 0; i < SWS_NUM_SCALER_PARAMS; i++) sws->scaler_params[i] = ctx->scaler_params[i]; -- 2.52.0 >From 8215e9bbea07173b59e842a860df58aca18ff01a Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Mon, 8 Jun 2026 14:44:01 +0200 Subject: [PATCH 13/16] swscale/filters: add option for adding an input pixel offset This is needed for chroma subsampling, which requires a different filter offset for chroma subsamples (according to the frame's chroma location). Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/filters.c | 3 ++- libswscale/filters.h | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/libswscale/filters.c b/libswscale/filters.c index 7e8865659e..81a388e692 100644 --- a/libswscale/filters.c +++ b/libswscale/filters.c @@ -75,7 +75,7 @@ static void compute_row(SwsFilterWeights *f, const SwsFilterFunction *fun, * the entire square from (0,0) to (1,1). When normalizing between different * image sizes, we therefore need to add/subtract off these 0.5 offsets. */ - const double src_pos = (dst_pos + 0.5) * ratio_inv - 0.5; + const double src_pos = (dst_pos + 0.5) * ratio_inv - 0.5 + f->offset; if (f->filter_size == 1) { *pos = fmin(fmax(round(src_pos), 0.0), f->src_size - 1); *out = SWS_FILTER_SCALE; @@ -257,6 +257,7 @@ int ff_sws_filter_generate(void *log, const SwsFilterParams *params, memcpy(filter->name, fun.name, sizeof(filter->name)); filter->src_size = params->src_size; filter->dst_size = params->dst_size; + filter->offset = params->offset; filter->filter_size = filter_size; if (filter->filter_size == 1) filter->sum_positive = SWS_FILTER_SCALE; diff --git a/libswscale/filters.h b/libswscale/filters.h index 1bfdb196b8..143fc2c5e3 100644 --- a/libswscale/filters.h +++ b/libswscale/filters.h @@ -56,6 +56,13 @@ typedef struct SwsFilterParams { */ int src_size; int dst_size; + + /** + * The sample offset, in units of input pixels. This is added onto all + * sampled coordinates directly, i.e. a value of offset = 1.0 would shift + * the output to the top/left by one whole source pixel. + */ + double offset; } SwsFilterParams; /** @@ -88,6 +95,7 @@ typedef struct SwsFilterWeights { */ int src_size; int dst_size; + double offset; /** * Extra metadata about the filter, used to inform the optimizer / range -- 2.52.0 >From fa1ca69a8bde7a74e7c01777c75bb7c7e0754ee3 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 19 Jun 2026 15:36:08 +0200 Subject: [PATCH 14/16] swscale/filters: add ability to set a virtual output size Odd-size luma planes are not exact multiples of the chroma plane; but the sample grid is still matched as though it were. We need to account for this when translating a luma sample to the corresponding chroma sample coordinates. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/filters.c | 7 ++++++- libswscale/filters.h | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/libswscale/filters.c b/libswscale/filters.c index 81a388e692..fb54d2bbd8 100644 --- a/libswscale/filters.c +++ b/libswscale/filters.c @@ -194,7 +194,11 @@ int ff_sws_filter_generate(void *log, const SwsFilterParams *params, if (scaler == SWS_SCALE_AUTO) scaler = SWS_SCALE_BICUBIC; - const double ratio = (double) params->dst_size / params->src_size; + double virtual_size = params->virtual_size; + if (!virtual_size) + virtual_size = params->dst_size; + + const double ratio = virtual_size / params->src_size; double stretch = 1.0; if (ratio < 1.0 && scaler != SWS_SCALE_POINT) { /* Widen filter for downscaling (anti-aliasing) */ @@ -257,6 +261,7 @@ int ff_sws_filter_generate(void *log, const SwsFilterParams *params, memcpy(filter->name, fun.name, sizeof(filter->name)); filter->src_size = params->src_size; filter->dst_size = params->dst_size; + filter->virtual_size = virtual_size; filter->offset = params->offset; filter->filter_size = filter_size; if (filter->filter_size == 1) diff --git a/libswscale/filters.h b/libswscale/filters.h index 143fc2c5e3..a10e6f3964 100644 --- a/libswscale/filters.h +++ b/libswscale/filters.h @@ -57,6 +57,20 @@ typedef struct SwsFilterParams { int src_size; int dst_size; + /** + * The virtual output size. If zero, this is assumed to be the same as + * `dst_size`. Matters for e.g. chroma subsampling, where the the luma + * plane may be smaller than the dst_size. For example, a 99x99 input + * image has a chroma size of 50x50, which would be 100x100 after + * chroma upscaling; but is sampled only at 99x99 resolution. In this + * instance, dst_size is 99x99 and virtual_size is 100x100. + * + * The upscaling offset from this shift is implicit and does not need + * to be accounted for in `offset`. In other words, `offset` is taken + * relative to the virtual size, not the sampled size. + */ + double virtual_size; + /** * The sample offset, in units of input pixels. This is added onto all * sampled coordinates directly, i.e. a value of offset = 1.0 would shift @@ -95,6 +109,7 @@ typedef struct SwsFilterWeights { */ int src_size; int dst_size; + double virtual_size; double offset; /** -- 2.52.0 >From 8cc6b2ddafe184cf544a0a10a954570740e94912 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Fri, 19 Jun 2026 16:30:28 +0200 Subject: [PATCH 15/16] swscale/tests/swscale: fix unscaled subsampled chroma format check This should be matching against the *chroma* scaler, not the main scaler. Of course, under normal circumstances, scaler_sub matches scaler, but this allows users to explicitly override this defaulting by setting e.g. -scaler none -scaler_sub bicubic Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/tests/swscale.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c index b5b8faeeff..6de9327ba9 100644 --- a/libswscale/tests/swscale.c +++ b/libswscale/tests/swscale.c @@ -691,8 +691,9 @@ static inline int fmt_is_supported_by_hw(enum AVPixelFormat fmt) static inline int fmt_disabled(const struct options *opts, enum AVPixelFormat fmt) { + const int scaler_sub = opts->scaler_sub ? opts->scaler_sub : opts->scaler; return (hw_device_constr && !fmt_is_supported_by_hw(fmt)) || - (opts->scaler < 0 && fmt_is_subsampled(fmt)); + (scaler_sub < 0 && fmt_is_subsampled(fmt)); } static inline int test_formats(const struct options *opts, -- 2.52.0 >From b3689e792fdbaaac690ffb98e03dc75ca56483a6 Mon Sep 17 00:00:00 2001 From: Niklas Haas <[email protected]> Date: Sat, 20 Jun 2026 02:56:18 +0200 Subject: [PATCH 16/16] swscale/uops: simplify permute naming scheme We also drop the useless/unused mask from the permute ops. Avoids a bunch of otherwise duplicate permute ops. Now that this is handled by SWS_UOP_MOVE for x86, there is no downside to this. The FATE change is a pure rename of the uops dumps. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/uops.c | 22 ++- libswscale/uops_macros.h | 258 ++++++++++++++---------------------- tests/ref/fate/sws-ops-list | 2 +- 3 files changed, 112 insertions(+), 170 deletions(-) diff --git a/libswscale/uops.c b/libswscale/uops.c index 1bd3e2f763..a0bbf5ddbc 100644 --- a/libswscale/uops.c +++ b/libswscale/uops.c @@ -155,6 +155,10 @@ void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX]) av_bprintf(&bp, "_%u", par->shift.amount); break; case SWS_UOP_PERMUTE: + av_bprint_chars(&bp, '_', 1); + for (int i = 0; i < 4; i++) + av_bprint_chars(&bp, "xyzw"[par->swizzle.in[i]], 1); + break; case SWS_UOP_COPY: av_bprint_chars(&bp, '_', 1); for (int i = 0; i < 4; i++) { @@ -597,13 +601,13 @@ static int translate_swizzle(SwsUOpList *ops, SwsUOpFlags flags, const SwsOp *op SwsUOp uop = { .type = pixel_type_to_int(op->type), .uop = SWS_UOP_PERMUTE, - .mask = ff_sws_comp_mask_needed(op), .par.swizzle.in = {0, 1, 2, 3}, }; + SwsCompMask needed = ff_sws_comp_mask_needed(op); SwsCompMask seen = 0; for (int i = 0; i < 4; i++) { - if (!SWS_COMP_TEST(uop.mask, i)) + if (!SWS_COMP_TEST(needed, i)) continue; const int src = op->swizzle.in[i]; if (SWS_COMP_TEST(seen, src)) @@ -615,7 +619,7 @@ static int translate_swizzle(SwsUOpList *ops, SwsUOpFlags flags, const SwsOp *op if (uop.uop == SWS_UOP_PERMUTE) { /* Prevent overlap by moving unused components to unseen indices */ for (int i = 0; i < 4; i++) { - if (SWS_COMP_TEST(uop.mask, i)) + if (SWS_COMP_TEST(needed, i)) continue; /* Prefer identity mapping if possible */ @@ -634,10 +638,14 @@ static int translate_swizzle(SwsUOpList *ops, SwsUOpFlags flags, const SwsOp *op } } - /* Remove remaining trivial / identity components from the mask */ - for (int i = 0; i < 4; i++) { - if (uop.par.swizzle.in[i] == i) - uop.mask &= ~SWS_COMP(i); + if (uop.uop == SWS_UOP_COPY) { + /* Remove remaining trivial / identity components from the mask */ + for (int i = 0; i < 4; i++) { + if (uop.par.swizzle.in[i] == i) + needed &= ~SWS_COMP(i); + } + + uop.mask = needed; } return ff_sws_uop_list_append(ops, &uop); diff --git a/libswscale/uops_macros.h b/libswscale/uops_macros.h index 5d9e1a8026..77a4900248 100644 --- a/libswscale/uops_macros.h +++ b/libswscale/uops_macros.h @@ -102,67 +102,43 @@ #define SWS_FOR_STRUCT_U8_WRITE_BIT(MACRO, ...) \ MACRO(__VA_ARGS__, u8_write_bit_x , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_WRITE_BIT , .mask = 0x1) #define SWS_FOR_U8_PERMUTE(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_permute_x_y , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x1, 1, 0, 2, 3) \ - MACRO(__VA_ARGS__, u8_permute_x_z , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x1, 2, 1, 0, 3) \ - MACRO(__VA_ARGS__, u8_permute_x_w , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x1, 3, 1, 2, 0) \ - MACRO(__VA_ARGS__, u8_permute_y_w , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x2, 0, 3, 2, 1) \ - MACRO(__VA_ARGS__, u8_permute_y_x , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x2, 1, 0, 2, 3) \ - MACRO(__VA_ARGS__, u8_permute_xy_yx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x3, 1, 0, 2, 3) \ - MACRO(__VA_ARGS__, u8_permute_xy_yw , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x3, 1, 3, 2, 0) \ - MACRO(__VA_ARGS__, u8_permute_xy_zw , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x3, 2, 3, 0, 1) \ - MACRO(__VA_ARGS__, u8_permute_xy_wx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x3, 3, 0, 2, 1) \ - MACRO(__VA_ARGS__, u8_permute_z_x , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x4, 1, 2, 0, 3) \ - MACRO(__VA_ARGS__, u8_permute_xz_zx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x5, 2, 1, 0, 3) \ - MACRO(__VA_ARGS__, u8_permute_yz_zy , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x6, 0, 2, 1, 3) \ - MACRO(__VA_ARGS__, u8_permute_xyz_yzx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x7, 1, 2, 0, 3) \ - MACRO(__VA_ARGS__, u8_permute_xyz_yzw , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x7, 1, 2, 3, 0) \ - MACRO(__VA_ARGS__, u8_permute_xyz_zxy , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x7, 2, 0, 1, 3) \ - MACRO(__VA_ARGS__, u8_permute_xyz_zwy , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x7, 2, 3, 1, 0) \ - MACRO(__VA_ARGS__, u8_permute_xyz_wzy , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x7, 3, 2, 1, 0) \ - MACRO(__VA_ARGS__, u8_permute_w_x , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x8, 3, 1, 2, 0) \ - MACRO(__VA_ARGS__, u8_permute_yw_wy , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0xa, 0, 3, 2, 1) \ - MACRO(__VA_ARGS__, u8_permute_zw_xz , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0xc, 3, 1, 0, 2) \ - MACRO(__VA_ARGS__, u8_permute_xzw_zwx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0xd, 2, 1, 3, 0) \ - MACRO(__VA_ARGS__, u8_permute_xzw_wxz , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0xd, 3, 1, 0, 2) \ - MACRO(__VA_ARGS__, u8_permute_yzw_zwy , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0xe, 0, 2, 3, 1) \ - MACRO(__VA_ARGS__, u8_permute_yzw_xyz , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0xe, 3, 0, 1, 2) \ - MACRO(__VA_ARGS__, u8_permute_yzw_zxy , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0xe, 3, 2, 0, 1) \ - MACRO(__VA_ARGS__, u8_permute_yzw_zyx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0xe, 3, 2, 1, 0) \ - MACRO(__VA_ARGS__, u8_permute_xyzw_yzwx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0xf, 1, 2, 3, 0) \ - MACRO(__VA_ARGS__, u8_permute_xyzw_wxyz , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0xf, 3, 0, 1, 2) \ - MACRO(__VA_ARGS__, u8_permute_xyzw_wzxy , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0xf, 3, 2, 0, 1) \ - MACRO(__VA_ARGS__, u8_permute_xyzw_wzyx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0xf, 3, 2, 1, 0) + MACRO(__VA_ARGS__, u8_permute_xzyw , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 0, 2, 1, 3) \ + MACRO(__VA_ARGS__, u8_permute_xzwy , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 0, 2, 3, 1) \ + MACRO(__VA_ARGS__, u8_permute_xwzy , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 0, 3, 2, 1) \ + MACRO(__VA_ARGS__, u8_permute_yxzw , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 1, 0, 2, 3) \ + MACRO(__VA_ARGS__, u8_permute_yzxw , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 1, 2, 0, 3) \ + MACRO(__VA_ARGS__, u8_permute_yzwx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 1, 2, 3, 0) \ + MACRO(__VA_ARGS__, u8_permute_ywzx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 1, 3, 2, 0) \ + MACRO(__VA_ARGS__, u8_permute_zxyw , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 2, 0, 1, 3) \ + MACRO(__VA_ARGS__, u8_permute_zyxw , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 2, 1, 0, 3) \ + MACRO(__VA_ARGS__, u8_permute_zywx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 2, 1, 3, 0) \ + MACRO(__VA_ARGS__, u8_permute_zwxy , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 2, 3, 0, 1) \ + MACRO(__VA_ARGS__, u8_permute_zwyx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 2, 3, 1, 0) \ + MACRO(__VA_ARGS__, u8_permute_wxyz , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 3, 0, 1, 2) \ + MACRO(__VA_ARGS__, u8_permute_wxzy , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 3, 0, 2, 1) \ + MACRO(__VA_ARGS__, u8_permute_wyxz , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 3, 1, 0, 2) \ + MACRO(__VA_ARGS__, u8_permute_wyzx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 3, 1, 2, 0) \ + MACRO(__VA_ARGS__, u8_permute_wzxy , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 3, 2, 0, 1) \ + MACRO(__VA_ARGS__, u8_permute_wzyx , SWS_PIXEL_U8 , SWS_UOP_PERMUTE , 0x0, 3, 2, 1, 0) #define SWS_FOR_STRUCT_U8_PERMUTE(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_permute_x_y , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x1, .par.swizzle.in = {1, 0, 2, 3}) \ - MACRO(__VA_ARGS__, u8_permute_x_z , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x1, .par.swizzle.in = {2, 1, 0, 3}) \ - MACRO(__VA_ARGS__, u8_permute_x_w , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x1, .par.swizzle.in = {3, 1, 2, 0}) \ - MACRO(__VA_ARGS__, u8_permute_y_w , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x2, .par.swizzle.in = {0, 3, 2, 1}) \ - MACRO(__VA_ARGS__, u8_permute_y_x , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x2, .par.swizzle.in = {1, 0, 2, 3}) \ - MACRO(__VA_ARGS__, u8_permute_xy_yx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x3, .par.swizzle.in = {1, 0, 2, 3}) \ - MACRO(__VA_ARGS__, u8_permute_xy_yw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x3, .par.swizzle.in = {1, 3, 2, 0}) \ - MACRO(__VA_ARGS__, u8_permute_xy_zw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x3, .par.swizzle.in = {2, 3, 0, 1}) \ - MACRO(__VA_ARGS__, u8_permute_xy_wx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x3, .par.swizzle.in = {3, 0, 2, 1}) \ - MACRO(__VA_ARGS__, u8_permute_z_x , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x4, .par.swizzle.in = {1, 2, 0, 3}) \ - MACRO(__VA_ARGS__, u8_permute_xz_zx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x5, .par.swizzle.in = {2, 1, 0, 3}) \ - MACRO(__VA_ARGS__, u8_permute_yz_zy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x6, .par.swizzle.in = {0, 2, 1, 3}) \ - MACRO(__VA_ARGS__, u8_permute_xyz_yzx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {1, 2, 0, 3}) \ - MACRO(__VA_ARGS__, u8_permute_xyz_yzw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {1, 2, 3, 0}) \ - MACRO(__VA_ARGS__, u8_permute_xyz_zxy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {2, 0, 1, 3}) \ - MACRO(__VA_ARGS__, u8_permute_xyz_zwy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {2, 3, 1, 0}) \ - MACRO(__VA_ARGS__, u8_permute_xyz_wzy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {3, 2, 1, 0}) \ - MACRO(__VA_ARGS__, u8_permute_w_x , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x8, .par.swizzle.in = {3, 1, 2, 0}) \ - MACRO(__VA_ARGS__, u8_permute_yw_wy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0xa, .par.swizzle.in = {0, 3, 2, 1}) \ - MACRO(__VA_ARGS__, u8_permute_zw_xz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0xc, .par.swizzle.in = {3, 1, 0, 2}) \ - MACRO(__VA_ARGS__, u8_permute_xzw_zwx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0xd, .par.swizzle.in = {2, 1, 3, 0}) \ - MACRO(__VA_ARGS__, u8_permute_xzw_wxz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0xd, .par.swizzle.in = {3, 1, 0, 2}) \ - MACRO(__VA_ARGS__, u8_permute_yzw_zwy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0xe, .par.swizzle.in = {0, 2, 3, 1}) \ - MACRO(__VA_ARGS__, u8_permute_yzw_xyz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0xe, .par.swizzle.in = {3, 0, 1, 2}) \ - MACRO(__VA_ARGS__, u8_permute_yzw_zxy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0xe, .par.swizzle.in = {3, 2, 0, 1}) \ - MACRO(__VA_ARGS__, u8_permute_yzw_zyx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0xe, .par.swizzle.in = {3, 2, 1, 0}) \ - MACRO(__VA_ARGS__, u8_permute_xyzw_yzwx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0xf, .par.swizzle.in = {1, 2, 3, 0}) \ - MACRO(__VA_ARGS__, u8_permute_xyzw_wxyz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0xf, .par.swizzle.in = {3, 0, 1, 2}) \ - MACRO(__VA_ARGS__, u8_permute_xyzw_wzxy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0xf, .par.swizzle.in = {3, 2, 0, 1}) \ - MACRO(__VA_ARGS__, u8_permute_xyzw_wzyx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0xf, .par.swizzle.in = {3, 2, 1, 0}) + MACRO(__VA_ARGS__, u8_permute_xzyw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {0, 2, 1, 3}) \ + MACRO(__VA_ARGS__, u8_permute_xzwy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {0, 2, 3, 1}) \ + MACRO(__VA_ARGS__, u8_permute_xwzy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {0, 3, 2, 1}) \ + MACRO(__VA_ARGS__, u8_permute_yxzw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {1, 0, 2, 3}) \ + MACRO(__VA_ARGS__, u8_permute_yzxw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {1, 2, 0, 3}) \ + MACRO(__VA_ARGS__, u8_permute_yzwx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {1, 2, 3, 0}) \ + MACRO(__VA_ARGS__, u8_permute_ywzx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {1, 3, 2, 0}) \ + MACRO(__VA_ARGS__, u8_permute_zxyw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {2, 0, 1, 3}) \ + MACRO(__VA_ARGS__, u8_permute_zyxw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {2, 1, 0, 3}) \ + MACRO(__VA_ARGS__, u8_permute_zywx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {2, 1, 3, 0}) \ + MACRO(__VA_ARGS__, u8_permute_zwxy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {2, 3, 0, 1}) \ + MACRO(__VA_ARGS__, u8_permute_zwyx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {2, 3, 1, 0}) \ + MACRO(__VA_ARGS__, u8_permute_wxyz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 0, 1, 2}) \ + MACRO(__VA_ARGS__, u8_permute_wxzy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 0, 2, 1}) \ + MACRO(__VA_ARGS__, u8_permute_wyxz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 1, 0, 2}) \ + MACRO(__VA_ARGS__, u8_permute_wyzx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 1, 2, 0}) \ + MACRO(__VA_ARGS__, u8_permute_wzxy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 2, 0, 1}) \ + MACRO(__VA_ARGS__, u8_permute_wzyx , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 2, 1, 0}) #define SWS_FOR_U8_COPY(MACRO, ...) \ MACRO(__VA_ARGS__, u8_copy_yz_xx , SWS_PIXEL_U8 , SWS_UOP_COPY , 0x6, 0, 0, 0, 3) \ MACRO(__VA_ARGS__, u8_copy_yzw_xxx , SWS_PIXEL_U8 , SWS_UOP_COPY , 0xe, 0, 0, 0, 0) \ @@ -440,51 +416,35 @@ #define SWS_FOR_U16_WRITE_BIT(MACRO, ...) #define SWS_FOR_STRUCT_U16_WRITE_BIT(MACRO, ...) #define SWS_FOR_U16_PERMUTE(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_permute_x_y , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x1, 1, 0, 2, 3) \ - MACRO(__VA_ARGS__, u16_permute_x_z , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x1, 2, 1, 0, 3) \ - MACRO(__VA_ARGS__, u16_permute_x_w , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x1, 3, 1, 2, 0) \ - MACRO(__VA_ARGS__, u16_permute_y_w , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x2, 0, 3, 2, 1) \ - MACRO(__VA_ARGS__, u16_permute_y_x , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x2, 1, 0, 2, 3) \ - MACRO(__VA_ARGS__, u16_permute_xy_yx , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x3, 1, 0, 2, 3) \ - MACRO(__VA_ARGS__, u16_permute_xy_wx , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x3, 3, 0, 2, 1) \ - MACRO(__VA_ARGS__, u16_permute_xz_zx , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x5, 2, 1, 0, 3) \ - MACRO(__VA_ARGS__, u16_permute_xz_zw , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x5, 2, 1, 3, 0) \ - MACRO(__VA_ARGS__, u16_permute_xyz_yzx , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x7, 1, 2, 0, 3) \ - MACRO(__VA_ARGS__, u16_permute_xyz_yzw , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x7, 1, 2, 3, 0) \ - MACRO(__VA_ARGS__, u16_permute_xyz_zxy , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x7, 2, 0, 1, 3) \ - MACRO(__VA_ARGS__, u16_permute_xyz_zwy , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x7, 2, 3, 1, 0) \ - MACRO(__VA_ARGS__, u16_permute_xyz_wzy , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x7, 3, 2, 1, 0) \ - MACRO(__VA_ARGS__, u16_permute_w_x , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x8, 3, 1, 2, 0) \ - MACRO(__VA_ARGS__, u16_permute_zw_xz , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0xc, 3, 1, 0, 2) \ - MACRO(__VA_ARGS__, u16_permute_yzw_zwy , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0xe, 0, 2, 3, 1) \ - MACRO(__VA_ARGS__, u16_permute_yzw_xyz , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0xe, 3, 0, 1, 2) \ - MACRO(__VA_ARGS__, u16_permute_yzw_zyx , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0xe, 3, 2, 1, 0) \ - MACRO(__VA_ARGS__, u16_permute_xyzw_yzwx , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0xf, 1, 2, 3, 0) \ - MACRO(__VA_ARGS__, u16_permute_xyzw_wxyz , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0xf, 3, 0, 1, 2) \ - MACRO(__VA_ARGS__, u16_permute_xyzw_wzyx , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0xf, 3, 2, 1, 0) + MACRO(__VA_ARGS__, u16_permute_xzwy , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 0, 2, 3, 1) \ + MACRO(__VA_ARGS__, u16_permute_xwzy , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 0, 3, 2, 1) \ + MACRO(__VA_ARGS__, u16_permute_yxzw , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 1, 0, 2, 3) \ + MACRO(__VA_ARGS__, u16_permute_yzxw , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 1, 2, 0, 3) \ + MACRO(__VA_ARGS__, u16_permute_yzwx , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 1, 2, 3, 0) \ + MACRO(__VA_ARGS__, u16_permute_zxyw , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 2, 0, 1, 3) \ + MACRO(__VA_ARGS__, u16_permute_zyxw , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 2, 1, 0, 3) \ + MACRO(__VA_ARGS__, u16_permute_zywx , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 2, 1, 3, 0) \ + MACRO(__VA_ARGS__, u16_permute_zwyx , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 2, 3, 1, 0) \ + MACRO(__VA_ARGS__, u16_permute_wxyz , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 3, 0, 1, 2) \ + MACRO(__VA_ARGS__, u16_permute_wxzy , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 3, 0, 2, 1) \ + MACRO(__VA_ARGS__, u16_permute_wyxz , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 3, 1, 0, 2) \ + MACRO(__VA_ARGS__, u16_permute_wyzx , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 3, 1, 2, 0) \ + MACRO(__VA_ARGS__, u16_permute_wzyx , SWS_PIXEL_U16, SWS_UOP_PERMUTE , 0x0, 3, 2, 1, 0) #define SWS_FOR_STRUCT_U16_PERMUTE(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_permute_x_y , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x1, .par.swizzle.in = {1, 0, 2, 3}) \ - MACRO(__VA_ARGS__, u16_permute_x_z , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x1, .par.swizzle.in = {2, 1, 0, 3}) \ - MACRO(__VA_ARGS__, u16_permute_x_w , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x1, .par.swizzle.in = {3, 1, 2, 0}) \ - MACRO(__VA_ARGS__, u16_permute_y_w , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x2, .par.swizzle.in = {0, 3, 2, 1}) \ - MACRO(__VA_ARGS__, u16_permute_y_x , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x2, .par.swizzle.in = {1, 0, 2, 3}) \ - MACRO(__VA_ARGS__, u16_permute_xy_yx , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x3, .par.swizzle.in = {1, 0, 2, 3}) \ - MACRO(__VA_ARGS__, u16_permute_xy_wx , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x3, .par.swizzle.in = {3, 0, 2, 1}) \ - MACRO(__VA_ARGS__, u16_permute_xz_zx , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x5, .par.swizzle.in = {2, 1, 0, 3}) \ - MACRO(__VA_ARGS__, u16_permute_xz_zw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x5, .par.swizzle.in = {2, 1, 3, 0}) \ - MACRO(__VA_ARGS__, u16_permute_xyz_yzx , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {1, 2, 0, 3}) \ - MACRO(__VA_ARGS__, u16_permute_xyz_yzw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {1, 2, 3, 0}) \ - MACRO(__VA_ARGS__, u16_permute_xyz_zxy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {2, 0, 1, 3}) \ - MACRO(__VA_ARGS__, u16_permute_xyz_zwy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {2, 3, 1, 0}) \ - MACRO(__VA_ARGS__, u16_permute_xyz_wzy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {3, 2, 1, 0}) \ - MACRO(__VA_ARGS__, u16_permute_w_x , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x8, .par.swizzle.in = {3, 1, 2, 0}) \ - MACRO(__VA_ARGS__, u16_permute_zw_xz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0xc, .par.swizzle.in = {3, 1, 0, 2}) \ - MACRO(__VA_ARGS__, u16_permute_yzw_zwy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0xe, .par.swizzle.in = {0, 2, 3, 1}) \ - MACRO(__VA_ARGS__, u16_permute_yzw_xyz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0xe, .par.swizzle.in = {3, 0, 1, 2}) \ - MACRO(__VA_ARGS__, u16_permute_yzw_zyx , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0xe, .par.swizzle.in = {3, 2, 1, 0}) \ - MACRO(__VA_ARGS__, u16_permute_xyzw_yzwx , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0xf, .par.swizzle.in = {1, 2, 3, 0}) \ - MACRO(__VA_ARGS__, u16_permute_xyzw_wxyz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0xf, .par.swizzle.in = {3, 0, 1, 2}) \ - MACRO(__VA_ARGS__, u16_permute_xyzw_wzyx , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0xf, .par.swizzle.in = {3, 2, 1, 0}) + MACRO(__VA_ARGS__, u16_permute_xzwy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {0, 2, 3, 1}) \ + MACRO(__VA_ARGS__, u16_permute_xwzy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {0, 3, 2, 1}) \ + MACRO(__VA_ARGS__, u16_permute_yxzw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {1, 0, 2, 3}) \ + MACRO(__VA_ARGS__, u16_permute_yzxw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {1, 2, 0, 3}) \ + MACRO(__VA_ARGS__, u16_permute_yzwx , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {1, 2, 3, 0}) \ + MACRO(__VA_ARGS__, u16_permute_zxyw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {2, 0, 1, 3}) \ + MACRO(__VA_ARGS__, u16_permute_zyxw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {2, 1, 0, 3}) \ + MACRO(__VA_ARGS__, u16_permute_zywx , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {2, 1, 3, 0}) \ + MACRO(__VA_ARGS__, u16_permute_zwyx , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {2, 3, 1, 0}) \ + MACRO(__VA_ARGS__, u16_permute_wxyz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 0, 1, 2}) \ + MACRO(__VA_ARGS__, u16_permute_wxzy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 0, 2, 1}) \ + MACRO(__VA_ARGS__, u16_permute_wyxz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 1, 0, 2}) \ + MACRO(__VA_ARGS__, u16_permute_wyzx , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 1, 2, 0}) \ + MACRO(__VA_ARGS__, u16_permute_wzyx , .type = SWS_PIXEL_U16, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 2, 1, 0}) #define SWS_FOR_U16_COPY(MACRO, ...) \ MACRO(__VA_ARGS__, u16_copy_yz_xx , SWS_PIXEL_U16, SWS_UOP_COPY , 0x6, 0, 0, 0, 3) \ MACRO(__VA_ARGS__, u16_copy_yzw_xxy , SWS_PIXEL_U16, SWS_UOP_COPY , 0xe, 0, 0, 0, 1) @@ -744,65 +704,39 @@ #define SWS_FOR_U32_WRITE_BIT(MACRO, ...) #define SWS_FOR_STRUCT_U32_WRITE_BIT(MACRO, ...) #define SWS_FOR_U32_PERMUTE(MACRO, ...) \ - MACRO(__VA_ARGS__, u32_permute_x_y , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x1, 1, 0, 2, 3) \ - MACRO(__VA_ARGS__, u32_permute_x_z , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x1, 2, 1, 0, 3) \ - MACRO(__VA_ARGS__, u32_permute_x_w , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x1, 3, 1, 2, 0) \ - MACRO(__VA_ARGS__, u32_permute_y_w , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x2, 0, 3, 2, 1) \ - MACRO(__VA_ARGS__, u32_permute_y_x , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x2, 1, 0, 2, 3) \ - MACRO(__VA_ARGS__, u32_permute_xy_yx , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x3, 1, 0, 2, 3) \ - MACRO(__VA_ARGS__, u32_permute_z_x , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x4, 1, 2, 0, 3) \ - MACRO(__VA_ARGS__, u32_permute_xz_zx , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x5, 2, 1, 0, 3) \ - MACRO(__VA_ARGS__, u32_permute_xz_zw , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x5, 2, 1, 3, 0) \ - MACRO(__VA_ARGS__, u32_permute_xz_wx , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x5, 3, 1, 0, 2) \ - MACRO(__VA_ARGS__, u32_permute_yz_zy , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x6, 0, 2, 1, 3) \ - MACRO(__VA_ARGS__, u32_permute_xyz_yzx , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x7, 1, 2, 0, 3) \ - MACRO(__VA_ARGS__, u32_permute_xyz_yzw , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x7, 1, 2, 3, 0) \ - MACRO(__VA_ARGS__, u32_permute_xyz_zxy , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x7, 2, 0, 1, 3) \ - MACRO(__VA_ARGS__, u32_permute_xyz_zwy , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x7, 2, 3, 1, 0) \ - MACRO(__VA_ARGS__, u32_permute_xyz_wzy , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x7, 3, 2, 1, 0) \ - MACRO(__VA_ARGS__, u32_permute_w_y , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x8, 0, 2, 3, 1) \ - MACRO(__VA_ARGS__, u32_permute_w_x , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x8, 3, 1, 2, 0) \ - MACRO(__VA_ARGS__, u32_permute_xw_yx , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x9, 1, 2, 3, 0) \ - MACRO(__VA_ARGS__, u32_permute_yw_wy , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0xa, 0, 3, 2, 1) \ - MACRO(__VA_ARGS__, u32_permute_yw_xy , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0xa, 3, 0, 2, 1) \ - MACRO(__VA_ARGS__, u32_permute_zw_xz , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0xc, 3, 1, 0, 2) \ - MACRO(__VA_ARGS__, u32_permute_yzw_xyz , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0xe, 3, 0, 1, 2) \ - MACRO(__VA_ARGS__, u32_permute_yzw_zxy , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0xe, 3, 2, 0, 1) \ - MACRO(__VA_ARGS__, u32_permute_yzw_zyx , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0xe, 3, 2, 1, 0) \ - MACRO(__VA_ARGS__, u32_permute_xyzw_yzwx , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0xf, 1, 2, 3, 0) \ - MACRO(__VA_ARGS__, u32_permute_xyzw_wxyz , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0xf, 3, 0, 1, 2) \ - MACRO(__VA_ARGS__, u32_permute_xyzw_wzxy , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0xf, 3, 2, 0, 1) \ - MACRO(__VA_ARGS__, u32_permute_xyzw_wzyx , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0xf, 3, 2, 1, 0) + MACRO(__VA_ARGS__, u32_permute_xzyw , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 0, 2, 1, 3) \ + MACRO(__VA_ARGS__, u32_permute_xzwy , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 0, 2, 3, 1) \ + MACRO(__VA_ARGS__, u32_permute_xwzy , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 0, 3, 2, 1) \ + MACRO(__VA_ARGS__, u32_permute_yxzw , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 1, 0, 2, 3) \ + MACRO(__VA_ARGS__, u32_permute_yzxw , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 1, 2, 0, 3) \ + MACRO(__VA_ARGS__, u32_permute_yzwx , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 1, 2, 3, 0) \ + MACRO(__VA_ARGS__, u32_permute_zxyw , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 2, 0, 1, 3) \ + MACRO(__VA_ARGS__, u32_permute_zyxw , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 2, 1, 0, 3) \ + MACRO(__VA_ARGS__, u32_permute_zywx , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 2, 1, 3, 0) \ + MACRO(__VA_ARGS__, u32_permute_zwyx , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 2, 3, 1, 0) \ + MACRO(__VA_ARGS__, u32_permute_wxyz , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 3, 0, 1, 2) \ + MACRO(__VA_ARGS__, u32_permute_wxzy , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 3, 0, 2, 1) \ + MACRO(__VA_ARGS__, u32_permute_wyxz , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 3, 1, 0, 2) \ + MACRO(__VA_ARGS__, u32_permute_wyzx , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 3, 1, 2, 0) \ + MACRO(__VA_ARGS__, u32_permute_wzxy , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 3, 2, 0, 1) \ + MACRO(__VA_ARGS__, u32_permute_wzyx , SWS_PIXEL_U32, SWS_UOP_PERMUTE , 0x0, 3, 2, 1, 0) #define SWS_FOR_STRUCT_U32_PERMUTE(MACRO, ...) \ - MACRO(__VA_ARGS__, u32_permute_x_y , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x1, .par.swizzle.in = {1, 0, 2, 3}) \ - MACRO(__VA_ARGS__, u32_permute_x_z , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x1, .par.swizzle.in = {2, 1, 0, 3}) \ - MACRO(__VA_ARGS__, u32_permute_x_w , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x1, .par.swizzle.in = {3, 1, 2, 0}) \ - MACRO(__VA_ARGS__, u32_permute_y_w , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x2, .par.swizzle.in = {0, 3, 2, 1}) \ - MACRO(__VA_ARGS__, u32_permute_y_x , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x2, .par.swizzle.in = {1, 0, 2, 3}) \ - MACRO(__VA_ARGS__, u32_permute_xy_yx , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x3, .par.swizzle.in = {1, 0, 2, 3}) \ - MACRO(__VA_ARGS__, u32_permute_z_x , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x4, .par.swizzle.in = {1, 2, 0, 3}) \ - MACRO(__VA_ARGS__, u32_permute_xz_zx , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x5, .par.swizzle.in = {2, 1, 0, 3}) \ - MACRO(__VA_ARGS__, u32_permute_xz_zw , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x5, .par.swizzle.in = {2, 1, 3, 0}) \ - MACRO(__VA_ARGS__, u32_permute_xz_wx , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x5, .par.swizzle.in = {3, 1, 0, 2}) \ - MACRO(__VA_ARGS__, u32_permute_yz_zy , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x6, .par.swizzle.in = {0, 2, 1, 3}) \ - MACRO(__VA_ARGS__, u32_permute_xyz_yzx , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {1, 2, 0, 3}) \ - MACRO(__VA_ARGS__, u32_permute_xyz_yzw , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {1, 2, 3, 0}) \ - MACRO(__VA_ARGS__, u32_permute_xyz_zxy , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {2, 0, 1, 3}) \ - MACRO(__VA_ARGS__, u32_permute_xyz_zwy , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {2, 3, 1, 0}) \ - MACRO(__VA_ARGS__, u32_permute_xyz_wzy , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x7, .par.swizzle.in = {3, 2, 1, 0}) \ - MACRO(__VA_ARGS__, u32_permute_w_y , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x8, .par.swizzle.in = {0, 2, 3, 1}) \ - MACRO(__VA_ARGS__, u32_permute_w_x , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x8, .par.swizzle.in = {3, 1, 2, 0}) \ - MACRO(__VA_ARGS__, u32_permute_xw_yx , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x9, .par.swizzle.in = {1, 2, 3, 0}) \ - MACRO(__VA_ARGS__, u32_permute_yw_wy , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0xa, .par.swizzle.in = {0, 3, 2, 1}) \ - MACRO(__VA_ARGS__, u32_permute_yw_xy , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0xa, .par.swizzle.in = {3, 0, 2, 1}) \ - MACRO(__VA_ARGS__, u32_permute_zw_xz , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0xc, .par.swizzle.in = {3, 1, 0, 2}) \ - MACRO(__VA_ARGS__, u32_permute_yzw_xyz , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0xe, .par.swizzle.in = {3, 0, 1, 2}) \ - MACRO(__VA_ARGS__, u32_permute_yzw_zxy , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0xe, .par.swizzle.in = {3, 2, 0, 1}) \ - MACRO(__VA_ARGS__, u32_permute_yzw_zyx , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0xe, .par.swizzle.in = {3, 2, 1, 0}) \ - MACRO(__VA_ARGS__, u32_permute_xyzw_yzwx , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0xf, .par.swizzle.in = {1, 2, 3, 0}) \ - MACRO(__VA_ARGS__, u32_permute_xyzw_wxyz , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0xf, .par.swizzle.in = {3, 0, 1, 2}) \ - MACRO(__VA_ARGS__, u32_permute_xyzw_wzxy , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0xf, .par.swizzle.in = {3, 2, 0, 1}) \ - MACRO(__VA_ARGS__, u32_permute_xyzw_wzyx , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0xf, .par.swizzle.in = {3, 2, 1, 0}) + MACRO(__VA_ARGS__, u32_permute_xzyw , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {0, 2, 1, 3}) \ + MACRO(__VA_ARGS__, u32_permute_xzwy , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {0, 2, 3, 1}) \ + MACRO(__VA_ARGS__, u32_permute_xwzy , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {0, 3, 2, 1}) \ + MACRO(__VA_ARGS__, u32_permute_yxzw , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {1, 0, 2, 3}) \ + MACRO(__VA_ARGS__, u32_permute_yzxw , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {1, 2, 0, 3}) \ + MACRO(__VA_ARGS__, u32_permute_yzwx , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {1, 2, 3, 0}) \ + MACRO(__VA_ARGS__, u32_permute_zxyw , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {2, 0, 1, 3}) \ + MACRO(__VA_ARGS__, u32_permute_zyxw , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {2, 1, 0, 3}) \ + MACRO(__VA_ARGS__, u32_permute_zywx , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {2, 1, 3, 0}) \ + MACRO(__VA_ARGS__, u32_permute_zwyx , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {2, 3, 1, 0}) \ + MACRO(__VA_ARGS__, u32_permute_wxyz , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 0, 1, 2}) \ + MACRO(__VA_ARGS__, u32_permute_wxzy , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 0, 2, 1}) \ + MACRO(__VA_ARGS__, u32_permute_wyxz , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 1, 0, 2}) \ + MACRO(__VA_ARGS__, u32_permute_wyzx , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 1, 2, 0}) \ + MACRO(__VA_ARGS__, u32_permute_wzxy , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 2, 0, 1}) \ + MACRO(__VA_ARGS__, u32_permute_wzyx , .type = SWS_PIXEL_U32, .uop = SWS_UOP_PERMUTE , .mask = 0x0, .par.swizzle.in = {3, 2, 1, 0}) #define SWS_FOR_U32_COPY(MACRO, ...) \ MACRO(__VA_ARGS__, u32_copy_yz_xx , SWS_PIXEL_U32, SWS_UOP_COPY , 0x6, 0, 0, 0, 3) \ MACRO(__VA_ARGS__, u32_copy_yzw_xxx , SWS_PIXEL_U32, SWS_UOP_COPY , 0xe, 0, 0, 0, 0) \ diff --git a/tests/ref/fate/sws-ops-list b/tests/ref/fate/sws-ops-list index dcda011ccc..d47e674bec 100644 --- a/tests/ref/fate/sws-ops-list +++ b/tests/ref/fate/sws-ops-list @@ -1 +1 @@ -e490d908612d059c644e64b43247fb08 +a5779f7e6e5f6a56d8150261343369ac -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
