This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 4a8a1f5b8b70bfd563b83e88a9c20b5e55ecb39e Author: Niklas Haas <[email protected]> AuthorDate: Tue Jun 9 01:31:20 2026 +0200 Commit: Niklas Haas <[email protected]> CommitDate: Tue Jun 9 18:27:20 2026 +0200 swscale/uops: add SWS_UOP_READ_PLANAR_FV_FMA Analog of SWS_UOP_READ_PLANAR_FV for FMA-enabled backends. The logic for determining when we can safely use FMA is maybe a bit obtuse, given that a `return type == SWS_PIXEL_U8` would have just done the trick as well, but better to be safe than sorry, if we ever decide to tune this constant in the future. Signed-off-by: Niklas Haas <[email protected]> --- libswscale/uops.c | 41 +++++++++++++++++++++++++++++++++-------- libswscale/uops.h | 1 + libswscale/uops_macros.h | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/libswscale/uops.c b/libswscale/uops.c index 9d3c7a71f5..9fe4470d2f 100644 --- a/libswscale/uops.c +++ b/libswscale/uops.c @@ -41,15 +41,16 @@ int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b) } static const struct { - char full[24]; - char abbr[16]; - char macro[16]; + char full[32]; + char abbr[32]; + char macro[32]; } uop_names[SWS_UOP_TYPE_NB] = { #define UOP_NAME(OP, ABBR) [SWS_UOP_##OP] = { "SWS_UOP_" #OP, ABBR, #OP } UOP_NAME(INVALID, "invalid"), UOP_NAME(READ_PLANAR, "read_planar"), UOP_NAME(READ_PLANAR_FH, "read_planar_fh"), UOP_NAME(READ_PLANAR_FV, "read_planar_fv"), + UOP_NAME(READ_PLANAR_FV_FMA,"read_planar_fv_fma"), UOP_NAME(READ_PACKED, "read_packed"), UOP_NAME(READ_NIBBLE, "read_nibble"), UOP_NAME(READ_BIT, "read_bit"), @@ -314,6 +315,7 @@ static void uop_uninit(SwsUOp *uop) break; case SWS_UOP_READ_PLANAR_FH: case SWS_UOP_READ_PLANAR_FV: + case SWS_UOP_READ_PLANAR_FV_FMA: av_refstruct_unref(&uop->data.kernel); break; } @@ -403,7 +405,26 @@ static bool exact_prod(SwsPixelType type, SwsPixel coef, return false; } -static int translate_rw_op(SwsUOpList *ops, const SwsOp *op) +static bool check_filter_fma(SwsContext *ctx, SwsUOpFlags flags, const SwsOp *op) +{ + if (!(flags & SWS_UOP_FLAG_FMA)) + return false; + if (!(ctx->flags & SWS_BITEXACT)) + return true; + if (!ff_sws_pixel_type_is_int(op->type)) + return false; + + const int bits = ff_sws_pixel_type_size(op->type) * 8; + const uint64_t max_val = UINT64_MAX >> (64 - bits); + + /* Maximum value representable losslessly as float. Note that this is + * currently true only for U8, but that may change if we ever update the + * value of SWS_FILTER_SCALE. */ + return max_val * SWS_FILTER_SCALE <= (1 << 22); +} + +static int translate_rw_op(SwsContext *ctx, SwsUOpList *ops, SwsUOpFlags flags, + const SwsOp *op) { SwsUOp uop = { .type = op->type, @@ -419,10 +440,14 @@ static int translate_rw_op(SwsUOpList *ops, const SwsOp *op) if (op->rw.filter) { if (op->op == SWS_OP_WRITE || op->rw.frac || op->rw.packed) return AVERROR(ENOTSUP); - uop.uop = op->rw.filter == SWS_OP_FILTER_H - ? SWS_UOP_READ_PLANAR_FH - : SWS_UOP_READ_PLANAR_FV; uop.data.kernel = av_refstruct_ref(op->rw.kernel); + if (op->rw.filter == SWS_OP_FILTER_H) { + uop.uop = SWS_UOP_READ_PLANAR_FH; + } else if (check_filter_fma(ctx, flags, op)) { + uop.uop = SWS_UOP_READ_PLANAR_FV_FMA; + } else { + uop.uop = SWS_UOP_READ_PLANAR_FV; + } } else if (op->rw.packed && op->rw.elems > 1) { if (op->rw.frac) return AVERROR(ENOTSUP); @@ -602,7 +627,7 @@ static int translate_op(SwsContext *ctx, SwsUOpList *uops, SwsUOpFlags flags, return AVERROR(ENOTSUP); /* always handled by subpass splitting */ case SWS_OP_READ: case SWS_OP_WRITE: - return translate_rw_op(uops, op); + return translate_rw_op(ctx, uops, flags, op); case SWS_OP_SWIZZLE: return translate_swizzle(uops, op); case SWS_OP_DITHER: diff --git a/libswscale/uops.h b/libswscale/uops.h index 4983552eb7..dcfbf336b5 100644 --- a/libswscale/uops.h +++ b/libswscale/uops.h @@ -91,6 +91,7 @@ typedef enum SwsUOpType { SWS_UOP_READ_PLANAR, /* simple planar byte-aligned read */ SWS_UOP_READ_PLANAR_FH, /* planar read with horizontal filter */ SWS_UOP_READ_PLANAR_FV, /* planar read with vertical filter */ + SWS_UOP_READ_PLANAR_FV_FMA, SWS_UOP_READ_PACKED, /* simple packed byte-aligned read */ SWS_UOP_READ_NIBBLE, /* fractional read (4 bits) from single plane */ SWS_UOP_READ_BIT, /* fractional read (1 bit) from single plane */ diff --git a/libswscale/uops_macros.h b/libswscale/uops_macros.h index f3774243a4..f8cd63c828 100644 --- a/libswscale/uops_macros.h +++ b/libswscale/uops_macros.h @@ -49,6 +49,16 @@ MACRO(__VA_ARGS__, u8_read_planar_fv_xy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3) \ MACRO(__VA_ARGS__, u8_read_planar_fv_xyz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7) \ MACRO(__VA_ARGS__, u8_read_planar_fv_xyzw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf) +#define SWS_FOR_U8_READ_PLANAR_FV_FMA(MACRO, ...) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_x , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x1) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xy , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x3) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyz , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x7) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyzw , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0xf) +#define SWS_FOR_STRUCT_U8_READ_PLANAR_FV_FMA(MACRO, ...) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_x , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyzw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf) #define SWS_FOR_U8_READ_PACKED(MACRO, ...) \ MACRO(__VA_ARGS__, u8_read_packed_xy , SWS_PIXEL_U8 , SWS_UOP_READ_PACKED , 0x3) \ MACRO(__VA_ARGS__, u8_read_packed_xyz , SWS_PIXEL_U8 , SWS_UOP_READ_PACKED , 0x7) \ @@ -315,6 +325,16 @@ MACRO(__VA_ARGS__, u16_read_planar_fv_xy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3) \ MACRO(__VA_ARGS__, u16_read_planar_fv_xyz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7) \ MACRO(__VA_ARGS__, u16_read_planar_fv_xyzw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf) +#define SWS_FOR_U16_READ_PLANAR_FV_FMA(MACRO, ...) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_x , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x1) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xy , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x3) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyz , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x7) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyzw , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0xf) +#define SWS_FOR_STRUCT_U16_READ_PLANAR_FV_FMA(MACRO, ...) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_x , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyzw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf) #define SWS_FOR_U16_READ_PACKED(MACRO, ...) \ MACRO(__VA_ARGS__, u16_read_packed_xy , SWS_PIXEL_U16, SWS_UOP_READ_PACKED , 0x3) \ MACRO(__VA_ARGS__, u16_read_packed_xyz , SWS_PIXEL_U16, SWS_UOP_READ_PACKED , 0x7) \ @@ -567,6 +587,8 @@ #define SWS_FOR_STRUCT_U32_READ_PLANAR_FH(MACRO, ...) #define SWS_FOR_U32_READ_PLANAR_FV(MACRO, ...) #define SWS_FOR_STRUCT_U32_READ_PLANAR_FV(MACRO, ...) +#define SWS_FOR_U32_READ_PLANAR_FV_FMA(MACRO, ...) +#define SWS_FOR_STRUCT_U32_READ_PLANAR_FV_FMA(MACRO, ...) #define SWS_FOR_U32_READ_PACKED(MACRO, ...) \ MACRO(__VA_ARGS__, u32_read_packed_xy , SWS_PIXEL_U32, SWS_UOP_READ_PACKED , 0x3) \ MACRO(__VA_ARGS__, u32_read_packed_xyz , SWS_PIXEL_U32, SWS_UOP_READ_PACKED , 0x7) \ @@ -787,6 +809,16 @@ MACRO(__VA_ARGS__, f32_read_planar_fv_xy , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3) \ MACRO(__VA_ARGS__, f32_read_planar_fv_xyz , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7) \ MACRO(__VA_ARGS__, f32_read_planar_fv_xyzw , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf) +#define SWS_FOR_F32_READ_PLANAR_FV_FMA(MACRO, ...) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_x , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x1) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xy , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x3) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyz , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x7) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyzw , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0xf) +#define SWS_FOR_STRUCT_F32_READ_PLANAR_FV_FMA(MACRO, ...) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xy , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyz , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyzw , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf) #define SWS_FOR_F32_READ_PACKED(MACRO, ...) #define SWS_FOR_STRUCT_F32_READ_PACKED(MACRO, ...) #define SWS_FOR_F32_READ_NIBBLE(MACRO, ...) _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
