This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 6057759ffc86406d5839fbd4d485a784f3ef3285 Author: Niklas Haas <[email protected]> AuthorDate: Tue Jun 9 01:35:51 2026 +0200 Commit: Niklas Haas <[email protected]> CommitDate: Tue Jun 9 18:27:20 2026 +0200 swscale/uops: parametrize filter op result type The ops.h infrastructure currently hard-codes this as SWS_PIXEL_F32, but I want to at least properly parametrize this in case we ever decide to revisit this decision in the future. In particular, it may become relevant for trivial kernels or kernels whose intermediates are bounded, exact integers (which could possibly be output directly as e.g. U16 or U32). The FATE change is just because the filter op names gained a suffix. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <[email protected]> --- libswscale/uops.c | 16 +++++ libswscale/uops.h | 5 ++ libswscale/uops_macros.h | 144 ++++++++++++++++++++++---------------------- libswscale/uops_tmpl.c | 12 +++- tests/ref/fate/sws-ops-list | 2 +- 5 files changed, 104 insertions(+), 75 deletions(-) diff --git a/libswscale/uops.c b/libswscale/uops.c index 9fe4470d2f..3f2b81c205 100644 --- a/libswscale/uops.c +++ b/libswscale/uops.c @@ -144,6 +144,11 @@ void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX]) const SwsUOpParams *par = &op->par; switch (op->uop) { + case SWS_UOP_READ_PLANAR_FH: + case SWS_UOP_READ_PLANAR_FV: + case SWS_UOP_READ_PLANAR_FV_FMA: + av_bprintf(&bp, "_%s", ff_sws_pixel_type_name(par->filter.type)); + break; case SWS_UOP_LSHIFT: case SWS_UOP_RSHIFT: av_bprintf(&bp, "_%u", par->shift.amount); @@ -219,6 +224,11 @@ static int generate_entry_struct(void *opaque, void *key) const SwsUOpParams *par = &uop->par; switch (uop->uop) { + case SWS_UOP_READ_PLANAR_FH: + case SWS_UOP_READ_PLANAR_FV: + case SWS_UOP_READ_PLANAR_FV_FMA: + av_bprintf(bp, ", .par.filter.type = %s", pixel_types[par->filter.type].full); + break; case SWS_UOP_LSHIFT: case SWS_UOP_RSHIFT: av_bprintf(bp, ", .par.shift.amount = %u", par->shift.amount); @@ -270,6 +280,11 @@ static int generate_entry_args(void *opaque, void *key) const SwsUOpParams *par = &uop->par; switch (uop->uop) { + case SWS_UOP_READ_PLANAR_FH: + case SWS_UOP_READ_PLANAR_FV: + case SWS_UOP_READ_PLANAR_FV_FMA: + av_bprintf(bp, ", %s", pixel_types[par->filter.type].full); + break; case SWS_UOP_LSHIFT: case SWS_UOP_RSHIFT: av_bprintf(bp, ", %u", par->shift.amount); @@ -440,6 +455,7 @@ static int translate_rw_op(SwsContext *ctx, SwsUOpList *ops, SwsUOpFlags flags, if (op->rw.filter) { if (op->op == SWS_OP_WRITE || op->rw.frac || op->rw.packed) return AVERROR(ENOTSUP); + uop.par.filter.type = SWS_PIXEL_F32; uop.data.kernel = av_refstruct_ref(op->rw.kernel); if (op->rw.filter == SWS_OP_FILTER_H) { uop.uop = SWS_UOP_READ_PLANAR_FH; diff --git a/libswscale/uops.h b/libswscale/uops.h index dcfbf336b5..d69c35053d 100644 --- a/libswscale/uops.h +++ b/libswscale/uops.h @@ -135,6 +135,10 @@ typedef enum SwsUOpType { SWS_UOP_TYPE_NB, } SwsUOpType; +typedef struct SwsFilterUOp { + SwsPixelType type; /* pixel type to store result as */ +} SwsFilterUOp; + typedef struct SwsShiftUOp { uint8_t amount; } SwsShiftUOp; @@ -172,6 +176,7 @@ typedef struct SwsDitherUOp { int ff_sws_dither_height(const SwsDitherUOp *dither); typedef union SwsUOpParams { + SwsFilterUOp filter; /* for SWS_UOP_READ_*_FV/FH */ SwsShiftUOp shift; SwsSwizzleUOp swizzle; SwsPackUOp pack; diff --git a/libswscale/uops_macros.h b/libswscale/uops_macros.h index f8cd63c828..9ab1858577 100644 --- a/libswscale/uops_macros.h +++ b/libswscale/uops_macros.h @@ -30,35 +30,35 @@ MACRO(__VA_ARGS__, u8_read_planar_xyz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR , .mask = 0x7) \ MACRO(__VA_ARGS__, u8_read_planar_xyzw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR , .mask = 0xf) #define SWS_FOR_U8_READ_PLANAR_FH(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_x , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0x1) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_xy , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0x3) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_xyz , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0x7) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_xyzw , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0xf) + MACRO(__VA_ARGS__, u8_read_planar_fh_x_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fh_xy_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fh_xyz_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fh_xyzw_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FH , 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_U8_READ_PLANAR_FH(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_x , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x1) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_xy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x3) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_xyz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x7) \ - MACRO(__VA_ARGS__, u8_read_planar_fh_xyzw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0xf) + MACRO(__VA_ARGS__, u8_read_planar_fh_x_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fh_xy_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fh_xyz_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fh_xyzw_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_U8_READ_PLANAR_FV(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_x , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0x1) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_xy , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0x3) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_xyz , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0x7) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_xyzw , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0xf) + MACRO(__VA_ARGS__, u8_read_planar_fv_x_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_xy_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_xyz_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_xyzw_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV , 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_U8_READ_PLANAR_FV(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_x , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x1) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_xy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_xyz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_xyzw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf) + MACRO(__VA_ARGS__, u8_read_planar_fv_x_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_xy_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_xyz_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_xyzw_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_U8_READ_PLANAR_FV_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_x , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x1) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xy , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x3) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyz , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x7) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyzw , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0xf) + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_x_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xy_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyz_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyzw_f32 , SWS_PIXEL_U8 , SWS_UOP_READ_PLANAR_FV_FMA, 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_U8_READ_PLANAR_FV_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_x , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xy , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyz , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7) \ - MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyzw , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf) + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_x_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xy_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyz_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u8_read_planar_fv_fma_xyzw_f32 , .type = SWS_PIXEL_U8 , .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_U8_READ_PACKED(MACRO, ...) \ MACRO(__VA_ARGS__, u8_read_packed_xy , SWS_PIXEL_U8 , SWS_UOP_READ_PACKED , 0x3) \ MACRO(__VA_ARGS__, u8_read_packed_xyz , SWS_PIXEL_U8 , SWS_UOP_READ_PACKED , 0x7) \ @@ -306,35 +306,35 @@ MACRO(__VA_ARGS__, u16_read_planar_xyz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR , .mask = 0x7) \ MACRO(__VA_ARGS__, u16_read_planar_xyzw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR , .mask = 0xf) #define SWS_FOR_U16_READ_PLANAR_FH(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_x , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0x1) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_xy , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0x3) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_xyz , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0x7) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_xyzw , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0xf) + MACRO(__VA_ARGS__, u16_read_planar_fh_x_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fh_xy_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fh_xyz_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fh_xyzw_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FH , 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_U16_READ_PLANAR_FH(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_x , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x1) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_xy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x3) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_xyz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x7) \ - MACRO(__VA_ARGS__, u16_read_planar_fh_xyzw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0xf) + MACRO(__VA_ARGS__, u16_read_planar_fh_x_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fh_xy_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fh_xyz_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fh_xyzw_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_U16_READ_PLANAR_FV(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_x , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0x1) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_xy , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0x3) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_xyz , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0x7) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_xyzw , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0xf) + MACRO(__VA_ARGS__, u16_read_planar_fv_x_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_xy_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_xyz_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_xyzw_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV , 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_U16_READ_PLANAR_FV(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_x , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x1) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_xy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_xyz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_xyzw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf) + MACRO(__VA_ARGS__, u16_read_planar_fv_x_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_xy_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_xyz_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_xyzw_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_U16_READ_PLANAR_FV_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_x , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x1) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xy , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x3) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyz , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x7) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyzw , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0xf) + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_x_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xy_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyz_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyzw_f32 , SWS_PIXEL_U16, SWS_UOP_READ_PLANAR_FV_FMA, 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_U16_READ_PLANAR_FV_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_x , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xy , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyz , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7) \ - MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyzw , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf) + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_x_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xy_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyz_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, u16_read_planar_fv_fma_xyzw_f32 , .type = SWS_PIXEL_U16, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_U16_READ_PACKED(MACRO, ...) \ MACRO(__VA_ARGS__, u16_read_packed_xy , SWS_PIXEL_U16, SWS_UOP_READ_PACKED , 0x3) \ MACRO(__VA_ARGS__, u16_read_packed_xyz , SWS_PIXEL_U16, SWS_UOP_READ_PACKED , 0x7) \ @@ -790,35 +790,35 @@ #define SWS_FOR_F32_READ_PLANAR(MACRO, ...) #define SWS_FOR_STRUCT_F32_READ_PLANAR(MACRO, ...) #define SWS_FOR_F32_READ_PLANAR_FH(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_x , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0x1) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_xy , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0x3) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_xyz , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0x7) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_xyzw , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0xf) + MACRO(__VA_ARGS__, f32_read_planar_fh_x_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fh_xy_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fh_xyz_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fh_xyzw_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FH , 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_F32_READ_PLANAR_FH(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x1) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_xy , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x3) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_xyz , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x7) \ - MACRO(__VA_ARGS__, f32_read_planar_fh_xyzw , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0xf) + MACRO(__VA_ARGS__, f32_read_planar_fh_x_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fh_xy_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fh_xyz_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fh_xyzw_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FH , .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_F32_READ_PLANAR_FV(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_x , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0x1) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_xy , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0x3) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_xyz , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0x7) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_xyzw , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0xf) + MACRO(__VA_ARGS__, f32_read_planar_fv_x_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_xy_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_xyz_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_xyzw_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV , 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_F32_READ_PLANAR_FV(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x1) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_xy , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_xyz , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_xyzw , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf) + MACRO(__VA_ARGS__, f32_read_planar_fv_x_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_xy_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_xyz_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_xyzw_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV , .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_F32_READ_PLANAR_FV_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_x , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x1) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xy , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x3) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyz , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x7) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyzw , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0xf) + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_x_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x1, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xy_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x3, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyz_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0x7, SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyzw_f32 , SWS_PIXEL_F32, SWS_UOP_READ_PLANAR_FV_FMA, 0xf, SWS_PIXEL_F32) #define SWS_FOR_STRUCT_F32_READ_PLANAR_FV_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xy , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyz , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7) \ - MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyzw , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf) + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_x_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x1, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xy_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x3, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyz_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0x7, .par.filter.type = SWS_PIXEL_F32) \ + MACRO(__VA_ARGS__, f32_read_planar_fv_fma_xyzw_f32 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_READ_PLANAR_FV_FMA, .mask = 0xf, .par.filter.type = SWS_PIXEL_F32) #define SWS_FOR_F32_READ_PACKED(MACRO, ...) #define SWS_FOR_STRUCT_F32_READ_PACKED(MACRO, ...) #define SWS_FOR_F32_READ_NIBBLE(MACRO, ...) diff --git a/libswscale/uops_tmpl.c b/libswscale/uops_tmpl.c index 9e0d35ea1f..44e8551083 100644 --- a/libswscale/uops_tmpl.c +++ b/libswscale/uops_tmpl.c @@ -219,6 +219,9 @@ SWS_FOR_STRUCT(PX, WRITE_BIT, DECL_ENTRY) DECL_SETUP(setup_filter_v, params, out) { + if (params->uop->par.filter.type != SWS_PIXEL_F32) + return AVERROR(ENOTSUP); + const SwsFilterWeights *filter = params->uop->data.kernel; static_assert(sizeof(out->priv.ptr) <= sizeof(int32_t[2]), ">8 byte pointers not supported"); @@ -238,8 +241,9 @@ DECL_SETUP(setup_filter_v, params, out) } /* Fully general vertical planar filter case */ -DECL_READ(read_planar_fv, const SwsCompMask mask) +DECL_READ(read_planar_fv, const SwsCompMask mask, const SwsPixelType type) { + av_assert2(type == SWS_PIXEL_F32); const SwsOpExec *exec = iter->exec; const float *restrict weights = impl->priv.ptr; const int filter_size = impl->priv.i32[2]; @@ -278,6 +282,9 @@ DECL_READ(read_planar_fv, const SwsCompMask mask) DECL_SETUP(setup_filter_h, params, out) { + if (params->uop->par.filter.type != SWS_PIXEL_F32) + return AVERROR(ENOTSUP); + SwsFilterWeights *filter = params->uop->data.kernel; out->priv.ptr = av_refstruct_ref(filter->weights); out->priv.i32[2] = filter->filter_size; @@ -286,8 +293,9 @@ DECL_SETUP(setup_filter_h, params, out) } /* Fully general horizontal planar filter case */ -DECL_READ(read_planar_fh, const SwsCompMask mask) +DECL_READ(read_planar_fh, const SwsCompMask mask, const SwsPixelType type) { + av_assert2(type == SWS_PIXEL_F32); const SwsOpExec *exec = iter->exec; const int *restrict weights = impl->priv.ptr; const int filter_size = impl->priv.i32[2]; diff --git a/tests/ref/fate/sws-ops-list b/tests/ref/fate/sws-ops-list index 44b615a0c8..68a1fc1105 100644 --- a/tests/ref/fate/sws-ops-list +++ b/tests/ref/fate/sws-ops-list @@ -1 +1 @@ -900da2b2c6276da01ae3f158d02abf0b +e2f26cb6df5c11015e613016bb1a004a _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
