PR #23563 opened by Ramiro Polla (ramiro) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23563 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23563.patch
>From 38d40bfce58acebe676f287ad28af5b1ec449ff4 Mon Sep 17 00:00:00 2001 From: Ramiro Polla <[email protected]> Date: Tue, 23 Jun 2026 01:49:20 +0200 Subject: [PATCH] swscale/uops: mark unneeded rows as zero in translate_linear_op() --- libswscale/uops.c | 7 +- libswscale/uops_macros.h | 148 +++++++++++++++++++-------------------- 2 files changed, 79 insertions(+), 76 deletions(-) diff --git a/libswscale/uops.c b/libswscale/uops.c index 096621466c..64eb91e959 100644 --- a/libswscale/uops.c +++ b/libswscale/uops.c @@ -712,8 +712,11 @@ static int translate_linear_op(SwsContext *ctx, SwsUOpList *ops, uint32_t exact = 0; for (int i = 0; i < 4; i++) { - if (SWS_OP_NEEDED(op, i) && (op->lin.mask & SWS_MASK_ROW(i))) - uop.mask |= SWS_COMP(i); + if (!SWS_OP_NEEDED(op, i) || !(op->lin.mask & SWS_MASK_ROW(i))) { + uop.par.lin.zero |= SWS_MASK_ROW(i); + continue; + } + uop.mask |= SWS_COMP(i); bool nonzero = (op->lin.m[i][4].num != 0); for (int j = 0; j < 5; j++) { const AVRational k = op->lin.m[i][j]; diff --git a/libswscale/uops_macros.h b/libswscale/uops_macros.h index a382988361..ed8d439b17 100644 --- a/libswscale/uops_macros.h +++ b/libswscale/uops_macros.h @@ -1080,93 +1080,93 @@ #define SWS_FOR_F32_CLEAR(MACRO, ...) #define SWS_FOR_STRUCT_F32_CLEAR(MACRO, ...) #define SWS_FOR_F32_LINEAR(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_linear_x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x1, 0x41040, 0xbefa8) \ - MACRO(__VA_ARGS__, f32_linear_x_x000x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x1, 0x41040, 0xbefae) \ - MACRO(__VA_ARGS__, f32_linear_x_xxx00 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x1, 0x41040, 0xbefb8) \ - MACRO(__VA_ARGS__, f32_linear_y_0x000 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x2, 0x41001, 0xbefbe) \ - MACRO(__VA_ARGS__, f32_linear_xyz_xxx0x_xxx0x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x7, 0x40000, 0xba108) \ - MACRO(__VA_ARGS__, f32_linear_xyz_x0x0x_xxx0x_xx00x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x7, 0x40000, 0xbb10a) \ - MACRO(__VA_ARGS__, f32_linear_xyz_xxx00_xxx0x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x7, 0x40000, 0xba118) \ - MACRO(__VA_ARGS__, f32_linear_xyz_x000x_0x00x_00x0x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x7, 0x40000, 0xbadae) \ - MACRO(__VA_ARGS__, f32_linear_xyz_x0000_0x000_00x00 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x7, 0x40000, 0xbefbe) \ - MACRO(__VA_ARGS__, f32_linear_xyz_10x0x_1xx0x_1x00x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x7, 0x40421, 0xbb10a) \ - MACRO(__VA_ARGS__, f32_linear_w_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x8, 0x01041, 0xbefbe) \ - MACRO(__VA_ARGS__, f32_linear_xw_x000x_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x9, 0x01040, 0xbefae) \ - MACRO(__VA_ARGS__, f32_linear_xw_xxx00_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x9, 0x01040, 0xbefb8) \ + MACRO(__VA_ARGS__, f32_linear_x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x1, 0x00000, 0xfffe8) \ + MACRO(__VA_ARGS__, f32_linear_x_x000x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x1, 0x00000, 0xfffee) \ + MACRO(__VA_ARGS__, f32_linear_x_xxx00 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x1, 0x00000, 0xffff8) \ + MACRO(__VA_ARGS__, f32_linear_y_0x000 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x2, 0x00000, 0xfffbf) \ + MACRO(__VA_ARGS__, f32_linear_xyz_xxx0x_xxx0x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x7, 0x00000, 0xfa108) \ + MACRO(__VA_ARGS__, f32_linear_xyz_x0x0x_xxx0x_xx00x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x7, 0x00000, 0xfb10a) \ + MACRO(__VA_ARGS__, f32_linear_xyz_xxx00_xxx0x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x7, 0x00000, 0xfa118) \ + MACRO(__VA_ARGS__, f32_linear_xyz_x000x_0x00x_00x0x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x7, 0x00000, 0xfadae) \ + MACRO(__VA_ARGS__, f32_linear_xyz_x0000_0x000_00x00 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x7, 0x00000, 0xfefbe) \ + MACRO(__VA_ARGS__, f32_linear_xyz_10x0x_1xx0x_1x00x , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x7, 0x00421, 0xfb10a) \ + MACRO(__VA_ARGS__, f32_linear_w_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x8, 0x00000, 0xbffff) \ + MACRO(__VA_ARGS__, f32_linear_xw_x000x_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x9, 0x00000, 0xbffee) \ + MACRO(__VA_ARGS__, f32_linear_xw_xxx00_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0x9, 0x00000, 0xbfff8) \ MACRO(__VA_ARGS__, f32_linear_xyzw_xxx0x_xxx0x_xxx0x_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0xf, 0x00000, 0xba108) \ MACRO(__VA_ARGS__, f32_linear_xyzw_x0x0x_xxx0x_xx00x_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0xf, 0x00000, 0xbb10a) \ MACRO(__VA_ARGS__, f32_linear_xyzw_x0000_0x000_00x00_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR , 0xf, 0x00000, 0xbefbe) #define SWS_FOR_STRUCT_F32_LINEAR(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_linear_x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefa8) \ - MACRO(__VA_ARGS__, f32_linear_x_x000x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefae) \ - MACRO(__VA_ARGS__, f32_linear_x_xxx00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefb8) \ - MACRO(__VA_ARGS__, f32_linear_y_0x000 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x2, .par.lin.one = 0x41001, .par.lin.zero = 0xbefbe) \ - MACRO(__VA_ARGS__, f32_linear_xyz_xxx0x_xxx0x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba108) \ - MACRO(__VA_ARGS__, f32_linear_xyz_x0x0x_xxx0x_xx00x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xbb10a) \ - MACRO(__VA_ARGS__, f32_linear_xyz_xxx00_xxx0x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba118) \ - MACRO(__VA_ARGS__, f32_linear_xyz_x000x_0x00x_00x0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xbadae) \ - MACRO(__VA_ARGS__, f32_linear_xyz_x0000_0x000_00x00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xbefbe) \ - MACRO(__VA_ARGS__, f32_linear_xyz_10x0x_1xx0x_1x00x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x7, .par.lin.one = 0x40421, .par.lin.zero = 0xbb10a) \ - MACRO(__VA_ARGS__, f32_linear_w_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x8, .par.lin.one = 0x1041, .par.lin.zero = 0xbefbe) \ - MACRO(__VA_ARGS__, f32_linear_xw_x000x_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x9, .par.lin.one = 0x1040, .par.lin.zero = 0xbefae) \ - MACRO(__VA_ARGS__, f32_linear_xw_xxx00_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x9, .par.lin.one = 0x1040, .par.lin.zero = 0xbefb8) \ + MACRO(__VA_ARGS__, f32_linear_x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x1, .par.lin.one = 0x0, .par.lin.zero = 0xfffe8) \ + MACRO(__VA_ARGS__, f32_linear_x_x000x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x1, .par.lin.one = 0x0, .par.lin.zero = 0xfffee) \ + MACRO(__VA_ARGS__, f32_linear_x_xxx00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x1, .par.lin.one = 0x0, .par.lin.zero = 0xffff8) \ + MACRO(__VA_ARGS__, f32_linear_y_0x000 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x2, .par.lin.one = 0x0, .par.lin.zero = 0xfffbf) \ + MACRO(__VA_ARGS__, f32_linear_xyz_xxx0x_xxx0x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfa108) \ + MACRO(__VA_ARGS__, f32_linear_xyz_x0x0x_xxx0x_xx00x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfb10a) \ + MACRO(__VA_ARGS__, f32_linear_xyz_xxx00_xxx0x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfa118) \ + MACRO(__VA_ARGS__, f32_linear_xyz_x000x_0x00x_00x0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfadae) \ + MACRO(__VA_ARGS__, f32_linear_xyz_x0000_0x000_00x00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfefbe) \ + MACRO(__VA_ARGS__, f32_linear_xyz_10x0x_1xx0x_1x00x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x7, .par.lin.one = 0x421, .par.lin.zero = 0xfb10a) \ + MACRO(__VA_ARGS__, f32_linear_w_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x8, .par.lin.one = 0x0, .par.lin.zero = 0xbffff) \ + MACRO(__VA_ARGS__, f32_linear_xw_x000x_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x9, .par.lin.one = 0x0, .par.lin.zero = 0xbffee) \ + MACRO(__VA_ARGS__, f32_linear_xw_xxx00_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0x9, .par.lin.one = 0x0, .par.lin.zero = 0xbfff8) \ MACRO(__VA_ARGS__, f32_linear_xyzw_xxx0x_xxx0x_xxx0x_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0xf, .par.lin.one = 0x0, .par.lin.zero = 0xba108) \ MACRO(__VA_ARGS__, f32_linear_xyzw_x0x0x_xxx0x_xx00x_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0xf, .par.lin.one = 0x0, .par.lin.zero = 0xbb10a) \ MACRO(__VA_ARGS__, f32_linear_xyzw_x0000_0x000_00x00_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR , .mask = 0xf, .par.lin.one = 0x0, .par.lin.zero = 0xbefbe) #define SWS_FOR_F32_LINEAR_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefa8, 0xfffe8) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefa8, 0xfffef) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_x000x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefae, 0xfffee) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_X000x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefae, 0xfffef) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_xxx00 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefb8, 0xffff8) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_xXx00 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefb8, 0xffffa) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_xxX00 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefb8, 0xffffc) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_xXX00 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x41040, 0xbefb8, 0xffffe) \ - MACRO(__VA_ARGS__, f32_linear_fma_y_0x000 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x2, 0x41001, 0xbefbe, 0xfffbf) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx0x_xxx0x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba108, 0xfa108) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XxX0x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba108, 0xfbdaf) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XXX0x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba108, 0xfbdef) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_X0X0x_XXX0x_XX00x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xbb10a, 0xfbdef) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx00_xxx0x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba118, 0xfa118) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XxX0x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba118, 0xfbdbe) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XXX0x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xba118, 0xfbdfe) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_X000x_0X00x_00X0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xbadae, 0xfbdef) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_x0000_0x000_00x00 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40000, 0xbefbe, 0xfefbe) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_10X0x_1XX0x_1X00x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x40421, 0xbb10a, 0xfbdef) \ - MACRO(__VA_ARGS__, f32_linear_fma_w_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x8, 0x01041, 0xbefbe, 0xbffff) \ - MACRO(__VA_ARGS__, f32_linear_fma_xw_x000x_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x9, 0x01040, 0xbefae, 0xbffee) \ - MACRO(__VA_ARGS__, f32_linear_fma_xw_X000x_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x9, 0x01040, 0xbefae, 0xbffef) \ - MACRO(__VA_ARGS__, f32_linear_fma_xw_xxx00_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x9, 0x01040, 0xbefb8, 0xbfff8) \ - MACRO(__VA_ARGS__, f32_linear_fma_xw_xXX00_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x9, 0x01040, 0xbefb8, 0xbfffe) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x00000, 0xfffe8, 0xfffe8) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x00000, 0xfffe8, 0xfffef) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_x000x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x00000, 0xfffee, 0xfffee) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_X000x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x00000, 0xfffee, 0xfffef) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_xxx00 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x00000, 0xffff8, 0xffff8) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_xXx00 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x00000, 0xffff8, 0xffffa) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_xxX00 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x00000, 0xffff8, 0xffffc) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_xXX00 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x1, 0x00000, 0xffff8, 0xffffe) \ + MACRO(__VA_ARGS__, f32_linear_fma_y_0x000 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x2, 0x00000, 0xfffbf, 0xfffbf) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx0x_xxx0x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x00000, 0xfa108, 0xfa108) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XxX0x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x00000, 0xfa108, 0xfbdaf) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XXX0x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x00000, 0xfa108, 0xfbdef) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_X0X0x_XXX0x_XX00x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x00000, 0xfb10a, 0xfbdef) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx00_xxx0x_xxx0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x00000, 0xfa118, 0xfa118) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XxX0x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x00000, 0xfa118, 0xfbdbe) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XXX0x_XXX0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x00000, 0xfa118, 0xfbdfe) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_X000x_0X00x_00X0x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x00000, 0xfadae, 0xfbdef) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_x0000_0x000_00x00 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x00000, 0xfefbe, 0xfefbe) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_10X0x_1XX0x_1X00x , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x7, 0x00421, 0xfb10a, 0xfbdef) \ + MACRO(__VA_ARGS__, f32_linear_fma_w_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x8, 0x00000, 0xbffff, 0xbffff) \ + MACRO(__VA_ARGS__, f32_linear_fma_xw_x000x_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x9, 0x00000, 0xbffee, 0xbffee) \ + MACRO(__VA_ARGS__, f32_linear_fma_xw_X000x_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x9, 0x00000, 0xbffee, 0xbffef) \ + MACRO(__VA_ARGS__, f32_linear_fma_xw_xxx00_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x9, 0x00000, 0xbfff8, 0xbfff8) \ + MACRO(__VA_ARGS__, f32_linear_fma_xw_xXX00_000x0 , SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0x9, 0x00000, 0xbfff8, 0xbfffe) \ MACRO(__VA_ARGS__, f32_linear_fma_xyzw_xxx0x_xxx0x_xxx0x_000x0, SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0xf, 0x00000, 0xba108, 0xba108) \ MACRO(__VA_ARGS__, f32_linear_fma_xyzw_XXX0x_XXX0x_XXX0x_000x0, SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0xf, 0x00000, 0xba108, 0xbbdef) \ MACRO(__VA_ARGS__, f32_linear_fma_xyzw_X0X0x_XXX0x_XX00x_000x0, SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0xf, 0x00000, 0xbb10a, 0xbbdef) \ MACRO(__VA_ARGS__, f32_linear_fma_xyzw_x0000_0x000_00x00_000x0, SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA , 0xf, 0x00000, 0xbefbe, 0xbefbe) #define SWS_FOR_STRUCT_F32_LINEAR_FMA(MACRO, ...) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefa8, .par.lin.exact = 0xfffe8) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefa8, .par.lin.exact = 0xfffef) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_x000x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefae, .par.lin.exact = 0xfffee) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_X000x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefae, .par.lin.exact = 0xfffef) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_xxx00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefb8, .par.lin.exact = 0xffff8) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_xXx00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefb8, .par.lin.exact = 0xffffa) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_xxX00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefb8, .par.lin.exact = 0xffffc) \ - MACRO(__VA_ARGS__, f32_linear_fma_x_xXX00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x41040, .par.lin.zero = 0xbefb8, .par.lin.exact = 0xffffe) \ - MACRO(__VA_ARGS__, f32_linear_fma_y_0x000 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x2, .par.lin.one = 0x41001, .par.lin.zero = 0xbefbe, .par.lin.exact = 0xfffbf) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx0x_xxx0x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba108, .par.lin.exact = 0xfa108) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XxX0x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba108, .par.lin.exact = 0xfbdaf) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XXX0x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba108, .par.lin.exact = 0xfbdef) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_X0X0x_XXX0x_XX00x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xbb10a, .par.lin.exact = 0xfbdef) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx00_xxx0x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba118, .par.lin.exact = 0xfa118) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XxX0x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba118, .par.lin.exact = 0xfbdbe) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XXX0x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xba118, .par.lin.exact = 0xfbdfe) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_X000x_0X00x_00X0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xbadae, .par.lin.exact = 0xfbdef) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_x0000_0x000_00x00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40000, .par.lin.zero = 0xbefbe, .par.lin.exact = 0xfefbe) \ - MACRO(__VA_ARGS__, f32_linear_fma_xyz_10X0x_1XX0x_1X00x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x40421, .par.lin.zero = 0xbb10a, .par.lin.exact = 0xfbdef) \ - MACRO(__VA_ARGS__, f32_linear_fma_w_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x8, .par.lin.one = 0x1041, .par.lin.zero = 0xbefbe, .par.lin.exact = 0xbffff) \ - MACRO(__VA_ARGS__, f32_linear_fma_xw_x000x_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x9, .par.lin.one = 0x1040, .par.lin.zero = 0xbefae, .par.lin.exact = 0xbffee) \ - MACRO(__VA_ARGS__, f32_linear_fma_xw_X000x_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x9, .par.lin.one = 0x1040, .par.lin.zero = 0xbefae, .par.lin.exact = 0xbffef) \ - MACRO(__VA_ARGS__, f32_linear_fma_xw_xxx00_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x9, .par.lin.one = 0x1040, .par.lin.zero = 0xbefb8, .par.lin.exact = 0xbfff8) \ - MACRO(__VA_ARGS__, f32_linear_fma_xw_xXX00_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x9, .par.lin.one = 0x1040, .par.lin.zero = 0xbefb8, .par.lin.exact = 0xbfffe) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x0, .par.lin.zero = 0xfffe8, .par.lin.exact = 0xfffe8) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x0, .par.lin.zero = 0xfffe8, .par.lin.exact = 0xfffef) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_x000x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x0, .par.lin.zero = 0xfffee, .par.lin.exact = 0xfffee) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_X000x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x0, .par.lin.zero = 0xfffee, .par.lin.exact = 0xfffef) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_xxx00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x0, .par.lin.zero = 0xffff8, .par.lin.exact = 0xffff8) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_xXx00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x0, .par.lin.zero = 0xffff8, .par.lin.exact = 0xffffa) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_xxX00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x0, .par.lin.zero = 0xffff8, .par.lin.exact = 0xffffc) \ + MACRO(__VA_ARGS__, f32_linear_fma_x_xXX00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x1, .par.lin.one = 0x0, .par.lin.zero = 0xffff8, .par.lin.exact = 0xffffe) \ + MACRO(__VA_ARGS__, f32_linear_fma_y_0x000 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x2, .par.lin.one = 0x0, .par.lin.zero = 0xfffbf, .par.lin.exact = 0xfffbf) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx0x_xxx0x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfa108, .par.lin.exact = 0xfa108) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XxX0x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfa108, .par.lin.exact = 0xfbdaf) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XXX0x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfa108, .par.lin.exact = 0xfbdef) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_X0X0x_XXX0x_XX00x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfb10a, .par.lin.exact = 0xfbdef) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx00_xxx0x_xxx0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfa118, .par.lin.exact = 0xfa118) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XxX0x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfa118, .par.lin.exact = 0xfbdbe) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XXX0x_XXX0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfa118, .par.lin.exact = 0xfbdfe) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_X000x_0X00x_00X0x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfadae, .par.lin.exact = 0xfbdef) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_x0000_0x000_00x00 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x0, .par.lin.zero = 0xfefbe, .par.lin.exact = 0xfefbe) \ + MACRO(__VA_ARGS__, f32_linear_fma_xyz_10X0x_1XX0x_1X00x , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x7, .par.lin.one = 0x421, .par.lin.zero = 0xfb10a, .par.lin.exact = 0xfbdef) \ + MACRO(__VA_ARGS__, f32_linear_fma_w_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x8, .par.lin.one = 0x0, .par.lin.zero = 0xbffff, .par.lin.exact = 0xbffff) \ + MACRO(__VA_ARGS__, f32_linear_fma_xw_x000x_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x9, .par.lin.one = 0x0, .par.lin.zero = 0xbffee, .par.lin.exact = 0xbffee) \ + MACRO(__VA_ARGS__, f32_linear_fma_xw_X000x_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x9, .par.lin.one = 0x0, .par.lin.zero = 0xbffee, .par.lin.exact = 0xbffef) \ + MACRO(__VA_ARGS__, f32_linear_fma_xw_xxx00_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x9, .par.lin.one = 0x0, .par.lin.zero = 0xbfff8, .par.lin.exact = 0xbfff8) \ + MACRO(__VA_ARGS__, f32_linear_fma_xw_xXX00_000x0 , .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0x9, .par.lin.one = 0x0, .par.lin.zero = 0xbfff8, .par.lin.exact = 0xbfffe) \ MACRO(__VA_ARGS__, f32_linear_fma_xyzw_xxx0x_xxx0x_xxx0x_000x0, .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0xf, .par.lin.one = 0x0, .par.lin.zero = 0xba108, .par.lin.exact = 0xba108) \ MACRO(__VA_ARGS__, f32_linear_fma_xyzw_XXX0x_XXX0x_XXX0x_000x0, .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0xf, .par.lin.one = 0x0, .par.lin.zero = 0xba108, .par.lin.exact = 0xbbdef) \ MACRO(__VA_ARGS__, f32_linear_fma_xyzw_X0X0x_XXX0x_XX00x_000x0, .type = SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA , .mask = 0xf, .par.lin.one = 0x0, .par.lin.zero = 0xbb10a, .par.lin.exact = 0xbbdef) \ -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
