PR #23563 opened by Ramiro Polla (ramiro)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23563
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23563.patch


>From 38d40bfce58acebe676f287ad28af5b1ec449ff4 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <[email protected]>
Date: Tue, 23 Jun 2026 01:49:20 +0200
Subject: [PATCH] swscale/uops: mark unneeded rows as zero in
 translate_linear_op()

---
 libswscale/uops.c        |   7 +-
 libswscale/uops_macros.h | 148 +++++++++++++++++++--------------------
 2 files changed, 79 insertions(+), 76 deletions(-)

diff --git a/libswscale/uops.c b/libswscale/uops.c
index 096621466c..64eb91e959 100644
--- a/libswscale/uops.c
+++ b/libswscale/uops.c
@@ -712,8 +712,11 @@ static int translate_linear_op(SwsContext *ctx, SwsUOpList 
*ops,
     uint32_t exact = 0;
 
     for (int i = 0; i < 4; i++) {
-        if (SWS_OP_NEEDED(op, i) && (op->lin.mask & SWS_MASK_ROW(i)))
-            uop.mask |= SWS_COMP(i);
+        if (!SWS_OP_NEEDED(op, i) || !(op->lin.mask & SWS_MASK_ROW(i))) {
+            uop.par.lin.zero |= SWS_MASK_ROW(i);
+            continue;
+        }
+        uop.mask |= SWS_COMP(i);
         bool nonzero = (op->lin.m[i][4].num != 0);
         for (int j = 0; j < 5; j++) {
             const AVRational k = op->lin.m[i][j];
diff --git a/libswscale/uops_macros.h b/libswscale/uops_macros.h
index a382988361..ed8d439b17 100644
--- a/libswscale/uops_macros.h
+++ b/libswscale/uops_macros.h
@@ -1080,93 +1080,93 @@
 #define SWS_FOR_F32_CLEAR(MACRO, ...)
 #define SWS_FOR_STRUCT_F32_CLEAR(MACRO, ...)
 #define SWS_FOR_F32_LINEAR(MACRO, ...) \
-    MACRO(__VA_ARGS__, f32_linear_x_xxx0x                      , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x1, 0x41040, 0xbefa8) \
-    MACRO(__VA_ARGS__, f32_linear_x_x000x                      , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x1, 0x41040, 0xbefae) \
-    MACRO(__VA_ARGS__, f32_linear_x_xxx00                      , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x1, 0x41040, 0xbefb8) \
-    MACRO(__VA_ARGS__, f32_linear_y_0x000                      , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x2, 0x41001, 0xbefbe) \
-    MACRO(__VA_ARGS__, f32_linear_xyz_xxx0x_xxx0x_xxx0x        , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x7, 0x40000, 0xba108) \
-    MACRO(__VA_ARGS__, f32_linear_xyz_x0x0x_xxx0x_xx00x        , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x7, 0x40000, 0xbb10a) \
-    MACRO(__VA_ARGS__, f32_linear_xyz_xxx00_xxx0x_xxx0x        , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x7, 0x40000, 0xba118) \
-    MACRO(__VA_ARGS__, f32_linear_xyz_x000x_0x00x_00x0x        , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x7, 0x40000, 0xbadae) \
-    MACRO(__VA_ARGS__, f32_linear_xyz_x0000_0x000_00x00        , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x7, 0x40000, 0xbefbe) \
-    MACRO(__VA_ARGS__, f32_linear_xyz_10x0x_1xx0x_1x00x        , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x7, 0x40421, 0xbb10a) \
-    MACRO(__VA_ARGS__, f32_linear_w_000x0                      , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x8, 0x01041, 0xbefbe) \
-    MACRO(__VA_ARGS__, f32_linear_xw_x000x_000x0               , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x9, 0x01040, 0xbefae) \
-    MACRO(__VA_ARGS__, f32_linear_xw_xxx00_000x0               , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x9, 0x01040, 0xbefb8) \
+    MACRO(__VA_ARGS__, f32_linear_x_xxx0x                      , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x1, 0x00000, 0xfffe8) \
+    MACRO(__VA_ARGS__, f32_linear_x_x000x                      , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x1, 0x00000, 0xfffee) \
+    MACRO(__VA_ARGS__, f32_linear_x_xxx00                      , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x1, 0x00000, 0xffff8) \
+    MACRO(__VA_ARGS__, f32_linear_y_0x000                      , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x2, 0x00000, 0xfffbf) \
+    MACRO(__VA_ARGS__, f32_linear_xyz_xxx0x_xxx0x_xxx0x        , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x7, 0x00000, 0xfa108) \
+    MACRO(__VA_ARGS__, f32_linear_xyz_x0x0x_xxx0x_xx00x        , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x7, 0x00000, 0xfb10a) \
+    MACRO(__VA_ARGS__, f32_linear_xyz_xxx00_xxx0x_xxx0x        , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x7, 0x00000, 0xfa118) \
+    MACRO(__VA_ARGS__, f32_linear_xyz_x000x_0x00x_00x0x        , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x7, 0x00000, 0xfadae) \
+    MACRO(__VA_ARGS__, f32_linear_xyz_x0000_0x000_00x00        , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x7, 0x00000, 0xfefbe) \
+    MACRO(__VA_ARGS__, f32_linear_xyz_10x0x_1xx0x_1x00x        , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x7, 0x00421, 0xfb10a) \
+    MACRO(__VA_ARGS__, f32_linear_w_000x0                      , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x8, 0x00000, 0xbffff) \
+    MACRO(__VA_ARGS__, f32_linear_xw_x000x_000x0               , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x9, 0x00000, 0xbffee) \
+    MACRO(__VA_ARGS__, f32_linear_xw_xxx00_000x0               , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0x9, 0x00000, 0xbfff8) \
     MACRO(__VA_ARGS__, f32_linear_xyzw_xxx0x_xxx0x_xxx0x_000x0 , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0xf, 0x00000, 0xba108) \
     MACRO(__VA_ARGS__, f32_linear_xyzw_x0x0x_xxx0x_xx00x_000x0 , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0xf, 0x00000, 0xbb10a) \
     MACRO(__VA_ARGS__, f32_linear_xyzw_x0000_0x000_00x00_000x0 , 
SWS_PIXEL_F32, SWS_UOP_LINEAR          , 0xf, 0x00000, 0xbefbe)
 #define SWS_FOR_STRUCT_F32_LINEAR(MACRO, ...) \
-    MACRO(__VA_ARGS__, f32_linear_x_xxx0x                      , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x1, .par.lin.one = 
0x41040, .par.lin.zero = 0xbefa8) \
-    MACRO(__VA_ARGS__, f32_linear_x_x000x                      , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x1, .par.lin.one = 
0x41040, .par.lin.zero = 0xbefae) \
-    MACRO(__VA_ARGS__, f32_linear_x_xxx00                      , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x1, .par.lin.one = 
0x41040, .par.lin.zero = 0xbefb8) \
-    MACRO(__VA_ARGS__, f32_linear_y_0x000                      , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x2, .par.lin.one = 
0x41001, .par.lin.zero = 0xbefbe) \
-    MACRO(__VA_ARGS__, f32_linear_xyz_xxx0x_xxx0x_xxx0x        , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xba108) \
-    MACRO(__VA_ARGS__, f32_linear_xyz_x0x0x_xxx0x_xx00x        , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xbb10a) \
-    MACRO(__VA_ARGS__, f32_linear_xyz_xxx00_xxx0x_xxx0x        , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xba118) \
-    MACRO(__VA_ARGS__, f32_linear_xyz_x000x_0x00x_00x0x        , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xbadae) \
-    MACRO(__VA_ARGS__, f32_linear_xyz_x0000_0x000_00x00        , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xbefbe) \
-    MACRO(__VA_ARGS__, f32_linear_xyz_10x0x_1xx0x_1x00x        , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x7, .par.lin.one = 
0x40421, .par.lin.zero = 0xbb10a) \
-    MACRO(__VA_ARGS__, f32_linear_w_000x0                      , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x8, .par.lin.one = 
0x1041, .par.lin.zero = 0xbefbe) \
-    MACRO(__VA_ARGS__, f32_linear_xw_x000x_000x0               , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x9, .par.lin.one = 
0x1040, .par.lin.zero = 0xbefae) \
-    MACRO(__VA_ARGS__, f32_linear_xw_xxx00_000x0               , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x9, .par.lin.one = 
0x1040, .par.lin.zero = 0xbefb8) \
+    MACRO(__VA_ARGS__, f32_linear_x_xxx0x                      , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x1, .par.lin.one = 
0x0, .par.lin.zero = 0xfffe8) \
+    MACRO(__VA_ARGS__, f32_linear_x_x000x                      , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x1, .par.lin.one = 
0x0, .par.lin.zero = 0xfffee) \
+    MACRO(__VA_ARGS__, f32_linear_x_xxx00                      , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x1, .par.lin.one = 
0x0, .par.lin.zero = 0xffff8) \
+    MACRO(__VA_ARGS__, f32_linear_y_0x000                      , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x2, .par.lin.one = 
0x0, .par.lin.zero = 0xfffbf) \
+    MACRO(__VA_ARGS__, f32_linear_xyz_xxx0x_xxx0x_xxx0x        , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfa108) \
+    MACRO(__VA_ARGS__, f32_linear_xyz_x0x0x_xxx0x_xx00x        , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfb10a) \
+    MACRO(__VA_ARGS__, f32_linear_xyz_xxx00_xxx0x_xxx0x        , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfa118) \
+    MACRO(__VA_ARGS__, f32_linear_xyz_x000x_0x00x_00x0x        , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfadae) \
+    MACRO(__VA_ARGS__, f32_linear_xyz_x0000_0x000_00x00        , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfefbe) \
+    MACRO(__VA_ARGS__, f32_linear_xyz_10x0x_1xx0x_1x00x        , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x7, .par.lin.one = 
0x421, .par.lin.zero = 0xfb10a) \
+    MACRO(__VA_ARGS__, f32_linear_w_000x0                      , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x8, .par.lin.one = 
0x0, .par.lin.zero = 0xbffff) \
+    MACRO(__VA_ARGS__, f32_linear_xw_x000x_000x0               , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x9, .par.lin.one = 
0x0, .par.lin.zero = 0xbffee) \
+    MACRO(__VA_ARGS__, f32_linear_xw_xxx00_000x0               , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0x9, .par.lin.one = 
0x0, .par.lin.zero = 0xbfff8) \
     MACRO(__VA_ARGS__, f32_linear_xyzw_xxx0x_xxx0x_xxx0x_000x0 , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0xf, .par.lin.one = 
0x0, .par.lin.zero = 0xba108) \
     MACRO(__VA_ARGS__, f32_linear_xyzw_x0x0x_xxx0x_xx00x_000x0 , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0xf, .par.lin.one = 
0x0, .par.lin.zero = 0xbb10a) \
     MACRO(__VA_ARGS__, f32_linear_xyzw_x0000_0x000_00x00_000x0 , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR          , .mask = 0xf, .par.lin.one = 
0x0, .par.lin.zero = 0xbefbe)
 #define SWS_FOR_F32_LINEAR_FMA(MACRO, ...) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_xxx0x                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x41040, 0xbefa8, 0xfffe8) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_XXX0x                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x41040, 0xbefa8, 0xfffef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_x000x                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x41040, 0xbefae, 0xfffee) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_X000x                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x41040, 0xbefae, 0xfffef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_xxx00                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x41040, 0xbefb8, 0xffff8) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_xXx00                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x41040, 0xbefb8, 0xffffa) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_xxX00                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x41040, 0xbefb8, 0xffffc) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_xXX00                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x41040, 0xbefb8, 0xffffe) \
-    MACRO(__VA_ARGS__, f32_linear_fma_y_0x000                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x2, 0x41001, 0xbefbe, 0xfffbf) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx0x_xxx0x_xxx0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x40000, 0xba108, 0xfa108) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XxX0x_XXX0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x40000, 0xba108, 0xfbdaf) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XXX0x_XXX0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x40000, 0xba108, 0xfbdef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_X0X0x_XXX0x_XX00x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x40000, 0xbb10a, 0xfbdef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx00_xxx0x_xxx0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x40000, 0xba118, 0xfa118) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XxX0x_XXX0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x40000, 0xba118, 0xfbdbe) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XXX0x_XXX0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x40000, 0xba118, 0xfbdfe) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_X000x_0X00x_00X0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x40000, 0xbadae, 0xfbdef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_x0000_0x000_00x00    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x40000, 0xbefbe, 0xfefbe) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_10X0x_1XX0x_1X00x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x40421, 0xbb10a, 0xfbdef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_w_000x0                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x8, 0x01041, 0xbefbe, 0xbffff) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xw_x000x_000x0           , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x9, 0x01040, 0xbefae, 0xbffee) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xw_X000x_000x0           , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x9, 0x01040, 0xbefae, 0xbffef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xw_xxx00_000x0           , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x9, 0x01040, 0xbefb8, 0xbfff8) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xw_xXX00_000x0           , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x9, 0x01040, 0xbefb8, 0xbfffe) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_xxx0x                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x00000, 0xfffe8, 0xfffe8) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_XXX0x                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x00000, 0xfffe8, 0xfffef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_x000x                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x00000, 0xfffee, 0xfffee) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_X000x                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x00000, 0xfffee, 0xfffef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_xxx00                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x00000, 0xffff8, 0xffff8) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_xXx00                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x00000, 0xffff8, 0xffffa) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_xxX00                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x00000, 0xffff8, 0xffffc) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_xXX00                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x1, 0x00000, 0xffff8, 0xffffe) \
+    MACRO(__VA_ARGS__, f32_linear_fma_y_0x000                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x2, 0x00000, 0xfffbf, 0xfffbf) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx0x_xxx0x_xxx0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x00000, 0xfa108, 0xfa108) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XxX0x_XXX0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x00000, 0xfa108, 0xfbdaf) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XXX0x_XXX0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x00000, 0xfa108, 0xfbdef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_X0X0x_XXX0x_XX00x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x00000, 0xfb10a, 0xfbdef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx00_xxx0x_xxx0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x00000, 0xfa118, 0xfa118) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XxX0x_XXX0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x00000, 0xfa118, 0xfbdbe) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XXX0x_XXX0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x00000, 0xfa118, 0xfbdfe) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_X000x_0X00x_00X0x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x00000, 0xfadae, 0xfbdef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_x0000_0x000_00x00    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x00000, 0xfefbe, 0xfefbe) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_10X0x_1XX0x_1X00x    , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x7, 0x00421, 0xfb10a, 0xfbdef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_w_000x0                  , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x8, 0x00000, 0xbffff, 0xbffff) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xw_x000x_000x0           , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x9, 0x00000, 0xbffee, 0xbffee) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xw_X000x_000x0           , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x9, 0x00000, 0xbffee, 0xbffef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xw_xxx00_000x0           , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x9, 0x00000, 0xbfff8, 0xbfff8) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xw_xXX00_000x0           , 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0x9, 0x00000, 0xbfff8, 0xbfffe) \
     MACRO(__VA_ARGS__, f32_linear_fma_xyzw_xxx0x_xxx0x_xxx0x_000x0, 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0xf, 0x00000, 0xba108, 0xba108) \
     MACRO(__VA_ARGS__, f32_linear_fma_xyzw_XXX0x_XXX0x_XXX0x_000x0, 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0xf, 0x00000, 0xba108, 0xbbdef) \
     MACRO(__VA_ARGS__, f32_linear_fma_xyzw_X0X0x_XXX0x_XX00x_000x0, 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0xf, 0x00000, 0xbb10a, 0xbbdef) \
     MACRO(__VA_ARGS__, f32_linear_fma_xyzw_x0000_0x000_00x00_000x0, 
SWS_PIXEL_F32, SWS_UOP_LINEAR_FMA      , 0xf, 0x00000, 0xbefbe, 0xbefbe)
 #define SWS_FOR_STRUCT_F32_LINEAR_FMA(MACRO, ...) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_xxx0x                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x41040, .par.lin.zero = 0xbefa8, .par.lin.exact = 0xfffe8) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_XXX0x                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x41040, .par.lin.zero = 0xbefa8, .par.lin.exact = 0xfffef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_x000x                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x41040, .par.lin.zero = 0xbefae, .par.lin.exact = 0xfffee) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_X000x                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x41040, .par.lin.zero = 0xbefae, .par.lin.exact = 0xfffef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_xxx00                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x41040, .par.lin.zero = 0xbefb8, .par.lin.exact = 0xffff8) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_xXx00                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x41040, .par.lin.zero = 0xbefb8, .par.lin.exact = 0xffffa) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_xxX00                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x41040, .par.lin.zero = 0xbefb8, .par.lin.exact = 0xffffc) \
-    MACRO(__VA_ARGS__, f32_linear_fma_x_xXX00                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x41040, .par.lin.zero = 0xbefb8, .par.lin.exact = 0xffffe) \
-    MACRO(__VA_ARGS__, f32_linear_fma_y_0x000                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x2, .par.lin.one = 
0x41001, .par.lin.zero = 0xbefbe, .par.lin.exact = 0xfffbf) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx0x_xxx0x_xxx0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xba108, .par.lin.exact = 0xfa108) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XxX0x_XXX0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xba108, .par.lin.exact = 0xfbdaf) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XXX0x_XXX0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xba108, .par.lin.exact = 0xfbdef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_X0X0x_XXX0x_XX00x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xbb10a, .par.lin.exact = 0xfbdef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx00_xxx0x_xxx0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xba118, .par.lin.exact = 0xfa118) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XxX0x_XXX0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xba118, .par.lin.exact = 0xfbdbe) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XXX0x_XXX0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xba118, .par.lin.exact = 0xfbdfe) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_X000x_0X00x_00X0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xbadae, .par.lin.exact = 0xfbdef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_x0000_0x000_00x00    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x40000, .par.lin.zero = 0xbefbe, .par.lin.exact = 0xfefbe) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xyz_10X0x_1XX0x_1X00x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x40421, .par.lin.zero = 0xbb10a, .par.lin.exact = 0xfbdef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_w_000x0                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x8, .par.lin.one = 
0x1041, .par.lin.zero = 0xbefbe, .par.lin.exact = 0xbffff) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xw_x000x_000x0           , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x9, .par.lin.one = 
0x1040, .par.lin.zero = 0xbefae, .par.lin.exact = 0xbffee) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xw_X000x_000x0           , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x9, .par.lin.one = 
0x1040, .par.lin.zero = 0xbefae, .par.lin.exact = 0xbffef) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xw_xxx00_000x0           , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x9, .par.lin.one = 
0x1040, .par.lin.zero = 0xbefb8, .par.lin.exact = 0xbfff8) \
-    MACRO(__VA_ARGS__, f32_linear_fma_xw_xXX00_000x0           , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x9, .par.lin.one = 
0x1040, .par.lin.zero = 0xbefb8, .par.lin.exact = 0xbfffe) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_xxx0x                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x0, .par.lin.zero = 0xfffe8, .par.lin.exact = 0xfffe8) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_XXX0x                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x0, .par.lin.zero = 0xfffe8, .par.lin.exact = 0xfffef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_x000x                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x0, .par.lin.zero = 0xfffee, .par.lin.exact = 0xfffee) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_X000x                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x0, .par.lin.zero = 0xfffee, .par.lin.exact = 0xfffef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_xxx00                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x0, .par.lin.zero = 0xffff8, .par.lin.exact = 0xffff8) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_xXx00                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x0, .par.lin.zero = 0xffff8, .par.lin.exact = 0xffffa) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_xxX00                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x0, .par.lin.zero = 0xffff8, .par.lin.exact = 0xffffc) \
+    MACRO(__VA_ARGS__, f32_linear_fma_x_xXX00                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x1, .par.lin.one = 
0x0, .par.lin.zero = 0xffff8, .par.lin.exact = 0xffffe) \
+    MACRO(__VA_ARGS__, f32_linear_fma_y_0x000                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x2, .par.lin.one = 
0x0, .par.lin.zero = 0xfffbf, .par.lin.exact = 0xfffbf) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx0x_xxx0x_xxx0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfa108, .par.lin.exact = 0xfa108) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XxX0x_XXX0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfa108, .par.lin.exact = 0xfbdaf) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_XXX0x_XXX0x_XXX0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfa108, .par.lin.exact = 0xfbdef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_X0X0x_XXX0x_XX00x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfb10a, .par.lin.exact = 0xfbdef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xxx00_xxx0x_xxx0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfa118, .par.lin.exact = 0xfa118) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XxX0x_XXX0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfa118, .par.lin.exact = 0xfbdbe) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_xXX00_XXX0x_XXX0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfa118, .par.lin.exact = 0xfbdfe) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_X000x_0X00x_00X0x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfadae, .par.lin.exact = 0xfbdef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_x0000_0x000_00x00    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x0, .par.lin.zero = 0xfefbe, .par.lin.exact = 0xfefbe) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xyz_10X0x_1XX0x_1X00x    , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x7, .par.lin.one = 
0x421, .par.lin.zero = 0xfb10a, .par.lin.exact = 0xfbdef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_w_000x0                  , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x8, .par.lin.one = 
0x0, .par.lin.zero = 0xbffff, .par.lin.exact = 0xbffff) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xw_x000x_000x0           , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x9, .par.lin.one = 
0x0, .par.lin.zero = 0xbffee, .par.lin.exact = 0xbffee) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xw_X000x_000x0           , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x9, .par.lin.one = 
0x0, .par.lin.zero = 0xbffee, .par.lin.exact = 0xbffef) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xw_xxx00_000x0           , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x9, .par.lin.one = 
0x0, .par.lin.zero = 0xbfff8, .par.lin.exact = 0xbfff8) \
+    MACRO(__VA_ARGS__, f32_linear_fma_xw_xXX00_000x0           , .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0x9, .par.lin.one = 
0x0, .par.lin.zero = 0xbfff8, .par.lin.exact = 0xbfffe) \
     MACRO(__VA_ARGS__, f32_linear_fma_xyzw_xxx0x_xxx0x_xxx0x_000x0, .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0xf, .par.lin.one = 
0x0, .par.lin.zero = 0xba108, .par.lin.exact = 0xba108) \
     MACRO(__VA_ARGS__, f32_linear_fma_xyzw_XXX0x_XXX0x_XXX0x_000x0, .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0xf, .par.lin.one = 
0x0, .par.lin.zero = 0xba108, .par.lin.exact = 0xbbdef) \
     MACRO(__VA_ARGS__, f32_linear_fma_xyzw_X0X0x_XXX0x_XX00x_000x0, .type = 
SWS_PIXEL_F32, .uop = SWS_UOP_LINEAR_FMA      , .mask = 0xf, .par.lin.one = 
0x0, .par.lin.zero = 0xbb10a, .par.lin.exact = 0xbbdef) \
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to