PR #23527 opened by Ramiro Polla (ramiro)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23527
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23527.patch

Unused components weren't being taken into consideration.

Note this commit includes a hack to prevent checkasm from aborting,
since checkasm does {read, unpack, write} which doesn't properly mark
the unpacked components as unused.


>From 48c9b38087b91800d3f6aa94419209cb0400a750 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <[email protected]>
Date: Fri, 12 Jun 2026 02:41:09 +0200
Subject: [PATCH] swscale/uops: generate more specific masks for unpack when
 translating

Unused components weren't being taken into consideration.

Note this commit includes a hack to prevent checkasm from aborting,
since checkasm does {read, unpack, write} which doesn't properly mark
the unpacked components as unused.
---
 libswscale/uops.c           | 14 +++++++++++++-
 libswscale/uops_macros.h    |  8 ++++++++
 tests/ref/fate/sws-ops-list |  2 +-
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/libswscale/uops.c b/libswscale/uops.c
index 1bd3e2f763..a9dd0d41cf 100644
--- a/libswscale/uops.c
+++ b/libswscale/uops.c
@@ -799,7 +799,8 @@ static int translate_op(SwsContext *ctx, SwsUOpList *uops, 
SwsUOpFlags flags,
         uop.mask = 0;
         for (int i = 0; i < 4 && op->pack.pattern[i]; i++) {
             uop.par.pack.pattern[i] = op->pack.pattern[i];
-            uop.mask |= SWS_COMP(i);
+            if (op->op == SWS_OP_PACK || SWS_OP_NEEDED(op, i))
+                uop.mask |= SWS_COMP(i);
         }
         break;
     case SWS_OP_LSHIFT:
@@ -1021,6 +1022,17 @@ int ff_sws_uops_macros_gen(char **out_str)
         }
     }
 
+    /* HACK: add some patterns for checkasm */
+    const SwsUOp extra_uops[] = {
+        { .type = SWS_PIXEL_U32, .uop = SWS_UOP_UNPACK, .mask = 0xf, 
.par.pack.pattern = {2, 10, 10, 10} },
+        { .type = SWS_PIXEL_U32, .uop = SWS_UOP_UNPACK, .mask = 0xf, 
.par.pack.pattern = {10, 10, 10, 2} },
+    };
+    for (int i = 0; i < FF_ARRAY_ELEMS(extra_uops); i++) {
+        ret = register_uop(&root, &extra_uops[i]);
+        if (ret < 0)
+            goto fail;
+    }
+
     #define BPRINT_STR(str) av_bprint_append_data(bp, str, strlen(str))
     BPRINT_STR(
 "/**\n"
diff --git a/libswscale/uops_macros.h b/libswscale/uops_macros.h
index 3c4d6b6a3e..62743a8359 100644
--- a/libswscale/uops_macros.h
+++ b/libswscale/uops_macros.h
@@ -934,9 +934,17 @@
 #define SWS_FOR_U32_MAX(MACRO, ...)
 #define SWS_FOR_STRUCT_U32_MAX(MACRO, ...)
 #define SWS_FOR_U32_UNPACK(MACRO, ...) \
+    MACRO(__VA_ARGS__, u32_unpack_y_aaa2                       , 
SWS_PIXEL_U32, SWS_UOP_UNPACK          , 0x2, 10, 10, 10, 2) \
+    MACRO(__VA_ARGS__, u32_unpack_z_2aaa                       , 
SWS_PIXEL_U32, SWS_UOP_UNPACK          , 0x4, 2, 10, 10, 10) \
+    MACRO(__VA_ARGS__, u32_unpack_xyz_aaa2                     , 
SWS_PIXEL_U32, SWS_UOP_UNPACK          , 0x7, 10, 10, 10, 2) \
+    MACRO(__VA_ARGS__, u32_unpack_yzw_2aaa                     , 
SWS_PIXEL_U32, SWS_UOP_UNPACK          , 0xe, 2, 10, 10, 10) \
     MACRO(__VA_ARGS__, u32_unpack_xyzw_2aaa                    , 
SWS_PIXEL_U32, SWS_UOP_UNPACK          , 0xf, 2, 10, 10, 10) \
     MACRO(__VA_ARGS__, u32_unpack_xyzw_aaa2                    , 
SWS_PIXEL_U32, SWS_UOP_UNPACK          , 0xf, 10, 10, 10, 2)
 #define SWS_FOR_STRUCT_U32_UNPACK(MACRO, ...) \
+    MACRO(__VA_ARGS__, u32_unpack_y_aaa2                       , .type = 
SWS_PIXEL_U32, .uop = SWS_UOP_UNPACK          , .mask = 0x2, .par.pack.pattern 
= {10, 10, 10, 2}) \
+    MACRO(__VA_ARGS__, u32_unpack_z_2aaa                       , .type = 
SWS_PIXEL_U32, .uop = SWS_UOP_UNPACK          , .mask = 0x4, .par.pack.pattern 
= {2, 10, 10, 10}) \
+    MACRO(__VA_ARGS__, u32_unpack_xyz_aaa2                     , .type = 
SWS_PIXEL_U32, .uop = SWS_UOP_UNPACK          , .mask = 0x7, .par.pack.pattern 
= {10, 10, 10, 2}) \
+    MACRO(__VA_ARGS__, u32_unpack_yzw_2aaa                     , .type = 
SWS_PIXEL_U32, .uop = SWS_UOP_UNPACK          , .mask = 0xe, .par.pack.pattern 
= {2, 10, 10, 10}) \
     MACRO(__VA_ARGS__, u32_unpack_xyzw_2aaa                    , .type = 
SWS_PIXEL_U32, .uop = SWS_UOP_UNPACK          , .mask = 0xf, .par.pack.pattern 
= {2, 10, 10, 10}) \
     MACRO(__VA_ARGS__, u32_unpack_xyzw_aaa2                    , .type = 
SWS_PIXEL_U32, .uop = SWS_UOP_UNPACK          , .mask = 0xf, .par.pack.pattern 
= {10, 10, 10, 2})
 #define SWS_FOR_U32_PACK(MACRO, ...) \
diff --git a/tests/ref/fate/sws-ops-list b/tests/ref/fate/sws-ops-list
index 6b4003121a..f3311b643f 100644
--- a/tests/ref/fate/sws-ops-list
+++ b/tests/ref/fate/sws-ops-list
@@ -1 +1 @@
-bbe27c8c324f08d933f6397f5fb96650
+2f427f7cbfaef38013a3fbefcb5e1e57
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to