From: Karl Meakin <[email protected]>

Comparing `arg_type` and `TREE_TYPE (b)` by pointer address causes an
assertion failure when one type is a `typedef` for the other (eg
`uint32x2_t` and `__Uint32x2_t`. Fix by using a more relaxed comparison.

gcc/ChangeLog:

        * config/aarch64/aarch64-neon-builtins-base.cc: Compare
        `arg_type` and `TREE_TYPE (b)` by comparing their mode and
        unsigned-ness, rather than their pointer address.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/pr126064.c: New test.
---
 .../aarch64/aarch64-neon-builtins-base.cc     |  3 ++-
 gcc/testsuite/gcc.target/aarch64/pr126064.c   | 27 +++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr126064.c

diff --git a/gcc/config/aarch64/aarch64-neon-builtins-base.cc 
b/gcc/config/aarch64/aarch64-neon-builtins-base.cc
index 9e61ba7688ce..d7833bbcb7bc 100644
--- a/gcc/config/aarch64/aarch64-neon-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-neon-builtins-base.cc
@@ -562,7 +562,8 @@ public:
       std::swap (a, b);
 
     auto arg_type = TREE_TYPE (a);
-    gcc_assert (arg_type == TREE_TYPE (b));
+    gcc_assert (TYPE_MODE (arg_type) == TYPE_MODE (TREE_TYPE (b))
+               && TYPE_UNSIGNED (arg_type) == TYPE_UNSIGNED (TREE_TYPE (b)));
 
     auto tuple_type = TREE_TYPE (f.lhs);
     auto tuple = create_tmp_var (tuple_type);
diff --git a/gcc/testsuite/gcc.target/aarch64/pr126064.c 
b/gcc/testsuite/gcc.target/aarch64/pr126064.c
new file mode 100644
index 000000000000..b43601e3765a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr126064.c
@@ -0,0 +1,27 @@
+/* Check that `gimple_permute_pair::fold()` does not trip an assert when the 
LHS
+   type is `uint32x2_t` and the RHS type is `__Uint32x2_t`.  */
+
+#include <arm_neon.h>
+
+void
+repro (uint8_t *dst, uint8x8_t a, uint8x8_t b)
+{
+  uint32x2x2_t t;
+  uint32x2_t r;
+  uint8x8_t c;
+
+  r = vrev64_u32 (vreinterpret_u32_u8 (b));
+  t = vtrn_u32 (vreinterpret_u32_u8 (a), r);
+  c = vreinterpret_u8_u32 (t.val[1]);
+  t = vtrn_u32 (t.val[0], vreinterpret_u32_u8 (c));
+  t.val[1] = vrev64_u32 (t.val[1]);
+
+  vst1_u8 (dst, vreinterpret_u8_u32 (t.val[1]));
+}
+
+uint32x2_t
+minimized_repro (uint32x2_t a, uint32x2_t b)
+{
+  uint32x2x2_t c = vtrn_u32 (a, vrev64_u32 (b));
+  return c.val[1];
+}
-- 
2.54.0

Reply via email to