From: Karl Meakin <[email protected]>
Comparing `arg_type` and `TREE_TYPE (b)` by pointer address causes an
assertion failure when one type is a `typedef` for the other (eg
`uint32x2_t` and `__Uint32x2_t`. Fix by using a more relaxed comparison.
gcc/ChangeLog:
* config/aarch64/aarch64-neon-builtins-base.cc: Compare
`arg_type` and `TREE_TYPE (b)` by comparing their mode and
unsigned-ness, rather than their pointer address.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/pr126064.c: New test.
---
.../aarch64/aarch64-neon-builtins-base.cc | 3 ++-
gcc/testsuite/gcc.target/aarch64/pr126064.c | 27 +++++++++++++++++++
2 files changed, 29 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr126064.c
diff --git a/gcc/config/aarch64/aarch64-neon-builtins-base.cc
b/gcc/config/aarch64/aarch64-neon-builtins-base.cc
index 9e61ba7688ce..d7833bbcb7bc 100644
--- a/gcc/config/aarch64/aarch64-neon-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-neon-builtins-base.cc
@@ -562,7 +562,8 @@ public:
std::swap (a, b);
auto arg_type = TREE_TYPE (a);
- gcc_assert (arg_type == TREE_TYPE (b));
+ gcc_assert (TYPE_MODE (arg_type) == TYPE_MODE (TREE_TYPE (b))
+ && TYPE_UNSIGNED (arg_type) == TYPE_UNSIGNED (TREE_TYPE (b)));
auto tuple_type = TREE_TYPE (f.lhs);
auto tuple = create_tmp_var (tuple_type);
diff --git a/gcc/testsuite/gcc.target/aarch64/pr126064.c
b/gcc/testsuite/gcc.target/aarch64/pr126064.c
new file mode 100644
index 000000000000..b43601e3765a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr126064.c
@@ -0,0 +1,27 @@
+/* Check that `gimple_permute_pair::fold()` does not trip an assert when the
LHS
+ type is `uint32x2_t` and the RHS type is `__Uint32x2_t`. */
+
+#include <arm_neon.h>
+
+void
+repro (uint8_t *dst, uint8x8_t a, uint8x8_t b)
+{
+ uint32x2x2_t t;
+ uint32x2_t r;
+ uint8x8_t c;
+
+ r = vrev64_u32 (vreinterpret_u32_u8 (b));
+ t = vtrn_u32 (vreinterpret_u32_u8 (a), r);
+ c = vreinterpret_u8_u32 (t.val[1]);
+ t = vtrn_u32 (t.val[0], vreinterpret_u32_u8 (c));
+ t.val[1] = vrev64_u32 (t.val[1]);
+
+ vst1_u8 (dst, vreinterpret_u8_u32 (t.val[1]));
+}
+
+uint32x2_t
+minimized_repro (uint32x2_t a, uint32x2_t b)
+{
+ uint32x2x2_t c = vtrn_u32 (a, vrev64_u32 (b));
+ return c.val[1];
+}
--
2.54.0