This adds the simplification of:
_1 = ~x_2(D);
t1_4 = _1 & y_3(D);
t2_5 = x_2(D) | y_3(D);
_6 = t1_4 ^ t2_5;
return _6;
to:
return x_1(D);
also for ((~x) | y) ^ (x & y) version
_1 = ~x_2(D);
t1_4 = _1 | y_3(D);
t2_5 = x_2(D) & y_3(D);
_6 = t1_4 ^ t2_5;
return _6;
to:
int _1;
_1 = ~x_2(D);
return _1;
Bootstrapped and tested on aarch64-linux-gnu with
RUNTESTFLAGS="tree-ssa.exp".
changes since v1:
* v2:
- Update testcase to exercise GIMPLE folding
- Add additional type coverage
- Add vector and _Bool coverage
- Move code above in file
PR tree-optimization/112095
gcc/ChangeLog:
* match.pd: Simplify ((~x) & y) ^ (x | y)
to x and ((~x) | y) ^ (x & y) to ~x.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/pr112095.c: New test.
Signed-off-by: Shivam Gupta <[email protected]>
---
gcc/match.pd | 9 ++
gcc/testsuite/gcc.dg/tree-ssa/pr112095.c | 127 +++++++++++++++++++++++
2 files changed, 136 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr112095.c
diff --git a/gcc/match.pd b/gcc/match.pd
index 7b652afb43d..327598bfd63 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1606,6 +1606,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& TYPE_PRECISION (TREE_TYPE (@0)) == 1)
@2))
+/* ((~a) & b) ^ (a | b) --> a */
+/* ((~a) | b) ^ (a & b) --> ~a */
+(simplify
+ (bit_xor:c (bit_ior:c @0 @1) (bit_and:c @2 @1))
+ (with { bool wascmp; }
+ (if (bitwise_inverted_equal_p (@0, @2, wascmp)
+ && (!wascmp || element_precision (type) == 1))
+ @0)))
+
/* ~(~a & b) --> a | ~b */
(simplify
(bit_not (bit_and:cs (bit_not @0) @1))
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr112095.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr112095.c
new file mode 100644
index 00000000000..35d14b21b5d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr112095.c
@@ -0,0 +1,127 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+typedef signed int s32;
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long u64;
+typedef __attribute__((vector_size(4 * sizeof (unsigned int)))) unsigned int
v4ui;
+
+s32
+sf2 (s32 a, s32 b)
+{
+ s32 t1 = (~a) & b;
+ s32 t2 = a | b;
+ return t1 ^ t2;
+}
+
+s32
+sg2 (s32 a, s32 b)
+{
+ s32 t1 = (~a) | b;
+ s32 t2 = a & b;
+ return t1 ^ t2;
+}
+
+u8
+f1 (u8 a, u8 b)
+{
+ u8 t1 = (~a) & b;
+ u8 t2 = a | b;
+ return t1 ^ t2;
+}
+
+u8
+g1 (u8 a, u8 b)
+{
+ u8 t1 = (~a) | b;
+ u8 t2 = a & b;
+ return t1 ^ t2;
+}
+
+u16
+f2 (u16 a, u16 b)
+{
+ u16 t1 = (~a) & b;
+ u16 t2 = a | b;
+ return t1 ^ t2;
+}
+
+u16
+g2 (u16 a, u16 b)
+{
+ u16 t1 = (~a) | b;
+ u16 t2 = a & b;
+ return t1 ^ t2;
+}
+
+u32
+f3 (u32 a, u32 b)
+{
+ u32 t1 = (~a) & b;
+ u32 t2 = a | b;
+ return t1 ^ t2;
+}
+
+u32
+g3 (u32 a, u32 b)
+{
+ u32 t1 = (~a) | b;
+ u32 t2 = a & b;
+ return t1 ^ t2;
+}
+
+u64
+f4 (u64 a, u64 b)
+{
+ u64 t1 = (~a) & b;
+ u64 t2 = a | b;
+ return t1 ^ t2;
+}
+
+u64
+g4 (u64 a, u64 b)
+{
+ u64 t1 = (~a) | b;
+ u64 t2 = a & b;
+ return t1 ^ t2;
+}
+
+_Bool
+bf (_Bool a, _Bool b)
+{
+ _Bool t1 = (~a) & b;
+ _Bool t2 = a | b;
+ return t1 ^ t2;
+}
+
+_Bool
+bg (_Bool a, _Bool b)
+{
+ _Bool t1 = (~a) | b;
+ _Bool t2 = a & b;
+ return t1 ^ t2;
+}
+
+v4ui
+vf (v4ui a, v4ui b)
+{
+ v4ui t1 = (~a) & b;
+ v4ui t2 = a | b;
+ return t1 ^ t2;
+}
+
+v4ui
+vg (v4ui a, v4ui b)
+{
+ v4ui t1 = (~a) | b;
+ v4ui t2 = a & b;
+ return t1 ^ t2;
+}
+
+/* f* and vf should simplify to return a. */
+/* { dg-final { scan-tree-dump-times "return a_" 6 "optimized" } } */
+
+/* g* and vg should simplify to ~a directly. */
+/* { dg-final { scan-tree-dump-times "= ~a" 6 "optimized" } } */
--
2.34.1