This adds the simplification of:
  _1 = ~x_2(D);
  t1_4 = _1 & y_3(D);
  t2_5 = x_2(D) | y_3(D);
  _6 = t1_4 ^ t2_5;
  return _6;

to:
  return x_1(D);

also for ((~x) | y) ^ (x & y) version
  _1 = ~x_2(D);
  t1_4 = _1 | y_3(D);
  t2_5 = x_2(D) & y_3(D);
  _6 = t1_4 ^ t2_5;
  return _6;
to:
   int _1;
   _1 = ~x_2(D);
   return _1;

Bootstrapped and tested on aarch64-linux-gnu with
RUNTESTFLAGS="tree-ssa.exp".

changes since v1:
* v3: Change sf2/sg2 to sf/sg in test case
* v2:
- Update testcase to exercise GIMPLE folding
- Add additional type coverage
- Add vector and _Bool coverage
- Move code above in the file

        PR tree-optimization/112095

gcc/ChangeLog:

        * match.pd: Simplify ((~x) & y) ^ (x | y)
        to x and ((~x) | y) ^ (x & y) to ~x.

gcc/testsuite/ChangeLog:

        * gcc.dg/tree-ssa/pr112095.c: New test.

Signed-off-by: Shivam Gupta <[email protected]>
---
 gcc/match.pd                             |   9 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr112095.c | 127 +++++++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr112095.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 7b652afb43d..327598bfd63 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1606,6 +1606,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       && TYPE_PRECISION (TREE_TYPE (@0)) == 1)
   @2))
 
+/* ((~a) & b) ^ (a | b) --> a  */
+/* ((~a) | b) ^ (a & b) --> ~a  */
+(simplify
+ (bit_xor:c (bit_ior:c @0 @1) (bit_and:c @2 @1))
+ (with { bool wascmp; }
+  (if (bitwise_inverted_equal_p (@0, @2, wascmp)
+       && (!wascmp || element_precision (type) == 1))
+   @0)))
+
 /* ~(~a & b)  -->  a | ~b  */
 (simplify
  (bit_not (bit_and:cs (bit_not @0) @1))
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr112095.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr112095.c
new file mode 100644
index 00000000000..992d0e1f29d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr112095.c
@@ -0,0 +1,127 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+typedef signed int s32;
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long u64;
+typedef __attribute__((vector_size(4 * sizeof (unsigned int)))) unsigned int 
v4ui;
+
+s32
+sf (s32 a, s32 b)
+{
+  s32 t1 = (~a) & b;
+  s32 t2 = a | b;
+  return t1 ^ t2;
+}
+
+s32
+sg (s32 a, s32 b)
+{
+  s32 t1 = (~a) | b;
+  s32 t2 = a & b;
+  return t1 ^ t2;
+}
+
+u8
+f1 (u8 a, u8 b)
+{
+  u8 t1 = (~a) & b;
+  u8 t2 = a | b;
+  return t1 ^ t2;
+}
+
+u8
+g1 (u8 a, u8 b)
+{
+  u8 t1 = (~a) | b;
+  u8 t2 = a & b;
+  return t1 ^ t2;
+}
+
+u16
+f2 (u16 a, u16 b)
+{
+  u16 t1 = (~a) & b;
+  u16 t2 = a | b;
+  return t1 ^ t2;
+}
+
+u16
+g2 (u16 a, u16 b)
+{
+  u16 t1 = (~a) | b;
+  u16 t2 = a & b;
+  return t1 ^ t2;
+}
+
+u32
+f3 (u32 a, u32 b)
+{
+  u32 t1 = (~a) & b;
+  u32 t2 = a | b;
+  return t1 ^ t2;
+}
+
+u32
+g3 (u32 a, u32 b)
+{
+  u32 t1 = (~a) | b;
+  u32 t2 = a & b;
+  return t1 ^ t2;
+}
+
+u64
+f4 (u64 a, u64 b)
+{
+  u64 t1 = (~a) & b;
+  u64 t2 = a | b;
+  return t1 ^ t2;
+}
+
+u64
+g4 (u64 a, u64 b)
+{
+  u64 t1 = (~a) | b;
+  u64 t2 = a & b;
+  return t1 ^ t2;
+}
+
+_Bool
+bf (_Bool a, _Bool b)
+{
+  _Bool t1 = (~a) & b;
+  _Bool t2 = a | b;
+  return t1 ^ t2;
+}
+
+_Bool
+bg (_Bool a, _Bool b)
+{
+  _Bool t1 = (~a) | b;
+  _Bool t2 = a & b;
+  return t1 ^ t2;
+}
+
+v4ui
+vf (v4ui a, v4ui b)
+{
+  v4ui t1 = (~a) & b;
+  v4ui t2 = a | b;
+  return t1 ^ t2;
+}
+
+v4ui
+vg (v4ui a, v4ui b)
+{
+  v4ui t1 = (~a) | b;
+  v4ui t2 = a & b;
+  return t1 ^ t2;
+}
+
+/* f* and vf should simplify to return a.  */
+/* { dg-final { scan-tree-dump-times "return a_" 6 "optimized" } } */
+
+/* g* and vg should simplify to ~a directly.  */
+/* { dg-final { scan-tree-dump-times "= ~a" 6 "optimized" } } */
-- 
2.34.1

Reply via email to