From: Daniel Barboza <[email protected]>
Remove if mispredicts for bit_ior, lshift and rshift ops that follows
the following pattern:
if (cmp) SSA_NAME OP CST1 else SSA_NAME
By executing the OP everytime, using the zero_one pattern 'cmp' with
a 'mult' to re-create CST1:
IMM = cmp * CST1 SSA_NAME OP IMM
This works as long as 'OP' is an operation that results in SSA_NAME if
IMM == 0.
A helper pattern was added to simplify the following related case:
if (SSA_NAME == 0) SSA_NAME OP CST1 else SSA_NAME
if OP happens to be an operation that matches the same criteria from
above, this whole pattern can be reduced to 'SSA_NAME'. Otherwise our main
pattern will overcomplicate it needlesly and we'll have VRP regressions.
This was detected by pr103281-1.c.
As for OPs supported, we do not support XOR as a valid OP for this
transformation because a XOR in the format we're handling here happens
to match a CRC pattern (see gimple-crc-optimization.cc and crc-10.c test
file). We do not support PLUS at this point because it will break a lot
of scanner tests - something to go after in a follow-up.
Two existing tests were changed as a result of this optimization.
Bootstrapped on x86, aarch64 and rv64.
Regression tested on x86 and aarch64.
PR tree-optimization/56110
gcc/ChangeLog:
* match.pd(`if A == 0 A OP CST1 else A`): New pattern.
(`if A !=0 A else A OP CST1`) : New pattern.
(`if (cmp) SSA_NAME OP CST1 else SSA_NAME`): New pattern.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/pr107195-3.c: The code in 'foo3' is now being
optimized with -O2 after these changes. Other functions in this
test file weren't affected.
* gcc.target/aarch64/sve/cond_shift_1.c: add a PLUS operand in the
template to avoid the 56110 pattern being applied, allowing the
the cond_shifts to occur as expected by the test.
* gcc.dg/tree-ssa/pr56110-2.c: New test.
* gcc.dg/tree-ssa/pr56110-3.c: New test.
* gcc.dg/tree-ssa/pr56110.c: New test.
---
gcc/match.pd | 33 ++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr107195-3.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/pr56110-2.c | 51 +++++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr56110-3.c | 34 +++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr56110.c | 27 ++++++++++
.../gcc.target/aarch64/sve/cond_shift_1.c | 3 +-
6 files changed, 147 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr56110-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr56110-3.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr56110.c
diff --git a/gcc/match.pd b/gcc/match.pd
index 7f16fd4e081..a4aaf705780 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6685,6 +6685,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& INTEGRAL_TYPE_P (TREE_TYPE (@0)))
(cond @1 (convert @2) (convert @3))))
+/* PR56110: helper pattern to simplify this trivial case
+ that the main pattern below can overcomplicate, resulting
+ in VRP having problems optimizing away unneeded function
+ calls (see pr103281-1.c).
+
+ In theory we only need to handle @0==0 and shifts
+ but let's also handle mult, bit_and and the @0!=0
+ case since we're at it. */
+(for op (lshift rshift bit_and mult)
+ (simplify
+ (cond (eq @0 integer_zerop) (op @0 @1) @0)
+ @0)
+ (simplify
+ (cond (ne @0 integer_zerop) @0 (op @0 @1))
+ @0))
+
+/* PR56110: "if (cond) "A OP CST1" else A -> make OP
+ unconditional by using the cond bool value to re-create
+ CST1 via cond*CST1. This works as long as OP is an
+ operation that returns "A" when CST1 is zero.
+
+ We're deliberately not handling bit_xor because the XOR
+ pattern is used in CRC detection. */
+(for cmp (simple_comparison)
+ (for op (bit_ior lshift rshift)
+ (simplify
+ (cond (cmp@2 @3 @4) (op @0 INTEGER_CST@1) @0)
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && TYPE_PRECISION (type) <= BITS_PER_WORD
+ && (TYPE_UNSIGNED (TREE_TYPE (@1)) || tree_int_cst_sgn (@1) > 0))
+ (op @0 (mult (convert:type @2) (convert:type @1)))))))
+
/* Simplification moved from fold_cond_expr_with_comparison. It may also
be extended. */
/* This pattern implements two kinds simplification:
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr107195-3.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr107195-3.c
index eba4218b3c9..c4b1b800b16 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr107195-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr107195-3.c
@@ -1,6 +1,6 @@
/* Inspired by 'libgomp.oacc-c-c++-common/nvptx-sese-1.c'. */
-/* { dg-additional-options -O1 } */
+/* { dg-additional-options -O2 } */
/* { dg-additional-options -fdump-tree-dom3-raw } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr56110-2.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr56110-2.c
new file mode 100644
index 00000000000..d3603c18bd3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr56110-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+/* Macro adapted from builtin-object-size-common.h */
+#define FAIL() \
+ do { \
+ __builtin_printf ("Failure at line: %d\n", __LINE__); \
+ abort(); \
+ } while (0)
+
+void abort(void);
+
+unsigned f1 (unsigned x, unsigned m, unsigned n)
+{
+ if (x & 1)
+ m >>= 2;
+ return m + n;
+}
+
+unsigned f2 (unsigned x, unsigned m, unsigned n)
+{
+ if (x & 1)
+ m <<= 2;
+ return m + n;
+}
+
+unsigned f3 (unsigned x, unsigned m, unsigned n)
+{
+ if (x & 1)
+ m |= 2;
+ return m + n;
+}
+
+int main (void) {
+ if (f1 (0, 4, 1) != 5)
+ FAIL ();
+ if (f1 (1, 4, 1) != 2)
+ FAIL ();
+
+ if (f2 (0, 2, 1) != 3)
+ FAIL ();
+ if (f2 (1, 2, 1) != 9)
+ FAIL ();
+
+ if (f3 (0, 4, 1) != 5)
+ FAIL ();
+ if (f3 (1, 4, 1) != 7)
+ FAIL ();
+
+ return 0;
+}
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr56110-3.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr56110-3.c
new file mode 100644
index 00000000000..6530dc2f5a5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr56110-3.c
@@ -0,0 +1,34 @@
+/* { dg-additional-options -O2 } */
+/* { dg-additional-options -fdump-tree-phiopt3 } */
+
+#define EQ_ZERO(opname, OP) \
+__attribute__((noinline,noclone)) \
+int eqzero_##opname(int m) { \
+ if (m == 0) \
+ m = m OP 2; \
+ return m; \
+}
+
+#define NE_ZERO(opname, OP) \
+__attribute__((noinline,noclone)) \
+int nezero_##opname(int m) { \
+ if (m != 0) \
+ return m; \
+ else \
+ m = m OP 2; \
+ return m; \
+}
+
+EQ_ZERO(lshift, <<)
+EQ_ZERO(rshift, >>)
+EQ_ZERO(bit_and, &)
+EQ_ZERO(mult, *)
+
+NE_ZERO(lshift, <<)
+NE_ZERO(rshift, >>)
+NE_ZERO(bit_and, &)
+NE_ZERO(mult, *)
+
+/* { dg-final { scan-tree-dump-times "PHI" 0 phiopt3 } } */
+/* { dg-final { scan-tree-dump-times " == " 0 phiopt3 } } */
+/* { dg-final { scan-tree-dump-times " != " 0 phiopt3 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr56110.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr56110.c
new file mode 100644
index 00000000000..b8134f9116f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr56110.c
@@ -0,0 +1,27 @@
+/* { dg-additional-options -O2 } */
+/* { dg-additional-options -fdump-tree-phiopt3 } */
+
+unsigned f1 (unsigned x, unsigned m)
+{
+ if (m & 0x008080)
+ x >>= 8;
+
+ return x;
+}
+
+unsigned f2 (unsigned x, unsigned m)
+{
+ if (m & 0x008080)
+ x <<= 8;
+
+ return x;
+}
+
+unsigned f3 (unsigned x, unsigned m)
+{
+ if (m & 0x008080)
+ x |= 8;
+
+ return x;
+}
+/* { dg-final { scan-tree-dump-times "PHI" 0 phiopt3 } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c
b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c
index f2c51b291b2..15d3ef9b4af 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c
@@ -9,7 +9,7 @@
TYPE *__restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
- r[i] = a[i] > 20 ? b[i] OP 3 : b[i]; \
+ r[i] = a[i] > 20 ? b[i] OP 3 : b[i] + 1; \
}
#define TEST_TYPE(T, TYPE) \
@@ -44,5 +44,4 @@ TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */
-/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */