And finally, w.r.t mod operations, we want to always convert them to
something lighter (like a lshift or bit_and). For zero comparisons with
a integer_pow2 operand we want to do a bit_and instead, and the frontend
will convert it for us regardless of SSA_NAME sign. But the C frontend
won't convert a mod operand wrapped into a if, e.g. this C code:
return y % (x ? 16 : 4) == 0;
We're adding a pattern that handles this case, turning this into:
return ((xD.4590 != 0 ? 15 : 3) & yD.4589) == 0;
With all these patterns we will optimize and standardize all code
examples in PR101179.
Bootstrapped on x86, aarch64 and rv64.
Regression tested on x86 and aarch64.
PR tree-optimization/101179
gcc/ChangeLog:
* match.pd(`A OP ((CST1 shift (zero_one * CST2)))`): New
pattern.
(`A OP ((CST1 shift (zero_one*CST2)) + CST3)`): New pattern.
(`A OP ((zero_one shift CST1) + CST2)`): New pattern.
(`A OP (CST2 - (zero_one lshift CST1))`): New pattern.
(`if cond (cmp (OP A B) C) (cmp (OP A D) C)`): New pattern.
(`A % (cond X pow2A pow2B) EQ|NE 0`)`: New pattern.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/pr101179-2.c: New test.
* gcc.dg/tree-ssa/pr101179-3.c: New test.
* gcc.dg/tree-ssa/pr101179.c: New test.
---
gcc/match.pd | 133 ++++++++++++++++++++-
gcc/testsuite/gcc.dg/tree-ssa/pr101179-2.c | 36 ++++++
gcc/testsuite/gcc.dg/tree-ssa/pr101179-3.c | 11 ++
gcc/testsuite/gcc.dg/tree-ssa/pr101179.c | 95 +++++++++++++++
4 files changed, 274 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr101179-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr101179-3.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr101179.c
diff --git a/gcc/match.pd b/gcc/match.pd
index 7f16fd4e081..885e4413e08 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8642,6 +8642,137 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
replace if (x == 0) with tem = ~x; if (tem != 0) which is
clearly less optimal and which we'll transform again in forwprop. */
+/* PR101179: decompose the following zero_one patterns than creates
+ two CSTs into a cond based format. E.g:
+
+ A OP (4 << (zero_one * 2)) -> A OP (zero_one ? 16 : 4)
+
+ This will give optimizations like PR122608 the opportunity to
+ turn this further into:
+
+ "zero_one ? A OP CST1 : A OP CST2". */
+(for op (plus minus mult bit_and bit_ior bit_xor lshift rshift
+ rdiv trunc_div ceil_div floor_div round_div exact_div
+ trunc_mod ceil_mod floor_mod round_mod)
+ (for shift (lshift rshift)
+
+ /* A OP ((CST1 shift (zero_one * CST2))) ->
+ A OP (zero_one ? CST1 shift CST2 : CST1) */
+ (simplify
+ (op @0 (shift INTEGER_CST@1
+ (mult:c (convert? zero_one_valued_p@2) INTEGER_CST@3)))
+ (op @0 (cond @2 (shift @1 @3) @1)))
+ (simplify
+ (op (shift INTEGER_CST@1
+ (mult:c (convert? zero_one_valued_p@2) INTEGER_CST@3))
+ @0)
+ (op (cond @2 (shift @1 @3) @1) @0))
+
+ /* A OP ((CST1 shift (zero_one*CST2)) + CST3) ->
+ A OP (zero_one ? (CST1 shift CST2) + CST3 : CST1 + CST3) */
+ (simplify
+ (op @0 (plus:c
+ (shift INTEGER_CST@1
+ (mult:c (convert? zero_one_valued_p@2) INTEGER_CST@3))
+ INTEGER_CST@4))
+ (op @0 (cond @2 (plus (shift @1 @3) @4) (plus @1 @4))))
+ (simplify
+ (op (plus:c
+ (shift INTEGER_CST@1
+ (mult:c (convert? zero_one_valued_p@2) INTEGER_CST@3))
+ INTEGER_CST@4)
+ @0)
+ (op (cond @2 (plus (shift @1 @3) @4) (plus @1 @4)) @0))
+
+ /* A OP ((zero_one shift CST1) + CST2) ->
+ A OP (zero_one ? (1 shift CST1) + CST2 : CST2) */
+ (simplify
+ (op @0 (plus:c (shift zero_one_valued_p@2 INTEGER_CST@1) INTEGER_CST@3))
+ (op @0 (cond @2 (plus (shift { build_one_cst (type); } @1) @3)
+ @3)))
+ (simplify
+ (op (plus:c (shift zero_one_valued_p@2 INTEGER_CST@1) INTEGER_CST@3) @0)
+ (op (cond @2 (plus (shift { build_one_cst (type); } @1) @3) @3)
+ @0))
+
+ /* A OP (CST2 - (zero_one lshift CST1)) ->
+ A OP (zero_one ? CST2 - (1 lshift CST1) : CST2) */
+ (simplify
+ (op @0 (minus INTEGER_CST@3
+ (lshift zero_one_valued_p@2 INTEGER_CST@1)))
+ (op @0 (cond @2 (minus @3 (lshift { build_one_cst (type); } @1)) @3)))
+ (simplify
+ (op (minus INTEGER_CST@3
+ (lshift zero_one_valued_p@2 INTEGER_CST@1)) @0)
+ (op (cond @2 (minus @3 (lshift { build_one_cst (type); } @1)) @3) @0))
+))
+
+/* PR101179: if cond (cmp (OP A B) C) (cmp (OP A D) C)
+
+ The idea here is to detect patterns like this C code:
+
+ (x ? (y%16) == 0 : (y%4) == 0)
+
+ That will generate phiopt dumps such as:
+
+ bb3:
+ _14 = y_9 & 15;
+ _3 = _14 == 0;
+ goto <bb 5>;
+
+ bb4:
+ _13 = y_9 & 3;
+ _6 = _13 == 0;
+
+ bb5:
+ # _5 = PHI <_6 (4), _3 (3)>
+ _7 = (intD.7) _5;
+
+ We want to turn that into a pattern more friendly to other
+ optimizations (like PR122608) by moving the comparison to
+ the merge block:
+
+ bb3:
+ _14 = y_9 & 15;
+ goto <bb 5>;
+
+ bb4:
+ _13 = y_9 & 3;
+
+ bb5:
+ # _5 = PHI <_6 (4), _3 (3)>
+ _6 = _5 == 0;
+ _7 = (intD.7) _5;
+
+ Note that several opcodes (plus/minus, div, min/max) do
+ not generate this pattern due to their own specific
+ comparison optimizations. */
+(for op (mult bit_and bit_ior bit_xor lshift rshift
+ trunc_mod ceil_mod floor_mod round_mod)
+ (for cmp (simple_comparison)
+ (simplify
+ (cond @0 (cmp (op @1 @2) @4) (cmp (op @1 @3) @4))
+ (cmp (op @1 (cond @0 @2 @3)) @4)))
+)
+
+/* PR101179: A % (cond X pow2A pow2B) EQ|NE 0 ->
+ A & (cond X pow2A-1 pow2B-1) EQ|NE 0
+
+ Turning a comparison between a mod with a pow2 operand and zero
+ into a bit_and is usually done by the frontend. However the
+ frontend (from C, at least) is not able to do this conversion
+ if the mod is using the result of a cond with two pow2 operands. */
+(for op (trunc_mod ceil_mod floor_mod round_mod)
+ (for cmp (eq ne)
+ (simplify
+ (cmp (op @0 (cond@4 @3 integer_pow2p@1 integer_pow2p@2)) integer_zerop@5)
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && tree_int_cst_sgn (@1) > 0
+ && tree_int_cst_sgn (@2) > 0)
+ (cmp (bit_and @0 (minus @4 { build_one_cst (TREE_TYPE (@4)); }))
+ { build_zero_cst (TREE_TYPE (@0)); })))))
+
#if GIMPLE
/* From fold_binary_op_with_conditional_arg handle the case of
rewriting (a ? b : c) > d to a ? (b > d) : (c > d) when the
@@ -8658,7 +8789,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| !operation_could_trap_p (cmp, true, false, @3))
(cond @0 (cmp!:type @1 @3) (cmp!:type @2 @3)))))
-/* Similar to above:
+/* PR 122608 - Similar to above:
(c ? a : b) op d -> c ? (a op d) : (b op d)
But with non-vector binary ops, most of them non-commutative. */
(for op (plus minus mult bit_and bit_ior bit_xor
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr101179-2.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr101179-2.c
new file mode 100644
index 00000000000..f8e9be2d404
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr101179-2.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt1" } */
+
+/* Tests for the pattern
+ if cond X (cmp (OP A B) C) (cmp (OP A D) C) ->
+ (cmp (op A (cond X B D) C))). */
+
+#define F(OP,NAME) \
+ int NAME##_test (int y, _Bool x) { \
+ return x ? (y OP 16) == 0 : (y OP 4) == 0; \
+}
+
+F (%, mod)
+F (<<, lshift)
+F (>>, rshift)
+
+int bit_and_test (int y, _Bool x) {
+ return x ? (y & 63) != 1 : (y & 16) != 1;
+}
+
+int bit_ior_test (int y, _Bool x) {
+ return x ? (y | 3) < 0xF : (y | 8) < 0XF;
+}
+
+int bit_xor_test (int y, _Bool x) {
+ return x ? (y ^ 3) <= 13 : (y ^ 8) <= 13;
+}
+
+int mult_test (int y, _Bool x) {
+ return x ? (y * 3) > 0xF : (y * 2) > 0XF;
+}
+/* { dg-final { scan-tree-dump-times " == 0" 3 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times " != 1" 1 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times " <= 14" 1 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times " <= 13" 1 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times " > 15" 1 "phiopt1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr101179-3.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr101179-3.c
new file mode 100644
index 00000000000..9ca0a6a03e4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr101179-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt1" } */
+
+int test (int y, _Bool x) {
+ return y % (x ? 16 : 4) == 0;
+}
+
+/* { dg-final { scan-tree-dump-times " PHI <15" 1 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times " PHI <16" 0 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times " & " 1 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times " % " 0 "phiopt1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr101179.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr101179.c
new file mode 100644
index 00000000000..f448e7a73fa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr101179.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt1" } */
+
+#define F(OP,NAME) \
+ int NAME##_test (int y, _Bool x) { \
+ return y OP (4 << (x * 3)); \
+}
+
+#define F2(OP,NAME) \
+ int NAME##_test2 (int y, _Bool x) { \
+ return y OP ((4 << (x * 3)) + 1); \
+}
+
+#define F3(OP,NAME) \
+ int NAME##_test3 (int y, _Bool x) { \
+ return y OP (32 >> (x * 3)); \
+}
+
+#define F4(OP,NAME) \
+ int NAME##_test4 (int y, _Bool x) { \
+ return y OP ((32 >> (x * 3)) + 1); \
+}
+
+#define F5(OP,NAME) \
+ int NAME##_test5 (int y, _Bool x) { \
+ return y OP ((x << 2) + 3); \
+}
+
+#define F6(OP,NAME) \
+ int NAME##_test6 (int y, _Bool x) { \
+ return y OP (7 - (x << 2)); \
+}
+
+/* Same patterns from above but switching operands */
+#define F_alt(OP,NAME) \
+ int NAME##_test_alt (int y, _Bool x) { \
+ return (4 << (x * 3)) OP y; \
+}
+
+#define F_alt2(OP,NAME) \
+ int NAME##_test_alt2 (int y, _Bool x) { \
+ return ((4 << (x * 3)) + 1) OP y; \
+}
+
+#define F_alt3(OP,NAME) \
+ int NAME##_test_alt3 (int y, _Bool x) { \
+ return (32 >> (x * 3)) OP y; \
+}
+
+#define F_alt4(OP,NAME) \
+ int NAME##_test_alt4 (int y, _Bool x) { \
+ return ((32 >> (x * 3)) + 1) OP y; \
+}
+
+#define F_alt5(OP,NAME) \
+ int NAME##_test_alt5 (int y, _Bool x) { \
+ return ((x << 2) + 3) OP y; \
+}
+
+#define F_alt6(OP,NAME) \
+ int NAME##_test_alt6 (int y, _Bool x) { \
+ return (7 - (x << 2)) OP y; \
+}
+
+#define T(OP,NAME) \
+ F (OP, NAME) \
+ F2 (OP, NAME) \
+ F3 (OP, NAME) \
+ F4 (OP, NAME) \
+ F5 (OP, NAME) \
+ F6 (OP, NAME) \
+ F_alt (OP, NAME) \
+ F_alt2 (OP, NAME) \
+ F_alt3 (OP, NAME) \
+ F_alt4 (OP, NAME) \
+ F_alt5 (OP, NAME) \
+ F_alt6 (OP, NAME)
+
+T (+, plus)
+T (-, minus)
+T (*, mult)
+T (|, bit_and)
+T (|, bit_ior)
+T (^, bit_xor)
+T (/, div)
+T (%, mod)
+T (<<, lshift)
+T (>>, rshift)
+
+/* { dg-final { scan-tree-dump-times "PHI <32" 20 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times "PHI <33" 20 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times "PHI <4" 20 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times "PHI <5" 20 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times "PHI <7" 20 "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times "PHI <3\\(" 20 "phiopt1" } } */