In forward_propagate_into_comparison_1, the invariant_only_p
restriction prevents folding comparisons when the defining SSA value
has multiple uses and the folded result is not a constant.  This
blocks the simplification of patterns like (++*a == 1) into (*a == 0),
where comparing the pre-increment value against zero is cheaper on
most targets (e.g., beqz on RISC-V, cbz on AArch64).

Relax invariant_only_p when the defining statement is a PLUS_EXPR
with a constant operand, the comparison is an equality test against a
non-zero constant, and the folded constant would be zero.  GIMPLE
canonicalizes (X - C) to (X + -C), so only PLUS_EXPR needs handling.
This ensures we only fold toward zero comparisons, never away from
them (e.g., --*a == 0 must not fold to *a == 1).

For example, given:
  _1 = *a;
  _2 = _1 + 1;
  *a = _2;
  if (_2 == 1)

forwprop now produces:
  if (_1 == 0)

which generates beqz/cbz instead of li+beq/cmp+b.eq.

gcc/ChangeLog:

        * tree-ssa-forwprop.cc (forward_propagate_into_comparison_1):
        Relax invariant_only_p for PLUS_EXPR with constant operand
        when the fold produces an equality comparison against zero.

gcc/testsuite/ChangeLog:

        * gcc.dg/tree-ssa/forwprop-pre-incr-cmp.c: New test.

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-pre-incr-cmp.c 
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-pre-incr-cmp.c
new file mode 100644
index 000000000000..77e74700b9ef
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-pre-incr-cmp.c
@@ -0,0 +1,93 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop2" } */
+
+/* Verify that forwprop folds (++*a == 1) into (*a == 0), comparing the
+   pre-increment value against zero instead of comparing the incremented
+   value against 1.  Only fold when the result is a comparison against
+   zero (which is cheaper on most architectures).  */
+
+void g ();
+
+/* Unsigned EQ: ++*a == 1 -> *a == 0.  */
+void f1 (unsigned int *a)
+{
+  if (++*a == 1)
+    g ();
+}
+
+/* Unsigned NE: ++*a != 1 -> *a != 0.  */
+void f2 (unsigned int *a)
+{
+  if (++*a != 1)
+    g ();
+}
+
+/* Unsigned EQ with addend > 1: (*a += 3) == 3 -> *a == 0.  */
+void f3 (unsigned int *a)
+{
+  if ((*a += 3) == 3)
+    g ();
+}
+
+/* Unsigned EQ with non-zero result: (*a += 3) == 10 does NOT fold
+   (result would be 7, not zero).  */
+void f4 (unsigned int *a)
+{
+  if ((*a += 3) == 10)
+    g ();
+}
+
+/* Unsigned EQ already comparing against zero: --*a == 0 must NOT
+   fold to *a == 1 (regression away from zero).  */
+void f5 (unsigned int *a)
+{
+  if (--*a == 0)
+    g ();
+}
+
+/* Signed EQ: ++*a == 1 -> *a == 0.  */
+void f6 (int *a)
+{
+  if (++*a == 1)
+    g ();
+}
+
+/* Signed NE: ++*a != 1 -> *a != 0.  */
+void f7 (int *a)
+{
+  if (++*a != 1)
+    g ();
+}
+
+/* Signed EQ already comparing against zero: --*a == 0 must NOT
+   fold to *a == 1 (regression away from zero).  */
+void f8 (int *a)
+{
+  if (--*a == 0)
+    g ();
+}
+
+/* Ordering comparison: (++*a > 1) must NOT fold, even though the
+   folded constant would be zero -- the relaxation is restricted
+   to EQ_EXPR and NE_EXPR.  */
+void f9 (int *a)
+{
+  if (++*a > 1)
+    g ();
+}
+
+/* Positive: unsigned and signed EQ/NE fold to zero.
+   Use scan-tree-dump-times to independently verify that both unsigned
+   (f1/f2) and signed (f6/f7) variants fold.  */
+/* { dg-final { scan-tree-dump-times "Replaced '_\[0-9\]+ == 1' with 
'_\[0-9\]+ == 0'" 2 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "Replaced '_\[0-9\]+ != 1' with 
'_\[0-9\]+ != 0'" 2 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "Replaced '_\[0-9\]+ == 3' with 
'_\[0-9\]+ == 0'" 1 "forwprop2" } } */
+
+/* Negative: non-zero result must not fold.  */
+/* { dg-final { scan-tree-dump-not "Replaced '_\[0-9\]+ == 10' with '_\[0-9\]+ 
== 7'" "forwprop2" } } */
+
+/* Negative: already-zero comparison must not fold away from zero.  */
+/* { dg-final { scan-tree-dump-not "Replaced '_\[0-9\]+ == 0' with '_\[0-9\]+ 
==" "forwprop2" } } */
+
+/* Negative: ordering comparison must not fold via this path.  */
+/* { dg-final { scan-tree-dump-not "Replaced '_\[0-9\]+ > 1' with '_\[0-9\]+ > 
0'" "forwprop2" } } */
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index b5544414ca6e..0e1637d4782a 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -467,6 +467,32 @@ forward_propagate_into_comparison_1 (gimple *stmt,
                  || TREE_CODE_CLASS (def_code) == tcc_comparison))
            invariant_only_p = false;
 
+         /* Allow combining when the defining statement is an addition
+            with a constant, and the fold will produce a comparison
+            against zero.  On most architectures, comparing against
+            zero is cheaper than comparing against a non-zero constant.
+            Only relax invariant_only_p when the original comparison
+            is non-zero and the folded result would be zero -- otherwise
+            we could regress by moving a comparison away from zero.
+            Note: GIMPLE canonicalizes (X - C) to (X + -C), so only
+            PLUS_EXPR needs to be handled here.  */
+         if (invariant_only_p
+             && (code == EQ_EXPR || code == NE_EXPR)
+             && TREE_CODE (op1) == INTEGER_CST
+             && !integer_zerop (op1)
+             && def_code == PLUS_EXPR
+             && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == INTEGER_CST)
+           {
+             tree rhs2 = gimple_assign_rhs2 (def_stmt);
+             /* op1 and rhs2 may have different types due to implicit
+                promotions; int_const_binop handles this by converting
+                rhs2 to op1's precision.  We only check integer_zerop
+                on the result, which is type-insensitive.  */
+             tree folded_cst = int_const_binop (MINUS_EXPR, op1, rhs2);
+             if (folded_cst && integer_zerop (folded_cst))
+               invariant_only_p = false;
+           }
+
          tmp = combine_cond_expr_cond (stmt, code, type,
                                        rhs0, op1, invariant_only_p);
          if (tmp)
-- 
2.34.1

Reply via email to