In forward_propagate_into_comparison_1, the invariant_only_p
restriction prevents folding comparisons when the defining SSA value
has multiple uses and the folded result is not a constant. This
blocks the simplification of patterns like (++*a == 1) into (*a == 0),
where comparing the pre-increment value against zero is cheaper on
most targets (e.g., beqz on RISC-V, cbz on AArch64).
Relax invariant_only_p when the defining statement is a PLUS_EXPR
with a constant operand, the comparison is an equality test against a
non-zero constant, and the folded constant would be zero. GIMPLE
canonicalizes (X - C) to (X + -C), so only PLUS_EXPR needs handling.
This ensures we only fold toward zero comparisons, never away from
them (e.g., --*a == 0 must not fold to *a == 1).
For example, given:
_1 = *a;
_2 = _1 + 1;
*a = _2;
if (_2 == 1)
forwprop now produces:
if (_1 == 0)
which generates beqz/cbz instead of li+beq/cmp+b.eq.
gcc/ChangeLog:
* tree-ssa-forwprop.cc (forward_propagate_into_comparison_1):
Relax invariant_only_p for PLUS_EXPR with constant operand
when the fold produces an equality comparison against zero.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/forwprop-pre-incr-cmp.c: New test.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-pre-incr-cmp.c
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-pre-incr-cmp.c
new file mode 100644
index 000000000000..77e74700b9ef
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-pre-incr-cmp.c
@@ -0,0 +1,93 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop2" } */
+
+/* Verify that forwprop folds (++*a == 1) into (*a == 0), comparing the
+ pre-increment value against zero instead of comparing the incremented
+ value against 1. Only fold when the result is a comparison against
+ zero (which is cheaper on most architectures). */
+
+void g ();
+
+/* Unsigned EQ: ++*a == 1 -> *a == 0. */
+void f1 (unsigned int *a)
+{
+ if (++*a == 1)
+ g ();
+}
+
+/* Unsigned NE: ++*a != 1 -> *a != 0. */
+void f2 (unsigned int *a)
+{
+ if (++*a != 1)
+ g ();
+}
+
+/* Unsigned EQ with addend > 1: (*a += 3) == 3 -> *a == 0. */
+void f3 (unsigned int *a)
+{
+ if ((*a += 3) == 3)
+ g ();
+}
+
+/* Unsigned EQ with non-zero result: (*a += 3) == 10 does NOT fold
+ (result would be 7, not zero). */
+void f4 (unsigned int *a)
+{
+ if ((*a += 3) == 10)
+ g ();
+}
+
+/* Unsigned EQ already comparing against zero: --*a == 0 must NOT
+ fold to *a == 1 (regression away from zero). */
+void f5 (unsigned int *a)
+{
+ if (--*a == 0)
+ g ();
+}
+
+/* Signed EQ: ++*a == 1 -> *a == 0. */
+void f6 (int *a)
+{
+ if (++*a == 1)
+ g ();
+}
+
+/* Signed NE: ++*a != 1 -> *a != 0. */
+void f7 (int *a)
+{
+ if (++*a != 1)
+ g ();
+}
+
+/* Signed EQ already comparing against zero: --*a == 0 must NOT
+ fold to *a == 1 (regression away from zero). */
+void f8 (int *a)
+{
+ if (--*a == 0)
+ g ();
+}
+
+/* Ordering comparison: (++*a > 1) must NOT fold, even though the
+ folded constant would be zero -- the relaxation is restricted
+ to EQ_EXPR and NE_EXPR. */
+void f9 (int *a)
+{
+ if (++*a > 1)
+ g ();
+}
+
+/* Positive: unsigned and signed EQ/NE fold to zero.
+ Use scan-tree-dump-times to independently verify that both unsigned
+ (f1/f2) and signed (f6/f7) variants fold. */
+/* { dg-final { scan-tree-dump-times "Replaced '_\[0-9\]+ == 1' with
'_\[0-9\]+ == 0'" 2 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "Replaced '_\[0-9\]+ != 1' with
'_\[0-9\]+ != 0'" 2 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "Replaced '_\[0-9\]+ == 3' with
'_\[0-9\]+ == 0'" 1 "forwprop2" } } */
+
+/* Negative: non-zero result must not fold. */
+/* { dg-final { scan-tree-dump-not "Replaced '_\[0-9\]+ == 10' with '_\[0-9\]+
== 7'" "forwprop2" } } */
+
+/* Negative: already-zero comparison must not fold away from zero. */
+/* { dg-final { scan-tree-dump-not "Replaced '_\[0-9\]+ == 0' with '_\[0-9\]+
==" "forwprop2" } } */
+
+/* Negative: ordering comparison must not fold via this path. */
+/* { dg-final { scan-tree-dump-not "Replaced '_\[0-9\]+ > 1' with '_\[0-9\]+ >
0'" "forwprop2" } } */
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index b5544414ca6e..0e1637d4782a 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -467,6 +467,32 @@ forward_propagate_into_comparison_1 (gimple *stmt,
|| TREE_CODE_CLASS (def_code) == tcc_comparison))
invariant_only_p = false;
+ /* Allow combining when the defining statement is an addition
+ with a constant, and the fold will produce a comparison
+ against zero. On most architectures, comparing against
+ zero is cheaper than comparing against a non-zero constant.
+ Only relax invariant_only_p when the original comparison
+ is non-zero and the folded result would be zero -- otherwise
+ we could regress by moving a comparison away from zero.
+ Note: GIMPLE canonicalizes (X - C) to (X + -C), so only
+ PLUS_EXPR needs to be handled here. */
+ if (invariant_only_p
+ && (code == EQ_EXPR || code == NE_EXPR)
+ && TREE_CODE (op1) == INTEGER_CST
+ && !integer_zerop (op1)
+ && def_code == PLUS_EXPR
+ && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == INTEGER_CST)
+ {
+ tree rhs2 = gimple_assign_rhs2 (def_stmt);
+ /* op1 and rhs2 may have different types due to implicit
+ promotions; int_const_binop handles this by converting
+ rhs2 to op1's precision. We only check integer_zerop
+ on the result, which is type-insensitive. */
+ tree folded_cst = int_const_binop (MINUS_EXPR, op1, rhs2);
+ if (folded_cst && integer_zerop (folded_cst))
+ invariant_only_p = false;
+ }
+
tmp = combine_cond_expr_cond (stmt, code, type,
rhs0, op1, invariant_only_p);
if (tmp)
--
2.34.1