visit_nary_op canonicalises (T)(A + C) into (T)A + (T)C for its VN
lookup, but not the reverse -- so whether VN discovers (T)A + C ==
(T)(A + C) depends on which form it sees first. Add a match.pd rule
that rewrites (T)A +- CST into (T)(A +- CST') using the op! qualifier,
so the fold only fires when the narrow expression already has a value
number -- i.e. only inside VN via mprts_hook.
Restrict to TYPE_OVERFLOW_UNDEFINED inner types: for unsigned inner the
narrow op wraps mod 2^prec (defined) while the widened outer op does
not, changing the observed value (bitfld-5.c is the concrete miscompile
when the guard is loosened).
Use wi::min_precision (CST, SIGNED) rather than int_fits_type_p for the
fits-check, so sign-encoded small negatives (e.g. -1 as sizetype's
0xFFFF...FFFF) qualify.
PR tree-optimization/124545
gcc/ChangeLog:
* match.pd: Add (T)A +- CST -> (T)(A +- CST') for widening
conversions from a signed inner type with undefined overflow.
gcc/testsuite/ChangeLog:
* gcc.dg/pr124545.c: New test.
* gcc.dg/pr124545-2.c: New test.
Signed-off-by: Philipp Tomsich <[email protected]>
---
gcc/match.pd | 32 ++++++++++++++++++
gcc/testsuite/gcc.dg/pr124545-2.c | 55 +++++++++++++++++++++++++++++++
gcc/testsuite/gcc.dg/pr124545.c | 29 ++++++++++++++++
3 files changed, 116 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/pr124545-2.c
create mode 100644 gcc/testsuite/gcc.dg/pr124545.c
diff --git a/gcc/match.pd b/gcc/match.pd
index ddf3b61638ce..817a52499128 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4067,6 +4067,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(plus (convert @0) (op @2 (convert @1))))))
#endif
+/* Inverse of the above: (T)(A) +- CST -> (T)(A +- CST') when T is a
+ widening conversion from a type with undefined overflow and the outer
+ type wraps. This allows VN to discover that (T)A + (T)C == (T)(A + C)
+ regardless of which form appears first in program order. PR124545.
+ The rewrite is unsound for unsigned inner types: the narrow op wraps
+ mod 2^prec (defined) while the widened op does not, changing the
+ observed value. Cover the unsigned case separately once ranger can
+ prove no wrap. */
+#if GIMPLE
+ (for op (plus minus)
+ (simplify
+ (op (convert @0) INTEGER_CST@1)
+ (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
+ && TREE_CODE (type) == INTEGER_TYPE
+ && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
+ && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_WRAPS (type)
+ /* CST must be the sign-extension of its low inner-precision bits,
+ otherwise narrowing changes the value. Use min_precision (..,
+ SIGNED) rather than int_fits_type_p so that small negative offsets
+ encoded as large unsigned constants (e.g. -1 as sizetype) still
+ qualify. */
+ && wi::min_precision (wi::to_wide (@1), SIGNED)
+ <= TYPE_PRECISION (TREE_TYPE (@0)))
+ (with {
+ wide_int c1 = wi::to_wide (@1);
+ tree inner_cst = wide_int_to_tree (TREE_TYPE (@0),
+ wi::sext (c1, TYPE_PRECISION (TREE_TYPE (@0)))); }
+ (convert (op! @0 { inner_cst; }))))))
+#endif
+
/* (T)(A) +- (T)(B) -> (T)(A +- B) only when (A +- B) could be simplified
to a simple value. */
(for op (plus minus)
diff --git a/gcc/testsuite/gcc.dg/pr124545-2.c
b/gcc/testsuite/gcc.dg/pr124545-2.c
new file mode 100644
index 000000000000..b4806567acce
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr124545-2.c
@@ -0,0 +1,55 @@
+/* PR tree-optimization/124545 */
+/* Runtime correctness for the inverse-widening VN rewrite
+ (T)A +- CST -> (T)(A +- CST'). The rewrite must never change the
+ computed value. In particular it must NOT fire when CST is not
+ representable in the inner type (which would silently drop the bits
+ above the inner precision), and it must stay correct for unsigned
+ inner types where the narrow operation wraps. */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+/* CST = 2^32 does not fit in int: the value must be preserved.
+ Before the fix this comparison folded to a constant 1. */
+__attribute__((noipa)) int
+oor_eq (int a)
+{
+ return ((unsigned long long) a + 0x100000000ULL) == (unsigned long long) a;
+}
+
+__attribute__((noipa)) unsigned long long
+oor_val (int a)
+{
+ return (unsigned long long) a + 0x100000000ULL;
+}
+
+/* Unsigned inner: narrow add wraps mod 2^32; the widened add does not.
+ The result must match the wide arithmetic for every input. */
+__attribute__((noipa)) int
+uns_carry (unsigned int a)
+{
+ unsigned int t = a + 100u;
+ unsigned long w = (unsigned long) a + 100;
+ return w == (unsigned long) t;
+}
+
+/* Legitimate in-range case (matches the PR): k == j - 1, so the two
+ loads are the same address and the rewrite may fire. */
+__attribute__((noipa)) int
+inrange_eq (int *p, int j)
+{
+ int k = j - 1;
+ return p[j - 1] == p[k];
+}
+
+int
+main (void)
+{
+ if (oor_eq (5) != 0) __builtin_abort ();
+ if (oor_eq (-1) != 0) __builtin_abort ();
+ if (oor_val (5) != 5ULL + 0x100000000ULL) __builtin_abort ();
+ if (uns_carry (0xfffffff0u) != 0) __builtin_abort ();
+ if (uns_carry (10) != 1) __builtin_abort ();
+ int arr[4] = { 7, 7, 7, 7 };
+ if (inrange_eq (arr, 2) != 1) __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr124545.c b/gcc/testsuite/gcc.dg/pr124545.c
new file mode 100644
index 000000000000..a21346b179c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr124545.c
@@ -0,0 +1,29 @@
+/* PR tree-optimization/124545 */
+/* Verify that VN recognizes (T)A + C == (T)(A + C') regardless of
+ operand order in the equality comparison. */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+int func1(int *a, int j) {
+ int k = j - 1;
+ return a[j - 1] == a[k];
+}
+
+int func2(int *a, int j) {
+ int k = j - 1;
+ return a[k] == a[j - 1];
+}
+
+int func3(int *a, int j) {
+ int k = j - 3;
+ return a[k] == a[j - 3];
+}
+
+int func4(int *a, int j) {
+ int k = j + 2;
+ return a[k] == a[j + 2];
+}
+
+/* All four functions should fold to return 1 after FRE. */
+/* The pattern is not applied on ilp32 targets (PR116845). */
+/* { dg-final { scan-tree-dump-times "return 1;" 4 "fre1" { xfail { ilp32 } }
} } */
--
2.34.1
base-commit: d3cd3ff57b6ad4eb434fd75e4e54b2884ab4a44f
branch: ptomsich/pr124545-vn-inverse-v2