visit_nary_op canonicalises (T)(A + C) into (T)A + (T)C for its VN
lookup, but not the reverse -- so whether VN discovers (T)A + C ==
(T)(A + C) depends on which form it sees first.  Add a match.pd rule
that rewrites (T)A +- CST into (T)(A +- CST') using the op! qualifier,
so the fold only fires when the narrow expression already has a value
number -- i.e. only inside VN via mprts_hook.

Restrict to TYPE_OVERFLOW_UNDEFINED inner types: for unsigned inner the
narrow op wraps mod 2^prec (defined) while the widened outer op does
not, changing the observed value (bitfld-5.c is the concrete miscompile
when the guard is loosened).

Use wi::min_precision (CST, SIGNED) rather than int_fits_type_p for the
fits-check, so sign-encoded small negatives (e.g. -1 as sizetype's
0xFFFF...FFFF) qualify.

        PR tree-optimization/124545

gcc/ChangeLog:

        * match.pd: Add (T)A +- CST -> (T)(A +- CST') for widening
        conversions from a signed inner type with undefined overflow.

gcc/testsuite/ChangeLog:

        * gcc.dg/pr124545.c: New test.
        * gcc.dg/pr124545-2.c: New test.

Signed-off-by: Philipp Tomsich <[email protected]>

---

 gcc/match.pd                      | 32 ++++++++++++++++++
 gcc/testsuite/gcc.dg/pr124545-2.c | 55 +++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/pr124545.c   | 29 ++++++++++++++++
 3 files changed, 116 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr124545-2.c
 create mode 100644 gcc/testsuite/gcc.dg/pr124545.c

diff --git a/gcc/match.pd b/gcc/match.pd
index ddf3b61638ce..817a52499128 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4067,6 +4067,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
        (plus (convert @0) (op @2 (convert @1))))))
 #endif
 
+/* Inverse of the above: (T)(A) +- CST -> (T)(A +- CST') when T is a
+   widening conversion from a type with undefined overflow and the outer
+   type wraps.  This allows VN to discover that (T)A + (T)C == (T)(A + C)
+   regardless of which form appears first in program order.  PR124545.
+   The rewrite is unsound for unsigned inner types: the narrow op wraps
+   mod 2^prec (defined) while the widened op does not, changing the
+   observed value.  Cover the unsigned case separately once ranger can
+   prove no wrap.  */
+#if GIMPLE
+  (for op (plus minus)
+   (simplify
+    (op (convert @0) INTEGER_CST@1)
+     (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
+         && TREE_CODE (type) == INTEGER_TYPE
+         && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+         && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
+         && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
+         && TYPE_OVERFLOW_WRAPS (type)
+         /* CST must be the sign-extension of its low inner-precision bits,
+            otherwise narrowing changes the value.  Use min_precision (..,
+            SIGNED) rather than int_fits_type_p so that small negative offsets
+            encoded as large unsigned constants (e.g. -1 as sizetype) still
+            qualify.  */
+         && wi::min_precision (wi::to_wide (@1), SIGNED)
+            <= TYPE_PRECISION (TREE_TYPE (@0)))
+       (with {
+         wide_int c1 = wi::to_wide (@1);
+         tree inner_cst = wide_int_to_tree (TREE_TYPE (@0),
+                            wi::sext (c1, TYPE_PRECISION (TREE_TYPE (@0)))); }
+       (convert (op! @0 { inner_cst; }))))))
+#endif
+
 /* (T)(A) +- (T)(B) -> (T)(A +- B) only when (A +- B) could be simplified
    to a simple value.  */
   (for op (plus minus)
diff --git a/gcc/testsuite/gcc.dg/pr124545-2.c 
b/gcc/testsuite/gcc.dg/pr124545-2.c
new file mode 100644
index 000000000000..b4806567acce
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr124545-2.c
@@ -0,0 +1,55 @@
+/* PR tree-optimization/124545 */
+/* Runtime correctness for the inverse-widening VN rewrite
+   (T)A +- CST -> (T)(A +- CST').  The rewrite must never change the
+   computed value.  In particular it must NOT fire when CST is not
+   representable in the inner type (which would silently drop the bits
+   above the inner precision), and it must stay correct for unsigned
+   inner types where the narrow operation wraps.  */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+/* CST = 2^32 does not fit in int: the value must be preserved.
+   Before the fix this comparison folded to a constant 1.  */
+__attribute__((noipa)) int
+oor_eq (int a)
+{
+  return ((unsigned long long) a + 0x100000000ULL) == (unsigned long long) a;
+}
+
+__attribute__((noipa)) unsigned long long
+oor_val (int a)
+{
+  return (unsigned long long) a + 0x100000000ULL;
+}
+
+/* Unsigned inner: narrow add wraps mod 2^32; the widened add does not.
+   The result must match the wide arithmetic for every input.  */
+__attribute__((noipa)) int
+uns_carry (unsigned int a)
+{
+  unsigned int t = a + 100u;
+  unsigned long w = (unsigned long) a + 100;
+  return w == (unsigned long) t;
+}
+
+/* Legitimate in-range case (matches the PR): k == j - 1, so the two
+   loads are the same address and the rewrite may fire.  */
+__attribute__((noipa)) int
+inrange_eq (int *p, int j)
+{
+  int k = j - 1;
+  return p[j - 1] == p[k];
+}
+
+int
+main (void)
+{
+  if (oor_eq (5) != 0) __builtin_abort ();
+  if (oor_eq (-1) != 0) __builtin_abort ();
+  if (oor_val (5) != 5ULL + 0x100000000ULL) __builtin_abort ();
+  if (uns_carry (0xfffffff0u) != 0) __builtin_abort ();
+  if (uns_carry (10) != 1) __builtin_abort ();
+  int arr[4] = { 7, 7, 7, 7 };
+  if (inrange_eq (arr, 2) != 1) __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr124545.c b/gcc/testsuite/gcc.dg/pr124545.c
new file mode 100644
index 000000000000..a21346b179c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr124545.c
@@ -0,0 +1,29 @@
+/* PR tree-optimization/124545 */
+/* Verify that VN recognizes (T)A + C == (T)(A + C') regardless of
+   operand order in the equality comparison.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+int func1(int *a, int j) {
+  int k = j - 1;
+  return a[j - 1] == a[k];
+}
+
+int func2(int *a, int j) {
+  int k = j - 1;
+  return a[k] == a[j - 1];
+}
+
+int func3(int *a, int j) {
+  int k = j - 3;
+  return a[k] == a[j - 3];
+}
+
+int func4(int *a, int j) {
+  int k = j + 2;
+  return a[k] == a[j + 2];
+}
+
+/* All four functions should fold to return 1 after FRE.  */
+/* The pattern is not applied on ilp32 targets (PR116845).  */
+/* { dg-final { scan-tree-dump-times "return 1;" 4 "fre1" { xfail { ilp32 } } 
} } */
-- 
2.34.1

base-commit: d3cd3ff57b6ad4eb434fd75e4e54b2884ab4a44f
branch: ptomsich/pr124545-vn-inverse-v2

Reply via email to