Hi Philipp,

Thanks for the quick turnaround on this.


On 2026-07-03 20:21, Philipp Tomsich wrote:
Torbjörn,

I reproduced this on x86-64 (against -m32) and it is just a type mismatch.
Could you confirm that the following fully addresses this for you as well:

Thanks,
Philipp.

diff --git a/gcc/testsuite/gcc.dg/pr124545-2.c
b/gcc/testsuite/gcc.dg/pr124545-2.c
index b4806567acce..990f509d3490 100644
--- a/gcc/testsuite/gcc.dg/pr124545-2.c
+++ b/gcc/testsuite/gcc.dg/pr124545-2.c
@@ -4,7 +4,9 @@
     computed value.  In particular it must NOT fire when CST is not
     representable in the inner type (which would silently drop the bits
     above the inner precision), and it must stay correct for unsigned
-   inner types where the narrow operation wraps.  */
+   inner types where the narrow operation wraps.  Uses __UINT{32,64}_TYPE__
+   rather than unsigned {int,long} so that the narrow-vs-wide contrast is
+   independent of ILP32 vs LP64.  */
  /* { dg-do run } */
  /* { dg-options "-O2" } */

@@ -13,23 +15,23 @@
  __attribute__((noipa)) int
  oor_eq (int a)
  {
-  return ((unsigned long long) a + 0x100000000ULL) == (unsigned long long) a;
+  return ((__UINT64_TYPE__) a + 0x100000000ULL) == (__UINT64_TYPE__) a;
  }

-__attribute__((noipa)) unsigned long long
+__attribute__((noipa)) __UINT64_TYPE__
  oor_val (int a)
  {
-  return (unsigned long long) a + 0x100000000ULL;
+  return (__UINT64_TYPE__) a + 0x100000000ULL;
  }

  /* Unsigned inner: narrow add wraps mod 2^32; the widened add does not.
     The result must match the wide arithmetic for every input.  */
  __attribute__((noipa)) int
-uns_carry (unsigned int a)
+uns_carry (__UINT32_TYPE__ a)
  {
-  unsigned int t = a + 100u;
-  unsigned long w = (unsigned long) a + 100;
-  return w == (unsigned long) t;
+  __UINT32_TYPE__ t = a + 100u;
+  __UINT64_TYPE__ w = (__UINT64_TYPE__) a + 100;
+  return w == (__UINT64_TYPE__) t;
  }

I can confirm that the above fixes the failure for arm-none-eabi.

Kind regards,
Torbjörn



On Fri, 3 Jul 2026 at 20:12, Philipp Tomsich <[email protected]> wrote:

Torbjörn,

The test (as written today) doesn't really make sense on ILP32 (where
sizeof(int) == sizeof(long)).
We'll look into whether to disable (gate on LP64) or to explicitly use
unsigned long long.

Thanks for the report,
Philipp.


On Fri, 3 Jul 2026 at 20:00, Torbjorn SVENSSON
<[email protected]> wrote:

Hi,

The gcc.dg/pr124545-2.c test does not work for arm-none-eabi.
Is this suppose to work or is it missing some dg-require-effective-target?

Testing gcc.dg/pr124545-2.c
doing compile
Executing on host: /build/r17-2109-g2b8f4671103159/bin/arm-none-eabi-gcc  
/build/gcc_src/gcc/testsuite/gcc.dg/pr124545-2.c  -mthumb -march=armv7ve+neon 
-mtune=cortex-a7 -mfloat-abi=hard -mfpu=auto   -dumpbase "" 
-fdiagnostics-plain-output   -O2      --specs=rdimon.specs -Wl,--start-group -lc -lm 
-Wl,--end-group --specs=nosys.specs -Wl,--allow-multiple-definition 
-Wl,-u,_isatty,-u,_fstat  -Wl,-wrap,exit -Wl,-wrap,_exit -Wl,-wrap,main -Wl,-wrap,abort 
-Wl,gcc_tg.o -lm -o ./pr124545-2.exe    (timeout = 800)
spawn -ignore SIGHUP /build/r17-2109-g2b8f4671103159/bin/arm-none-eabi-gcc 
/build/gcc_src/gcc/testsuite/gcc.dg/pr124545-2.c -mthumb -march=armv7ve+neon 
-mtune=cortex-a7 -mfloat-abi=hard -mfpu=auto -dumpbase  
-fdiagnostics-plain-output -O2 --specs=rdimon.specs -Wl,--start-group -lc -lm 
-Wl,--end-group --specs=nosys.specs -Wl,--allow-multiple-definition 
-Wl,-u,_isatty,-u,_fstat -Wl,-wrap,exit -Wl,-wrap,_exit -Wl,-wrap,main 
-Wl,-wrap,abort -Wl,gcc_tg.o -lm -o ./pr124545-2.exe
pid is 165557 -165557
pid is -1
output is  status 0
PASS: gcc.dg/pr124545-2.c (test for excess errors)
spawning command  qemu-system-arm -nographic -machine virt -cpu cortex-a7 -m 
256 -semihosting -monitor /dev/null -kernel ./pr124545-2.exe
spawn qemu-system-arm -nographic -machine virt -cpu cortex-a7 -m 256 
-semihosting -monitor /dev/null -kernel ./pr124545-2.exe

*** EXIT code 4242

*** EXIT code 1
pid is -1
Shell closed.
Output is
*** EXIT code 4242

*** EXIT code 1

FAIL: gcc.dg/pr124545-2.c execution test



This is the assembly:
$ /build/r17-2109-g2b8f4671103159/bin/arm-none-eabi-gcc  
/build/gcc_src/gcc/testsuite/gcc.dg/pr124545-2.c  -mthumb -march=armv7ve+neon 
-mtune=cortex-a7 -mfloat-abi=hard -mfpu=auto -O2 -dp -S -o -
          .arch armv7-a
          .arch_extension virt
          .arch_extension idiv
          .arch_extension sec
          .arch_extension mp
          .fpu neon
          .eabi_attribute 28, 1
          .eabi_attribute 20, 1
          .eabi_attribute 21, 1
          .eabi_attribute 23, 3
          .eabi_attribute 24, 1
          .eabi_attribute 25, 1
          .eabi_attribute 26, 1
          .eabi_attribute 30, 2
          .eabi_attribute 34, 1
          .eabi_attribute 18, 4
          .file   "pr124545-2.c"
          .text
          .align  1
          .p2align 2,,3
          .global oor_eq
          .syntax unified
          .thumb
          .thumb_func
          .type   oor_eq, %function
oor_eq:
          @ args = 0, pretend = 0, frame = 0
          @ frame_needed = 0, uses_anonymous_args = 0
          @ link register save eliminated.
          movs    r0, #0  @ 10    [c=4 l=2]  *thumb2_movsi_shortim
          bx      lr      @ 17    [c=8 l=4]  *thumb2_return
          .size   oor_eq, .-oor_eq
          .align  1
          .p2align 2,,3
          .global oor_val
          .syntax unified
          .thumb
          .thumb_func
          .type   oor_val, %function
oor_val:
          @ args = 0, pretend = 0, frame = 0
          @ frame_needed = 0, uses_anonymous_args = 0
          @ link register save eliminated.
          asrs    r1, r0, #31     @ 6     [c=4 l=2]  *thumb2_shiftsi3_short/1
          adds    r1, r1, #1      @ 21    [c=4 l=2]  *thumb2_addsi_short/0
          bx      lr      @ 27    [c=8 l=4]  *thumb2_return
          .size   oor_val, .-oor_val
          .align  1
          .p2align 2,,3
          .global uns_carry
          .syntax unified
          .thumb
          .thumb_func
          .type   uns_carry, %function
uns_carry:
          @ args = 0, pretend = 0, frame = 0
          @ frame_needed = 0, uses_anonymous_args = 0
          @ link register save eliminated.
          movs    r0, #1  @ 10    [c=4 l=2]  *thumb2_movsi_shortim
          bx      lr      @ 17    [c=8 l=4]  *thumb2_return
          .size   uns_carry, .-uns_carry
          .align  1
          .p2align 2,,3
          .global inrange_eq
          .syntax unified
          .thumb
          .thumb_func
          .type   inrange_eq, %function
inrange_eq:
          @ args = 0, pretend = 0, frame = 0
          @ frame_needed = 0, uses_anonymous_args = 0
          @ link register save eliminated.
          movs    r0, #1  @ 11    [c=4 l=2]  *thumb2_movsi_shortim
          bx      lr      @ 18    [c=8 l=4]  *thumb2_return
          .size   inrange_eq, .-inrange_eq
          .section        .text.startup,"ax",%progbits
          .align  1
          .p2align 2,,3
          .global main
          .syntax unified
          .thumb
          .thumb_func
          .type   main, %function
main:
          @ args = 0, pretend = 0, frame = 16
          @ frame_needed = 0, uses_anonymous_args = 0
          push    {r4, lr}        @ 108   [c=8 l=2]  *push_multi
          movs    r0, #5  @ 5     [c=4 l=2]  *thumb2_movsi_shortim
          sub     sp, sp, #16     @ 109   [c=4 l=4]  *arm_addsi3/11
          bl      oor_eq          @ 6     [c=4 l=4]  *call_value_symbol
          cbnz    r0, .L8 @ 9     [c=16 l=2]  *thumb2_cbnz/0
          mov     r0, #-1 @ 15    [c=4 l=4]  *thumb2_movsi_vfp/1
          bl      oor_eq          @ 16    [c=4 l=4]  *call_value_symbol
          cbnz    r0, .L8 @ 20    [c=16 l=2]  *thumb2_cbnz/0
          movs    r0, #5  @ 22    [c=4 l=2]  *thumb2_movsi_shortim
          bl      oor_val         @ 23    [c=4 l=4]  *call_value_symbol
          cmp     r1, #1  @ 26    [c=20 l=6]  *cmp_ior/0
          it      eq
          cmpeq   r0, #5
          bne     .L8             @ 27    [c=16 l=2]  arm_cond_branch
          mvn     r0, #15 @ 29    [c=4 l=4]  *thumb2_movsi_vfp/3
          bl      uns_carry               @ 30    [c=4 l=4]  *call_value_symbol
          mov     r4, r0  @ 93    [c=4 l=2]  *thumb2_movsi_vfp/0
          cbnz    r0, .L8 @ 33    [c=16 l=2]  *thumb2_cbnz/0
          movs    r0, #10 @ 35    [c=4 l=2]  *thumb2_movsi_shortim
          bl      uns_carry               @ 36    [c=4 l=4]  *call_value_symbol
          cmp     r0, #1  @ 38    [c=4 l=2]  *arm_cmpsi_insn/0
          bne     .L8             @ 39    [c=16 l=2]  arm_cond_branch
          movw    r3, #:lower16:.LANCHOR0 @ 106   [c=4 l=4]  *thumb2_movsi_vfp/4
          movt    r3, #:upper16:.LANCHOR0 @ 107   [c=4 l=4]  *arm_movt/0
          ldm     r3, {r0, r1, r2, r3}    @ 44    [c=8 l=4]  *ldm4_
          stm     sp, {r0, r1, r2, r3}    @ 45    [c=8 l=4]  *stm4_
          movs    r1, #2  @ 47    [c=4 l=2]  *thumb2_movsi_shortim
          mov     r0, sp  @ 48    [c=4 l=2]  *thumb2_movsi_vfp/0
          bl      inrange_eq              @ 49    [c=4 l=4]  *call_value_symbol
          cmp     r0, #1  @ 51    [c=4 l=2]  *arm_cmpsi_insn/0
          bne     .L8             @ 52    [c=16 l=2]  arm_cond_branch
          mov     r0, r4  @ 58    [c=4 l=2]  *thumb2_movsi_vfp/0
          add     sp, sp, #16     @ 113   [c=4 l=4]  *arm_addsi3/5
          @ sp needed     @ 114   [c=8 l=0]  force_register_use
          pop     {r4, pc}        @ 115   [c=8 l=2]  
*pop_multiple_with_writeback_and_return
.L8:
          bl      abort           @ 11    [c=8 l=4]  *call_symbol
          .size   main, .-main
          .section        .rodata
          .align  2
          .set    .LANCHOR0,. + 0
.LC0:
          .word   7
          .word   7
          .word   7
          .word   7
          .ident  "GCC: (r17-2109-g2b8f4671103159) 17.0.0 20260703 
(experimental)"


Let me know if you need anything else or want me to test some potential fix.

Kind regards,
Torbjörn

On 2026-07-02 08:56, Richard Biener wrote:
On Wed, 1 Jul 2026, Philipp Tomsich wrote:

visit_nary_op canonicalises (T)(A + C) into (T)A + (T)C for its VN
lookup, but not the reverse -- so whether VN discovers (T)A + C ==
(T)(A + C) depends on which form it sees first.  Add a match.pd rule
that rewrites (T)A +- CST into (T)(A +- CST') using the op! qualifier,
so the fold only fires when the narrow expression already has a value
number -- i.e. only inside VN via mprts_hook.

Restrict to TYPE_OVERFLOW_UNDEFINED inner types: for unsigned inner the
narrow op wraps mod 2^prec (defined) while the widened outer op does
not, changing the observed value (bitfld-5.c is the concrete miscompile
when the guard is loosened).

Use wi::min_precision (CST, SIGNED) rather than int_fits_type_p for the
fits-check, so sign-encoded small negatives (e.g. -1 as sizetype's
0xFFFF...FFFF) qualify.

OK.

Thanks,
Richard.

      PR tree-optimization/124545

gcc/ChangeLog:

      * match.pd: Add (T)A +- CST -> (T)(A +- CST') for widening
      conversions from a signed inner type with undefined overflow.

gcc/testsuite/ChangeLog:

      * gcc.dg/pr124545.c: New test.
      * gcc.dg/pr124545-2.c: New test.

Signed-off-by: Philipp Tomsich <[email protected]>

---

   gcc/match.pd                      | 32 ++++++++++++++++++
   gcc/testsuite/gcc.dg/pr124545-2.c | 55 +++++++++++++++++++++++++++++++
   gcc/testsuite/gcc.dg/pr124545.c   | 29 ++++++++++++++++
   3 files changed, 116 insertions(+)
   create mode 100644 gcc/testsuite/gcc.dg/pr124545-2.c
   create mode 100644 gcc/testsuite/gcc.dg/pr124545.c

diff --git a/gcc/match.pd b/gcc/match.pd
index ddf3b61638ce..817a52499128 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4067,6 +4067,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
          (plus (convert @0) (op @2 (convert @1))))))
   #endif

+/* Inverse of the above: (T)(A) +- CST -> (T)(A +- CST') when T is a
+   widening conversion from a type with undefined overflow and the outer
+   type wraps.  This allows VN to discover that (T)A + (T)C == (T)(A + C)
+   regardless of which form appears first in program order.  PR124545.
+   The rewrite is unsound for unsigned inner types: the narrow op wraps
+   mod 2^prec (defined) while the widened op does not, changing the
+   observed value.  Cover the unsigned case separately once ranger can
+   prove no wrap.  */
+#if GIMPLE
+  (for op (plus minus)
+   (simplify
+    (op (convert @0) INTEGER_CST@1)
+     (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
+      && TREE_CODE (type) == INTEGER_TYPE
+      && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+      && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
+      && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
+      && TYPE_OVERFLOW_WRAPS (type)
+      /* CST must be the sign-extension of its low inner-precision bits,
+         otherwise narrowing changes the value.  Use min_precision (..,
+         SIGNED) rather than int_fits_type_p so that small negative offsets
+         encoded as large unsigned constants (e.g. -1 as sizetype) still
+         qualify.  */
+      && wi::min_precision (wi::to_wide (@1), SIGNED)
+         <= TYPE_PRECISION (TREE_TYPE (@0)))
+       (with {
+      wide_int c1 = wi::to_wide (@1);
+      tree inner_cst = wide_int_to_tree (TREE_TYPE (@0),
+                         wi::sext (c1, TYPE_PRECISION (TREE_TYPE (@0)))); }
+    (convert (op! @0 { inner_cst; }))))))
+#endif
+
   /* (T)(A) +- (T)(B) -> (T)(A +- B) only when (A +- B) could be simplified
      to a simple value.  */
     (for op (plus minus)
diff --git a/gcc/testsuite/gcc.dg/pr124545-2.c 
b/gcc/testsuite/gcc.dg/pr124545-2.c
new file mode 100644
index 000000000000..b4806567acce
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr124545-2.c
@@ -0,0 +1,55 @@
+/* PR tree-optimization/124545 */
+/* Runtime correctness for the inverse-widening VN rewrite
+   (T)A +- CST -> (T)(A +- CST').  The rewrite must never change the
+   computed value.  In particular it must NOT fire when CST is not
+   representable in the inner type (which would silently drop the bits
+   above the inner precision), and it must stay correct for unsigned
+   inner types where the narrow operation wraps.  */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+/* CST = 2^32 does not fit in int: the value must be preserved.
+   Before the fix this comparison folded to a constant 1.  */
+__attribute__((noipa)) int
+oor_eq (int a)
+{
+  return ((unsigned long long) a + 0x100000000ULL) == (unsigned long long) a;
+}
+
+__attribute__((noipa)) unsigned long long
+oor_val (int a)
+{
+  return (unsigned long long) a + 0x100000000ULL;
+}
+
+/* Unsigned inner: narrow add wraps mod 2^32; the widened add does not.
+   The result must match the wide arithmetic for every input.  */
+__attribute__((noipa)) int
+uns_carry (unsigned int a)
+{
+  unsigned int t = a + 100u;
+  unsigned long w = (unsigned long) a + 100;
+  return w == (unsigned long) t;
+}
+
+/* Legitimate in-range case (matches the PR): k == j - 1, so the two
+   loads are the same address and the rewrite may fire.  */
+__attribute__((noipa)) int
+inrange_eq (int *p, int j)
+{
+  int k = j - 1;
+  return p[j - 1] == p[k];
+}
+
+int
+main (void)
+{
+  if (oor_eq (5) != 0) __builtin_abort ();
+  if (oor_eq (-1) != 0) __builtin_abort ();
+  if (oor_val (5) != 5ULL + 0x100000000ULL) __builtin_abort ();
+  if (uns_carry (0xfffffff0u) != 0) __builtin_abort ();
+  if (uns_carry (10) != 1) __builtin_abort ();
+  int arr[4] = { 7, 7, 7, 7 };
+  if (inrange_eq (arr, 2) != 1) __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr124545.c b/gcc/testsuite/gcc.dg/pr124545.c
new file mode 100644
index 000000000000..a21346b179c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr124545.c
@@ -0,0 +1,29 @@
+/* PR tree-optimization/124545 */
+/* Verify that VN recognizes (T)A + C == (T)(A + C') regardless of
+   operand order in the equality comparison.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+int func1(int *a, int j) {
+  int k = j - 1;
+  return a[j - 1] == a[k];
+}
+
+int func2(int *a, int j) {
+  int k = j - 1;
+  return a[k] == a[j - 1];
+}
+
+int func3(int *a, int j) {
+  int k = j - 3;
+  return a[k] == a[j - 3];
+}
+
+int func4(int *a, int j) {
+  int k = j + 2;
+  return a[k] == a[j + 2];
+}
+
+/* All four functions should fold to return 1 after FRE.  */
+/* The pattern is not applied on ilp32 targets (PR116845).  */
+/* { dg-final { scan-tree-dump-times "return 1;" 4 "fre1" { xfail { ilp32 } } 
} } */




Reply via email to