Torbjörn,

I reproduced this on x86-64 (against -m32) and it is just a type mismatch.
Could you confirm that the following fully addresses this for you as well:

Thanks,
Philipp.

diff --git a/gcc/testsuite/gcc.dg/pr124545-2.c
b/gcc/testsuite/gcc.dg/pr124545-2.c
index b4806567acce..990f509d3490 100644
--- a/gcc/testsuite/gcc.dg/pr124545-2.c
+++ b/gcc/testsuite/gcc.dg/pr124545-2.c
@@ -4,7 +4,9 @@
    computed value.  In particular it must NOT fire when CST is not
    representable in the inner type (which would silently drop the bits
    above the inner precision), and it must stay correct for unsigned
-   inner types where the narrow operation wraps.  */
+   inner types where the narrow operation wraps.  Uses __UINT{32,64}_TYPE__
+   rather than unsigned {int,long} so that the narrow-vs-wide contrast is
+   independent of ILP32 vs LP64.  */
 /* { dg-do run } */
 /* { dg-options "-O2" } */

@@ -13,23 +15,23 @@
 __attribute__((noipa)) int
 oor_eq (int a)
 {
-  return ((unsigned long long) a + 0x100000000ULL) == (unsigned long long) a;
+  return ((__UINT64_TYPE__) a + 0x100000000ULL) == (__UINT64_TYPE__) a;
 }

-__attribute__((noipa)) unsigned long long
+__attribute__((noipa)) __UINT64_TYPE__
 oor_val (int a)
 {
-  return (unsigned long long) a + 0x100000000ULL;
+  return (__UINT64_TYPE__) a + 0x100000000ULL;
 }

 /* Unsigned inner: narrow add wraps mod 2^32; the widened add does not.
    The result must match the wide arithmetic for every input.  */
 __attribute__((noipa)) int
-uns_carry (unsigned int a)
+uns_carry (__UINT32_TYPE__ a)
 {
-  unsigned int t = a + 100u;
-  unsigned long w = (unsigned long) a + 100;
-  return w == (unsigned long) t;
+  __UINT32_TYPE__ t = a + 100u;
+  __UINT64_TYPE__ w = (__UINT64_TYPE__) a + 100;
+  return w == (__UINT64_TYPE__) t;
 }

On Fri, 3 Jul 2026 at 20:12, Philipp Tomsich <[email protected]> wrote:
>
> Torbjörn,
>
> The test (as written today) doesn't really make sense on ILP32 (where
> sizeof(int) == sizeof(long)).
> We'll look into whether to disable (gate on LP64) or to explicitly use
> unsigned long long.
>
> Thanks for the report,
> Philipp.
>
>
> On Fri, 3 Jul 2026 at 20:00, Torbjorn SVENSSON
> <[email protected]> wrote:
> >
> > Hi,
> >
> > The gcc.dg/pr124545-2.c test does not work for arm-none-eabi.
> > Is this suppose to work or is it missing some dg-require-effective-target?
> >
> > Testing gcc.dg/pr124545-2.c
> > doing compile
> > Executing on host: /build/r17-2109-g2b8f4671103159/bin/arm-none-eabi-gcc  
> > /build/gcc_src/gcc/testsuite/gcc.dg/pr124545-2.c  -mthumb 
> > -march=armv7ve+neon -mtune=cortex-a7 -mfloat-abi=hard -mfpu=auto   
> > -dumpbase "" -fdiagnostics-plain-output   -O2      --specs=rdimon.specs 
> > -Wl,--start-group -lc -lm -Wl,--end-group --specs=nosys.specs 
> > -Wl,--allow-multiple-definition -Wl,-u,_isatty,-u,_fstat  -Wl,-wrap,exit 
> > -Wl,-wrap,_exit -Wl,-wrap,main -Wl,-wrap,abort -Wl,gcc_tg.o -lm -o 
> > ./pr124545-2.exe    (timeout = 800)
> > spawn -ignore SIGHUP /build/r17-2109-g2b8f4671103159/bin/arm-none-eabi-gcc 
> > /build/gcc_src/gcc/testsuite/gcc.dg/pr124545-2.c -mthumb 
> > -march=armv7ve+neon -mtune=cortex-a7 -mfloat-abi=hard -mfpu=auto -dumpbase  
> > -fdiagnostics-plain-output -O2 --specs=rdimon.specs -Wl,--start-group -lc 
> > -lm -Wl,--end-group --specs=nosys.specs -Wl,--allow-multiple-definition 
> > -Wl,-u,_isatty,-u,_fstat -Wl,-wrap,exit -Wl,-wrap,_exit -Wl,-wrap,main 
> > -Wl,-wrap,abort -Wl,gcc_tg.o -lm -o ./pr124545-2.exe
> > pid is 165557 -165557
> > pid is -1
> > output is  status 0
> > PASS: gcc.dg/pr124545-2.c (test for excess errors)
> > spawning command  qemu-system-arm -nographic -machine virt -cpu cortex-a7 
> > -m 256 -semihosting -monitor /dev/null -kernel ./pr124545-2.exe
> > spawn qemu-system-arm -nographic -machine virt -cpu cortex-a7 -m 256 
> > -semihosting -monitor /dev/null -kernel ./pr124545-2.exe
> >
> > *** EXIT code 4242
> >
> > *** EXIT code 1
> > pid is -1
> > Shell closed.
> > Output is
> > *** EXIT code 4242
> >
> > *** EXIT code 1
> >
> > FAIL: gcc.dg/pr124545-2.c execution test
> >
> >
> >
> > This is the assembly:
> > $ /build/r17-2109-g2b8f4671103159/bin/arm-none-eabi-gcc  
> > /build/gcc_src/gcc/testsuite/gcc.dg/pr124545-2.c  -mthumb 
> > -march=armv7ve+neon -mtune=cortex-a7 -mfloat-abi=hard -mfpu=auto -O2 -dp -S 
> > -o -
> >          .arch armv7-a
> >          .arch_extension virt
> >          .arch_extension idiv
> >          .arch_extension sec
> >          .arch_extension mp
> >          .fpu neon
> >          .eabi_attribute 28, 1
> >          .eabi_attribute 20, 1
> >          .eabi_attribute 21, 1
> >          .eabi_attribute 23, 3
> >          .eabi_attribute 24, 1
> >          .eabi_attribute 25, 1
> >          .eabi_attribute 26, 1
> >          .eabi_attribute 30, 2
> >          .eabi_attribute 34, 1
> >          .eabi_attribute 18, 4
> >          .file   "pr124545-2.c"
> >          .text
> >          .align  1
> >          .p2align 2,,3
> >          .global oor_eq
> >          .syntax unified
> >          .thumb
> >          .thumb_func
> >          .type   oor_eq, %function
> > oor_eq:
> >          @ args = 0, pretend = 0, frame = 0
> >          @ frame_needed = 0, uses_anonymous_args = 0
> >          @ link register save eliminated.
> >          movs    r0, #0  @ 10    [c=4 l=2]  *thumb2_movsi_shortim
> >          bx      lr      @ 17    [c=8 l=4]  *thumb2_return
> >          .size   oor_eq, .-oor_eq
> >          .align  1
> >          .p2align 2,,3
> >          .global oor_val
> >          .syntax unified
> >          .thumb
> >          .thumb_func
> >          .type   oor_val, %function
> > oor_val:
> >          @ args = 0, pretend = 0, frame = 0
> >          @ frame_needed = 0, uses_anonymous_args = 0
> >          @ link register save eliminated.
> >          asrs    r1, r0, #31     @ 6     [c=4 l=2]  *thumb2_shiftsi3_short/1
> >          adds    r1, r1, #1      @ 21    [c=4 l=2]  *thumb2_addsi_short/0
> >          bx      lr      @ 27    [c=8 l=4]  *thumb2_return
> >          .size   oor_val, .-oor_val
> >          .align  1
> >          .p2align 2,,3
> >          .global uns_carry
> >          .syntax unified
> >          .thumb
> >          .thumb_func
> >          .type   uns_carry, %function
> > uns_carry:
> >          @ args = 0, pretend = 0, frame = 0
> >          @ frame_needed = 0, uses_anonymous_args = 0
> >          @ link register save eliminated.
> >          movs    r0, #1  @ 10    [c=4 l=2]  *thumb2_movsi_shortim
> >          bx      lr      @ 17    [c=8 l=4]  *thumb2_return
> >          .size   uns_carry, .-uns_carry
> >          .align  1
> >          .p2align 2,,3
> >          .global inrange_eq
> >          .syntax unified
> >          .thumb
> >          .thumb_func
> >          .type   inrange_eq, %function
> > inrange_eq:
> >          @ args = 0, pretend = 0, frame = 0
> >          @ frame_needed = 0, uses_anonymous_args = 0
> >          @ link register save eliminated.
> >          movs    r0, #1  @ 11    [c=4 l=2]  *thumb2_movsi_shortim
> >          bx      lr      @ 18    [c=8 l=4]  *thumb2_return
> >          .size   inrange_eq, .-inrange_eq
> >          .section        .text.startup,"ax",%progbits
> >          .align  1
> >          .p2align 2,,3
> >          .global main
> >          .syntax unified
> >          .thumb
> >          .thumb_func
> >          .type   main, %function
> > main:
> >          @ args = 0, pretend = 0, frame = 16
> >          @ frame_needed = 0, uses_anonymous_args = 0
> >          push    {r4, lr}        @ 108   [c=8 l=2]  *push_multi
> >          movs    r0, #5  @ 5     [c=4 l=2]  *thumb2_movsi_shortim
> >          sub     sp, sp, #16     @ 109   [c=4 l=4]  *arm_addsi3/11
> >          bl      oor_eq          @ 6     [c=4 l=4]  *call_value_symbol
> >          cbnz    r0, .L8 @ 9     [c=16 l=2]  *thumb2_cbnz/0
> >          mov     r0, #-1 @ 15    [c=4 l=4]  *thumb2_movsi_vfp/1
> >          bl      oor_eq          @ 16    [c=4 l=4]  *call_value_symbol
> >          cbnz    r0, .L8 @ 20    [c=16 l=2]  *thumb2_cbnz/0
> >          movs    r0, #5  @ 22    [c=4 l=2]  *thumb2_movsi_shortim
> >          bl      oor_val         @ 23    [c=4 l=4]  *call_value_symbol
> >          cmp     r1, #1  @ 26    [c=20 l=6]  *cmp_ior/0
> >          it      eq
> >          cmpeq   r0, #5
> >          bne     .L8             @ 27    [c=16 l=2]  arm_cond_branch
> >          mvn     r0, #15 @ 29    [c=4 l=4]  *thumb2_movsi_vfp/3
> >          bl      uns_carry               @ 30    [c=4 l=4]  
> > *call_value_symbol
> >          mov     r4, r0  @ 93    [c=4 l=2]  *thumb2_movsi_vfp/0
> >          cbnz    r0, .L8 @ 33    [c=16 l=2]  *thumb2_cbnz/0
> >          movs    r0, #10 @ 35    [c=4 l=2]  *thumb2_movsi_shortim
> >          bl      uns_carry               @ 36    [c=4 l=4]  
> > *call_value_symbol
> >          cmp     r0, #1  @ 38    [c=4 l=2]  *arm_cmpsi_insn/0
> >          bne     .L8             @ 39    [c=16 l=2]  arm_cond_branch
> >          movw    r3, #:lower16:.LANCHOR0 @ 106   [c=4 l=4]  
> > *thumb2_movsi_vfp/4
> >          movt    r3, #:upper16:.LANCHOR0 @ 107   [c=4 l=4]  *arm_movt/0
> >          ldm     r3, {r0, r1, r2, r3}    @ 44    [c=8 l=4]  *ldm4_
> >          stm     sp, {r0, r1, r2, r3}    @ 45    [c=8 l=4]  *stm4_
> >          movs    r1, #2  @ 47    [c=4 l=2]  *thumb2_movsi_shortim
> >          mov     r0, sp  @ 48    [c=4 l=2]  *thumb2_movsi_vfp/0
> >          bl      inrange_eq              @ 49    [c=4 l=4]  
> > *call_value_symbol
> >          cmp     r0, #1  @ 51    [c=4 l=2]  *arm_cmpsi_insn/0
> >          bne     .L8             @ 52    [c=16 l=2]  arm_cond_branch
> >          mov     r0, r4  @ 58    [c=4 l=2]  *thumb2_movsi_vfp/0
> >          add     sp, sp, #16     @ 113   [c=4 l=4]  *arm_addsi3/5
> >          @ sp needed     @ 114   [c=8 l=0]  force_register_use
> >          pop     {r4, pc}        @ 115   [c=8 l=2]  
> > *pop_multiple_with_writeback_and_return
> > .L8:
> >          bl      abort           @ 11    [c=8 l=4]  *call_symbol
> >          .size   main, .-main
> >          .section        .rodata
> >          .align  2
> >          .set    .LANCHOR0,. + 0
> > .LC0:
> >          .word   7
> >          .word   7
> >          .word   7
> >          .word   7
> >          .ident  "GCC: (r17-2109-g2b8f4671103159) 17.0.0 20260703 
> > (experimental)"
> >
> >
> > Let me know if you need anything else or want me to test some potential fix.
> >
> > Kind regards,
> > Torbjörn
> >
> > On 2026-07-02 08:56, Richard Biener wrote:
> > > On Wed, 1 Jul 2026, Philipp Tomsich wrote:
> > >
> > >> visit_nary_op canonicalises (T)(A + C) into (T)A + (T)C for its VN
> > >> lookup, but not the reverse -- so whether VN discovers (T)A + C ==
> > >> (T)(A + C) depends on which form it sees first.  Add a match.pd rule
> > >> that rewrites (T)A +- CST into (T)(A +- CST') using the op! qualifier,
> > >> so the fold only fires when the narrow expression already has a value
> > >> number -- i.e. only inside VN via mprts_hook.
> > >>
> > >> Restrict to TYPE_OVERFLOW_UNDEFINED inner types: for unsigned inner the
> > >> narrow op wraps mod 2^prec (defined) while the widened outer op does
> > >> not, changing the observed value (bitfld-5.c is the concrete miscompile
> > >> when the guard is loosened).
> > >>
> > >> Use wi::min_precision (CST, SIGNED) rather than int_fits_type_p for the
> > >> fits-check, so sign-encoded small negatives (e.g. -1 as sizetype's
> > >> 0xFFFF...FFFF) qualify.
> > >
> > > OK.
> > >
> > > Thanks,
> > > Richard.
> > >
> > >>      PR tree-optimization/124545
> > >>
> > >> gcc/ChangeLog:
> > >>
> > >>      * match.pd: Add (T)A +- CST -> (T)(A +- CST') for widening
> > >>      conversions from a signed inner type with undefined overflow.
> > >>
> > >> gcc/testsuite/ChangeLog:
> > >>
> > >>      * gcc.dg/pr124545.c: New test.
> > >>      * gcc.dg/pr124545-2.c: New test.
> > >>
> > >> Signed-off-by: Philipp Tomsich <[email protected]>
> > >>
> > >> ---
> > >>
> > >>   gcc/match.pd                      | 32 ++++++++++++++++++
> > >>   gcc/testsuite/gcc.dg/pr124545-2.c | 55 +++++++++++++++++++++++++++++++
> > >>   gcc/testsuite/gcc.dg/pr124545.c   | 29 ++++++++++++++++
> > >>   3 files changed, 116 insertions(+)
> > >>   create mode 100644 gcc/testsuite/gcc.dg/pr124545-2.c
> > >>   create mode 100644 gcc/testsuite/gcc.dg/pr124545.c
> > >>
> > >> diff --git a/gcc/match.pd b/gcc/match.pd
> > >> index ddf3b61638ce..817a52499128 100644
> > >> --- a/gcc/match.pd
> > >> +++ b/gcc/match.pd
> > >> @@ -4067,6 +4067,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >>          (plus (convert @0) (op @2 (convert @1))))))
> > >>   #endif
> > >>
> > >> +/* Inverse of the above: (T)(A) +- CST -> (T)(A +- CST') when T is a
> > >> +   widening conversion from a type with undefined overflow and the outer
> > >> +   type wraps.  This allows VN to discover that (T)A + (T)C == (T)(A + 
> > >> C)
> > >> +   regardless of which form appears first in program order.  PR124545.
> > >> +   The rewrite is unsound for unsigned inner types: the narrow op wraps
> > >> +   mod 2^prec (defined) while the widened op does not, changing the
> > >> +   observed value.  Cover the unsigned case separately once ranger can
> > >> +   prove no wrap.  */
> > >> +#if GIMPLE
> > >> +  (for op (plus minus)
> > >> +   (simplify
> > >> +    (op (convert @0) INTEGER_CST@1)
> > >> +     (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
> > >> +      && TREE_CODE (type) == INTEGER_TYPE
> > >> +      && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
> > >> +      && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> > >> +      && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
> > >> +      && TYPE_OVERFLOW_WRAPS (type)
> > >> +      /* CST must be the sign-extension of its low inner-precision bits,
> > >> +         otherwise narrowing changes the value.  Use min_precision (..,
> > >> +         SIGNED) rather than int_fits_type_p so that small negative 
> > >> offsets
> > >> +         encoded as large unsigned constants (e.g. -1 as sizetype) still
> > >> +         qualify.  */
> > >> +      && wi::min_precision (wi::to_wide (@1), SIGNED)
> > >> +         <= TYPE_PRECISION (TREE_TYPE (@0)))
> > >> +       (with {
> > >> +      wide_int c1 = wi::to_wide (@1);
> > >> +      tree inner_cst = wide_int_to_tree (TREE_TYPE (@0),
> > >> +                         wi::sext (c1, TYPE_PRECISION (TREE_TYPE 
> > >> (@0)))); }
> > >> +    (convert (op! @0 { inner_cst; }))))))
> > >> +#endif
> > >> +
> > >>   /* (T)(A) +- (T)(B) -> (T)(A +- B) only when (A +- B) could be 
> > >> simplified
> > >>      to a simple value.  */
> > >>     (for op (plus minus)
> > >> diff --git a/gcc/testsuite/gcc.dg/pr124545-2.c 
> > >> b/gcc/testsuite/gcc.dg/pr124545-2.c
> > >> new file mode 100644
> > >> index 000000000000..b4806567acce
> > >> --- /dev/null
> > >> +++ b/gcc/testsuite/gcc.dg/pr124545-2.c
> > >> @@ -0,0 +1,55 @@
> > >> +/* PR tree-optimization/124545 */
> > >> +/* Runtime correctness for the inverse-widening VN rewrite
> > >> +   (T)A +- CST -> (T)(A +- CST').  The rewrite must never change the
> > >> +   computed value.  In particular it must NOT fire when CST is not
> > >> +   representable in the inner type (which would silently drop the bits
> > >> +   above the inner precision), and it must stay correct for unsigned
> > >> +   inner types where the narrow operation wraps.  */
> > >> +/* { dg-do run } */
> > >> +/* { dg-options "-O2" } */
> > >> +
> > >> +/* CST = 2^32 does not fit in int: the value must be preserved.
> > >> +   Before the fix this comparison folded to a constant 1.  */
> > >> +__attribute__((noipa)) int
> > >> +oor_eq (int a)
> > >> +{
> > >> +  return ((unsigned long long) a + 0x100000000ULL) == (unsigned long 
> > >> long) a;
> > >> +}
> > >> +
> > >> +__attribute__((noipa)) unsigned long long
> > >> +oor_val (int a)
> > >> +{
> > >> +  return (unsigned long long) a + 0x100000000ULL;
> > >> +}
> > >> +
> > >> +/* Unsigned inner: narrow add wraps mod 2^32; the widened add does not.
> > >> +   The result must match the wide arithmetic for every input.  */
> > >> +__attribute__((noipa)) int
> > >> +uns_carry (unsigned int a)
> > >> +{
> > >> +  unsigned int t = a + 100u;
> > >> +  unsigned long w = (unsigned long) a + 100;
> > >> +  return w == (unsigned long) t;
> > >> +}
> > >> +
> > >> +/* Legitimate in-range case (matches the PR): k == j - 1, so the two
> > >> +   loads are the same address and the rewrite may fire.  */
> > >> +__attribute__((noipa)) int
> > >> +inrange_eq (int *p, int j)
> > >> +{
> > >> +  int k = j - 1;
> > >> +  return p[j - 1] == p[k];
> > >> +}
> > >> +
> > >> +int
> > >> +main (void)
> > >> +{
> > >> +  if (oor_eq (5) != 0) __builtin_abort ();
> > >> +  if (oor_eq (-1) != 0) __builtin_abort ();
> > >> +  if (oor_val (5) != 5ULL + 0x100000000ULL) __builtin_abort ();
> > >> +  if (uns_carry (0xfffffff0u) != 0) __builtin_abort ();
> > >> +  if (uns_carry (10) != 1) __builtin_abort ();
> > >> +  int arr[4] = { 7, 7, 7, 7 };
> > >> +  if (inrange_eq (arr, 2) != 1) __builtin_abort ();
> > >> +  return 0;
> > >> +}
> > >> diff --git a/gcc/testsuite/gcc.dg/pr124545.c 
> > >> b/gcc/testsuite/gcc.dg/pr124545.c
> > >> new file mode 100644
> > >> index 000000000000..a21346b179c7
> > >> --- /dev/null
> > >> +++ b/gcc/testsuite/gcc.dg/pr124545.c
> > >> @@ -0,0 +1,29 @@
> > >> +/* PR tree-optimization/124545 */
> > >> +/* Verify that VN recognizes (T)A + C == (T)(A + C') regardless of
> > >> +   operand order in the equality comparison.  */
> > >> +/* { dg-do compile } */
> > >> +/* { dg-options "-O2 -fdump-tree-fre1" } */
> > >> +
> > >> +int func1(int *a, int j) {
> > >> +  int k = j - 1;
> > >> +  return a[j - 1] == a[k];
> > >> +}
> > >> +
> > >> +int func2(int *a, int j) {
> > >> +  int k = j - 1;
> > >> +  return a[k] == a[j - 1];
> > >> +}
> > >> +
> > >> +int func3(int *a, int j) {
> > >> +  int k = j - 3;
> > >> +  return a[k] == a[j - 3];
> > >> +}
> > >> +
> > >> +int func4(int *a, int j) {
> > >> +  int k = j + 2;
> > >> +  return a[k] == a[j + 2];
> > >> +}
> > >> +
> > >> +/* All four functions should fold to return 1 after FRE.  */
> > >> +/* The pattern is not applied on ilp32 targets (PR116845).  */
> > >> +/* { dg-final { scan-tree-dump-times "return 1;" 4 "fre1" { xfail { 
> > >> ilp32 } } } } */
> > >>
> > >
> >

Reply via email to