MINUS

Philipp Tomsich Fri, 03 Jul 2026 11:12:43 -0700

Torbjörn,

The test (as written today) doesn't really make sense on ILP32 (where
sizeof(int) == sizeof(long)).
We'll look into whether to disable (gate on LP64) or to explicitly use
unsigned long long.


Thanks for the report,
Philipp.


On Fri, 3 Jul 2026 at 20:00, Torbjorn SVENSSON
<[email protected]> wrote:
>
> Hi,
>
> The gcc.dg/pr124545-2.c test does not work for arm-none-eabi.
> Is this suppose to work or is it missing some dg-require-effective-target?
>
> Testing gcc.dg/pr124545-2.c
> doing compile
> Executing on host: /build/r17-2109-g2b8f4671103159/bin/arm-none-eabi-gcc  
> /build/gcc_src/gcc/testsuite/gcc.dg/pr124545-2.c  -mthumb -march=armv7ve+neon 
> -mtune=cortex-a7 -mfloat-abi=hard -mfpu=auto   -dumpbase "" 
> -fdiagnostics-plain-output   -O2      --specs=rdimon.specs -Wl,--start-group 
> -lc -lm -Wl,--end-group --specs=nosys.specs -Wl,--allow-multiple-definition 
> -Wl,-u,_isatty,-u,_fstat  -Wl,-wrap,exit -Wl,-wrap,_exit -Wl,-wrap,main 
> -Wl,-wrap,abort -Wl,gcc_tg.o -lm -o ./pr124545-2.exe    (timeout = 800)
> spawn -ignore SIGHUP /build/r17-2109-g2b8f4671103159/bin/arm-none-eabi-gcc 
> /build/gcc_src/gcc/testsuite/gcc.dg/pr124545-2.c -mthumb -march=armv7ve+neon 
> -mtune=cortex-a7 -mfloat-abi=hard -mfpu=auto -dumpbase  
> -fdiagnostics-plain-output -O2 --specs=rdimon.specs -Wl,--start-group -lc -lm 
> -Wl,--end-group --specs=nosys.specs -Wl,--allow-multiple-definition 
> -Wl,-u,_isatty,-u,_fstat -Wl,-wrap,exit -Wl,-wrap,_exit -Wl,-wrap,main 
> -Wl,-wrap,abort -Wl,gcc_tg.o -lm -o ./pr124545-2.exe
> pid is 165557 -165557
> pid is -1
> output is  status 0
> PASS: gcc.dg/pr124545-2.c (test for excess errors)
> spawning command  qemu-system-arm -nographic -machine virt -cpu cortex-a7 -m 
> 256 -semihosting -monitor /dev/null -kernel ./pr124545-2.exe
> spawn qemu-system-arm -nographic -machine virt -cpu cortex-a7 -m 256 
> -semihosting -monitor /dev/null -kernel ./pr124545-2.exe
>
> *** EXIT code 4242
>
> *** EXIT code 1
> pid is -1
> Shell closed.
> Output is
> *** EXIT code 4242
>
> *** EXIT code 1
>
> FAIL: gcc.dg/pr124545-2.c execution test
>
>
>
> This is the assembly:
> $ /build/r17-2109-g2b8f4671103159/bin/arm-none-eabi-gcc  
> /build/gcc_src/gcc/testsuite/gcc.dg/pr124545-2.c  -mthumb -march=armv7ve+neon 
> -mtune=cortex-a7 -mfloat-abi=hard -mfpu=auto -O2 -dp -S -o -
>          .arch armv7-a
>          .arch_extension virt
>          .arch_extension idiv
>          .arch_extension sec
>          .arch_extension mp
>          .fpu neon
>          .eabi_attribute 28, 1
>          .eabi_attribute 20, 1
>          .eabi_attribute 21, 1
>          .eabi_attribute 23, 3
>          .eabi_attribute 24, 1
>          .eabi_attribute 25, 1
>          .eabi_attribute 26, 1
>          .eabi_attribute 30, 2
>          .eabi_attribute 34, 1
>          .eabi_attribute 18, 4
>          .file   "pr124545-2.c"
>          .text
>          .align  1
>          .p2align 2,,3
>          .global oor_eq
>          .syntax unified
>          .thumb
>          .thumb_func
>          .type   oor_eq, %function
> oor_eq:
>          @ args = 0, pretend = 0, frame = 0
>          @ frame_needed = 0, uses_anonymous_args = 0
>          @ link register save eliminated.
>          movs    r0, #0  @ 10    [c=4 l=2]  *thumb2_movsi_shortim
>          bx      lr      @ 17    [c=8 l=4]  *thumb2_return
>          .size   oor_eq, .-oor_eq
>          .align  1
>          .p2align 2,,3
>          .global oor_val
>          .syntax unified
>          .thumb
>          .thumb_func
>          .type   oor_val, %function
> oor_val:
>          @ args = 0, pretend = 0, frame = 0
>          @ frame_needed = 0, uses_anonymous_args = 0
>          @ link register save eliminated.
>          asrs    r1, r0, #31     @ 6     [c=4 l=2]  *thumb2_shiftsi3_short/1
>          adds    r1, r1, #1      @ 21    [c=4 l=2]  *thumb2_addsi_short/0
>          bx      lr      @ 27    [c=8 l=4]  *thumb2_return
>          .size   oor_val, .-oor_val
>          .align  1
>          .p2align 2,,3
>          .global uns_carry
>          .syntax unified
>          .thumb
>          .thumb_func
>          .type   uns_carry, %function
> uns_carry:
>          @ args = 0, pretend = 0, frame = 0
>          @ frame_needed = 0, uses_anonymous_args = 0
>          @ link register save eliminated.
>          movs    r0, #1  @ 10    [c=4 l=2]  *thumb2_movsi_shortim
>          bx      lr      @ 17    [c=8 l=4]  *thumb2_return
>          .size   uns_carry, .-uns_carry
>          .align  1
>          .p2align 2,,3
>          .global inrange_eq
>          .syntax unified
>          .thumb
>          .thumb_func
>          .type   inrange_eq, %function
> inrange_eq:
>          @ args = 0, pretend = 0, frame = 0
>          @ frame_needed = 0, uses_anonymous_args = 0
>          @ link register save eliminated.
>          movs    r0, #1  @ 11    [c=4 l=2]  *thumb2_movsi_shortim
>          bx      lr      @ 18    [c=8 l=4]  *thumb2_return
>          .size   inrange_eq, .-inrange_eq
>          .section        .text.startup,"ax",%progbits
>          .align  1
>          .p2align 2,,3
>          .global main
>          .syntax unified
>          .thumb
>          .thumb_func
>          .type   main, %function
> main:
>          @ args = 0, pretend = 0, frame = 16
>          @ frame_needed = 0, uses_anonymous_args = 0
>          push    {r4, lr}        @ 108   [c=8 l=2]  *push_multi
>          movs    r0, #5  @ 5     [c=4 l=2]  *thumb2_movsi_shortim
>          sub     sp, sp, #16     @ 109   [c=4 l=4]  *arm_addsi3/11
>          bl      oor_eq          @ 6     [c=4 l=4]  *call_value_symbol
>          cbnz    r0, .L8 @ 9     [c=16 l=2]  *thumb2_cbnz/0
>          mov     r0, #-1 @ 15    [c=4 l=4]  *thumb2_movsi_vfp/1
>          bl      oor_eq          @ 16    [c=4 l=4]  *call_value_symbol
>          cbnz    r0, .L8 @ 20    [c=16 l=2]  *thumb2_cbnz/0
>          movs    r0, #5  @ 22    [c=4 l=2]  *thumb2_movsi_shortim
>          bl      oor_val         @ 23    [c=4 l=4]  *call_value_symbol
>          cmp     r1, #1  @ 26    [c=20 l=6]  *cmp_ior/0
>          it      eq
>          cmpeq   r0, #5
>          bne     .L8             @ 27    [c=16 l=2]  arm_cond_branch
>          mvn     r0, #15 @ 29    [c=4 l=4]  *thumb2_movsi_vfp/3
>          bl      uns_carry               @ 30    [c=4 l=4]  *call_value_symbol
>          mov     r4, r0  @ 93    [c=4 l=2]  *thumb2_movsi_vfp/0
>          cbnz    r0, .L8 @ 33    [c=16 l=2]  *thumb2_cbnz/0
>          movs    r0, #10 @ 35    [c=4 l=2]  *thumb2_movsi_shortim
>          bl      uns_carry               @ 36    [c=4 l=4]  *call_value_symbol
>          cmp     r0, #1  @ 38    [c=4 l=2]  *arm_cmpsi_insn/0
>          bne     .L8             @ 39    [c=16 l=2]  arm_cond_branch
>          movw    r3, #:lower16:.LANCHOR0 @ 106   [c=4 l=4]  
> *thumb2_movsi_vfp/4
>          movt    r3, #:upper16:.LANCHOR0 @ 107   [c=4 l=4]  *arm_movt/0
>          ldm     r3, {r0, r1, r2, r3}    @ 44    [c=8 l=4]  *ldm4_
>          stm     sp, {r0, r1, r2, r3}    @ 45    [c=8 l=4]  *stm4_
>          movs    r1, #2  @ 47    [c=4 l=2]  *thumb2_movsi_shortim
>          mov     r0, sp  @ 48    [c=4 l=2]  *thumb2_movsi_vfp/0
>          bl      inrange_eq              @ 49    [c=4 l=4]  *call_value_symbol
>          cmp     r0, #1  @ 51    [c=4 l=2]  *arm_cmpsi_insn/0
>          bne     .L8             @ 52    [c=16 l=2]  arm_cond_branch
>          mov     r0, r4  @ 58    [c=4 l=2]  *thumb2_movsi_vfp/0
>          add     sp, sp, #16     @ 113   [c=4 l=4]  *arm_addsi3/5
>          @ sp needed     @ 114   [c=8 l=0]  force_register_use
>          pop     {r4, pc}        @ 115   [c=8 l=2]  
> *pop_multiple_with_writeback_and_return
> .L8:
>          bl      abort           @ 11    [c=8 l=4]  *call_symbol
>          .size   main, .-main
>          .section        .rodata
>          .align  2
>          .set    .LANCHOR0,. + 0
> .LC0:
>          .word   7
>          .word   7
>          .word   7
>          .word   7
>          .ident  "GCC: (r17-2109-g2b8f4671103159) 17.0.0 20260703 
> (experimental)"
>
>
> Let me know if you need anything else or want me to test some potential fix.
>
> Kind regards,
> Torbjörn
>
> On 2026-07-02 08:56, Richard Biener wrote:
> > On Wed, 1 Jul 2026, Philipp Tomsich wrote:
> >
> >> visit_nary_op canonicalises (T)(A + C) into (T)A + (T)C for its VN
> >> lookup, but not the reverse -- so whether VN discovers (T)A + C ==
> >> (T)(A + C) depends on which form it sees first.  Add a match.pd rule
> >> that rewrites (T)A +- CST into (T)(A +- CST') using the op! qualifier,
> >> so the fold only fires when the narrow expression already has a value
> >> number -- i.e. only inside VN via mprts_hook.
> >>
> >> Restrict to TYPE_OVERFLOW_UNDEFINED inner types: for unsigned inner the
> >> narrow op wraps mod 2^prec (defined) while the widened outer op does
> >> not, changing the observed value (bitfld-5.c is the concrete miscompile
> >> when the guard is loosened).
> >>
> >> Use wi::min_precision (CST, SIGNED) rather than int_fits_type_p for the
> >> fits-check, so sign-encoded small negatives (e.g. -1 as sizetype's
> >> 0xFFFF...FFFF) qualify.
> >
> > OK.
> >
> > Thanks,
> > Richard.
> >
> >>      PR tree-optimization/124545
> >>
> >> gcc/ChangeLog:
> >>
> >>      * match.pd: Add (T)A +- CST -> (T)(A +- CST') for widening
> >>      conversions from a signed inner type with undefined overflow.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>      * gcc.dg/pr124545.c: New test.
> >>      * gcc.dg/pr124545-2.c: New test.
> >>
> >> Signed-off-by: Philipp Tomsich <[email protected]>
> >>
> >> ---
> >>
> >>   gcc/match.pd                      | 32 ++++++++++++++++++
> >>   gcc/testsuite/gcc.dg/pr124545-2.c | 55 +++++++++++++++++++++++++++++++
> >>   gcc/testsuite/gcc.dg/pr124545.c   | 29 ++++++++++++++++
> >>   3 files changed, 116 insertions(+)
> >>   create mode 100644 gcc/testsuite/gcc.dg/pr124545-2.c
> >>   create mode 100644 gcc/testsuite/gcc.dg/pr124545.c
> >>
> >> diff --git a/gcc/match.pd b/gcc/match.pd
> >> index ddf3b61638ce..817a52499128 100644
> >> --- a/gcc/match.pd
> >> +++ b/gcc/match.pd
> >> @@ -4067,6 +4067,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >>          (plus (convert @0) (op @2 (convert @1))))))
> >>   #endif
> >>
> >> +/* Inverse of the above: (T)(A) +- CST -> (T)(A +- CST') when T is a
> >> +   widening conversion from a type with undefined overflow and the outer
> >> +   type wraps.  This allows VN to discover that (T)A + (T)C == (T)(A + C)
> >> +   regardless of which form appears first in program order.  PR124545.
> >> +   The rewrite is unsound for unsigned inner types: the narrow op wraps
> >> +   mod 2^prec (defined) while the widened op does not, changing the
> >> +   observed value.  Cover the unsigned case separately once ranger can
> >> +   prove no wrap.  */
> >> +#if GIMPLE
> >> +  (for op (plus minus)
> >> +   (simplify
> >> +    (op (convert @0) INTEGER_CST@1)
> >> +     (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
> >> +      && TREE_CODE (type) == INTEGER_TYPE
> >> +      && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
> >> +      && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> >> +      && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
> >> +      && TYPE_OVERFLOW_WRAPS (type)
> >> +      /* CST must be the sign-extension of its low inner-precision bits,
> >> +         otherwise narrowing changes the value.  Use min_precision (..,
> >> +         SIGNED) rather than int_fits_type_p so that small negative 
> >> offsets
> >> +         encoded as large unsigned constants (e.g. -1 as sizetype) still
> >> +         qualify.  */
> >> +      && wi::min_precision (wi::to_wide (@1), SIGNED)
> >> +         <= TYPE_PRECISION (TREE_TYPE (@0)))
> >> +       (with {
> >> +      wide_int c1 = wi::to_wide (@1);
> >> +      tree inner_cst = wide_int_to_tree (TREE_TYPE (@0),
> >> +                         wi::sext (c1, TYPE_PRECISION (TREE_TYPE (@0)))); 
> >> }
> >> +    (convert (op! @0 { inner_cst; }))))))
> >> +#endif
> >> +
> >>   /* (T)(A) +- (T)(B) -> (T)(A +- B) only when (A +- B) could be simplified
> >>      to a simple value.  */
> >>     (for op (plus minus)
> >> diff --git a/gcc/testsuite/gcc.dg/pr124545-2.c 
> >> b/gcc/testsuite/gcc.dg/pr124545-2.c
> >> new file mode 100644
> >> index 000000000000..b4806567acce
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/pr124545-2.c
> >> @@ -0,0 +1,55 @@
> >> +/* PR tree-optimization/124545 */
> >> +/* Runtime correctness for the inverse-widening VN rewrite
> >> +   (T)A +- CST -> (T)(A +- CST').  The rewrite must never change the
> >> +   computed value.  In particular it must NOT fire when CST is not
> >> +   representable in the inner type (which would silently drop the bits
> >> +   above the inner precision), and it must stay correct for unsigned
> >> +   inner types where the narrow operation wraps.  */
> >> +/* { dg-do run } */
> >> +/* { dg-options "-O2" } */
> >> +
> >> +/* CST = 2^32 does not fit in int: the value must be preserved.
> >> +   Before the fix this comparison folded to a constant 1.  */
> >> +__attribute__((noipa)) int
> >> +oor_eq (int a)
> >> +{
> >> +  return ((unsigned long long) a + 0x100000000ULL) == (unsigned long 
> >> long) a;
> >> +}
> >> +
> >> +__attribute__((noipa)) unsigned long long
> >> +oor_val (int a)
> >> +{
> >> +  return (unsigned long long) a + 0x100000000ULL;
> >> +}
> >> +
> >> +/* Unsigned inner: narrow add wraps mod 2^32; the widened add does not.
> >> +   The result must match the wide arithmetic for every input.  */
> >> +__attribute__((noipa)) int
> >> +uns_carry (unsigned int a)
> >> +{
> >> +  unsigned int t = a + 100u;
> >> +  unsigned long w = (unsigned long) a + 100;
> >> +  return w == (unsigned long) t;
> >> +}
> >> +
> >> +/* Legitimate in-range case (matches the PR): k == j - 1, so the two
> >> +   loads are the same address and the rewrite may fire.  */
> >> +__attribute__((noipa)) int
> >> +inrange_eq (int *p, int j)
> >> +{
> >> +  int k = j - 1;
> >> +  return p[j - 1] == p[k];
> >> +}
> >> +
> >> +int
> >> +main (void)
> >> +{
> >> +  if (oor_eq (5) != 0) __builtin_abort ();
> >> +  if (oor_eq (-1) != 0) __builtin_abort ();
> >> +  if (oor_val (5) != 5ULL + 0x100000000ULL) __builtin_abort ();
> >> +  if (uns_carry (0xfffffff0u) != 0) __builtin_abort ();
> >> +  if (uns_carry (10) != 1) __builtin_abort ();
> >> +  int arr[4] = { 7, 7, 7, 7 };
> >> +  if (inrange_eq (arr, 2) != 1) __builtin_abort ();
> >> +  return 0;
> >> +}
> >> diff --git a/gcc/testsuite/gcc.dg/pr124545.c 
> >> b/gcc/testsuite/gcc.dg/pr124545.c
> >> new file mode 100644
> >> index 000000000000..a21346b179c7
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/pr124545.c
> >> @@ -0,0 +1,29 @@
> >> +/* PR tree-optimization/124545 */
> >> +/* Verify that VN recognizes (T)A + C == (T)(A + C') regardless of
> >> +   operand order in the equality comparison.  */
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O2 -fdump-tree-fre1" } */
> >> +
> >> +int func1(int *a, int j) {
> >> +  int k = j - 1;
> >> +  return a[j - 1] == a[k];
> >> +}
> >> +
> >> +int func2(int *a, int j) {
> >> +  int k = j - 1;
> >> +  return a[k] == a[j - 1];
> >> +}
> >> +
> >> +int func3(int *a, int j) {
> >> +  int k = j - 3;
> >> +  return a[k] == a[j - 3];
> >> +}
> >> +
> >> +int func4(int *a, int j) {
> >> +  int k = j + 2;
> >> +  return a[k] == a[j + 2];
> >> +}
> >> +
> >> +/* All four functions should fold to return 1 after FRE.  */
> >> +/* The pattern is not applied on ilp32 targets (PR116845).  */
> >> +/* { dg-final { scan-tree-dump-times "return 1;" 4 "fre1" { xfail { ilp32 
> >> } } } } */
> >>
> >
>

Re: [PATCH] tree-optimization/124545 - VN: add inverse widening lookup for PLUS/MINUS

Reply via email to