Hi,
The gcc.dg/pr124545-2.c test does not work for arm-none-eabi.
Is this suppose to work or is it missing some dg-require-effective-target?
Testing gcc.dg/pr124545-2.c
doing compile
Executing on host: /build/r17-2109-g2b8f4671103159/bin/arm-none-eabi-gcc
/build/gcc_src/gcc/testsuite/gcc.dg/pr124545-2.c -mthumb -march=armv7ve+neon
-mtune=cortex-a7 -mfloat-abi=hard -mfpu=auto -dumpbase ""
-fdiagnostics-plain-output -O2 --specs=rdimon.specs -Wl,--start-group -lc -lm
-Wl,--end-group --specs=nosys.specs -Wl,--allow-multiple-definition
-Wl,-u,_isatty,-u,_fstat -Wl,-wrap,exit -Wl,-wrap,_exit -Wl,-wrap,main -Wl,-wrap,abort
-Wl,gcc_tg.o -lm -o ./pr124545-2.exe (timeout = 800)
spawn -ignore SIGHUP /build/r17-2109-g2b8f4671103159/bin/arm-none-eabi-gcc
/build/gcc_src/gcc/testsuite/gcc.dg/pr124545-2.c -mthumb -march=armv7ve+neon
-mtune=cortex-a7 -mfloat-abi=hard -mfpu=auto -dumpbase
-fdiagnostics-plain-output -O2 --specs=rdimon.specs -Wl,--start-group -lc -lm
-Wl,--end-group --specs=nosys.specs -Wl,--allow-multiple-definition
-Wl,-u,_isatty,-u,_fstat -Wl,-wrap,exit -Wl,-wrap,_exit -Wl,-wrap,main
-Wl,-wrap,abort -Wl,gcc_tg.o -lm -o ./pr124545-2.exe
pid is 165557 -165557
pid is -1
output is status 0
PASS: gcc.dg/pr124545-2.c (test for excess errors)
spawning command qemu-system-arm -nographic -machine virt -cpu cortex-a7 -m
256 -semihosting -monitor /dev/null -kernel ./pr124545-2.exe
spawn qemu-system-arm -nographic -machine virt -cpu cortex-a7 -m 256
-semihosting -monitor /dev/null -kernel ./pr124545-2.exe
*** EXIT code 4242
*** EXIT code 1
pid is -1
Shell closed.
Output is
*** EXIT code 4242
*** EXIT code 1
FAIL: gcc.dg/pr124545-2.c execution test
This is the assembly:
$ /build/r17-2109-g2b8f4671103159/bin/arm-none-eabi-gcc
/build/gcc_src/gcc/testsuite/gcc.dg/pr124545-2.c -mthumb -march=armv7ve+neon
-mtune=cortex-a7 -mfloat-abi=hard -mfpu=auto -O2 -dp -S -o -
.arch armv7-a
.arch_extension virt
.arch_extension idiv
.arch_extension sec
.arch_extension mp
.fpu neon
.eabi_attribute 28, 1
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.file "pr124545-2.c"
.text
.align 1
.p2align 2,,3
.global oor_eq
.syntax unified
.thumb
.thumb_func
.type oor_eq, %function
oor_eq:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
movs r0, #0 @ 10 [c=4 l=2] *thumb2_movsi_shortim
bx lr @ 17 [c=8 l=4] *thumb2_return
.size oor_eq, .-oor_eq
.align 1
.p2align 2,,3
.global oor_val
.syntax unified
.thumb
.thumb_func
.type oor_val, %function
oor_val:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
asrs r1, r0, #31 @ 6 [c=4 l=2] *thumb2_shiftsi3_short/1
adds r1, r1, #1 @ 21 [c=4 l=2] *thumb2_addsi_short/0
bx lr @ 27 [c=8 l=4] *thumb2_return
.size oor_val, .-oor_val
.align 1
.p2align 2,,3
.global uns_carry
.syntax unified
.thumb
.thumb_func
.type uns_carry, %function
uns_carry:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
movs r0, #1 @ 10 [c=4 l=2] *thumb2_movsi_shortim
bx lr @ 17 [c=8 l=4] *thumb2_return
.size uns_carry, .-uns_carry
.align 1
.p2align 2,,3
.global inrange_eq
.syntax unified
.thumb
.thumb_func
.type inrange_eq, %function
inrange_eq:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
movs r0, #1 @ 11 [c=4 l=2] *thumb2_movsi_shortim
bx lr @ 18 [c=8 l=4] *thumb2_return
.size inrange_eq, .-inrange_eq
.section .text.startup,"ax",%progbits
.align 1
.p2align 2,,3
.global main
.syntax unified
.thumb
.thumb_func
.type main, %function
main:
@ args = 0, pretend = 0, frame = 16
@ frame_needed = 0, uses_anonymous_args = 0
push {r4, lr} @ 108 [c=8 l=2] *push_multi
movs r0, #5 @ 5 [c=4 l=2] *thumb2_movsi_shortim
sub sp, sp, #16 @ 109 [c=4 l=4] *arm_addsi3/11
bl oor_eq @ 6 [c=4 l=4] *call_value_symbol
cbnz r0, .L8 @ 9 [c=16 l=2] *thumb2_cbnz/0
mov r0, #-1 @ 15 [c=4 l=4] *thumb2_movsi_vfp/1
bl oor_eq @ 16 [c=4 l=4] *call_value_symbol
cbnz r0, .L8 @ 20 [c=16 l=2] *thumb2_cbnz/0
movs r0, #5 @ 22 [c=4 l=2] *thumb2_movsi_shortim
bl oor_val @ 23 [c=4 l=4] *call_value_symbol
cmp r1, #1 @ 26 [c=20 l=6] *cmp_ior/0
it eq
cmpeq r0, #5
bne .L8 @ 27 [c=16 l=2] arm_cond_branch
mvn r0, #15 @ 29 [c=4 l=4] *thumb2_movsi_vfp/3
bl uns_carry @ 30 [c=4 l=4] *call_value_symbol
mov r4, r0 @ 93 [c=4 l=2] *thumb2_movsi_vfp/0
cbnz r0, .L8 @ 33 [c=16 l=2] *thumb2_cbnz/0
movs r0, #10 @ 35 [c=4 l=2] *thumb2_movsi_shortim
bl uns_carry @ 36 [c=4 l=4] *call_value_symbol
cmp r0, #1 @ 38 [c=4 l=2] *arm_cmpsi_insn/0
bne .L8 @ 39 [c=16 l=2] arm_cond_branch
movw r3, #:lower16:.LANCHOR0 @ 106 [c=4 l=4] *thumb2_movsi_vfp/4
movt r3, #:upper16:.LANCHOR0 @ 107 [c=4 l=4] *arm_movt/0
ldm r3, {r0, r1, r2, r3} @ 44 [c=8 l=4] *ldm4_
stm sp, {r0, r1, r2, r3} @ 45 [c=8 l=4] *stm4_
movs r1, #2 @ 47 [c=4 l=2] *thumb2_movsi_shortim
mov r0, sp @ 48 [c=4 l=2] *thumb2_movsi_vfp/0
bl inrange_eq @ 49 [c=4 l=4] *call_value_symbol
cmp r0, #1 @ 51 [c=4 l=2] *arm_cmpsi_insn/0
bne .L8 @ 52 [c=16 l=2] arm_cond_branch
mov r0, r4 @ 58 [c=4 l=2] *thumb2_movsi_vfp/0
add sp, sp, #16 @ 113 [c=4 l=4] *arm_addsi3/5
@ sp needed @ 114 [c=8 l=0] force_register_use
pop {r4, pc} @ 115 [c=8 l=2]
*pop_multiple_with_writeback_and_return
.L8:
bl abort @ 11 [c=8 l=4] *call_symbol
.size main, .-main
.section .rodata
.align 2
.set .LANCHOR0,. + 0
.LC0:
.word 7
.word 7
.word 7
.word 7
.ident "GCC: (r17-2109-g2b8f4671103159) 17.0.0 20260703
(experimental)"
Let me know if you need anything else or want me to test some potential fix.
Kind regards,
Torbjörn
On 2026-07-02 08:56, Richard Biener wrote:
On Wed, 1 Jul 2026, Philipp Tomsich wrote:
visit_nary_op canonicalises (T)(A + C) into (T)A + (T)C for its VN
lookup, but not the reverse -- so whether VN discovers (T)A + C ==
(T)(A + C) depends on which form it sees first. Add a match.pd rule
that rewrites (T)A +- CST into (T)(A +- CST') using the op! qualifier,
so the fold only fires when the narrow expression already has a value
number -- i.e. only inside VN via mprts_hook.
Restrict to TYPE_OVERFLOW_UNDEFINED inner types: for unsigned inner the
narrow op wraps mod 2^prec (defined) while the widened outer op does
not, changing the observed value (bitfld-5.c is the concrete miscompile
when the guard is loosened).
Use wi::min_precision (CST, SIGNED) rather than int_fits_type_p for the
fits-check, so sign-encoded small negatives (e.g. -1 as sizetype's
0xFFFF...FFFF) qualify.
OK.
Thanks,
Richard.
PR tree-optimization/124545
gcc/ChangeLog:
* match.pd: Add (T)A +- CST -> (T)(A +- CST') for widening
conversions from a signed inner type with undefined overflow.
gcc/testsuite/ChangeLog:
* gcc.dg/pr124545.c: New test.
* gcc.dg/pr124545-2.c: New test.
Signed-off-by: Philipp Tomsich <[email protected]>
---
gcc/match.pd | 32 ++++++++++++++++++
gcc/testsuite/gcc.dg/pr124545-2.c | 55 +++++++++++++++++++++++++++++++
gcc/testsuite/gcc.dg/pr124545.c | 29 ++++++++++++++++
3 files changed, 116 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/pr124545-2.c
create mode 100644 gcc/testsuite/gcc.dg/pr124545.c
diff --git a/gcc/match.pd b/gcc/match.pd
index ddf3b61638ce..817a52499128 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4067,6 +4067,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(plus (convert @0) (op @2 (convert @1))))))
#endif
+/* Inverse of the above: (T)(A) +- CST -> (T)(A +- CST') when T is a
+ widening conversion from a type with undefined overflow and the outer
+ type wraps. This allows VN to discover that (T)A + (T)C == (T)(A + C)
+ regardless of which form appears first in program order. PR124545.
+ The rewrite is unsound for unsigned inner types: the narrow op wraps
+ mod 2^prec (defined) while the widened op does not, changing the
+ observed value. Cover the unsigned case separately once ranger can
+ prove no wrap. */
+#if GIMPLE
+ (for op (plus minus)
+ (simplify
+ (op (convert @0) INTEGER_CST@1)
+ (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
+ && TREE_CODE (type) == INTEGER_TYPE
+ && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
+ && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_WRAPS (type)
+ /* CST must be the sign-extension of its low inner-precision bits,
+ otherwise narrowing changes the value. Use min_precision (..,
+ SIGNED) rather than int_fits_type_p so that small negative offsets
+ encoded as large unsigned constants (e.g. -1 as sizetype) still
+ qualify. */
+ && wi::min_precision (wi::to_wide (@1), SIGNED)
+ <= TYPE_PRECISION (TREE_TYPE (@0)))
+ (with {
+ wide_int c1 = wi::to_wide (@1);
+ tree inner_cst = wide_int_to_tree (TREE_TYPE (@0),
+ wi::sext (c1, TYPE_PRECISION (TREE_TYPE (@0)))); }
+ (convert (op! @0 { inner_cst; }))))))
+#endif
+
/* (T)(A) +- (T)(B) -> (T)(A +- B) only when (A +- B) could be simplified
to a simple value. */
(for op (plus minus)
diff --git a/gcc/testsuite/gcc.dg/pr124545-2.c
b/gcc/testsuite/gcc.dg/pr124545-2.c
new file mode 100644
index 000000000000..b4806567acce
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr124545-2.c
@@ -0,0 +1,55 @@
+/* PR tree-optimization/124545 */
+/* Runtime correctness for the inverse-widening VN rewrite
+ (T)A +- CST -> (T)(A +- CST'). The rewrite must never change the
+ computed value. In particular it must NOT fire when CST is not
+ representable in the inner type (which would silently drop the bits
+ above the inner precision), and it must stay correct for unsigned
+ inner types where the narrow operation wraps. */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+/* CST = 2^32 does not fit in int: the value must be preserved.
+ Before the fix this comparison folded to a constant 1. */
+__attribute__((noipa)) int
+oor_eq (int a)
+{
+ return ((unsigned long long) a + 0x100000000ULL) == (unsigned long long) a;
+}
+
+__attribute__((noipa)) unsigned long long
+oor_val (int a)
+{
+ return (unsigned long long) a + 0x100000000ULL;
+}
+
+/* Unsigned inner: narrow add wraps mod 2^32; the widened add does not.
+ The result must match the wide arithmetic for every input. */
+__attribute__((noipa)) int
+uns_carry (unsigned int a)
+{
+ unsigned int t = a + 100u;
+ unsigned long w = (unsigned long) a + 100;
+ return w == (unsigned long) t;
+}
+
+/* Legitimate in-range case (matches the PR): k == j - 1, so the two
+ loads are the same address and the rewrite may fire. */
+__attribute__((noipa)) int
+inrange_eq (int *p, int j)
+{
+ int k = j - 1;
+ return p[j - 1] == p[k];
+}
+
+int
+main (void)
+{
+ if (oor_eq (5) != 0) __builtin_abort ();
+ if (oor_eq (-1) != 0) __builtin_abort ();
+ if (oor_val (5) != 5ULL + 0x100000000ULL) __builtin_abort ();
+ if (uns_carry (0xfffffff0u) != 0) __builtin_abort ();
+ if (uns_carry (10) != 1) __builtin_abort ();
+ int arr[4] = { 7, 7, 7, 7 };
+ if (inrange_eq (arr, 2) != 1) __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr124545.c b/gcc/testsuite/gcc.dg/pr124545.c
new file mode 100644
index 000000000000..a21346b179c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr124545.c
@@ -0,0 +1,29 @@
+/* PR tree-optimization/124545 */
+/* Verify that VN recognizes (T)A + C == (T)(A + C') regardless of
+ operand order in the equality comparison. */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+int func1(int *a, int j) {
+ int k = j - 1;
+ return a[j - 1] == a[k];
+}
+
+int func2(int *a, int j) {
+ int k = j - 1;
+ return a[k] == a[j - 1];
+}
+
+int func3(int *a, int j) {
+ int k = j - 3;
+ return a[k] == a[j - 3];
+}
+
+int func4(int *a, int j) {
+ int k = j + 2;
+ return a[k] == a[j + 2];
+}
+
+/* All four functions should fold to return 1 after FRE. */
+/* The pattern is not applied on ilp32 targets (PR116845). */
+/* { dg-final { scan-tree-dump-times "return 1;" 4 "fre1" { xfail { ilp32 } }
} } */