https://gcc.gnu.org/g:8af2e8e49d6e5d33c01c2beaead4933bc286974c
commit r17-837-g8af2e8e49d6e5d33c01c2beaead4933bc286974c Author: Tamar Christina <[email protected]> Date: Wed May 27 10:53:07 2026 +0100 vect: Don't generate scalar epilogue if not needed [PR120352] The example loop #define N 4 int a[N] = {0,0,0,1}; int b[N] = {0,0,0,1}; __attribute__((noipa, noinline)) int foo () { for (int i = 0; i < N; i++) { if (a[i] > b[i]) return 1; } return 0; } compiled with -O3 -march=armv9-a generates foo: adrp x2, .LANCHOR0 add x1, x2, :lo12:.LANCHOR0 ptrue p7.b, vl16 mov w0, 0 ldr q30, [x2, #:lo12:.LANCHOR0] ldr q31, [x1, 16] cmpgt p7.s, p7/z, z30.s, z31.s b.any .L7 ret .L7: ldr w2, [x2, #:lo12:.LANCHOR0] ldr w0, [x1, 16] cmp w2, w0 bgt .L4 ldr w0, [x1, 4] ldr w2, [x1, 20] cmp w2, w0 blt .L4 ldr w0, [x1, 8] ldr w2, [x1, 24] cmp w2, w0 blt .L4 ldr w2, [x1, 12] ldr w0, [x1, 28] cmp w2, w0 cset w0, gt ret .L4: mov w0, 1 ret Which when we find an element, in order to return 1 we still go to scalar. Obviously the scalar code is completely unneeded. This patch teaches the vectorizer that when 1. We have no live values 2. We only have one exit (this is a restriction that will be lifted in a later patch and is there because we need masking to avoid false positives, but see testcase vect-early-break-no-epilog_11.c) 3. The loop has no side-effects then we don't need the scalar epilogue at all. e.g. for the above we now generate foo: adrp x0, .LANCHOR0 add x0, x0, :lo12:.LANCHOR0 ptrue p7.s, vl4 ldp q31, q30, [x0] cmplt p15.s, p7/z, z30.s, z31.s cset w0, any ret gcc/ChangeLog: PR tree-optimization/120352 * tree-vectorizer.h (LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG): New. (class _loop_vec_info): Add early_break_needs_epilogue. * tree-vect-data-refs.cc (vect_analyze_early_break_dependences): Detect usage of stores. * tree-vect-loop-manip.cc (vect_do_peeling): Use them. * tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Likewise. (vect_create_loop_vinfo): Likewise. (vect_update_ivs_after_vectorizer_for_early_breaks): Likewise. * tree-vect-stmts.cc (vect_stmt_relevant_p): Likewise. gcc/testsuite/ChangeLog: PR tree-optimization/120352 * gcc.dg/vect/vect-early-break-no-epilog_1.c: New test. * gcc.dg/vect/vect-early-break-no-epilog_10.c: New test. * gcc.dg/vect/vect-early-break-no-epilog_11.c: New test. * gcc.dg/vect/vect-early-break-no-epilog_2.c: New test. * gcc.dg/vect/vect-early-break-no-epilog_3.c: New test. * gcc.dg/vect/vect-early-break-no-epilog_4.c: New test. * gcc.dg/vect/vect-early-break-no-epilog_5.c: New test. * gcc.dg/vect/vect-early-break-no-epilog_6.c: New test. * gcc.dg/vect/vect-early-break-no-epilog_7.c: New test. * gcc.dg/vect/vect-early-break-no-epilog_8.c: New test. * gcc.dg/vect/vect-early-break-no-epilog_9.c: New test. * gcc.target/aarch64/noeffect.c: New test. * gcc.target/aarch64/noeffect10.c: New test. * gcc.target/aarch64/noeffect11.c: New test. * gcc.target/aarch64/noeffect2.c: New test. * gcc.target/aarch64/noeffect3.c: New test. * gcc.target/aarch64/noeffect4.c: New test. * gcc.target/aarch64/noeffect5.c: New test. * gcc.target/aarch64/noeffect6.c: New test. * gcc.target/aarch64/noeffect7.c: New test. * gcc.target/aarch64/noeffect8.c: New test. * gcc.target/aarch64/noeffect9.c: New test. * gcc.target/aarch64/sve/noeffect.c: New test. * gcc.target/aarch64/sve/noeffect10.c: New test. * gcc.target/aarch64/sve/noeffect11.c: New test. * gcc.target/aarch64/sve/noeffect2.c: New test. * gcc.target/aarch64/sve/noeffect3.c: New test. * gcc.target/aarch64/sve/noeffect4.c: New test. * gcc.target/aarch64/sve/noeffect5.c: New test. * gcc.target/aarch64/sve/noeffect6.c: New test. * gcc.target/aarch64/sve/noeffect7.c: New test. * gcc.target/aarch64/sve/noeffect8.c: New test. * gcc.target/aarch64/sve/noeffect9.c: New test. Diff: --- .../gcc.dg/vect/vect-early-break-no-epilog_1.c | 21 ++++++ .../gcc.dg/vect/vect-early-break-no-epilog_10.c | 21 ++++++ .../gcc.dg/vect/vect-early-break-no-epilog_11.c | 51 ++++++++++++++ .../gcc.dg/vect/vect-early-break-no-epilog_2.c | 21 ++++++ .../gcc.dg/vect/vect-early-break-no-epilog_3.c | 21 ++++++ .../gcc.dg/vect/vect-early-break-no-epilog_4.c | 21 ++++++ .../gcc.dg/vect/vect-early-break-no-epilog_5.c | 21 ++++++ .../gcc.dg/vect/vect-early-break-no-epilog_6.c | 21 ++++++ .../gcc.dg/vect/vect-early-break-no-epilog_7.c | 25 +++++++ .../gcc.dg/vect/vect-early-break-no-epilog_8.c | 18 +++++ .../gcc.dg/vect/vect-early-break-no-epilog_9.c | 22 ++++++ gcc/testsuite/gcc.target/aarch64/noeffect.c | 30 +++++++++ gcc/testsuite/gcc.target/aarch64/noeffect10.c | 30 +++++++++ gcc/testsuite/gcc.target/aarch64/noeffect11.c | 70 +++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/noeffect2.c | 33 +++++++++ gcc/testsuite/gcc.target/aarch64/noeffect3.c | 30 +++++++++ gcc/testsuite/gcc.target/aarch64/noeffect4.c | 30 +++++++++ gcc/testsuite/gcc.target/aarch64/noeffect5.c | 30 +++++++++ gcc/testsuite/gcc.target/aarch64/noeffect6.c | 30 +++++++++ gcc/testsuite/gcc.target/aarch64/noeffect7.c | 36 ++++++++++ gcc/testsuite/gcc.target/aarch64/noeffect8.c | 32 +++++++++ gcc/testsuite/gcc.target/aarch64/noeffect9.c | 36 ++++++++++ gcc/testsuite/gcc.target/aarch64/sve/noeffect.c | 27 ++++++++ gcc/testsuite/gcc.target/aarch64/sve/noeffect10.c | 27 ++++++++ gcc/testsuite/gcc.target/aarch64/sve/noeffect11.c | 78 ++++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/sve/noeffect2.c | 32 +++++++++ gcc/testsuite/gcc.target/aarch64/sve/noeffect3.c | 34 ++++++++++ gcc/testsuite/gcc.target/aarch64/sve/noeffect4.c | 33 +++++++++ gcc/testsuite/gcc.target/aarch64/sve/noeffect5.c | 33 +++++++++ gcc/testsuite/gcc.target/aarch64/sve/noeffect6.c | 33 +++++++++ gcc/testsuite/gcc.target/aarch64/sve/noeffect7.c | 36 ++++++++++ gcc/testsuite/gcc.target/aarch64/sve/noeffect8.c | 32 +++++++++ gcc/testsuite/gcc.target/aarch64/sve/noeffect9.c | 36 ++++++++++ gcc/tree-vect-data-refs.cc | 7 ++ gcc/tree-vect-loop-manip.cc | 17 +++-- gcc/tree-vect-loop.cc | 17 +++-- gcc/tree-vect-stmts.cc | 1 + gcc/tree-vectorizer.h | 5 ++ 38 files changed, 1090 insertions(+), 8 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_1.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_1.c new file mode 100644 index 000000000000..cf9178375db6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_1.c @@ -0,0 +1,21 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +#define N 4 +int a[N] = {0, 0, 0, 1}; +int b[N] = {0, 0, 0, 1}; + +int foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i]) + return 1; + } + return 0; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_10.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_10.c new file mode 100644 index 000000000000..86b753122dd8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_10.c @@ -0,0 +1,21 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +#define N 8 +short a[N] = {0}; +short b[N] = {0}; + +short foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i]) + return 1; + } + return 0; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_11.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_11.c new file mode 100644 index 000000000000..3a6b72fa5acb --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_11.c @@ -0,0 +1,51 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do run } */ +/* { dg-require-effective-target vect_early_break_hw } */ +/* { dg-require-effective-target vect_long } */ + +#include "tree-vect.h" + +__attribute__ ((noipa)) +int f1 (const unsigned long *restrict a, const unsigned long *b, int n) +{ + for (int i = 0; i < n; ++i) + { + if (a[i] < b[i]) + return 0; + if (a[i] > b[i]) + return 1; + } + return 1; +} + +__attribute__ ((noipa)) +int f2 (const unsigned long *restrict a, const unsigned long *b, int n) +{ + for (int i = 0; i < n; ++i) + { + if (a[i] < b[i]) + return 1; + if (a[i] > b[i]) + return 1; + } + return 0; +} + +int main (void) +{ + check_vect (); + + static unsigned long a[3] __attribute__ ((aligned (16))) = {10, 1, 0}; + static unsigned long b[3] __attribute__ ((aligned (16))) = {9, 2, 0}; + + if (f1 (a, b, 3) != 1) + __builtin_abort (); + + if (f2 (a, b, 3) != 1) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_2.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_2.c new file mode 100644 index 000000000000..6563ceaae534 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_2.c @@ -0,0 +1,21 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +#define N 1000 +int a[N] = {0}; +int b[N] = {0}; + +int foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i]) + return 1; + } + return 0; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_3.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_3.c new file mode 100644 index 000000000000..dfb398da9b78 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_3.c @@ -0,0 +1,21 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +#define N 1000 +int a[N] = {0}; +int b[N] = {0}; + +int foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i]) + return i; + } + return 0; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_4.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_4.c new file mode 100644 index 000000000000..99bb66f9fd05 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_4.c @@ -0,0 +1,21 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +#define N 1000 +short a[N] = {0}; +short b[N] = {0}; + +int foo (void) +{ + for (unsigned short i = 0; i < N; i++) + { + if (a[i] > b[i]) + return i; + } + return 0; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_5.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_5.c new file mode 100644 index 000000000000..ec3eec5cfa13 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_5.c @@ -0,0 +1,21 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +#define N 1000 +short a[N] = {0}; +short b[N] = {0}; + +short foo (void) +{ + for (unsigned short i = 0; i < N; i++) + { + if (a[i] > b[i]) + return i; + } + return 0; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_6.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_6.c new file mode 100644 index 000000000000..46d6a8953cfe --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_6.c @@ -0,0 +1,21 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +#define N 1000 +short a[N] = {0}; +short b[N] = {0}; + +short foo (void) +{ + for (unsigned short i = 0; i < N; i++) + { + if (a[i] > b[i]) + return a[i]; + } + return 0; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_7.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_7.c new file mode 100644 index 000000000000..6d94312f4a60 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_7.c @@ -0,0 +1,25 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-march=armv8-a+sve" { target { aarch64*-*-* } } } */ + +#define N 1000 +int a[N] = {0}; +int b[N] = {0}; + +int foo (void) +{ + for (int i = 0; i < (N / 2); i += 2) + { + if (a[i] > b[i]) + return 1; + + if (a[i + 1] > b[i + 1]) + return 1; + } + return 0; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" { target { aarch64*-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_8.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_8.c new file mode 100644 index 000000000000..3236cdb66ff3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_8.c @@ -0,0 +1,18 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +void +add (int n, int *__restrict a, int *__restrict b, int *__restrict c) +{ + for (int i = 0; i < n; i++) + { + c[i] = a[i] + b[i]; + if (i > 1000) + break; + } +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "early break does not require epilog" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_9.c b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_9.c new file mode 100644 index 000000000000..c788a684d750 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_9.c @@ -0,0 +1,22 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +#define N 1000 +int a[N] = {0}; +int b[N] = {0}; +int c[N] = {0}; + +int foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i] && a[i] > c[i]) + return 1; + } + return 0; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect.c b/gcc/testsuite/gcc.target/aarch64/noeffect.c new file mode 100644 index 000000000000..ba3329973b24 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/noeffect.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 4 +int a[N] = {0, 0, 0, 1}; +int b[N] = {0, 0, 0, 1}; + +/* +** foo: +** ... +** ldr q[0-9]+, \[x[0-9]+, #:lo12:\.LANCHOR0\] +** ldr q[0-9]+, \[x[0-9]+, 16\] +** cmgt v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s +** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s +** fmov x[0-9]+, d[0-9]+ +** cmp x[0-9]+, 0 +** cset w0, ne +** ret +*/ +__attribute__ ((noipa, noinline)) +int foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i]) + return 1; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect10.c b/gcc/testsuite/gcc.target/aarch64/noeffect10.c new file mode 100644 index 000000000000..03f3e48a729a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/noeffect10.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 8 +short a[N] = {0}; +short b[N] = {0}; + +/* +** foo: +** ... +** ldr q[0-9]+, \[x[0-9]+, #:lo12:\.LANCHOR0\] +** ldr q[0-9]+, \[x[0-9]+, 16\] +** cmgt v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h +** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s +** fmov x[0-9]+, d[0-9]+ +** cmp x[0-9]+, 0 +** cset w0, ne +** ret +*/ +__attribute__ ((noipa, noinline)) +short foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i]) + return 1; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect11.c b/gcc/testsuite/gcc.target/aarch64/noeffect11.c new file mode 100644 index 000000000000..82c2f00c7cb1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/noeffect11.c @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -march=armv8-a" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +** f1: +** ... +** cmhi v[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d +** ... +** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s +** ... +** ldr x[0-9]+, \[x[0-9]+, x[0-9]+\] +** ldr x[0-9]+, \[x[0-9]+, x[0-9]+\] +** cmp x[0-9]+, x[0-9]+ +** ... +** ret +*/ +__attribute__ ((noipa)) +int f1 (const unsigned long *restrict a, const unsigned long *b, int n) +{ + for (int i = 0; i < n; ++i) + { + if (a[i] < b[i]) + return 0; + if (a[i] > b[i]) + return 1; + } + return 1; +} + +/* +** f2: +** ... +** cmhi v[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d +** ... +** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s +** ... +** ldr x[0-9]+, \[x[0-9]+, x[0-9]+\] +** ldr x[0-9]+, \[x[0-9]+, x[0-9]+\] +** cmp x[0-9]+, x[0-9]+ +** ... +** ret +*/ +__attribute__ ((noipa)) +int f2 (const unsigned long *restrict a, const unsigned long *b, int n) +{ + for (int i = 0; i < n; ++i) + { + if (a[i] < b[i]) + return 1; + if (a[i] > b[i]) + return 1; + } + return 0; +} + +int main (void) +{ + static unsigned long a[3] __attribute__ ((aligned (16))) = {10, 1, 0}; + static unsigned long b[3] __attribute__ ((aligned (16))) = {9, 2, 0}; + + if (f1 (a, b, 3) != 1) + __builtin_abort (); + + if (f2 (a, b, 3) != 1) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect2.c b/gcc/testsuite/gcc.target/aarch64/noeffect2.c new file mode 100644 index 000000000000..08c531fb18c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/noeffect2.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +int a[N] = {0}; +int b[N] = {0}; + +/* +** foo: +** ... +** ldr q[0-9]+, \[x[0-9]+, x[0-9]+\] +** ldr q[0-9]+, \[x[0-9]+, x[0-9]+\] +** add x[0-9]+, x[0-9]+, 16 +** cmgt v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s +** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s +** fmov x[0-9]+, d[0-9]+ +** cbz x[0-9]+, \.L[0-9]+ +** mov w0, 1 +** ret +** mov w0, 0 +** ret +*/ +__attribute__ ((noipa, noinline)) +int foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i]) + return 1; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect3.c b/gcc/testsuite/gcc.target/aarch64/noeffect3.c new file mode 100644 index 000000000000..886ad7bda2f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/noeffect3.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +int a[N] = {0}; +int b[N] = {0}; + +/* +** foo: +** ... +** cmgt v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s +** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s +** ... +** ldr w[0-9]+, \[x[0-9]+, w[0-9]+, sxtw 2\] +** ldr w[0-9]+, \[x[0-9]+, w[0-9]+, sxtw 2\] +** cmp w[0-9]+, w[0-9]+ +** ... +** ret +*/ +__attribute__ ((noipa, noinline)) +int foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i]) + return i; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect4.c b/gcc/testsuite/gcc.target/aarch64/noeffect4.c new file mode 100644 index 000000000000..276843c9bbd8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/noeffect4.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +short a[N] = {0}; +short b[N] = {0}; + +/* +** foo: +** ... +** cmgt v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h +** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s +** ... +** ldrsh w[0-9]+, \[[^\n]+\] +** ldrsh w[0-9]+, \[[^\n]+\] +** cmp w[0-9]+, w[0-9]+ +** ... +** ret +*/ +__attribute__ ((noipa, noinline)) +int foo (void) +{ + for (unsigned short i = 0; i < N; i++) + { + if (a[i] > b[i]) + return i; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect5.c b/gcc/testsuite/gcc.target/aarch64/noeffect5.c new file mode 100644 index 000000000000..c15e52ebdfc4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/noeffect5.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +short a[N] = {0}; +short b[N] = {0}; + +/* +** foo: +** ... +** cmgt v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h +** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s +** ... +** ldrsh w[0-9]+, \[[^\n]+\] +** ldrsh w[0-9]+, \[[^\n]+\] +** cmp w[0-9]+, w[0-9]+ +** ... +** ret +*/ +__attribute__ ((noipa, noinline)) +short foo (void) +{ + for (unsigned short i = 0; i < N; i++) + { + if (a[i] > b[i]) + return i; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect6.c b/gcc/testsuite/gcc.target/aarch64/noeffect6.c new file mode 100644 index 000000000000..9da4f496a02e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/noeffect6.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +short a[N] = {0}; +short b[N] = {0}; + +/* +** foo: +** ... +** cmgt v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h +** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s +** ... +** ldrsh w[0-9]+, \[[^\n]+\] +** ldrsh w[0-9]+, \[[^\n]+\] +** cmp w[0-9]+, w[0-9]+ +** ... +** ret +*/ +__attribute__ ((noipa, noinline)) +short foo (void) +{ + for (unsigned short i = 0; i < N; i++) + { + if (a[i] > b[i]) + return a[i]; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect7.c b/gcc/testsuite/gcc.target/aarch64/noeffect7.c new file mode 100644 index 000000000000..16fc921117aa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/noeffect7.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +int a[N] = {0}; +int b[N] = {0}; + +/* +** foo: +** ... +** add x[0-9]+, x[0-9]+, 4000 +** add x[0-9]+, x[0-9]+, 2000 +** b \.L[0-9]+ +** ldr w[0-9]+, \[x[0-9]+[^\n]*\] +** ... +** cmp w[0-9]+, w[0-9]+ +** ... +** mov w0, 1 +** ret +** mov w0, 0 +** ret +*/ +__attribute__ ((noipa, noinline)) +int foo (void) +{ + for (int i = 0; i < (N / 2); i += 2) + { + if (a[i] > b[i]) + return 1; + + if (a[i + 1] > b[i + 1]) + return 1; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect8.c b/gcc/testsuite/gcc.target/aarch64/noeffect8.c new file mode 100644 index 000000000000..ada79391a8ac --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/noeffect8.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +** add: +** ... +** cmeq v[0-9]+\.4s, v[0-9]+\.4s, #0 +** umaxp v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s +** ... +** ldr q[0-9]+, \[x[0-9]+[^\n]*\] +** ... +** str q[0-9]+, \[x[0-9]+[^\n]*\] +** ... +** ldr w[0-9]+, \[x[0-9]+, x[0-9]+\] +** ldr w[0-9]+, \[x[0-9]+, x[0-9]+\] +** add w[0-9]+, w[0-9]+, w[0-9]+ +** str w[0-9]+, \[x[0-9]+, x[0-9]+\] +** ... +** ret +** ... +*/ +void +add (int n, int *__restrict a, int *__restrict b, int *__restrict c) +{ + for (int i = 0; i < n; i++) + { + c[i] = a[i] + b[i]; + if (i > 1000) + break; + } +} diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect9.c b/gcc/testsuite/gcc.target/aarch64/noeffect9.c new file mode 100644 index 000000000000..0ce0380e182f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/noeffect9.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +int a[N] = {0}; +int b[N] = {0}; +int c[N] = {0}; + +/* +** foo: +** ... +** add x[0-9]+, x[0-9]+, 4000 +** add x[0-9]+, x[0-9]+, 3648 +** mov x0, 0 +** b \.L[0-9]+ +** ... +** ldr w[0-9]+, \[x[0-9]+[^\n]*\] +** ... +** cmp w[0-9]+, w[0-9]+ +** ... +** mov w0, 1 +** ret +** mov w0, 0 +** ret +*/ +__attribute__ ((noipa, noinline)) +int foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i] && a[i] > c[i]) + return 1; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect.c new file mode 100644 index 000000000000..f7109b1483cb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 4 +int a[N] = {0, 0, 0, 1}; +int b[N] = {0, 0, 0, 1}; + +/* +** foo: +** ... +** ldp q[0-9]+, q[0-9]+, \[x[0-9]+\] +** cmplt p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s +** ptest p[0-9]+, p[0-9]+\.b +** cset w0, any +** ret +*/ +__attribute__ ((noipa, noinline)) +int foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i]) + return 1; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect10.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect10.c new file mode 100644 index 000000000000..39ab9489b8e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect10.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 8 +short a[N] = {0}; +short b[N] = {0}; + +/* +** foo: +** ... +** ldp q[0-9]+, q[0-9]+, \[x[0-9]+\] +** cmplt p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h +** ptest p[0-9]+, p[0-9]+\.b +** cset w0, any +** ret +*/ +__attribute__ ((noipa, noinline)) +short foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i]) + return 1; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect11.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect11.c new file mode 100644 index 000000000000..5c8c3565da62 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect11.c @@ -0,0 +1,78 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +** f1: +** ... +** whilelo p[0-9]+\.d, x[0-9]+, x[0-9]+ +** ... +** ld1d z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\] +** ld1d z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\] +** cmplo p[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, z[0-9]+\.d +** ... +** ldr x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\] +** ldr x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\] +** ... +** cmp x[0-9]+, x[0-9]+ +** ... +** ret +** ... +*/ +__attribute__ ((noipa)) +int f1 (const unsigned long *restrict a, const unsigned long *b, int n) +{ + for (int i = 0; i < n; ++i) + { + if (a[i] < b[i]) + return 0; + if (a[i] > b[i]) + return 1; + } + return 1; +} + +/* +** f2: +** ... +** whilelo p[0-9]+\.d, x[0-9]+, x[0-9]+ +** ... +** ld1d z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\] +** ld1d z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\] +** cmplo p[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, z[0-9]+\.d +** ... +** ldr x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\] +** ldr x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\] +** ... +** cmp x[0-9]+, x[0-9]+ +** ... +** ret +** ... +*/ +__attribute__ ((noipa)) +int f2 (const unsigned long *restrict a, const unsigned long *b, int n) +{ + for (int i = 0; i < n; ++i) + { + if (a[i] < b[i]) + return 1; + if (a[i] > b[i]) + return 1; + } + return 0; +} + +int main (void) +{ + static unsigned long a[3] __attribute__ ((aligned (16))) = {10, 1, 0}; + static unsigned long b[3] __attribute__ ((aligned (16))) = {9, 2, 0}; + + if (f1 (a, b, 3) != 1) + __builtin_abort (); + + if (f2 (a, b, 3) != 1) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect2.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect2.c new file mode 100644 index 000000000000..5bb1badde3c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect2.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +int a[N] = {0}; +int b[N] = {0}; + +/* +** foo: +** ... +** whilelo p[0-9]+\.s, w[0-9]+, w[0-9]+ +** ... +** ld1w z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\] +** ld1w z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\] +** cmpgt p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s +** b\.none \.L[0-9]+ +** mov w0, 1 +** ret +** mov w0, 0 +** ret +*/ +__attribute__ ((noipa, noinline)) +int foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i]) + return 1; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect3.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect3.c new file mode 100644 index 000000000000..c5f81bb0d303 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect3.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +int a[N] = {0}; +int b[N] = {0}; + +/* +** foo: +** ... +** whilelo p[0-9]+\.s, w[0-9]+, w[0-9]+ +** ... +** ld1w z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\] +** ld1w z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\] +** cmpgt p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s +** ... +** ldr w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\] +** ldr w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\] +** cmp w[0-9]+, w[0-9]+ +** ... +** ret +** ... +*/ +__attribute__ ((noipa, noinline)) +int foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i]) + return i; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect4.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect4.c new file mode 100644 index 000000000000..735b54523b4d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect4.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +short a[N] = {0}; +short b[N] = {0}; + +/* +** foo: +** ... +** ld1h z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\] +** ld1h z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\] +** cmpgt p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h +** ... +** ldrsh w[0-9]+, \[x[0-9]+, x[0-9]+\] +** ldrsh w[0-9]+, \[x[0-9]+, x[0-9]+\] +** ... +** cmp w[0-9]+, w[0-9]+ +** ... +** ret +** ... +*/ +__attribute__ ((noipa, noinline)) +int foo (void) +{ + for (unsigned short i = 0; i < N; i++) + { + if (a[i] > b[i]) + return i; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect5.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect5.c new file mode 100644 index 000000000000..bfaec5805785 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect5.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +short a[N] = {0}; +short b[N] = {0}; + +/* +** foo: +** ... +** ld1h z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\] +** ld1h z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\] +** cmpgt p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h +** ... +** ldrsh w[0-9]+, \[x[0-9]+, x[0-9]+\] +** ldrsh w[0-9]+, \[x[0-9]+, x[0-9]+\] +** ... +** cmp w[0-9]+, w[0-9]+ +** ... +** ret +** ... +*/ +__attribute__ ((noipa, noinline)) +short foo (void) +{ + for (unsigned short i = 0; i < N; i++) + { + if (a[i] > b[i]) + return i; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect6.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect6.c new file mode 100644 index 000000000000..14438830453a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect6.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +short a[N] = {0}; +short b[N] = {0}; + +/* +** foo: +** ... +** ld1h z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\] +** ld1h z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\] +** cmpgt p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h +** ... +** ldrsh w[0-9]+, \[x[0-9]+, x[0-9]+\] +** ldrsh w[0-9]+, \[x[0-9]+, x[0-9]+\] +** ... +** cmp w[0-9]+, w[0-9]+ +** ... +** ret +** ... +*/ +__attribute__ ((noipa, noinline)) +short foo (void) +{ + for (unsigned short i = 0; i < N; i++) + { + if (a[i] > b[i]) + return a[i]; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect7.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect7.c new file mode 100644 index 000000000000..9dab90e72b25 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect7.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +int a[N] = {0}; +int b[N] = {0}; + +/* +** foo: +** ... +** ld2w \{z[0-9]+\.s - z[0-9]+\.s\}, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\] +** ... +** cmpgt p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s +** ptest p[0-9]+, p[0-9]+\.b +** ... +** ldr w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\] +** ldr w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\] +** cmp w[0-9]+, w[0-9]+ +** ... +** ret +** ... +*/ +__attribute__ ((noipa, noinline)) +int foo (void) +{ + for (int i = 0; i < (N / 2); i += 2) + { + if (a[i] > b[i]) + return 1; + + if (a[i + 1] > b[i + 1]) + return 1; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect8.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect8.c new file mode 100644 index 000000000000..79979882b371 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect8.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +** add: +** ... +** cmpeq p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, #1 +** ... +** ld1w z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\] +** ld1w z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\] +** add z[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s +** st1w z[0-9]+\.s, p[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\] +** ... +** ldr w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\] +** ldr w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\] +** add w[0-9]+, w[0-9]+, w[0-9]+ +** str w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\] +** ... +** ret +** ... +*/ +void +add (int n, int *__restrict a, int *__restrict b, int *__restrict c) +{ + for (int i = 0; i < n; i++) + { + c[i] = a[i] + b[i]; + if (i > 1000) + break; + } +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect9.c b/gcc/testsuite/gcc.target/aarch64/sve/noeffect9.c new file mode 100644 index 000000000000..bfc5ed7d4e53 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect9.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve --param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define N 1000 +int a[N] = {0}; +int b[N] = {0}; +int c[N] = {0}; + +/* +** foo: +** ... +** add x[0-9]+, x[0-9]+, 4000 +** add x[0-9]+, x[0-9]+, 3648 +** mov x0, 0 +** b \.L[0-9]+ +** ... +** ldr w[0-9]+, \[x[0-9]+[^\n]*\] +** ... +** cmp w[0-9]+, w[0-9]+ +** ... +** mov w0, 1 +** ret +** mov w0, 0 +** ret +*/ +__attribute__ ((noipa, noinline)) +int foo (void) +{ + for (int i = 0; i < N; i++) + { + if (a[i] > b[i] && a[i] > c[i]) + return 1; + } + return 0; +} diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index da65f1d652cf..03ac4c141d08 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -889,6 +889,13 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) dest_bb->index); LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb; + /* Check if loop has a side-effect (stores), force scalar epilogue. */ + for (auto dr : LOOP_VINFO_DATAREFS (loop_vinfo)) + if (DR_IS_WRITE (dr)) + { + LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) = true; + break; + } if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ()) { diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 3aae0dea25b0..9653ad43e0d3 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -3306,12 +3306,17 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, /* For early breaks the scalar loop needs to execute at most VF times to find the element that caused the break. */ - if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) + && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo)) bound_epilog = vf; bool epilog_peeling = maybe_ne (bound_epilog, 0U); poly_uint64 bound_scalar = bound_epilog; + if (!LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) && dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "early break does not require epilog.\n"); + if (!prolog_peeling && !epilog_peeling) return NULL; @@ -3501,11 +3506,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, /* Peel prolog and put it on preheader edge of loop. */ edge scalar_e = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo); edge prolog_e = NULL; + bool early_break_peel_p = LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo); prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, exit_e, scalar_loop, scalar_e, e, &prolog_e, true, NULL, uncounted_p, uncounted_p, - true); + early_break_peel_p); gcc_assert (prolog); prolog->force_vectorize = false; @@ -3617,11 +3623,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, edge epilog_e = vect_epilogues ? e : scalar_e; edge new_epilog_e = NULL; auto_vec<basic_block> doms; + bool early_break_peel_p = LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo); epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e, &new_epilog_e, true, &doms, uncounted_p, false, - true); + early_break_peel_p); LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo) = new_epilog_e; gcc_assert (epilog); @@ -3671,6 +3678,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, /* Handle any remaining dominator updates needed after inserting the loop skip edge above. */ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) + && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) && prolog_peeling) { /* Adding a skip edge to skip a loop with multiple exits @@ -3818,7 +3826,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, = make_ssa_name (LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo)); if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo) - && get_loop_exit_edges (loop).length () == 1)) + && get_loop_exit_edges (loop).length () == 1) + && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo)) { basic_block exit_bb = NULL; edge update_e = NULL; diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index ac7e08cf205c..dded8b9aabff 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -775,7 +775,9 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) drs_advanced_by (NULL_TREE), vec_loop_main_exit (NULL), vec_epilogue_loop_main_exit (NULL), - scalar_loop_main_exit (NULL) + scalar_loop_main_exit (NULL), + early_break_needs_epilogue (false), + early_break_niters_var (NULL) { /* CHECKME: We want to visit all BBs before their successors (except for latch blocks, for which this assertion wouldn't hold). In the simple @@ -1705,6 +1707,13 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared, LOOP_VINFO_EARLY_BREAKS (loop_vinfo) = !LOOP_VINFO_LOOP_CONDS (loop_vinfo).is_empty (); + /* At the moment we can't support no epilogs for multiple exits, result of + the first compare should be masked by that of the second. We can only + allow it if the early exits have the same live values. for differing + values we have to calculate a third mask to disambiguate. */ + LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) + = LOOP_VINFO_LOOP_CONDS (loop_vinfo).length () > 1; + if (info->inner_loop_cond) { /* If we have an estimate on the number of iterations of the inner @@ -11058,11 +11067,11 @@ vect_update_ivs_after_vectorizer_for_early_breaks (loop_vec_info loop_vinfo) { DUMP_VECT_SCOPE ("vect_update_ivs_after_vectorizer_for_early_breaks"); - if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) + if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo) + /* If no peeling was done then we have no IV to update. */ + || !LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo)) return; - gcc_assert (LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo)); - tree phi_var = LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo); tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 09ee794300be..4c9d871a31b8 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -413,6 +413,7 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI); *live_p = true; + LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) = true; } } } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 6d7393809013..b8a287825f43 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1241,6 +1241,10 @@ public: For counted loops, this IV controls the natural exits of the loop. */ edge scalar_loop_main_exit; + /* Indicate if the multiple exit loop has any side-effects that require it to + have a scalar epilogue. */ + bool early_break_needs_epilogue; + /* Used to store the list of stores needing to be moved if doing early break vectorization as they would violate the scalar loop semantics if vectorized in their current location. These are stored in order that they @@ -1325,6 +1329,7 @@ public: #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter #define LOOP_VINFO_EARLY_BREAKS(L) (L)->early_breaks +#define LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG(L) (L)->early_break_needs_epilogue #define LOOP_VINFO_EARLY_BRK_STORES(L) (L)->early_break_stores #define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L) \ ((single_pred ((L)->loop->latch) != (L)->vec_loop_main_exit->src) \
