Re: Ping: [PATCH v2] Analyze niter for until-wrap condition [PR101145]
On Wed, Aug 25, 2021 at 11:26 AM guojiufu wrote: > > On 2021-08-16 09:33, Bin.Cheng wrote: > > On Wed, Aug 4, 2021 at 10:42 AM guojiufu > > wrote: > >> > ... > >> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc > >> >> b/gcc/testsuite/gcc.dg/vect/pr101145.inc > >> >> new file mode 100644 > >> >> index 000..6eed3fa8aca > >> >> --- /dev/null > >> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc > >> >> @@ -0,0 +1,63 @@ > >> >> +TYPE __attribute__ ((noinline)) > >> >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) > >> >> +{ > >> >> + for (l = L_BASE; n < l; l += C) > >> >> +*a++ = *b++ + 1; > >> >> + return l; > >> >> +} > >> >> + > >> >> +TYPE __attribute__ ((noinline)) > >> >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) > >> >> +{ > >> >> + for (l = L_BASE_DOWN; l < n; l -= C) > > I noticed that both L_BASE and L_BASE_DOWN are defined as l, which > > makes this test a bit confusing. Could you clean the use of l, for > > example, by using an auto var for the loop index invariable? > > Otherwise the patch looks good to me. Thanks very much for the work. > > Hi, > > Sorry for bothering you here. > I feel this would be an approval (with the comment) already :) > > With the change code to make it a little clear as: >TYPE i; >for (i = l; n < i; i += C) > > it may be ok to commit the patch to the trunk, right? Yes please. Thanks again for working on this. Thanks, bin > > BR, > Jiufu > > > > > Thanks, > > bin > >> >> +*a++ = *b++ + 1; > >> >> + return l; > >> >> +} > >> >> + > >> >> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; } > >> >> + > >> >> +int a[1000], b[1000]; > >> >> +int fail; > >> >> + > >> >> +int > ... > >> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c > >> >> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c > >> >> new file mode 100644 > >> >> index 000..94f6b99b893 > >> >> --- /dev/null > >> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c > >> >> @@ -0,0 +1,15 @@ > >> >> +/* { dg-require-effective-target vect_int } */ > >> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > >> >> +#define TYPE signed char > >> >> +#define MIN -128 > >> >> +#define MAX 127 > >> >> +#define N_BASE (MAX - 32) > >> >> +#define N_BASE_DOWN (MIN + 32) > >> >> + > >> >> +#define C 3 > >> >> +#define L_BASE l > >> >> +#define L_BASE_DOWN l > >> >> +
Re: Ping: [PATCH v2] Analyze niter for until-wrap condition [PR101145]
On 2021-08-16 09:33, Bin.Cheng wrote: On Wed, Aug 4, 2021 at 10:42 AM guojiufu wrote: ... >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc >> b/gcc/testsuite/gcc.dg/vect/pr101145.inc >> new file mode 100644 >> index 000..6eed3fa8aca >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc >> @@ -0,0 +1,63 @@ >> +TYPE __attribute__ ((noinline)) >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) >> +{ >> + for (l = L_BASE; n < l; l += C) >> +*a++ = *b++ + 1; >> + return l; >> +} >> + >> +TYPE __attribute__ ((noinline)) >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) >> +{ >> + for (l = L_BASE_DOWN; l < n; l -= C) I noticed that both L_BASE and L_BASE_DOWN are defined as l, which makes this test a bit confusing. Could you clean the use of l, for example, by using an auto var for the loop index invariable? Otherwise the patch looks good to me. Thanks very much for the work. Hi, Sorry for bothering you here. I feel this would be an approval (with the comment) already :) With the change code to make it a little clear as: TYPE i; for (i = l; n < i; i += C) it may be ok to commit the patch to the trunk, right? BR, Jiufu Thanks, bin >> +*a++ = *b++ + 1; >> + return l; >> +} >> + >> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; } >> + >> +int a[1000], b[1000]; >> +int fail; >> + >> +int ... >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c >> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c >> new file mode 100644 >> index 000..94f6b99b893 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c >> @@ -0,0 +1,15 @@ >> +/* { dg-require-effective-target vect_int } */ >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */ >> +#define TYPE signed char >> +#define MIN -128 >> +#define MAX 127 >> +#define N_BASE (MAX - 32) >> +#define N_BASE_DOWN (MIN + 32) >> + >> +#define C 3 >> +#define L_BASE l >> +#define L_BASE_DOWN l >> +
Re: Ping: [PATCH v2] Analyze niter for until-wrap condition [PR101145]
Jiufu Guo writes: "Bin.Cheng" writes: On Wed, Aug 4, 2021 at 10:42 AM guojiufu wrote: Hi, cut... >> @@ -0,0 +1,63 @@ >> +TYPE __attribute__ ((noinline)) >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE >> l, >> TYPE n) >> +{ >> + for (l = L_BASE; n < l; l += C) >> +*a++ = *b++ + 1; >> + return l; >> +} >> + >> +TYPE __attribute__ ((noinline)) >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE >> l, >> TYPE n) >> +{ >> + for (l = L_BASE_DOWN; l < n; l -= C) I noticed that both L_BASE and L_BASE_DOWN are defined as l, which makes this test a bit confusing. Could you clean the use of l, for example, by using an auto var for the loop index invariable? Otherwise the patch looks good to me. Thanks very much for the work. Thanks a lot for your help to review! L_BASE.. are not needed. Updated the patch which use a new index var 'i' for loop instead param 'l': TYPE i; for (i = l; n < i; i += C) I updated the patch as below. Bootstrap & regress pass on powerpc64 and powerpc64le. I mean it also pass powerpc64(BE includes 32bit). BR, Jiufu For code like: unsigned foo(unsigned val, unsigned start) { unsigned cnt = 0; for (unsigned i = start; i > val; ++i) cnt++; return cnt; } The number of iterations should be about UINT_MAX - start. There is function adjust_cond_for_loop_until_wrap which handles similar work for const bases. Like adjust_cond_for_loop_until_wrap, this patch enhance function number_of_iterations_cond/number_of_iterations_lt to analyze number of iterations for this kind of loop. Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64. Is this ok for trunk? gcc/ChangeLog: 2021-08-16 Jiufu Guo PR tree-optimization/101145 * tree-ssa-loop-niter.c (number_of_iterations_until_wrap): New function. (number_of_iterations_lt): Invoke above function. (adjust_cond_for_loop_until_wrap): Merge to number_of_iterations_until_wrap. (number_of_iterations_cond): Update invokes for adjust_cond_for_loop_until_wrap and number_of_iterations_lt. gcc/testsuite/ChangeLog: 2021-08-16 Jiufu Guo PR tree-optimization/101145 * gcc.dg/vect/pr101145.c: New test. * gcc.dg/vect/pr101145.inc: New test. * gcc.dg/vect/pr101145_1.c: New test. * gcc.dg/vect/pr101145_2.c: New test. * gcc.dg/vect/pr101145_3.c: New test. * gcc.dg/vect/pr101145inf.c: New test. * gcc.dg/vect/pr101145inf.inc: New test. * gcc.dg/vect/pr101145inf_1.c: New test. --- gcc/testsuite/gcc.dg/vect/pr101145.c | 187 ++ gcc/testsuite/gcc.dg/vect/pr101145.inc| 65 gcc/testsuite/gcc.dg/vect/pr101145_1.c| 13 ++ gcc/testsuite/gcc.dg/vect/pr101145_2.c| 13 ++ gcc/testsuite/gcc.dg/vect/pr101145_3.c| 13 ++ gcc/testsuite/gcc.dg/vect/pr101145inf.c | 25 +++ gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 28 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 23 +++ gcc/tree-ssa-loop-niter.c | 157 ++ 9 files changed, 459 insertions(+), 65 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c b/gcc/testsuite/gcc.dg/vect/pr101145.c new file mode 100644 index 000..74031b031cf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c @@ -0,0 +1,187 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#include + +unsigned __attribute__ ((noinline)) +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (UINT_MAX - 64 < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = UINT_MAX - 32; + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n <= ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ // infininate + while (0 <= ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + //no loop + l = UINT_MAX; +
Re: Ping: [PATCH v2] Analyze niter for until-wrap condition [PR101145]
"Bin.Cheng" writes: On Wed, Aug 4, 2021 at 10:42 AM guojiufu wrote: Hi, cut... >> @@ -0,0 +1,63 @@ >> +TYPE __attribute__ ((noinline)) >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, >> TYPE n) >> +{ >> + for (l = L_BASE; n < l; l += C) >> +*a++ = *b++ + 1; >> + return l; >> +} >> + >> +TYPE __attribute__ ((noinline)) >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, >> TYPE n) >> +{ >> + for (l = L_BASE_DOWN; l < n; l -= C) I noticed that both L_BASE and L_BASE_DOWN are defined as l, which makes this test a bit confusing. Could you clean the use of l, for example, by using an auto var for the loop index invariable? Otherwise the patch looks good to me. Thanks very much for the work. Thanks a lot for your help to review! L_BASE.. are not needed. Updated the patch which use a new index var 'i' for loop instead param 'l': TYPE i; for (i = l; n < i; i += C) I updated the patch as below. Bootstrap & regress pass on powerpc64 and powerpc64le. For code like: unsigned foo(unsigned val, unsigned start) { unsigned cnt = 0; for (unsigned i = start; i > val; ++i) cnt++; return cnt; } The number of iterations should be about UINT_MAX - start. There is function adjust_cond_for_loop_until_wrap which handles similar work for const bases. Like adjust_cond_for_loop_until_wrap, this patch enhance function number_of_iterations_cond/number_of_iterations_lt to analyze number of iterations for this kind of loop. Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64. Is this ok for trunk? gcc/ChangeLog: 2021-08-16 Jiufu Guo PR tree-optimization/101145 * tree-ssa-loop-niter.c (number_of_iterations_until_wrap): New function. (number_of_iterations_lt): Invoke above function. (adjust_cond_for_loop_until_wrap): Merge to number_of_iterations_until_wrap. (number_of_iterations_cond): Update invokes for adjust_cond_for_loop_until_wrap and number_of_iterations_lt. gcc/testsuite/ChangeLog: 2021-08-16 Jiufu Guo PR tree-optimization/101145 * gcc.dg/vect/pr101145.c: New test. * gcc.dg/vect/pr101145.inc: New test. * gcc.dg/vect/pr101145_1.c: New test. * gcc.dg/vect/pr101145_2.c: New test. * gcc.dg/vect/pr101145_3.c: New test. * gcc.dg/vect/pr101145inf.c: New test. * gcc.dg/vect/pr101145inf.inc: New test. * gcc.dg/vect/pr101145inf_1.c: New test. --- gcc/testsuite/gcc.dg/vect/pr101145.c | 187 ++ gcc/testsuite/gcc.dg/vect/pr101145.inc| 65 gcc/testsuite/gcc.dg/vect/pr101145_1.c| 13 ++ gcc/testsuite/gcc.dg/vect/pr101145_2.c| 13 ++ gcc/testsuite/gcc.dg/vect/pr101145_3.c| 13 ++ gcc/testsuite/gcc.dg/vect/pr101145inf.c | 25 +++ gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 28 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 23 +++ gcc/tree-ssa-loop-niter.c | 157 ++ 9 files changed, 459 insertions(+), 65 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c b/gcc/testsuite/gcc.dg/vect/pr101145.c new file mode 100644 index 000..74031b031cf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c @@ -0,0 +1,187 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#include + +unsigned __attribute__ ((noinline)) +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (UINT_MAX - 64 < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = UINT_MAX - 32; + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n <= ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ // infininate + while (0 <= ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + //no loop + l = UINT_MAX; + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinli
Re: Ping: [PATCH v2] Analyze niter for until-wrap condition [PR101145]
On Wed, Aug 4, 2021 at 10:42 AM guojiufu wrote: > > Hi, > > I would like to have a ping on this. > > https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574596.html Sorry for being late in replying. > > BR, > Jiufu > > On 2021-07-15 08:17, guojiufu via Gcc-patches wrote: > > Hi, > > > > I would like to have an early ping on this with more mail addresses. > > > > BR, > > Jiufu. > > > > On 2021-07-07 20:47, Jiufu Guo wrote: > >> Changes since v1: > >> * Update assumptions for niter, add more test cases check > >> * Use widest_int/wide_int instead mpz to do +-/ > >> * Move some early check for quick return > >> > >> For code like: > >> unsigned foo(unsigned val, unsigned start) > >> { > >> unsigned cnt = 0; > >> for (unsigned i = start; i > val; ++i) > >> cnt++; > >> return cnt; > >> } > >> > >> The number of iterations should be about UINT_MAX - start. > >> > >> There is function adjust_cond_for_loop_until_wrap which > >> handles similar work for const bases. > >> Like adjust_cond_for_loop_until_wrap, this patch enhance > >> function number_of_iterations_cond/number_of_iterations_lt > >> to analyze number of iterations for this kind of loop. > >> > >> Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64. > >> Is this ok for trunk? > >> > >> gcc/ChangeLog: > >> > >> 2021-07-07 Jiufu Guo > >> > >> PR tree-optimization/101145 > >> * tree-ssa-loop-niter.c (number_of_iterations_until_wrap): > >> New function. > >> (number_of_iterations_lt): Invoke above function. > >> (adjust_cond_for_loop_until_wrap): > >> Merge to number_of_iterations_until_wrap. > >> (number_of_iterations_cond): Update invokes for > >> adjust_cond_for_loop_until_wrap and number_of_iterations_lt. > >> > >> gcc/testsuite/ChangeLog: > >> > >> 2021-07-07 Jiufu Guo > >> > >> PR tree-optimization/101145 > >> * gcc.dg/vect/pr101145.c: New test. > >> * gcc.dg/vect/pr101145.inc: New test. > >> * gcc.dg/vect/pr101145_1.c: New test. > >> * gcc.dg/vect/pr101145_2.c: New test. > >> * gcc.dg/vect/pr101145_3.c: New test. > >> * gcc.dg/vect/pr101145inf.c: New test. > >> * gcc.dg/vect/pr101145inf.inc: New test. > >> * gcc.dg/vect/pr101145inf_1.c: New test. > >> --- > >> gcc/testsuite/gcc.dg/vect/pr101145.c | 187 > >> ++ > >> gcc/testsuite/gcc.dg/vect/pr101145.inc| 63 > >> gcc/testsuite/gcc.dg/vect/pr101145_1.c| 15 ++ > >> gcc/testsuite/gcc.dg/vect/pr101145_2.c| 15 ++ > >> gcc/testsuite/gcc.dg/vect/pr101145_3.c| 15 ++ > >> gcc/testsuite/gcc.dg/vect/pr101145inf.c | 25 +++ > >> gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 28 > >> gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 23 +++ > >> gcc/tree-ssa-loop-niter.c | 157 ++ > >> 9 files changed, 463 insertions(+), 65 deletions(-) > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c > >> > >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c > >> b/gcc/testsuite/gcc.dg/vect/pr101145.c > >> new file mode 100644 > >> index 000..74031b031cf > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c > >> @@ -0,0 +1,187 @@ > >> +/* { dg-require-effective-target vect_int } */ > >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > >> +#include > >> + > >> +unsigned __attribute__ ((noinline)) > >> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > >> n) > >> +{ > >> + while (n < ++l) > >> +*a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +unsigned __attribute__ ((noinline)) > >> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, > >> unsigned) > >> +{ > >> + while (UINT_MAX - 64 < ++l) > >> +*a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +unsigned __attribute__ ((noinline)) > >> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > >> n) > >> +{ > >> + l = UINT_MAX - 32; > >> + while (n < ++l) > >> +*a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +unsigned __attribute__ ((noinline)) > >> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > >> n) > >> +{ > >> + while (n <= ++l) > >> +*a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +unsigned __attribute__ ((noinline)) > >> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > >> n) > >> +{ // infininate > >> + while (0 <= ++l) > >> +*a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +unsigned __attribute__ ((noinline)) > >> +foo_5 (int *__restr
Ping: [PATCH v2] Analyze niter for until-wrap condition [PR101145]
Hi, I would like to have a ping on this. https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574596.html BR, Jiufu On 2021-07-15 08:17, guojiufu via Gcc-patches wrote: Hi, I would like to have an early ping on this with more mail addresses. BR, Jiufu. On 2021-07-07 20:47, Jiufu Guo wrote: Changes since v1: * Update assumptions for niter, add more test cases check * Use widest_int/wide_int instead mpz to do +-/ * Move some early check for quick return For code like: unsigned foo(unsigned val, unsigned start) { unsigned cnt = 0; for (unsigned i = start; i > val; ++i) cnt++; return cnt; } The number of iterations should be about UINT_MAX - start. There is function adjust_cond_for_loop_until_wrap which handles similar work for const bases. Like adjust_cond_for_loop_until_wrap, this patch enhance function number_of_iterations_cond/number_of_iterations_lt to analyze number of iterations for this kind of loop. Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64. Is this ok for trunk? gcc/ChangeLog: 2021-07-07 Jiufu Guo PR tree-optimization/101145 * tree-ssa-loop-niter.c (number_of_iterations_until_wrap): New function. (number_of_iterations_lt): Invoke above function. (adjust_cond_for_loop_until_wrap): Merge to number_of_iterations_until_wrap. (number_of_iterations_cond): Update invokes for adjust_cond_for_loop_until_wrap and number_of_iterations_lt. gcc/testsuite/ChangeLog: 2021-07-07 Jiufu Guo PR tree-optimization/101145 * gcc.dg/vect/pr101145.c: New test. * gcc.dg/vect/pr101145.inc: New test. * gcc.dg/vect/pr101145_1.c: New test. * gcc.dg/vect/pr101145_2.c: New test. * gcc.dg/vect/pr101145_3.c: New test. * gcc.dg/vect/pr101145inf.c: New test. * gcc.dg/vect/pr101145inf.inc: New test. * gcc.dg/vect/pr101145inf_1.c: New test. --- gcc/testsuite/gcc.dg/vect/pr101145.c | 187 ++ gcc/testsuite/gcc.dg/vect/pr101145.inc| 63 gcc/testsuite/gcc.dg/vect/pr101145_1.c| 15 ++ gcc/testsuite/gcc.dg/vect/pr101145_2.c| 15 ++ gcc/testsuite/gcc.dg/vect/pr101145_3.c| 15 ++ gcc/testsuite/gcc.dg/vect/pr101145inf.c | 25 +++ gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 28 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 23 +++ gcc/tree-ssa-loop-niter.c | 157 ++ 9 files changed, 463 insertions(+), 65 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c b/gcc/testsuite/gcc.dg/vect/pr101145.c new file mode 100644 index 000..74031b031cf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c @@ -0,0 +1,187 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#include + +unsigned __attribute__ ((noinline)) +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (UINT_MAX - 64 < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = UINT_MAX - 32; + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n <= ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ // infininate + while (0 <= ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + //no loop + l = UINT_MAX; + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (--l < n) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (--l < 64) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = 32; + while (--l < n) +*a++ = *b++ + 1; + return l; +} + + +int a[3200], b[3200]; +in
Re: [PATCH v2] Analyze niter for until-wrap condition [PR101145]
Hi, I would like to have an early ping on this with more mail addresses. BR, Jiufu. On 2021-07-07 20:47, Jiufu Guo wrote: Changes since v1: * Update assumptions for niter, add more test cases check * Use widest_int/wide_int instead mpz to do +-/ * Move some early check for quick return For code like: unsigned foo(unsigned val, unsigned start) { unsigned cnt = 0; for (unsigned i = start; i > val; ++i) cnt++; return cnt; } The number of iterations should be about UINT_MAX - start. There is function adjust_cond_for_loop_until_wrap which handles similar work for const bases. Like adjust_cond_for_loop_until_wrap, this patch enhance function number_of_iterations_cond/number_of_iterations_lt to analyze number of iterations for this kind of loop. Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64. Is this ok for trunk? gcc/ChangeLog: 2021-07-07 Jiufu Guo PR tree-optimization/101145 * tree-ssa-loop-niter.c (number_of_iterations_until_wrap): New function. (number_of_iterations_lt): Invoke above function. (adjust_cond_for_loop_until_wrap): Merge to number_of_iterations_until_wrap. (number_of_iterations_cond): Update invokes for adjust_cond_for_loop_until_wrap and number_of_iterations_lt. gcc/testsuite/ChangeLog: 2021-07-07 Jiufu Guo PR tree-optimization/101145 * gcc.dg/vect/pr101145.c: New test. * gcc.dg/vect/pr101145.inc: New test. * gcc.dg/vect/pr101145_1.c: New test. * gcc.dg/vect/pr101145_2.c: New test. * gcc.dg/vect/pr101145_3.c: New test. * gcc.dg/vect/pr101145inf.c: New test. * gcc.dg/vect/pr101145inf.inc: New test. * gcc.dg/vect/pr101145inf_1.c: New test. --- gcc/testsuite/gcc.dg/vect/pr101145.c | 187 ++ gcc/testsuite/gcc.dg/vect/pr101145.inc| 63 gcc/testsuite/gcc.dg/vect/pr101145_1.c| 15 ++ gcc/testsuite/gcc.dg/vect/pr101145_2.c| 15 ++ gcc/testsuite/gcc.dg/vect/pr101145_3.c| 15 ++ gcc/testsuite/gcc.dg/vect/pr101145inf.c | 25 +++ gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 28 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 23 +++ gcc/tree-ssa-loop-niter.c | 157 ++ 9 files changed, 463 insertions(+), 65 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c b/gcc/testsuite/gcc.dg/vect/pr101145.c new file mode 100644 index 000..74031b031cf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c @@ -0,0 +1,187 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#include + +unsigned __attribute__ ((noinline)) +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (UINT_MAX - 64 < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = UINT_MAX - 32; + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n <= ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ // infininate + while (0 <= ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + //no loop + l = UINT_MAX; + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (--l < n) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (--l < 64) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = 32; + while (--l < n) +*a++ = *b++ + 1; + return l; +} + + +int a[3200], b[3200]; +int fail; + +int +main () +{ + unsigned l, n; + unsigned res; + /* l > n*/ + n = UINT_MAX - 64; + l = n + 32; + res = foo (a, b, l, n); + if (res != 0) +fail++; + + l = n; +
[PATCH v2] Analyze niter for until-wrap condition [PR101145]
Changes since v1: * Update assumptions for niter, add more test cases check * Use widest_int/wide_int instead mpz to do +-/ * Move some early check for quick return For code like: unsigned foo(unsigned val, unsigned start) { unsigned cnt = 0; for (unsigned i = start; i > val; ++i) cnt++; return cnt; } The number of iterations should be about UINT_MAX - start. There is function adjust_cond_for_loop_until_wrap which handles similar work for const bases. Like adjust_cond_for_loop_until_wrap, this patch enhance function number_of_iterations_cond/number_of_iterations_lt to analyze number of iterations for this kind of loop. Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64. Is this ok for trunk? gcc/ChangeLog: 2021-07-07 Jiufu Guo PR tree-optimization/101145 * tree-ssa-loop-niter.c (number_of_iterations_until_wrap): New function. (number_of_iterations_lt): Invoke above function. (adjust_cond_for_loop_until_wrap): Merge to number_of_iterations_until_wrap. (number_of_iterations_cond): Update invokes for adjust_cond_for_loop_until_wrap and number_of_iterations_lt. gcc/testsuite/ChangeLog: 2021-07-07 Jiufu Guo PR tree-optimization/101145 * gcc.dg/vect/pr101145.c: New test. * gcc.dg/vect/pr101145.inc: New test. * gcc.dg/vect/pr101145_1.c: New test. * gcc.dg/vect/pr101145_2.c: New test. * gcc.dg/vect/pr101145_3.c: New test. * gcc.dg/vect/pr101145inf.c: New test. * gcc.dg/vect/pr101145inf.inc: New test. * gcc.dg/vect/pr101145inf_1.c: New test. --- gcc/testsuite/gcc.dg/vect/pr101145.c | 187 ++ gcc/testsuite/gcc.dg/vect/pr101145.inc| 63 gcc/testsuite/gcc.dg/vect/pr101145_1.c| 15 ++ gcc/testsuite/gcc.dg/vect/pr101145_2.c| 15 ++ gcc/testsuite/gcc.dg/vect/pr101145_3.c| 15 ++ gcc/testsuite/gcc.dg/vect/pr101145inf.c | 25 +++ gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 28 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 23 +++ gcc/tree-ssa-loop-niter.c | 157 ++ 9 files changed, 463 insertions(+), 65 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c b/gcc/testsuite/gcc.dg/vect/pr101145.c new file mode 100644 index 000..74031b031cf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c @@ -0,0 +1,187 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#include + +unsigned __attribute__ ((noinline)) +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (UINT_MAX - 64 < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = UINT_MAX - 32; + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n <= ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ // infininate + while (0 <= ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + //no loop + l = UINT_MAX; + while (n < ++l) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (--l < n) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (--l < 64) +*a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = 32; + while (--l < n) +*a++ = *b++ + 1; + return l; +} + + +int a[3200], b[3200]; +int fail; + +int +main () +{ + unsigned l, n; + unsigned res; + /* l > n*/ + n = UINT_MAX - 64; + l = n + 32; + res = foo (a, b, l, n); + if (res != 0) +fail++; + + l = n; + res = foo (a, b, l, n); + if (res != 0) +fail++; + + l = n - 1; + res = foo (a, b, l, n); + if (res != l + 1) +fail++; +