Re: [PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]
On 2/24/21 11:59 AM, Jakub Jelinek via Gcc-patches wrote: > Hi! > > The r10-2806 change regressed following testcases, instead of doing > int -> unsigned long sign-extension once and then add 8, 16, ... 56 to it > for each of the memory access, it adds 8, 16, ... 56 in int mode and then > sign extends each. So that means: > + movq$0, (%rsp,%rax,8) > + leal1(%rdx), %eax > + cltq > + movq$0, (%rsp,%rax,8) > + leal2(%rdx), %eax > + cltq > + movq$0, (%rsp,%rax,8) > + leal3(%rdx), %eax > + cltq > + movq$0, (%rsp,%rax,8) > + leal4(%rdx), %eax > + cltq > + movq$0, (%rsp,%rax,8) > + leal5(%rdx), %eax > + cltq > + movq$0, (%rsp,%rax,8) > + leal6(%rdx), %eax > + addl$7, %edx > + cltq > + movslq %edx, %rdx > + movq$0, (%rsp,%rax,8) > movq$0, (%rsp,%rdx,8) > - movq$0, 8(%rsp,%rdx,8) > - movq$0, 16(%rsp,%rdx,8) > - movq$0, 24(%rsp,%rdx,8) > - movq$0, 32(%rsp,%rdx,8) > - movq$0, 40(%rsp,%rdx,8) > - movq$0, 48(%rsp,%rdx,8) > - movq$0, 56(%rsp,%rdx,8) > GCC 9 -> 10 change or: > - movq$0, (%rsp,%rdx,8) > - movq$0, 8(%rsp,%rdx,8) > - movq$0, 16(%rsp,%rdx,8) > - movq$0, 24(%rsp,%rdx,8) > - movq$0, 32(%rsp,%rdx,8) > - movq$0, 40(%rsp,%rdx,8) > - movq$0, 48(%rsp,%rdx,8) > - movq$0, 56(%rsp,%rdx,8) > + movq$0, (%rsp,%rax,8) > + leal1(%rdx), %eax > + movq$0, (%rsp,%rax,8) > + leal2(%rdx), %eax > + movq$0, (%rsp,%rax,8) > + leal3(%rdx), %eax > + movq$0, (%rsp,%rax,8) > + leal4(%rdx), %eax > + movq$0, (%rsp,%rax,8) > + leal5(%rdx), %eax > + movq$0, (%rsp,%rax,8) > + leal6(%rdx), %eax > + movq$0, (%rsp,%rax,8) > + leal7(%rdx), %eax > + movq$0, (%rsp,%rax,8) > change on the other test. While for the former case of > int there is due to signed integer overflow (unless -fwrapv) > the possibility to undo it e.g. during expansion, for the unsigned > case information is unfortunately lost. > > The following patch adds single_use case which restores these testcases > but keeps the testcases the patch meant to improve as is. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2021-02-24 Jakub Jelinek > > PR target/95798 > * match.pd ((T)(A) + CST -> (T)(A + CST)): Add single_use check. > > * gcc.target/i386/pr95798-1.c: New test. > * gcc.target/i386/pr95798-2.c: New test. OK jeff
Re: [PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]
On Thu, 25 Feb 2021, Jakub Jelinek wrote: > On Wed, Feb 24, 2021 at 08:56:55PM +0100, Jakub Jelinek via Gcc-patches wrote: > > On Wed, Feb 24, 2021 at 08:52:44PM +0100, Marc Glisse wrote: > > > On Wed, 24 Feb 2021, Jakub Jelinek via Gcc-patches wrote: > > > > > > > The following patch adds single_use case which restores these testcases > > > > but keeps the testcases the patch meant to improve as is. > > > > > > Hello, > > > > > > I wonder if :s would be sufficient here? I don't have an opinion on which > > > one is better for this particular transformation (don't change the patch > > > because of my comment), we just seem to be getting more and more uses of > > > single_use in match.pd, maybe at some point we need to revisit the meaning > > > of :s or introduce a stronger :S. > > > > :s seems to work for these testcases too, I'm never sure about :s > > vs. single_use. > > Following passed bootstrap/regtest on x86_64-linux and i686-linux too. OK. Thanks, Richard. > 2021-02-24 Jakub Jelinek > > PR target/95798 > * match.pd ((T)(A) + CST -> (T)(A + CST)): Add :s to convert. > > * gcc.target/i386/pr95798-1.c: New test. > * gcc.target/i386/pr95798-2.c: New test. > > --- gcc/match.pd.jj 2021-02-24 12:58:22.233006845 +0100 > +++ gcc/match.pd 2021-02-24 20:54:41.970241132 +0100 > @@ -2492,7 +2492,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > /* ((T)(A)) + CST -> (T)(A + CST) */ > #if GIMPLE >(simplify > - (plus (convert SSA_NAME@0) INTEGER_CST@1) > + (plus (convert:s SSA_NAME@0) INTEGER_CST@1) > (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE > && TREE_CODE (type) == INTEGER_TYPE > && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0)) > --- gcc/testsuite/gcc.target/i386/pr95798-1.c.jj 2021-02-24 > 15:58:06.935598077 +0100 > +++ gcc/testsuite/gcc.target/i386/pr95798-1.c 2021-02-24 16:02:47.298504500 > +0100 > @@ -0,0 +1,29 @@ > +/* PR target/95798 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -masm=att" } */ > +/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > +/* { dg-final { scan-assembler "2, 16\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > +/* { dg-final { scan-assembler "3, 24\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > +/* { dg-final { scan-assembler "4, 32\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > +/* { dg-final { scan-assembler "5, 40\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > +/* { dg-final { scan-assembler "6, 48\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > +/* { dg-final { scan-assembler "7, 56\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > + > +void bar (unsigned long long *, int); > + > +void > +foo (int y, unsigned long long z) > +{ > + unsigned long long x[1024]; > + unsigned long long i = y % 127; > + __builtin_memset (x, -1, sizeof (x)); > + x[i] = 0; > + x[i + 1] = 1; > + x[i + 2] = 2; > + x[i + 3] = 3; > + x[i + 4] = 4; > + x[i + 5] = 5; > + x[i + 6] = 6; > + x[i + 7] = 7; > + bar (x, y); > +} > --- gcc/testsuite/gcc.target/i386/pr95798-2.c.jj 2021-02-24 > 16:01:39.708250302 +0100 > +++ gcc/testsuite/gcc.target/i386/pr95798-2.c 2021-02-24 16:03:57.497729907 > +0100 > @@ -0,0 +1,29 @@ > +/* PR target/95798 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -masm=att" } */ > +/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > +/* { dg-final { scan-assembler "2, 16\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > +/* { dg-final { scan-assembler "3, 24\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > +/* { dg-final { scan-assembler "4, 32\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > +/* { dg-final { scan-assembler "5, 40\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > +/* { dg-final { scan-assembler "6, 48\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > +/* { dg-final { scan-assembler "7, 56\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target > lp64 } } } */ > + > +void bar (unsigned long long *, int); > + > +void > +foo (unsigned int y, unsigned long long z) > +{ > + unsigned long long x[1024]; > + unsigned long long i = y % 127; > + __builtin_memset (x, -1, sizeof (x)); > + x[i] = 0; > + x[i + 1] = 1; > + x[i + 2] = 2; > + x[i + 3] = 3; > + x[i + 4] = 4; > + x[i + 5] = 5; > + x[i + 6] = 6; > + x[i + 7] = 7; > + bar (x, y); > +} > > Jakub > > -- Richard Biener SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
Re: [PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]
On Wed, Feb 24, 2021 at 08:56:55PM +0100, Jakub Jelinek via Gcc-patches wrote: > On Wed, Feb 24, 2021 at 08:52:44PM +0100, Marc Glisse wrote: > > On Wed, 24 Feb 2021, Jakub Jelinek via Gcc-patches wrote: > > > > > The following patch adds single_use case which restores these testcases > > > but keeps the testcases the patch meant to improve as is. > > > > Hello, > > > > I wonder if :s would be sufficient here? I don't have an opinion on which > > one is better for this particular transformation (don't change the patch > > because of my comment), we just seem to be getting more and more uses of > > single_use in match.pd, maybe at some point we need to revisit the meaning > > of :s or introduce a stronger :S. > > :s seems to work for these testcases too, I'm never sure about :s > vs. single_use. Following passed bootstrap/regtest on x86_64-linux and i686-linux too. 2021-02-24 Jakub Jelinek PR target/95798 * match.pd ((T)(A) + CST -> (T)(A + CST)): Add :s to convert. * gcc.target/i386/pr95798-1.c: New test. * gcc.target/i386/pr95798-2.c: New test. --- gcc/match.pd.jj 2021-02-24 12:58:22.233006845 +0100 +++ gcc/match.pd2021-02-24 20:54:41.970241132 +0100 @@ -2492,7 +2492,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* ((T)(A)) + CST -> (T)(A + CST) */ #if GIMPLE (simplify - (plus (convert SSA_NAME@0) INTEGER_CST@1) + (plus (convert:s SSA_NAME@0) INTEGER_CST@1) (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE && TREE_CODE (type) == INTEGER_TYPE && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0)) --- gcc/testsuite/gcc.target/i386/pr95798-1.c.jj2021-02-24 15:58:06.935598077 +0100 +++ gcc/testsuite/gcc.target/i386/pr95798-1.c 2021-02-24 16:02:47.298504500 +0100 @@ -0,0 +1,29 @@ +/* PR target/95798 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "2, 16\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "3, 24\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "4, 32\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "5, 40\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "6, 48\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "7, 56\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ + +void bar (unsigned long long *, int); + +void +foo (int y, unsigned long long z) +{ + unsigned long long x[1024]; + unsigned long long i = y % 127; + __builtin_memset (x, -1, sizeof (x)); + x[i] = 0; + x[i + 1] = 1; + x[i + 2] = 2; + x[i + 3] = 3; + x[i + 4] = 4; + x[i + 5] = 5; + x[i + 6] = 6; + x[i + 7] = 7; + bar (x, y); +} --- gcc/testsuite/gcc.target/i386/pr95798-2.c.jj2021-02-24 16:01:39.708250302 +0100 +++ gcc/testsuite/gcc.target/i386/pr95798-2.c 2021-02-24 16:03:57.497729907 +0100 @@ -0,0 +1,29 @@ +/* PR target/95798 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "2, 16\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "3, 24\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "4, 32\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "5, 40\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "6, 48\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "7, 56\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ + +void bar (unsigned long long *, int); + +void +foo (unsigned int y, unsigned long long z) +{ + unsigned long long x[1024]; + unsigned long long i = y % 127; + __builtin_memset (x, -1, sizeof (x)); + x[i] = 0; + x[i + 1] = 1; + x[i + 2] = 2; + x[i + 3] = 3; + x[i + 4] = 4; + x[i + 5] = 5; + x[i + 6] = 6; + x[i + 7] = 7; + bar (x, y); +} Jakub
Re: [PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]
On Wed, Feb 24, 2021 at 08:52:44PM +0100, Marc Glisse wrote: > On Wed, 24 Feb 2021, Jakub Jelinek via Gcc-patches wrote: > > > The following patch adds single_use case which restores these testcases > > but keeps the testcases the patch meant to improve as is. > > Hello, > > I wonder if :s would be sufficient here? I don't have an opinion on which > one is better for this particular transformation (don't change the patch > because of my comment), we just seem to be getting more and more uses of > single_use in match.pd, maybe at some point we need to revisit the meaning > of :s or introduce a stronger :S. :s seems to work for these testcases too, I'm never sure about :s vs. single_use. Jakub
Re: [PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]
On Wed, 24 Feb 2021, Jakub Jelinek via Gcc-patches wrote: The following patch adds single_use case which restores these testcases but keeps the testcases the patch meant to improve as is. Hello, I wonder if :s would be sufficient here? I don't have an opinion on which one is better for this particular transformation (don't change the patch because of my comment), we just seem to be getting more and more uses of single_use in match.pd, maybe at some point we need to revisit the meaning of :s or introduce a stronger :S. -- Marc Glisse
[PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]
Hi! The r10-2806 change regressed following testcases, instead of doing int -> unsigned long sign-extension once and then add 8, 16, ... 56 to it for each of the memory access, it adds 8, 16, ... 56 in int mode and then sign extends each. So that means: + movq$0, (%rsp,%rax,8) + leal1(%rdx), %eax + cltq + movq$0, (%rsp,%rax,8) + leal2(%rdx), %eax + cltq + movq$0, (%rsp,%rax,8) + leal3(%rdx), %eax + cltq + movq$0, (%rsp,%rax,8) + leal4(%rdx), %eax + cltq + movq$0, (%rsp,%rax,8) + leal5(%rdx), %eax + cltq + movq$0, (%rsp,%rax,8) + leal6(%rdx), %eax + addl$7, %edx + cltq + movslq %edx, %rdx + movq$0, (%rsp,%rax,8) movq$0, (%rsp,%rdx,8) - movq$0, 8(%rsp,%rdx,8) - movq$0, 16(%rsp,%rdx,8) - movq$0, 24(%rsp,%rdx,8) - movq$0, 32(%rsp,%rdx,8) - movq$0, 40(%rsp,%rdx,8) - movq$0, 48(%rsp,%rdx,8) - movq$0, 56(%rsp,%rdx,8) GCC 9 -> 10 change or: - movq$0, (%rsp,%rdx,8) - movq$0, 8(%rsp,%rdx,8) - movq$0, 16(%rsp,%rdx,8) - movq$0, 24(%rsp,%rdx,8) - movq$0, 32(%rsp,%rdx,8) - movq$0, 40(%rsp,%rdx,8) - movq$0, 48(%rsp,%rdx,8) - movq$0, 56(%rsp,%rdx,8) + movq$0, (%rsp,%rax,8) + leal1(%rdx), %eax + movq$0, (%rsp,%rax,8) + leal2(%rdx), %eax + movq$0, (%rsp,%rax,8) + leal3(%rdx), %eax + movq$0, (%rsp,%rax,8) + leal4(%rdx), %eax + movq$0, (%rsp,%rax,8) + leal5(%rdx), %eax + movq$0, (%rsp,%rax,8) + leal6(%rdx), %eax + movq$0, (%rsp,%rax,8) + leal7(%rdx), %eax + movq$0, (%rsp,%rax,8) change on the other test. While for the former case of int there is due to signed integer overflow (unless -fwrapv) the possibility to undo it e.g. during expansion, for the unsigned case information is unfortunately lost. The following patch adds single_use case which restores these testcases but keeps the testcases the patch meant to improve as is. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2021-02-24 Jakub Jelinek PR target/95798 * match.pd ((T)(A) + CST -> (T)(A + CST)): Add single_use check. * gcc.target/i386/pr95798-1.c: New test. * gcc.target/i386/pr95798-2.c: New test. --- gcc/match.pd.jj 2021-02-24 12:58:22.233006845 +0100 +++ gcc/match.pd2021-02-24 15:41:15.64030 +0100 @@ -2492,11 +2492,12 @@ (define_operator_list COND_TERNARY /* ((T)(A)) + CST -> (T)(A + CST) */ #if GIMPLE (simplify - (plus (convert SSA_NAME@0) INTEGER_CST@1) + (plus (convert@2 SSA_NAME@0) INTEGER_CST@1) (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE && TREE_CODE (type) == INTEGER_TYPE && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0)) - && int_fits_type_p (@1, TREE_TYPE (@0))) + && int_fits_type_p (@1, TREE_TYPE (@0)) +&& single_use (@2)) /* Perform binary operation inside the cast if the constant fits and (A + CST)'s range does not overflow. */ (with --- gcc/testsuite/gcc.target/i386/pr95798-1.c.jj2021-02-24 15:58:06.935598077 +0100 +++ gcc/testsuite/gcc.target/i386/pr95798-1.c 2021-02-24 16:02:47.298504500 +0100 @@ -0,0 +1,29 @@ +/* PR target/95798 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "2, 16\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "3, 24\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "4, 32\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "5, 40\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "6, 48\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "7, 56\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ + +void bar (unsigned long long *, int); + +void +foo (int y, unsigned long long z) +{ + unsigned long long x[1024]; + unsigned long long i = y % 127; + __builtin_memset (x, -1, sizeof (x)); + x[i] = 0; + x[i + 1] = 1; + x[i + 2] = 2; + x[i + 3] = 3; + x[i + 4] = 4; + x[i + 5] = 5; + x[i + 6] = 6; + x[i + 7] = 7; + bar (x, y); +} --- gcc/testsuite/gcc.target/i386/pr95798-2.c.jj2021-02-24 16:01:39.708250302 +0100 +++ gcc/testsuite/gcc.target/i386/pr95798-2.c 2021-02-24 16:03:57.497729907 +0100 @@ -0,0 +1,29 @@ +/* PR target/95798 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "2,