Re: [PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]

2021-03-02 Thread Jeff Law via Gcc-patches



On 2/24/21 11:59 AM, Jakub Jelinek via Gcc-patches wrote:
> Hi!
>
> The r10-2806 change regressed following testcases, instead of doing
> int -> unsigned long sign-extension once and then add 8, 16, ... 56 to it
> for each of the memory access, it adds 8, 16, ... 56 in int mode and then
> sign extends each.  So that means:
> + movq$0, (%rsp,%rax,8)
> + leal1(%rdx), %eax
> + cltq
> + movq$0, (%rsp,%rax,8)
> + leal2(%rdx), %eax
> + cltq
> + movq$0, (%rsp,%rax,8)
> + leal3(%rdx), %eax
> + cltq
> + movq$0, (%rsp,%rax,8)
> + leal4(%rdx), %eax
> + cltq
> + movq$0, (%rsp,%rax,8)
> + leal5(%rdx), %eax
> + cltq
> + movq$0, (%rsp,%rax,8)
> + leal6(%rdx), %eax
> + addl$7, %edx
> + cltq
> + movslq  %edx, %rdx
> + movq$0, (%rsp,%rax,8)
>   movq$0, (%rsp,%rdx,8)
> - movq$0, 8(%rsp,%rdx,8)
> - movq$0, 16(%rsp,%rdx,8)
> - movq$0, 24(%rsp,%rdx,8)
> - movq$0, 32(%rsp,%rdx,8)
> - movq$0, 40(%rsp,%rdx,8)
> - movq$0, 48(%rsp,%rdx,8)
> - movq$0, 56(%rsp,%rdx,8)
> GCC 9 -> 10 change or:
> - movq$0, (%rsp,%rdx,8)
> - movq$0, 8(%rsp,%rdx,8)
> - movq$0, 16(%rsp,%rdx,8)
> - movq$0, 24(%rsp,%rdx,8)
> - movq$0, 32(%rsp,%rdx,8)
> - movq$0, 40(%rsp,%rdx,8)
> - movq$0, 48(%rsp,%rdx,8)
> - movq$0, 56(%rsp,%rdx,8)
> + movq$0, (%rsp,%rax,8)
> + leal1(%rdx), %eax
> + movq$0, (%rsp,%rax,8)
> + leal2(%rdx), %eax
> + movq$0, (%rsp,%rax,8)
> + leal3(%rdx), %eax
> + movq$0, (%rsp,%rax,8)
> + leal4(%rdx), %eax
> + movq$0, (%rsp,%rax,8)
> + leal5(%rdx), %eax
> + movq$0, (%rsp,%rax,8)
> + leal6(%rdx), %eax
> + movq$0, (%rsp,%rax,8)
> + leal7(%rdx), %eax
> + movq$0, (%rsp,%rax,8)
> change on the other test.  While for the former case of
> int there is due to signed integer overflow (unless -fwrapv)
> the possibility to undo it e.g. during expansion, for the unsigned
> case information is unfortunately lost.
>
> The following patch adds single_use case which restores these testcases
> but keeps the testcases the patch meant to improve as is.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2021-02-24  Jakub Jelinek  
>
>   PR target/95798
>   * match.pd ((T)(A) + CST -> (T)(A + CST)): Add single_use check.
>
>   * gcc.target/i386/pr95798-1.c: New test.
>   * gcc.target/i386/pr95798-2.c: New test.
OK
jeff



Re: [PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]

2021-02-25 Thread Richard Biener
On Thu, 25 Feb 2021, Jakub Jelinek wrote:

> On Wed, Feb 24, 2021 at 08:56:55PM +0100, Jakub Jelinek via Gcc-patches wrote:
> > On Wed, Feb 24, 2021 at 08:52:44PM +0100, Marc Glisse wrote:
> > > On Wed, 24 Feb 2021, Jakub Jelinek via Gcc-patches wrote:
> > > 
> > > > The following patch adds single_use case which restores these testcases
> > > > but keeps the testcases the patch meant to improve as is.
> > > 
> > > Hello,
> > > 
> > > I wonder if :s would be sufficient here? I don't have an opinion on which
> > > one is better for this particular transformation (don't change the patch
> > > because of my comment), we just seem to be getting more and more uses of
> > > single_use in match.pd, maybe at some point we need to revisit the meaning
> > > of :s or introduce a stronger :S.
> > 
> > :s seems to work for these testcases too, I'm never sure about :s
> > vs. single_use.
> 
> Following passed bootstrap/regtest on x86_64-linux and i686-linux too.

OK.

Thanks,
Richard.

> 2021-02-24  Jakub Jelinek  
> 
>   PR target/95798
>   * match.pd ((T)(A) + CST -> (T)(A + CST)): Add :s to convert.
> 
>   * gcc.target/i386/pr95798-1.c: New test.
>   * gcc.target/i386/pr95798-2.c: New test.
> 
> --- gcc/match.pd.jj   2021-02-24 12:58:22.233006845 +0100
> +++ gcc/match.pd  2021-02-24 20:54:41.970241132 +0100
> @@ -2492,7 +2492,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  /* ((T)(A)) + CST -> (T)(A + CST)  */
>  #if GIMPLE
>(simplify
> -   (plus (convert SSA_NAME@0) INTEGER_CST@1)
> +   (plus (convert:s SSA_NAME@0) INTEGER_CST@1)
>  (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
>   && TREE_CODE (type) == INTEGER_TYPE
>   && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
> --- gcc/testsuite/gcc.target/i386/pr95798-1.c.jj  2021-02-24 
> 15:58:06.935598077 +0100
> +++ gcc/testsuite/gcc.target/i386/pr95798-1.c 2021-02-24 16:02:47.298504500 
> +0100
> @@ -0,0 +1,29 @@
> +/* PR target/95798 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -masm=att" } */
> +/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +/* { dg-final { scan-assembler "2, 16\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +/* { dg-final { scan-assembler "3, 24\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +/* { dg-final { scan-assembler "4, 32\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +/* { dg-final { scan-assembler "5, 40\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +/* { dg-final { scan-assembler "6, 48\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +/* { dg-final { scan-assembler "7, 56\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +
> +void bar (unsigned long long *, int);
> +
> +void
> +foo (int y, unsigned long long z)
> +{
> +  unsigned long long x[1024];
> +  unsigned long long i = y % 127;
> +  __builtin_memset (x, -1, sizeof (x));
> +  x[i] = 0;
> +  x[i + 1] = 1;
> +  x[i + 2] = 2;
> +  x[i + 3] = 3;
> +  x[i + 4] = 4;
> +  x[i + 5] = 5;
> +  x[i + 6] = 6;
> +  x[i + 7] = 7;
> +  bar (x, y);
> +}
> --- gcc/testsuite/gcc.target/i386/pr95798-2.c.jj  2021-02-24 
> 16:01:39.708250302 +0100
> +++ gcc/testsuite/gcc.target/i386/pr95798-2.c 2021-02-24 16:03:57.497729907 
> +0100
> @@ -0,0 +1,29 @@
> +/* PR target/95798 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -masm=att" } */
> +/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +/* { dg-final { scan-assembler "2, 16\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +/* { dg-final { scan-assembler "3, 24\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +/* { dg-final { scan-assembler "4, 32\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +/* { dg-final { scan-assembler "5, 40\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +/* { dg-final { scan-assembler "6, 48\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +/* { dg-final { scan-assembler "7, 56\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
> lp64 } } } */
> +
> +void bar (unsigned long long *, int);
> +
> +void
> +foo (unsigned int y, unsigned long long z)
> +{
> +  unsigned long long x[1024];
> +  unsigned long long i = y % 127;
> +  __builtin_memset (x, -1, sizeof (x));
> +  x[i] = 0;
> +  x[i + 1] = 1;
> +  x[i + 2] = 2;
> +  x[i + 3] = 3;
> +  x[i + 4] = 4;
> +  x[i + 5] = 5;
> +  x[i + 6] = 6;
> +  x[i + 7] = 7;
> +  bar (x, y);
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


Re: [PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]

2021-02-25 Thread Jakub Jelinek via Gcc-patches
On Wed, Feb 24, 2021 at 08:56:55PM +0100, Jakub Jelinek via Gcc-patches wrote:
> On Wed, Feb 24, 2021 at 08:52:44PM +0100, Marc Glisse wrote:
> > On Wed, 24 Feb 2021, Jakub Jelinek via Gcc-patches wrote:
> > 
> > > The following patch adds single_use case which restores these testcases
> > > but keeps the testcases the patch meant to improve as is.
> > 
> > Hello,
> > 
> > I wonder if :s would be sufficient here? I don't have an opinion on which
> > one is better for this particular transformation (don't change the patch
> > because of my comment), we just seem to be getting more and more uses of
> > single_use in match.pd, maybe at some point we need to revisit the meaning
> > of :s or introduce a stronger :S.
> 
> :s seems to work for these testcases too, I'm never sure about :s
> vs. single_use.

Following passed bootstrap/regtest on x86_64-linux and i686-linux too.

2021-02-24  Jakub Jelinek  

PR target/95798
* match.pd ((T)(A) + CST -> (T)(A + CST)): Add :s to convert.

* gcc.target/i386/pr95798-1.c: New test.
* gcc.target/i386/pr95798-2.c: New test.

--- gcc/match.pd.jj 2021-02-24 12:58:22.233006845 +0100
+++ gcc/match.pd2021-02-24 20:54:41.970241132 +0100
@@ -2492,7 +2492,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* ((T)(A)) + CST -> (T)(A + CST)  */
 #if GIMPLE
   (simplify
-   (plus (convert SSA_NAME@0) INTEGER_CST@1)
+   (plus (convert:s SSA_NAME@0) INTEGER_CST@1)
 (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
  && TREE_CODE (type) == INTEGER_TYPE
  && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
--- gcc/testsuite/gcc.target/i386/pr95798-1.c.jj2021-02-24 
15:58:06.935598077 +0100
+++ gcc/testsuite/gcc.target/i386/pr95798-1.c   2021-02-24 16:02:47.298504500 
+0100
@@ -0,0 +1,29 @@
+/* PR target/95798 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 
} } } */
+/* { dg-final { scan-assembler "2, 16\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "3, 24\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "4, 32\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "5, 40\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "6, 48\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "7, 56\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+
+void bar (unsigned long long *, int);
+
+void
+foo (int y, unsigned long long z)
+{
+  unsigned long long x[1024];
+  unsigned long long i = y % 127;
+  __builtin_memset (x, -1, sizeof (x));
+  x[i] = 0;
+  x[i + 1] = 1;
+  x[i + 2] = 2;
+  x[i + 3] = 3;
+  x[i + 4] = 4;
+  x[i + 5] = 5;
+  x[i + 6] = 6;
+  x[i + 7] = 7;
+  bar (x, y);
+}
--- gcc/testsuite/gcc.target/i386/pr95798-2.c.jj2021-02-24 
16:01:39.708250302 +0100
+++ gcc/testsuite/gcc.target/i386/pr95798-2.c   2021-02-24 16:03:57.497729907 
+0100
@@ -0,0 +1,29 @@
+/* PR target/95798 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 
} } } */
+/* { dg-final { scan-assembler "2, 16\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "3, 24\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "4, 32\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "5, 40\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "6, 48\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "7, 56\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+
+void bar (unsigned long long *, int);
+
+void
+foo (unsigned int y, unsigned long long z)
+{
+  unsigned long long x[1024];
+  unsigned long long i = y % 127;
+  __builtin_memset (x, -1, sizeof (x));
+  x[i] = 0;
+  x[i + 1] = 1;
+  x[i + 2] = 2;
+  x[i + 3] = 3;
+  x[i + 4] = 4;
+  x[i + 5] = 5;
+  x[i + 6] = 6;
+  x[i + 7] = 7;
+  bar (x, y);
+}

Jakub



Re: [PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]

2021-02-24 Thread Jakub Jelinek via Gcc-patches
On Wed, Feb 24, 2021 at 08:52:44PM +0100, Marc Glisse wrote:
> On Wed, 24 Feb 2021, Jakub Jelinek via Gcc-patches wrote:
> 
> > The following patch adds single_use case which restores these testcases
> > but keeps the testcases the patch meant to improve as is.
> 
> Hello,
> 
> I wonder if :s would be sufficient here? I don't have an opinion on which
> one is better for this particular transformation (don't change the patch
> because of my comment), we just seem to be getting more and more uses of
> single_use in match.pd, maybe at some point we need to revisit the meaning
> of :s or introduce a stronger :S.

:s seems to work for these testcases too, I'm never sure about :s
vs. single_use.

Jakub



Re: [PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]

2021-02-24 Thread Marc Glisse

On Wed, 24 Feb 2021, Jakub Jelinek via Gcc-patches wrote:


The following patch adds single_use case which restores these testcases
but keeps the testcases the patch meant to improve as is.


Hello,

I wonder if :s would be sufficient here? I don't have an opinion on which 
one is better for this particular transformation (don't change the patch 
because of my comment), we just seem to be getting more and more uses of 
single_use in match.pd, maybe at some point we need to revisit the meaning 
of :s or introduce a stronger :S.


--
Marc Glisse


[PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]

2021-02-24 Thread Jakub Jelinek via Gcc-patches
Hi!

The r10-2806 change regressed following testcases, instead of doing
int -> unsigned long sign-extension once and then add 8, 16, ... 56 to it
for each of the memory access, it adds 8, 16, ... 56 in int mode and then
sign extends each.  So that means:
+   movq$0, (%rsp,%rax,8)
+   leal1(%rdx), %eax
+   cltq
+   movq$0, (%rsp,%rax,8)
+   leal2(%rdx), %eax
+   cltq
+   movq$0, (%rsp,%rax,8)
+   leal3(%rdx), %eax
+   cltq
+   movq$0, (%rsp,%rax,8)
+   leal4(%rdx), %eax
+   cltq
+   movq$0, (%rsp,%rax,8)
+   leal5(%rdx), %eax
+   cltq
+   movq$0, (%rsp,%rax,8)
+   leal6(%rdx), %eax
+   addl$7, %edx
+   cltq
+   movslq  %edx, %rdx
+   movq$0, (%rsp,%rax,8)
movq$0, (%rsp,%rdx,8)
-   movq$0, 8(%rsp,%rdx,8)
-   movq$0, 16(%rsp,%rdx,8)
-   movq$0, 24(%rsp,%rdx,8)
-   movq$0, 32(%rsp,%rdx,8)
-   movq$0, 40(%rsp,%rdx,8)
-   movq$0, 48(%rsp,%rdx,8)
-   movq$0, 56(%rsp,%rdx,8)
GCC 9 -> 10 change or:
-   movq$0, (%rsp,%rdx,8)
-   movq$0, 8(%rsp,%rdx,8)
-   movq$0, 16(%rsp,%rdx,8)
-   movq$0, 24(%rsp,%rdx,8)
-   movq$0, 32(%rsp,%rdx,8)
-   movq$0, 40(%rsp,%rdx,8)
-   movq$0, 48(%rsp,%rdx,8)
-   movq$0, 56(%rsp,%rdx,8)
+   movq$0, (%rsp,%rax,8)
+   leal1(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
+   leal2(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
+   leal3(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
+   leal4(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
+   leal5(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
+   leal6(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
+   leal7(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
change on the other test.  While for the former case of
int there is due to signed integer overflow (unless -fwrapv)
the possibility to undo it e.g. during expansion, for the unsigned
case information is unfortunately lost.

The following patch adds single_use case which restores these testcases
but keeps the testcases the patch meant to improve as is.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-02-24  Jakub Jelinek  

PR target/95798
* match.pd ((T)(A) + CST -> (T)(A + CST)): Add single_use check.

* gcc.target/i386/pr95798-1.c: New test.
* gcc.target/i386/pr95798-2.c: New test.

--- gcc/match.pd.jj 2021-02-24 12:58:22.233006845 +0100
+++ gcc/match.pd2021-02-24 15:41:15.64030 +0100
@@ -2492,11 +2492,12 @@ (define_operator_list COND_TERNARY
 /* ((T)(A)) + CST -> (T)(A + CST)  */
 #if GIMPLE
   (simplify
-   (plus (convert SSA_NAME@0) INTEGER_CST@1)
+   (plus (convert@2 SSA_NAME@0) INTEGER_CST@1)
 (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
  && TREE_CODE (type) == INTEGER_TYPE
  && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
- && int_fits_type_p (@1, TREE_TYPE (@0)))
+ && int_fits_type_p (@1, TREE_TYPE (@0))
+&& single_use (@2))
  /* Perform binary operation inside the cast if the constant fits
 and (A + CST)'s range does not overflow.  */
  (with
--- gcc/testsuite/gcc.target/i386/pr95798-1.c.jj2021-02-24 
15:58:06.935598077 +0100
+++ gcc/testsuite/gcc.target/i386/pr95798-1.c   2021-02-24 16:02:47.298504500 
+0100
@@ -0,0 +1,29 @@
+/* PR target/95798 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 
} } } */
+/* { dg-final { scan-assembler "2, 16\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "3, 24\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "4, 32\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "5, 40\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "6, 48\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "7, 56\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+
+void bar (unsigned long long *, int);
+
+void
+foo (int y, unsigned long long z)
+{
+  unsigned long long x[1024];
+  unsigned long long i = y % 127;
+  __builtin_memset (x, -1, sizeof (x));
+  x[i] = 0;
+  x[i + 1] = 1;
+  x[i + 2] = 2;
+  x[i + 3] = 3;
+  x[i + 4] = 4;
+  x[i + 5] = 5;
+  x[i + 6] = 6;
+  x[i + 7] = 7;
+  bar (x, y);
+}
--- gcc/testsuite/gcc.target/i386/pr95798-2.c.jj2021-02-24 
16:01:39.708250302 +0100
+++ gcc/testsuite/gcc.target/i386/pr95798-2.c   2021-02-24 16:03:57.497729907 
+0100
@@ -0,0 +1,29 @@
+/* PR target/95798 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 
} } } */
+/* { dg-final { scan-assembler "2,