Re: [PATCH] x86: Always return pseudo register in ix86_gen_scratch_sse_rtx

2022-03-03 Thread Hongtao Liu via Gcc-patches
On Thu, Mar 3, 2022 at 10:22 PM H.J. Lu via Gcc-patches
 wrote:
>
> ix86_gen_scratch_sse_rtx returns XMM7/XMM15/XMM31 as a scratch vector
> register to prevent RTL optimizers from removing vector register.  It
> introduces a conflict with explicit XMM7/XMM15/XMM31 usage and when it
> is called by RTL optimizers, it may introduce conflicting usages of
> XMM7/XMM15/XMM31.
>
> Change ix86_gen_scratch_sse_rtx to always return a pseudo register and
> xfail x86 tests which are optimized with a hard scratch register.
LGTM.
>
> gcc/
>
> PR target/104704
> * config/i386/i386.cc (ix86_gen_scratch_sse_rtx): Always return
> a pseudo register.
>
> gcc/testsuite/
>
> PR target/104704
> * gcc.target/i386/incoming-11.c: Xfail.
> * gcc.target/i386/pieces-memset-3.c: Likewise.
> * gcc.target/i386/pieces-memset-37.c: Likewise.
> * gcc.target/i386/pieces-memset-39.c: Likewise.
> * gcc.target/i386/pieces-memset-46.c: Likewise.
> * gcc.target/i386/pieces-memset-47.c: Likewise.
> * gcc.target/i386/pieces-memset-48.c: Likewise.
> * gcc.target/i386/pr90773-5.c: Likewise.
> * gcc.target/i386/pr90773-14.c: Likewise.
> * gcc.target/i386/pr90773-17.c: Likewise.
> * gcc.target/i386/pr100865-8a.c: Likewise.
> * gcc.target/i386/pr100865-8c.c: Likewise.
> * gcc.target/i386/pr100865-9c.c: Likewise.
> * gcc.target/i386/pieces-memset-21.c: Always expect vzeroupper.
> * gcc.target/i386/pr82941-1.c: Likewise.
> * gcc.target/i386/pr82942-1.c: Likewise.
> * gcc.target/i386/pr82990-1.c: Likewise.
> * gcc.target/i386/pr82990-3.c: Likewise.
> * gcc.target/i386/pr82990-5.c: Likewise.
> * gcc.target/i386/pr100865-11b.c: Expect vmovdqa instead of
> vmovdqa64.
> * gcc.target/i386/pr100865-12b.c: Likewise.
> * gcc.target/i386/pr100865-8b.c: Likewise.
> * gcc.target/i386/pr100865-9b.c: Likewise.
> * gcc.target/i386/pr104704-1.c: New test.
> * gcc.target/i386/pr104704-2.c: Likewise.
> * gcc.target/i386/pr104704-3.c: Likewise.
> * gcc.target/i386/pr104704-4.c: Likewise.
> * gcc.target/i386/pr104704-5.c: Likewise.
> * gcc.target/i386/pr104704-6.c: Likewise.
> ---
>  gcc/config/i386/i386.cc   | 19 +--
>  gcc/testsuite/gcc.target/i386/incoming-11.c   |  2 +-
>  .../gcc.target/i386/pieces-memset-21.c|  3 +-
>  .../gcc.target/i386/pieces-memset-3.c |  4 +--
>  .../gcc.target/i386/pieces-memset-37.c|  4 +--
>  .../gcc.target/i386/pieces-memset-39.c|  4 +--
>  .../gcc.target/i386/pieces-memset-46.c|  2 +-
>  .../gcc.target/i386/pieces-memset-47.c|  2 +-
>  .../gcc.target/i386/pieces-memset-48.c|  2 +-
>  gcc/testsuite/gcc.target/i386/pr100865-11b.c  |  2 +-
>  gcc/testsuite/gcc.target/i386/pr100865-12b.c  |  2 +-
>  gcc/testsuite/gcc.target/i386/pr100865-8a.c   |  2 +-
>  gcc/testsuite/gcc.target/i386/pr100865-8b.c   |  2 +-
>  gcc/testsuite/gcc.target/i386/pr100865-8c.c   |  2 +-
>  gcc/testsuite/gcc.target/i386/pr100865-9b.c   |  2 +-
>  gcc/testsuite/gcc.target/i386/pr100865-9c.c   |  2 +-
>  gcc/testsuite/gcc.target/i386/pr104704-1.c| 33 +++
>  gcc/testsuite/gcc.target/i386/pr104704-2.c| 33 +++
>  gcc/testsuite/gcc.target/i386/pr104704-3.c| 33 +++
>  gcc/testsuite/gcc.target/i386/pr104704-4.c| 33 +++
>  gcc/testsuite/gcc.target/i386/pr104704-5.c| 33 +++
>  gcc/testsuite/gcc.target/i386/pr104704-6.c| 33 +++
>  gcc/testsuite/gcc.target/i386/pr82941-1.c |  3 +-
>  gcc/testsuite/gcc.target/i386/pr82942-1.c |  3 +-
>  gcc/testsuite/gcc.target/i386/pr82990-1.c |  3 +-
>  gcc/testsuite/gcc.target/i386/pr82990-3.c |  3 +-
>  gcc/testsuite/gcc.target/i386/pr82990-5.c |  3 +-
>  gcc/testsuite/gcc.target/i386/pr90773-14.c|  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-17.c|  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-5.c |  2 +-
>  30 files changed, 225 insertions(+), 50 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr104704-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr104704-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr104704-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr104704-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr104704-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr104704-6.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index b2bf90576d5..95219902694 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -23786,24 +23786,7 @@ ix86_optab_supported_p (int op, machine_mode mode1, 
> machine_mode,
>  rtx
>  ix86_gen_scratch_sse_rtx (machine_mode mode)
>  {
> -  if (TARGET_SSE && !lra_in_progress)
> -{
> -  unsigned int regno;
> 

[PATCH] x86: Always return pseudo register in ix86_gen_scratch_sse_rtx

2022-03-03 Thread H.J. Lu via Gcc-patches
ix86_gen_scratch_sse_rtx returns XMM7/XMM15/XMM31 as a scratch vector
register to prevent RTL optimizers from removing vector register.  It
introduces a conflict with explicit XMM7/XMM15/XMM31 usage and when it
is called by RTL optimizers, it may introduce conflicting usages of
XMM7/XMM15/XMM31.

Change ix86_gen_scratch_sse_rtx to always return a pseudo register and
xfail x86 tests which are optimized with a hard scratch register.

gcc/

PR target/104704
* config/i386/i386.cc (ix86_gen_scratch_sse_rtx): Always return
a pseudo register.

gcc/testsuite/

PR target/104704
* gcc.target/i386/incoming-11.c: Xfail.
* gcc.target/i386/pieces-memset-3.c: Likewise.
* gcc.target/i386/pieces-memset-37.c: Likewise.
* gcc.target/i386/pieces-memset-39.c: Likewise.
* gcc.target/i386/pieces-memset-46.c: Likewise.
* gcc.target/i386/pieces-memset-47.c: Likewise.
* gcc.target/i386/pieces-memset-48.c: Likewise.
* gcc.target/i386/pr90773-5.c: Likewise.
* gcc.target/i386/pr90773-14.c: Likewise.
* gcc.target/i386/pr90773-17.c: Likewise.
* gcc.target/i386/pr100865-8a.c: Likewise.
* gcc.target/i386/pr100865-8c.c: Likewise.
* gcc.target/i386/pr100865-9c.c: Likewise.
* gcc.target/i386/pieces-memset-21.c: Always expect vzeroupper.
* gcc.target/i386/pr82941-1.c: Likewise.
* gcc.target/i386/pr82942-1.c: Likewise.
* gcc.target/i386/pr82990-1.c: Likewise.
* gcc.target/i386/pr82990-3.c: Likewise.
* gcc.target/i386/pr82990-5.c: Likewise.
* gcc.target/i386/pr100865-11b.c: Expect vmovdqa instead of
vmovdqa64.
* gcc.target/i386/pr100865-12b.c: Likewise.
* gcc.target/i386/pr100865-8b.c: Likewise.
* gcc.target/i386/pr100865-9b.c: Likewise.
* gcc.target/i386/pr104704-1.c: New test.
* gcc.target/i386/pr104704-2.c: Likewise.
* gcc.target/i386/pr104704-3.c: Likewise.
* gcc.target/i386/pr104704-4.c: Likewise.
* gcc.target/i386/pr104704-5.c: Likewise.
* gcc.target/i386/pr104704-6.c: Likewise.
---
 gcc/config/i386/i386.cc   | 19 +--
 gcc/testsuite/gcc.target/i386/incoming-11.c   |  2 +-
 .../gcc.target/i386/pieces-memset-21.c|  3 +-
 .../gcc.target/i386/pieces-memset-3.c |  4 +--
 .../gcc.target/i386/pieces-memset-37.c|  4 +--
 .../gcc.target/i386/pieces-memset-39.c|  4 +--
 .../gcc.target/i386/pieces-memset-46.c|  2 +-
 .../gcc.target/i386/pieces-memset-47.c|  2 +-
 .../gcc.target/i386/pieces-memset-48.c|  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-11b.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-12b.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-8a.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-8b.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-8c.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-9b.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-9c.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr104704-1.c| 33 +++
 gcc/testsuite/gcc.target/i386/pr104704-2.c| 33 +++
 gcc/testsuite/gcc.target/i386/pr104704-3.c| 33 +++
 gcc/testsuite/gcc.target/i386/pr104704-4.c| 33 +++
 gcc/testsuite/gcc.target/i386/pr104704-5.c| 33 +++
 gcc/testsuite/gcc.target/i386/pr104704-6.c| 33 +++
 gcc/testsuite/gcc.target/i386/pr82941-1.c |  3 +-
 gcc/testsuite/gcc.target/i386/pr82942-1.c |  3 +-
 gcc/testsuite/gcc.target/i386/pr82990-1.c |  3 +-
 gcc/testsuite/gcc.target/i386/pr82990-3.c |  3 +-
 gcc/testsuite/gcc.target/i386/pr82990-5.c |  3 +-
 gcc/testsuite/gcc.target/i386/pr90773-14.c|  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-17.c|  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-5.c |  2 +-
 30 files changed, 225 insertions(+), 50 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr104704-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr104704-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr104704-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr104704-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr104704-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr104704-6.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b2bf90576d5..95219902694 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -23786,24 +23786,7 @@ ix86_optab_supported_p (int op, machine_mode mode1, 
machine_mode,
 rtx
 ix86_gen_scratch_sse_rtx (machine_mode mode)
 {
-  if (TARGET_SSE && !lra_in_progress)
-{
-  unsigned int regno;
-  if (TARGET_64BIT)
-   {
- /* In 64-bit mode, use XMM31 to avoid vzeroupper and always
-use XMM31 for CSE.  */
- if (ix86_hard_regno_mode_ok (LAST_EXT_REX_SSE_REG, mode))
-   regno = LAST_EXT_REX_SSE_REG;
- else