Re: [PATCH] xtensa: Make full transition to LRA

2024-01-23 Thread Max Filippov
Hi Suwa-san,

I've finally processed the new issues introduced by this change.

On Wed, May 10, 2023 at 2:10 AM Max Filippov  wrote:
> On Mon, May 8, 2023 at 6:38 AM Takayuki 'January June' Suwa
>  wrote:
> >
> > gcc/ChangeLog:
> >
> > * config/xtensa/constraints.md (R, T, U):
> > Change define_constraint to define_memory_constraint.
> > * config/xtensa/xtensa.cc
> > (xtensa_lra_p, TARGET_LRA_P): Remove.
> > (xtensa_emit_move_sequence): Remove "if (reload_in_progress)"
> > clause as it can no longer be true.
> > (xtensa_output_integer_literal_parts): Consider 16-bit wide
> > constants.
> > (xtensa_legitimate_constant_p): Add short-circuit path for
> > integer load instructions.
> > * config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p()
> > rather reload_in_progress and reload_completed.
> > * config/xtensa/xtensa.opt (mlra): Remove.
> > ---
> >  gcc/config/xtensa/constraints.md | 26 --
> >  gcc/config/xtensa/xtensa.cc  | 26 +-
> >  gcc/config/xtensa/xtensa.md  |  2 +-
> >  gcc/config/xtensa/xtensa.opt |  4 
> >  4 files changed, 14 insertions(+), 44 deletions(-)
>
> That's impressive.
> This version introduces a few execution failures in the testsuite on
> little endian targets and a bunch more (but not all, some execution
> tests still pass) on big endian.
> I'm traveling this week and likely won't be able to take a deep look
> into it until 5/15.
>
> New LE failures:

All of the LE failures are related to zero-overhead loops. Dropping the
operand 2 from the doloop_end pattern fixes that (change 1).

> New BE failures:

All of the BE failures are related to loading HImode constants into
registers, which instead of

.literal .LCx value
...
l32r register, .LCx

now generates the following code:

.literal .LCx value
.literal .LCy .LCx
...
l32r register1, .LCy
l16ui register, register1, 0

I've fixed that by allowing HImode constants in the literal pool in the
'move_operand' predicate, making addresses of such constants
legitimate in the xtensa_legitimate_address_p and adding an
alternative with l32r opcode to the movhi_internal pattern (change 2).

With these additional changes there's no new regression failures
and the generated code looks mostly the same as with the reload.

--
Thanks.
-- Max
From 0fb9ddfd22d11579674ac4a95912d2bc5612deb7 Mon Sep 17 00:00:00 2001
From: Max Filippov 
Date: Sun, 21 Jan 2024 16:14:20 -0800
Subject: [PATCH 1/2] gcc: xtensa: drop operand 2 from doloop_end pattern

gcc/ChangeLog:
	* config/xtensa/xtensa.md (doloop_end): Drop operand 2.
---
 gcc/config/xtensa/xtensa.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 7aded86e244f..a9c37da48b81 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -2368,14 +2368,12 @@
 	  (set (match_dup 0)
 		   (plus:SI (match_dup 0)
 			(const_int -1)))
-	  (unspec [(const_int 0)] UNSPEC_LSETUP_END)
-	  (clobber (match_dup 2))])] ; match_scratch
+	  (unspec [(const_int 0)] UNSPEC_LSETUP_END)])]
   "TARGET_LOOPS && optimize"
 {
   /* The loop optimizer doesn't check the predicates... */
   if (GET_MODE (operands[0]) != SImode)
 FAIL;
-  operands[2] = gen_rtx_SCRATCH (SImode);
 })
 
 
-- 
2.39.2

From e5536a47e9f1ae856c2491919933d18866511991 Mon Sep 17 00:00:00 2001
From: Max Filippov 
Date: Tue, 23 Jan 2024 10:57:21 -0800
Subject: [PATCH 2/2] gcc: xtensa: fix HImode constant loads

gcc/ChangeLog:
	* config/xtensa/predicates.md (move_operand): Don't check that a
	constant pool operand size is a multiple of UNITS_PER_WORD.
	* config/xtensa/xtensa.cc (xtensa_legitimate_address_p): Don't
	check that mode size is at least UNITS_PER_WORD.
	* config/xtensa/xtensa.md (movhi_internal): Add alternative
	loading constant from a literal pool.
---
 gcc/config/xtensa/predicates.md | 4 +---
 gcc/config/xtensa/xtensa.cc | 2 +-
 gcc/config/xtensa/xtensa.md | 9 +
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index 672fb003a6c5..dd77911e3b70 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -143,9 +143,7 @@
 (define_predicate "move_operand"
   (ior
  (ior (match_operand 0 "register_operand")
-	  (and (match_operand 0 "memory_operand")
-	   (match_test "!constantpool_mem_p (op)
-			|| GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))
+	  (match_operand 0 "memory_operand"))
  (ior (and (match_code "const_int")
 	   (match_test "(GET_MODE_CLASS (mode) == MODE_INT
 			 && xtensa_simm12b (INTVAL (op)))
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 39ef576ac4a9..3c2d21fe8e2e 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2343,7 +2343,7 @@ 

Re: [PATCH] xtensa: Make full transition to LRA

2023-05-10 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Mon, May 8, 2023 at 6:38 AM Takayuki 'January June' Suwa
 wrote:
>
> gcc/ChangeLog:
>
> * config/xtensa/constraints.md (R, T, U):
> Change define_constraint to define_memory_constraint.
> * config/xtensa/xtensa.cc
> (xtensa_lra_p, TARGET_LRA_P): Remove.
> (xtensa_emit_move_sequence): Remove "if (reload_in_progress)"
> clause as it can no longer be true.
> (xtensa_output_integer_literal_parts): Consider 16-bit wide
> constants.
> (xtensa_legitimate_constant_p): Add short-circuit path for
> integer load instructions.
> * config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p()
> rather reload_in_progress and reload_completed.
> * config/xtensa/xtensa.opt (mlra): Remove.
> ---
>  gcc/config/xtensa/constraints.md | 26 --
>  gcc/config/xtensa/xtensa.cc  | 26 +-
>  gcc/config/xtensa/xtensa.md  |  2 +-
>  gcc/config/xtensa/xtensa.opt |  4 
>  4 files changed, 14 insertions(+), 44 deletions(-)

That's impressive.
This version introduces a few execution failures in the testsuite on
little endian targets and a bunch more (but not all, some execution
tests still pass) on big endian.
I'm traveling this week and likely won't be able to take a deep look
into it until 5/15.

New LE failures:

+FAIL: gcc.c-torture/execute/pr56866.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gcc.dg/torture/pr45764.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gcc.dg/torture/pr45764.c   -O3 -g  execution test

+FAIL: gfortran.dg/c-interop/section-2.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.dg/c-interop/section-2p.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.dg/c-interop/section-3.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.dg/c-interop/section-3p.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.dg/bind-c-contiguous-3.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.dg/bind-c-contiguous-3.f90   -O3 -g  execution test
+FAIL: gfortran.dg/check_bits_2.f90   -O1  output pattern test
+FAIL: gfortran.dg/coarray_ptr_comp_1.f08   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.dg/coarray_ptr_comp_1.f08   -O3 -g  execution test
+FAIL: gfortran.dg/loc_2.f90   -O2  execution test
+FAIL: gfortran.dg/loc_2.f90   -O3 -fomit-frame-pointer -funroll-loops
-fpeel-loops -ftracer -finline-functions  execution test
+FAIL: gfortran.dg/loc_2.f90   -O3 -g  execution test
+FAIL: gfortran.dg/loc_2.f90   -Os  execution test
+FAIL: gfortran.dg/sizeof_6.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.fortran-torture/execute/forall_7.f90 execution,  -O2
-fbounds-check

New BE failures:

+FAIL: gcc.c-torture/execute/builtins/memset-chk.c execution,  -O3
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
-finline-functions
+FAIL: gcc.c-torture/execute/builtins/memset-chk.c execution,  -O3 -g
+FAIL: gcc.c-torture/execute/2412-3.c   -O2  execution test
+FAIL: gcc.c-torture/execute/2412-3.c   -O3 -g  execution test
+FAIL: gcc.c-torture/execute/2412-3.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: gcc.c-torture/execute/20020201-1.c   -O2  execution test
+FAIL: gcc.c-torture/execute/20020201-1.c   -O3 -g  execution test
+FAIL: gcc.c-torture/execute/20020201-1.c   -Os  execution test
+FAIL: gcc.c-torture/execute/20020201-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: gcc.c-torture/execute/20030224-2.c   -O0  execution test
+FAIL: gcc.c-torture/execute/20040629-1.c   -O0  execution test
+FAIL: gcc.c-torture/execute/20040629-1.c   -O1  execution test
+FAIL: gcc.c-torture/execute/20040705-1.c   -O0  execution test
+FAIL: gcc.c-torture/execute/20040705-1.c   -O1  execution test
+FAIL: gcc.c-torture/execute/20040705-2.c   -O0  execution test
+FAIL: gcc.c-torture/execute/20040705-2.c   -O1  execution test
+FAIL: gcc.c-torture/execute/930603-3.c   -O2  execution test
+FAIL: gcc.c-torture/execute/930603-3.c   -O3 -g  execution test
+FAIL: gcc.c-torture/execute/930603-3.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: gcc.c-torture/execute/931004-10.c   -O2  execution test
+FAIL: gcc.c-torture/execute/931004-10.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gcc.c-torture/execute/931004-10.c   -O3 -g  execution 

Re: [PATCH] xtensa: Make full transition to LRA

2023-05-08 Thread Richard Biener via Gcc-patches
On Mon, May 8, 2023 at 3:39 PM Takayuki 'January June' Suwa via
Gcc-patches  wrote:
>
> gcc/ChangeLog:
>
> * config/xtensa/constraints.md (R, T, U):
> Change define_constraint to define_memory_constraint.
> * config/xtensa/xtensa.cc
> (xtensa_lra_p, TARGET_LRA_P): Remove.
> (xtensa_emit_move_sequence): Remove "if (reload_in_progress)"
> clause as it can no longer be true.
> (xtensa_output_integer_literal_parts): Consider 16-bit wide
> constants.
> (xtensa_legitimate_constant_p): Add short-circuit path for
> integer load instructions.
> * config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p()
> rather reload_in_progress and reload_completed.
> * config/xtensa/xtensa.opt (mlra): Remove.
> ---
>  gcc/config/xtensa/constraints.md | 26 --
>  gcc/config/xtensa/xtensa.cc  | 26 +-
>  gcc/config/xtensa/xtensa.md  |  2 +-
>  gcc/config/xtensa/xtensa.opt |  4 
>  4 files changed, 14 insertions(+), 44 deletions(-)
>
> diff --git a/gcc/config/xtensa/constraints.md 
> b/gcc/config/xtensa/constraints.md
> index 53e4d0d8dd1..9b31e162941 100644
> --- a/gcc/config/xtensa/constraints.md
> +++ b/gcc/config/xtensa/constraints.md
> @@ -123,29 +123,19 @@
>(and (match_code "const_int")
>(match_test "! xtensa_split1_finished_p ()"
>
> -;; Memory constraints.  Do not use define_memory_constraint here.  Doing so
> -;; causes reload to force some constants into the constant pool, but since
> -;; the Xtensa constant pool can only be accessed with L32R instructions, it
> -;; is always better to just copy a constant into a register.  Instead, use
> -;; regular constraints but add a check to allow pseudos during reload.
> +;; Memory constraints.
>
> -(define_constraint "R"
> +(define_memory_constraint "R"
>   "Memory that can be accessed with a 4-bit unsigned offset from a register."
> - (ior (and (match_code "mem")
> -  (match_test "smalloffset_mem_p (op)"))
> -  (and (match_code "reg")
> -  (match_test "reload_in_progress
> -   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
> + (and (match_code "mem")
> +  (match_test "smalloffset_mem_p (op)")))
>
> -(define_constraint "T"
> +(define_memory_constraint "T"
>   "Memory in a literal pool (addressable with an L32R instruction)."
>   (and (match_code "mem")
>(match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
>
> -(define_constraint "U"
> +(define_memory_constraint "U"
>   "Memory that is not in a literal pool."
> - (ior (and (match_code "mem")
> -  (match_test "! constantpool_mem_p (op)"))
> -  (and (match_code "reg")
> -  (match_test "reload_in_progress
> -   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
> + (and (match_code "mem")
> +  (match_test "! constantpool_mem_p (op)")))
> diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
> index 9e5d314e143..f4434ec6e2c 100644
> --- a/gcc/config/xtensa/xtensa.cc
> +++ b/gcc/config/xtensa/xtensa.cc
> @@ -190,7 +190,6 @@ static void xtensa_output_mi_thunk (FILE *file, tree 
> thunk ATTRIBUTE_UNUSED,
> HOST_WIDE_INT delta,
> HOST_WIDE_INT vcall_offset,
> tree function);
> -static bool xtensa_lra_p (void);
>
>  static rtx xtensa_delegitimize_address (rtx);
>
> @@ -286,9 +285,6 @@ static rtx xtensa_delegitimize_address (rtx);
>  #undef TARGET_CANNOT_FORCE_CONST_MEM
>  #define TARGET_CANNOT_FORCE_CONST_MEM xtensa_cannot_force_const_mem
>
> -#undef TARGET_LRA_P
> -#define TARGET_LRA_P xtensa_lra_p
> -
>  #undef TARGET_LEGITIMATE_ADDRESS_P
>  #define TARGET_LEGITIMATE_ADDRESS_Pxtensa_legitimate_address_p
>
> @@ -1266,14 +1262,6 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode 
> mode)
>
>operands[1] = xtensa_copy_incoming_a7 (operands[1]);
>
> -  /* During reload we don't want to emit (subreg:X (mem:Y)) since that
> - instruction won't be recognized after reload, so we remove the
> - subreg and adjust mem accordingly.  */
> -  if (reload_in_progress)
> -{
> -  operands[0] = fixup_subreg_mem (operands[0]);
> -  operands[1] = fixup_subreg_mem (operands[1]);
> -}
>return 0;
>  }
>
> @@ -3196,7 +3184,7 @@ xtensa_output_integer_literal_parts (FILE *file, rtx x, 
> int size)
>fputs (", ", file);
>xtensa_output_integer_literal_parts (file, second, size / 2);
>  }
> -  else if (size == 4)
> +  else if (size == 4 || size == 2)
>  {
>output_addr_const (file, x);
>  }
> @@ -4876,6 +4864,10 @@ xtensa_trampoline_init (rtx m_tramp, tree fndecl, rtx 
> chain)
>  static bool
>  xtensa_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
>  {
> +  if (CONST_INT_P (x))
> +return TARGET_AUTO_LITPOOLS || TARGET_CONST16
> +  || xtensa_simm12b (INTVAL 

[PATCH] xtensa: Make full transition to LRA

2023-05-08 Thread Takayuki 'January June' Suwa via Gcc-patches
gcc/ChangeLog:

* config/xtensa/constraints.md (R, T, U):
Change define_constraint to define_memory_constraint.
* config/xtensa/xtensa.cc
(xtensa_lra_p, TARGET_LRA_P): Remove.
(xtensa_emit_move_sequence): Remove "if (reload_in_progress)"
clause as it can no longer be true.
(xtensa_output_integer_literal_parts): Consider 16-bit wide
constants.
(xtensa_legitimate_constant_p): Add short-circuit path for
integer load instructions.
* config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p()
rather reload_in_progress and reload_completed.
* config/xtensa/xtensa.opt (mlra): Remove.
---
 gcc/config/xtensa/constraints.md | 26 --
 gcc/config/xtensa/xtensa.cc  | 26 +-
 gcc/config/xtensa/xtensa.md  |  2 +-
 gcc/config/xtensa/xtensa.opt |  4 
 4 files changed, 14 insertions(+), 44 deletions(-)

diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
index 53e4d0d8dd1..9b31e162941 100644
--- a/gcc/config/xtensa/constraints.md
+++ b/gcc/config/xtensa/constraints.md
@@ -123,29 +123,19 @@
   (and (match_code "const_int")
   (match_test "! xtensa_split1_finished_p ()"
 
-;; Memory constraints.  Do not use define_memory_constraint here.  Doing so
-;; causes reload to force some constants into the constant pool, but since
-;; the Xtensa constant pool can only be accessed with L32R instructions, it
-;; is always better to just copy a constant into a register.  Instead, use
-;; regular constraints but add a check to allow pseudos during reload.
+;; Memory constraints.
 
-(define_constraint "R"
+(define_memory_constraint "R"
  "Memory that can be accessed with a 4-bit unsigned offset from a register."
- (ior (and (match_code "mem")
-  (match_test "smalloffset_mem_p (op)"))
-  (and (match_code "reg")
-  (match_test "reload_in_progress
-   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
+ (and (match_code "mem")
+  (match_test "smalloffset_mem_p (op)")))
 
-(define_constraint "T"
+(define_memory_constraint "T"
  "Memory in a literal pool (addressable with an L32R instruction)."
  (and (match_code "mem")
   (match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
 
-(define_constraint "U"
+(define_memory_constraint "U"
  "Memory that is not in a literal pool."
- (ior (and (match_code "mem")
-  (match_test "! constantpool_mem_p (op)"))
-  (and (match_code "reg")
-  (match_test "reload_in_progress
-   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
+ (and (match_code "mem")
+  (match_test "! constantpool_mem_p (op)")))
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 9e5d314e143..f4434ec6e2c 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -190,7 +190,6 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk 
ATTRIBUTE_UNUSED,
HOST_WIDE_INT delta,
HOST_WIDE_INT vcall_offset,
tree function);
-static bool xtensa_lra_p (void);
 
 static rtx xtensa_delegitimize_address (rtx);
 
@@ -286,9 +285,6 @@ static rtx xtensa_delegitimize_address (rtx);
 #undef TARGET_CANNOT_FORCE_CONST_MEM
 #define TARGET_CANNOT_FORCE_CONST_MEM xtensa_cannot_force_const_mem
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P xtensa_lra_p
-
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_Pxtensa_legitimate_address_p
 
@@ -1266,14 +1262,6 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode 
mode)
 
   operands[1] = xtensa_copy_incoming_a7 (operands[1]);
 
-  /* During reload we don't want to emit (subreg:X (mem:Y)) since that
- instruction won't be recognized after reload, so we remove the
- subreg and adjust mem accordingly.  */
-  if (reload_in_progress)
-{
-  operands[0] = fixup_subreg_mem (operands[0]);
-  operands[1] = fixup_subreg_mem (operands[1]);
-}
   return 0;
 }
 
@@ -3196,7 +3184,7 @@ xtensa_output_integer_literal_parts (FILE *file, rtx x, 
int size)
   fputs (", ", file);
   xtensa_output_integer_literal_parts (file, second, size / 2);
 }
-  else if (size == 4)
+  else if (size == 4 || size == 2)
 {
   output_addr_const (file, x);
 }
@@ -4876,6 +4864,10 @@ xtensa_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain)
 static bool
 xtensa_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
 {
+  if (CONST_INT_P (x))
+return TARGET_AUTO_LITPOOLS || TARGET_CONST16
+  || xtensa_simm12b (INTVAL (x));
+
   return !xtensa_tls_referenced_p (x);
 }
 
@@ -5317,12 +5309,4 @@ xtensa_delegitimize_address (rtx op)
   return op;
 }
 
-/* Implement TARGET_LRA_P.  */
-
-static bool
-xtensa_lra_p (void)
-{
-  return TARGET_LRA;
-}
-
 #include "gt-xtensa.h"
diff --git a/gcc/config/xtensa/xtensa.md