Re: [PATCH] xtensa: Make full transition to LRA
Hi Suwa-san, I've finally processed the new issues introduced by this change. On Wed, May 10, 2023 at 2:10 AM Max Filippov wrote: > On Mon, May 8, 2023 at 6:38 AM Takayuki 'January June' Suwa > wrote: > > > > gcc/ChangeLog: > > > > * config/xtensa/constraints.md (R, T, U): > > Change define_constraint to define_memory_constraint. > > * config/xtensa/xtensa.cc > > (xtensa_lra_p, TARGET_LRA_P): Remove. > > (xtensa_emit_move_sequence): Remove "if (reload_in_progress)" > > clause as it can no longer be true. > > (xtensa_output_integer_literal_parts): Consider 16-bit wide > > constants. > > (xtensa_legitimate_constant_p): Add short-circuit path for > > integer load instructions. > > * config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p() > > rather reload_in_progress and reload_completed. > > * config/xtensa/xtensa.opt (mlra): Remove. > > --- > > gcc/config/xtensa/constraints.md | 26 -- > > gcc/config/xtensa/xtensa.cc | 26 +- > > gcc/config/xtensa/xtensa.md | 2 +- > > gcc/config/xtensa/xtensa.opt | 4 > > 4 files changed, 14 insertions(+), 44 deletions(-) > > That's impressive. > This version introduces a few execution failures in the testsuite on > little endian targets and a bunch more (but not all, some execution > tests still pass) on big endian. > I'm traveling this week and likely won't be able to take a deep look > into it until 5/15. > > New LE failures: All of the LE failures are related to zero-overhead loops. Dropping the operand 2 from the doloop_end pattern fixes that (change 1). > New BE failures: All of the BE failures are related to loading HImode constants into registers, which instead of .literal .LCx value ... l32r register, .LCx now generates the following code: .literal .LCx value .literal .LCy .LCx ... l32r register1, .LCy l16ui register, register1, 0 I've fixed that by allowing HImode constants in the literal pool in the 'move_operand' predicate, making addresses of such constants legitimate in the xtensa_legitimate_address_p and adding an alternative with l32r opcode to the movhi_internal pattern (change 2). With these additional changes there's no new regression failures and the generated code looks mostly the same as with the reload. -- Thanks. -- Max From 0fb9ddfd22d11579674ac4a95912d2bc5612deb7 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 21 Jan 2024 16:14:20 -0800 Subject: [PATCH 1/2] gcc: xtensa: drop operand 2 from doloop_end pattern gcc/ChangeLog: * config/xtensa/xtensa.md (doloop_end): Drop operand 2. --- gcc/config/xtensa/xtensa.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md index 7aded86e244f..a9c37da48b81 100644 --- a/gcc/config/xtensa/xtensa.md +++ b/gcc/config/xtensa/xtensa.md @@ -2368,14 +2368,12 @@ (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1))) - (unspec [(const_int 0)] UNSPEC_LSETUP_END) - (clobber (match_dup 2))])] ; match_scratch + (unspec [(const_int 0)] UNSPEC_LSETUP_END)])] "TARGET_LOOPS && optimize" { /* The loop optimizer doesn't check the predicates... */ if (GET_MODE (operands[0]) != SImode) FAIL; - operands[2] = gen_rtx_SCRATCH (SImode); }) -- 2.39.2 From e5536a47e9f1ae856c2491919933d18866511991 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 23 Jan 2024 10:57:21 -0800 Subject: [PATCH 2/2] gcc: xtensa: fix HImode constant loads gcc/ChangeLog: * config/xtensa/predicates.md (move_operand): Don't check that a constant pool operand size is a multiple of UNITS_PER_WORD. * config/xtensa/xtensa.cc (xtensa_legitimate_address_p): Don't check that mode size is at least UNITS_PER_WORD. * config/xtensa/xtensa.md (movhi_internal): Add alternative loading constant from a literal pool. --- gcc/config/xtensa/predicates.md | 4 +--- gcc/config/xtensa/xtensa.cc | 2 +- gcc/config/xtensa/xtensa.md | 9 + 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md index 672fb003a6c5..dd77911e3b70 100644 --- a/gcc/config/xtensa/predicates.md +++ b/gcc/config/xtensa/predicates.md @@ -143,9 +143,7 @@ (define_predicate "move_operand" (ior (ior (match_operand 0 "register_operand") - (and (match_operand 0 "memory_operand") - (match_test "!constantpool_mem_p (op) - || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0"))) + (match_operand 0 "memory_operand")) (ior (and (match_code "const_int") (match_test "(GET_MODE_CLASS (mode) == MODE_INT && xtensa_simm12b (INTVAL (op))) diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index 39ef576ac4a9..3c2d21fe8e2e 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -2343,7 +2343,7 @@
Re: [PATCH] xtensa: Make full transition to LRA
Hi Suwa-san, On Mon, May 8, 2023 at 6:38 AM Takayuki 'January June' Suwa wrote: > > gcc/ChangeLog: > > * config/xtensa/constraints.md (R, T, U): > Change define_constraint to define_memory_constraint. > * config/xtensa/xtensa.cc > (xtensa_lra_p, TARGET_LRA_P): Remove. > (xtensa_emit_move_sequence): Remove "if (reload_in_progress)" > clause as it can no longer be true. > (xtensa_output_integer_literal_parts): Consider 16-bit wide > constants. > (xtensa_legitimate_constant_p): Add short-circuit path for > integer load instructions. > * config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p() > rather reload_in_progress and reload_completed. > * config/xtensa/xtensa.opt (mlra): Remove. > --- > gcc/config/xtensa/constraints.md | 26 -- > gcc/config/xtensa/xtensa.cc | 26 +- > gcc/config/xtensa/xtensa.md | 2 +- > gcc/config/xtensa/xtensa.opt | 4 > 4 files changed, 14 insertions(+), 44 deletions(-) That's impressive. This version introduces a few execution failures in the testsuite on little endian targets and a bunch more (but not all, some execution tests still pass) on big endian. I'm traveling this week and likely won't be able to take a deep look into it until 5/15. New LE failures: +FAIL: gcc.c-torture/execute/pr56866.c -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test +FAIL: gcc.dg/torture/pr45764.c -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test +FAIL: gcc.dg/torture/pr45764.c -O3 -g execution test +FAIL: gfortran.dg/c-interop/section-2.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test +FAIL: gfortran.dg/c-interop/section-2p.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test +FAIL: gfortran.dg/c-interop/section-3.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test +FAIL: gfortran.dg/c-interop/section-3p.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test +FAIL: gfortran.dg/bind-c-contiguous-3.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test +FAIL: gfortran.dg/bind-c-contiguous-3.f90 -O3 -g execution test +FAIL: gfortran.dg/check_bits_2.f90 -O1 output pattern test +FAIL: gfortran.dg/coarray_ptr_comp_1.f08 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test +FAIL: gfortran.dg/coarray_ptr_comp_1.f08 -O3 -g execution test +FAIL: gfortran.dg/loc_2.f90 -O2 execution test +FAIL: gfortran.dg/loc_2.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test +FAIL: gfortran.dg/loc_2.f90 -O3 -g execution test +FAIL: gfortran.dg/loc_2.f90 -Os execution test +FAIL: gfortran.dg/sizeof_6.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test +FAIL: gfortran.fortran-torture/execute/forall_7.f90 execution, -O2 -fbounds-check New BE failures: +FAIL: gcc.c-torture/execute/builtins/memset-chk.c execution, -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions +FAIL: gcc.c-torture/execute/builtins/memset-chk.c execution, -O3 -g +FAIL: gcc.c-torture/execute/2412-3.c -O2 execution test +FAIL: gcc.c-torture/execute/2412-3.c -O3 -g execution test +FAIL: gcc.c-torture/execute/2412-3.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: gcc.c-torture/execute/20020201-1.c -O2 execution test +FAIL: gcc.c-torture/execute/20020201-1.c -O3 -g execution test +FAIL: gcc.c-torture/execute/20020201-1.c -Os execution test +FAIL: gcc.c-torture/execute/20020201-1.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: gcc.c-torture/execute/20030224-2.c -O0 execution test +FAIL: gcc.c-torture/execute/20040629-1.c -O0 execution test +FAIL: gcc.c-torture/execute/20040629-1.c -O1 execution test +FAIL: gcc.c-torture/execute/20040705-1.c -O0 execution test +FAIL: gcc.c-torture/execute/20040705-1.c -O1 execution test +FAIL: gcc.c-torture/execute/20040705-2.c -O0 execution test +FAIL: gcc.c-torture/execute/20040705-2.c -O1 execution test +FAIL: gcc.c-torture/execute/930603-3.c -O2 execution test +FAIL: gcc.c-torture/execute/930603-3.c -O3 -g execution test +FAIL: gcc.c-torture/execute/930603-3.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: gcc.c-torture/execute/931004-10.c -O2 execution test +FAIL: gcc.c-torture/execute/931004-10.c -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test +FAIL: gcc.c-torture/execute/931004-10.c -O3 -g execution
Re: [PATCH] xtensa: Make full transition to LRA
On Mon, May 8, 2023 at 3:39 PM Takayuki 'January June' Suwa via Gcc-patches wrote: > > gcc/ChangeLog: > > * config/xtensa/constraints.md (R, T, U): > Change define_constraint to define_memory_constraint. > * config/xtensa/xtensa.cc > (xtensa_lra_p, TARGET_LRA_P): Remove. > (xtensa_emit_move_sequence): Remove "if (reload_in_progress)" > clause as it can no longer be true. > (xtensa_output_integer_literal_parts): Consider 16-bit wide > constants. > (xtensa_legitimate_constant_p): Add short-circuit path for > integer load instructions. > * config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p() > rather reload_in_progress and reload_completed. > * config/xtensa/xtensa.opt (mlra): Remove. > --- > gcc/config/xtensa/constraints.md | 26 -- > gcc/config/xtensa/xtensa.cc | 26 +- > gcc/config/xtensa/xtensa.md | 2 +- > gcc/config/xtensa/xtensa.opt | 4 > 4 files changed, 14 insertions(+), 44 deletions(-) > > diff --git a/gcc/config/xtensa/constraints.md > b/gcc/config/xtensa/constraints.md > index 53e4d0d8dd1..9b31e162941 100644 > --- a/gcc/config/xtensa/constraints.md > +++ b/gcc/config/xtensa/constraints.md > @@ -123,29 +123,19 @@ >(and (match_code "const_int") >(match_test "! xtensa_split1_finished_p ()" > > -;; Memory constraints. Do not use define_memory_constraint here. Doing so > -;; causes reload to force some constants into the constant pool, but since > -;; the Xtensa constant pool can only be accessed with L32R instructions, it > -;; is always better to just copy a constant into a register. Instead, use > -;; regular constraints but add a check to allow pseudos during reload. > +;; Memory constraints. > > -(define_constraint "R" > +(define_memory_constraint "R" > "Memory that can be accessed with a 4-bit unsigned offset from a register." > - (ior (and (match_code "mem") > - (match_test "smalloffset_mem_p (op)")) > - (and (match_code "reg") > - (match_test "reload_in_progress > - && REGNO (op) >= FIRST_PSEUDO_REGISTER" > + (and (match_code "mem") > + (match_test "smalloffset_mem_p (op)"))) > > -(define_constraint "T" > +(define_memory_constraint "T" > "Memory in a literal pool (addressable with an L32R instruction)." > (and (match_code "mem") >(match_test "!TARGET_CONST16 && constantpool_mem_p (op)"))) > > -(define_constraint "U" > +(define_memory_constraint "U" > "Memory that is not in a literal pool." > - (ior (and (match_code "mem") > - (match_test "! constantpool_mem_p (op)")) > - (and (match_code "reg") > - (match_test "reload_in_progress > - && REGNO (op) >= FIRST_PSEUDO_REGISTER" > + (and (match_code "mem") > + (match_test "! constantpool_mem_p (op)"))) > diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc > index 9e5d314e143..f4434ec6e2c 100644 > --- a/gcc/config/xtensa/xtensa.cc > +++ b/gcc/config/xtensa/xtensa.cc > @@ -190,7 +190,6 @@ static void xtensa_output_mi_thunk (FILE *file, tree > thunk ATTRIBUTE_UNUSED, > HOST_WIDE_INT delta, > HOST_WIDE_INT vcall_offset, > tree function); > -static bool xtensa_lra_p (void); > > static rtx xtensa_delegitimize_address (rtx); > > @@ -286,9 +285,6 @@ static rtx xtensa_delegitimize_address (rtx); > #undef TARGET_CANNOT_FORCE_CONST_MEM > #define TARGET_CANNOT_FORCE_CONST_MEM xtensa_cannot_force_const_mem > > -#undef TARGET_LRA_P > -#define TARGET_LRA_P xtensa_lra_p > - > #undef TARGET_LEGITIMATE_ADDRESS_P > #define TARGET_LEGITIMATE_ADDRESS_Pxtensa_legitimate_address_p > > @@ -1266,14 +1262,6 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode > mode) > >operands[1] = xtensa_copy_incoming_a7 (operands[1]); > > - /* During reload we don't want to emit (subreg:X (mem:Y)) since that > - instruction won't be recognized after reload, so we remove the > - subreg and adjust mem accordingly. */ > - if (reload_in_progress) > -{ > - operands[0] = fixup_subreg_mem (operands[0]); > - operands[1] = fixup_subreg_mem (operands[1]); > -} >return 0; > } > > @@ -3196,7 +3184,7 @@ xtensa_output_integer_literal_parts (FILE *file, rtx x, > int size) >fputs (", ", file); >xtensa_output_integer_literal_parts (file, second, size / 2); > } > - else if (size == 4) > + else if (size == 4 || size == 2) > { >output_addr_const (file, x); > } > @@ -4876,6 +4864,10 @@ xtensa_trampoline_init (rtx m_tramp, tree fndecl, rtx > chain) > static bool > xtensa_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) > { > + if (CONST_INT_P (x)) > +return TARGET_AUTO_LITPOOLS || TARGET_CONST16 > + || xtensa_simm12b (INTVAL
[PATCH] xtensa: Make full transition to LRA
gcc/ChangeLog: * config/xtensa/constraints.md (R, T, U): Change define_constraint to define_memory_constraint. * config/xtensa/xtensa.cc (xtensa_lra_p, TARGET_LRA_P): Remove. (xtensa_emit_move_sequence): Remove "if (reload_in_progress)" clause as it can no longer be true. (xtensa_output_integer_literal_parts): Consider 16-bit wide constants. (xtensa_legitimate_constant_p): Add short-circuit path for integer load instructions. * config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p() rather reload_in_progress and reload_completed. * config/xtensa/xtensa.opt (mlra): Remove. --- gcc/config/xtensa/constraints.md | 26 -- gcc/config/xtensa/xtensa.cc | 26 +- gcc/config/xtensa/xtensa.md | 2 +- gcc/config/xtensa/xtensa.opt | 4 4 files changed, 14 insertions(+), 44 deletions(-) diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md index 53e4d0d8dd1..9b31e162941 100644 --- a/gcc/config/xtensa/constraints.md +++ b/gcc/config/xtensa/constraints.md @@ -123,29 +123,19 @@ (and (match_code "const_int") (match_test "! xtensa_split1_finished_p ()" -;; Memory constraints. Do not use define_memory_constraint here. Doing so -;; causes reload to force some constants into the constant pool, but since -;; the Xtensa constant pool can only be accessed with L32R instructions, it -;; is always better to just copy a constant into a register. Instead, use -;; regular constraints but add a check to allow pseudos during reload. +;; Memory constraints. -(define_constraint "R" +(define_memory_constraint "R" "Memory that can be accessed with a 4-bit unsigned offset from a register." - (ior (and (match_code "mem") - (match_test "smalloffset_mem_p (op)")) - (and (match_code "reg") - (match_test "reload_in_progress - && REGNO (op) >= FIRST_PSEUDO_REGISTER" + (and (match_code "mem") + (match_test "smalloffset_mem_p (op)"))) -(define_constraint "T" +(define_memory_constraint "T" "Memory in a literal pool (addressable with an L32R instruction)." (and (match_code "mem") (match_test "!TARGET_CONST16 && constantpool_mem_p (op)"))) -(define_constraint "U" +(define_memory_constraint "U" "Memory that is not in a literal pool." - (ior (and (match_code "mem") - (match_test "! constantpool_mem_p (op)")) - (and (match_code "reg") - (match_test "reload_in_progress - && REGNO (op) >= FIRST_PSEUDO_REGISTER" + (and (match_code "mem") + (match_test "! constantpool_mem_p (op)"))) diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index 9e5d314e143..f4434ec6e2c 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -190,7 +190,6 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, tree function); -static bool xtensa_lra_p (void); static rtx xtensa_delegitimize_address (rtx); @@ -286,9 +285,6 @@ static rtx xtensa_delegitimize_address (rtx); #undef TARGET_CANNOT_FORCE_CONST_MEM #define TARGET_CANNOT_FORCE_CONST_MEM xtensa_cannot_force_const_mem -#undef TARGET_LRA_P -#define TARGET_LRA_P xtensa_lra_p - #undef TARGET_LEGITIMATE_ADDRESS_P #define TARGET_LEGITIMATE_ADDRESS_Pxtensa_legitimate_address_p @@ -1266,14 +1262,6 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) operands[1] = xtensa_copy_incoming_a7 (operands[1]); - /* During reload we don't want to emit (subreg:X (mem:Y)) since that - instruction won't be recognized after reload, so we remove the - subreg and adjust mem accordingly. */ - if (reload_in_progress) -{ - operands[0] = fixup_subreg_mem (operands[0]); - operands[1] = fixup_subreg_mem (operands[1]); -} return 0; } @@ -3196,7 +3184,7 @@ xtensa_output_integer_literal_parts (FILE *file, rtx x, int size) fputs (", ", file); xtensa_output_integer_literal_parts (file, second, size / 2); } - else if (size == 4) + else if (size == 4 || size == 2) { output_addr_const (file, x); } @@ -4876,6 +4864,10 @@ xtensa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain) static bool xtensa_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { + if (CONST_INT_P (x)) +return TARGET_AUTO_LITPOOLS || TARGET_CONST16 + || xtensa_simm12b (INTVAL (x)); + return !xtensa_tls_referenced_p (x); } @@ -5317,12 +5309,4 @@ xtensa_delegitimize_address (rtx op) return op; } -/* Implement TARGET_LRA_P. */ - -static bool -xtensa_lra_p (void) -{ - return TARGET_LRA; -} - #include "gt-xtensa.h" diff --git a/gcc/config/xtensa/xtensa.md