Re: [PATCH] libgcc: arm: fix build for FDPIC target

2024-03-25 Thread Max Filippov
On Fri, Mar 22, 2024 at 1:15 PM Max Filippov  wrote:
>
> libgcc/
> * unwind-arm-common.inc (__gnu_personality_sigframe_fdpic): Cast
> last argument of _Unwind_VRS_Set to void *.
> ---
>  libgcc/unwind-arm-common.inc | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Build-tested for arm-gnu-uclinuxfdpiceabi, committed as obvious.

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Add supplementary split pattern for "*addsubx"

2024-03-22 Thread Max Filippov
On Thu, Mar 21, 2024 at 4:36 PM Takayuki 'January June' Suwa
 wrote:
>
> int test(int a) {
>return a * 4 + 3;
> }
>
> In the example above, since Xtensa has instructions to add register value
> scaled by 2, 4 or 8 (and corresponding define_insns), we would expect them
> to be used but not, because it is transformed before reaching the RTL
> generation pass as below:
>
> int test(int a) {
>return (a + 7500) * 4;
> }
>
> Fortunately, the RTL combination pass tries a splitting pattern that matches
> the first example, so it is easy to solve by defining that pattern.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md: Add new split pattern described above.
> ---
>  gcc/config/xtensa/xtensa.md | 14 ++
>  1 file changed, 14 insertions(+)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


[PATCH] libgcc: arm: fix build for FDPIC target

2024-03-22 Thread Max Filippov
libgcc/
* unwind-arm-common.inc (__gnu_personality_sigframe_fdpic): Cast
last argument of _Unwind_VRS_Set to void *.
---
 libgcc/unwind-arm-common.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgcc/unwind-arm-common.inc b/libgcc/unwind-arm-common.inc
index 5453f38186b5..576f7e93e8a8 100644
--- a/libgcc/unwind-arm-common.inc
+++ b/libgcc/unwind-arm-common.inc
@@ -248,7 +248,7 @@ __gnu_personality_sigframe_fdpic (_Unwind_State state,
  + ARM_SIGCONTEXT_R0;
 /* Restore regs saved on stack by the kernel.  */
 for (i = 0; i < 16; i++)
-   _Unwind_VRS_Set (context, _UVRSC_CORE, i, _UVRSD_UINT32, sp + 4 * i);
+   _Unwind_VRS_Set (context, _UVRSC_CORE, i, _UVRSD_UINT32, (void *)(sp + 
4 * i));
 
 return _URC_CONTINUE_UNWIND;
 }
-- 
2.39.2



[PATCH] gcc: xtensa: reorder movsi_internal patterns for better code generation during LRA

2024-03-14 Thread Max Filippov
After switching to LRA xtensa backend generates the following code for
saving/loading registers:

movi a9, 0x190
add  a9, a9, sp
s32i.n   a3, a9, 0

instead of the shorter and more efficient

s32i a3, a9, 0x190

E.g. the following code can be used to reproduce it:

int f1(int a, int b, int c, int d, int e, int f, int *p);
int f2(int a, int b, int c, int d, int e, int f, int *p);
int f3(int a, int b, int c, int d, int e, int f, int *p);

int foo(int a, int b, int c, int d, int e, int f)
{
int g[100];
return
f1(a, b, c, d, e, f, g) +
f2(a, b, c, d, e, f, g) +
f3(a, b, c, d, e, f, g);
}

This happens in the LRA pass because s32i.n and l32i.n are listed before
the s32i and l32i in the movsi_internal pattern and alternative
consideration loop stops early.

gcc/

* config/xtensa/xtensa.md (movsi_internal): Move l32i and s32i
patterns ahead of the l32i.n and s32i.n.
---
 gcc/config/xtensa/xtensa.md | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 1a2249b059a0..5cdf4dffe700 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1270,13 +1270,15 @@
 })
 
 (define_insn "movsi_internal"
-  [(set (match_operand:SI 0 "nonimmed_operand" 
"=D,D,D,D,R,R,a,q,a,a,W,a,a,U,*a,*A")
-   (match_operand:SI 1 "move_operand" 
"M,D,d,R,D,d,r,r,I,Y,i,T,U,r,*A,*r"))]
+  [(set (match_operand:SI 0 "nonimmed_operand" 
"=D,D,D,a,U,D,R,R,a,q,a,a,W,a,*a,*A")
+   (match_operand:SI 1 "move_operand" 
"M,D,d,U,r,R,D,d,r,r,I,Y,i,T,*A,*r"))]
   "xtensa_valid_move (SImode, operands)"
   "@
movi.n\t%0, %x1
mov.n\t%0, %1
mov.n\t%0, %1
+   %v1l32i\t%0, %1
+   %v0s32i\t%1, %0
%v1l32i.n\t%0, %1
%v0s32i.n\t%1, %0
%v0s32i.n\t%1, %0
@@ -1286,13 +1288,11 @@
movi\t%0, %1
const16\t%0, %t1\;const16\t%0, %b1
%v1l32r\t%0, %1
-   %v1l32i\t%0, %1
-   %v0s32i\t%1, %0
rsr\t%0, ACCLO
wsr\t%1, ACCLO"
-  [(set_attr "type"
"move,move,move,load,store,store,move,move,move,move,move,load,load,store,rsr,wsr")
+  [(set_attr "type"
"move,move,move,load,store,load,store,store,move,move,move,move,move,load,rsr,wsr")
(set_attr "mode""SI")
-   (set_attr "length"  "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")])
+   (set_attr "length"  "2,2,2,3,3,2,2,2,3,3,3,3,6,3,3,3")])
 
 (define_split
   [(set (match_operand:SHI 0 "register_operand")
-- 
2.39.2



Re: [PATCH 2/2] xtensa: Fix missing mode warning in "*eqne_zero_masked_bits"

2024-02-04 Thread Max Filippov
On Sat, Feb 3, 2024 at 6:19 AM Takayuki 'January June' Suwa
 wrote:
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (*eqne_zero_masked_bits):
> Add missing ":SI" to the match_operator.
> ---
>  gcc/config/xtensa/xtensa.md | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH 1/2 v2] xtensa: Recover constant synthesis for HImode after LRA transition

2024-02-04 Thread Max Filippov
On Sun, Feb 4, 2024 at 2:20 AM Takayuki 'January June' Suwa
 wrote:
>
> After LRA transition, HImode constants that don't fit into signed 12 bits
> are no longer subject to constant synthesis:
>
> /* example */
> void test(void) {
>   short foo = 32767;
>   __asm__ ("" :: "r"(foo));
> }
>
> ;; before
> .literal_position
> .literal .LC0, 32767
> test:
> l32ra9, .LC0
> ret.n
>
> This patch fixes that:
>
> ;; after
> test:
> movi.n  a9, -1
> extui   a9, a9, 17, 15
> ret.n
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (SHI): New mode iterator.
> (2 split patterns related to constsynth):
> Change to also accept HImode operands.
> ---
>  gcc/config/xtensa/xtensa.md | 22 ++
>  1 file changed, 14 insertions(+), 8 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH 1/2] xtensa: Recover constant synthesis for HImode after LRA transition

2024-02-03 Thread Max Filippov
Hi Suwa-san,

On Sat, Feb 3, 2024 at 6:20 AM Takayuki 'January June' Suwa
 wrote:
> After LRA transition, HImode constants that don't fit into signed 12 bits
> are no longer subject to constant synthesis:

with this change I get multiple ICEs during libgomp, libgfortran and
libstdc++ builds, e.g.:

/home/jcmvbkbc/ws/tensilica/gcc/gcc/libstdc++-v3/src/c++20/tzdb.cc:1228:3:
error: unrecognizable insn:
1228 |   }
 |   ^
(insn 3131 27 3132 2 (set (subreg:SI (reg:DI 176) 0)
   (const_int 78796800 [0x4b25800]))
"/home/jcmvbkbc/ws/tensilica/gcc/builds/gcc-14-8779-ge15d00be88c1-xtensa-call0-le/xtensa-buildroot-linux-uclibc/libstdc++-v3/include/bits/chrono.h":574:6
-1
(nil))
during RTL pass: subreg3
/home/jcmvbkbc/ws/tensilica/gcc/gcc/libstdc++-v3/src/c++20/tzdb.cc:1228:3:
internal compiler error: in extract_insn, at recog.cc:2812
0x7cb898 _fatal_insn(char const*, rtx_def const*, char const*, int, char const*)
   /home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/rtl-error.cc:108
0x7cb8b4 _fatal_insn_not_found(rtx_def const*, char const*, int, char const*)
   /home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/rtl-error.cc:116
0x7ca31e extract_insn(rtx_insn*)
   /home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/recog.cc:2812
0x1c08b57 decompose_multiword_subregs
   /home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/lower-subreg.cc:1569
0x1c09d7d execute
   /home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/lower-subreg.cc:1834



/home/jcmvbkbc/ws/tensilica/gcc/gcc/libstdc++-v3/src/filesystem/ops.cc:936:1:
error: unrecognizable insn:
 936 | }
 | ^
(insn 260 21 261 2 (set (reg:SI 4 a4)
   (const_int 10 [0x3b9aca00]))
"/home/jcmvbkbc/ws/tensilica/gcc/builds/gcc-14-8779-ge15d00be88c1-xtensa-call0-le/xtensa-buildroot-linux-uclibc/libstdc++-v3/include/bits/chrono.h":214:38
discrim 1 -1
(nil))
during RTL pass: subreg3
/home/jcmvbkbc/ws/tensilica/gcc/gcc/libstdc++-v3/src/filesystem/ops.cc:936:1:
internal compiler error: in extract_insn, at recog.cc:2812
0x7cb898 _fatal_insn(char const*, rtx_def const*, char const*, int, char const*)
   /home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/rtl-error.cc:108
0x7cb8b4 _fatal_insn_not_found(rtx_def const*, char const*, int, char const*)
   /home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/rtl-error.cc:116
0x7ca31e extract_insn(rtx_insn*)
   /home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/recog.cc:2812
0x1c08b57 decompose_multiword_subregs
   /home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/lower-subreg.cc:1569
0x1c09d7d execute
   /home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/lower-subreg.cc:1834


-- 
Thanks.
-- Max


[COMMITTED] xtensa: Make full transition to LRA

2024-01-30 Thread Max Filippov
From: Takayuki 'January June' Suwa 

gcc/ChangeLog:

* config/xtensa/constraints.md (R, T, U):
Change define_constraint to define_memory_constraint.
* config/xtensa/predicates.md (move_operand): Don't check that a
constant pool operand size is a multiple of UNITS_PER_WORD.
* config/xtensa/xtensa.cc
(xtensa_lra_p, TARGET_LRA_P): Remove.
(xtensa_emit_move_sequence): Remove "if (reload_in_progress)"
clause as it can no longer be true.
(fixup_subreg_mem): Drop function.
(xtensa_output_integer_literal_parts): Consider 16-bit wide
constants.
(xtensa_legitimate_constant_p): Add short-circuit path for
integer load instructions. Don't check that mode size is
at least UNITS_PER_WORD.
* config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p()
rather reload_in_progress and reload_completed.
(doloop_end): Drop operand 2.
(movhi_internal): Add alternative loading constant from a
literal pool.
(define_split for DI register_operand): Don't limit to
!TARGET_AUTO_LITPOOLS.
* config/xtensa/xtensa.opt (mlra): Change to no effect.
---
 gcc/config/xtensa/constraints.md | 26 ++
 gcc/config/xtensa/predicates.md  |  7 ++---
 gcc/config/xtensa/xtensa.cc  | 46 +---
 gcc/config/xtensa/xtensa.md  | 17 ++--
 gcc/config/xtensa/xtensa.opt |  4 +--
 5 files changed, 26 insertions(+), 74 deletions(-)

diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
index 27fd49656e5c..d855fb8d6057 100644
--- a/gcc/config/xtensa/constraints.md
+++ b/gcc/config/xtensa/constraints.md
@@ -123,29 +123,19 @@
   (and (match_code "const_int")
   (match_test "! xtensa_split1_finished_p ()"
 
-;; Memory constraints.  Do not use define_memory_constraint here.  Doing so
-;; causes reload to force some constants into the constant pool, but since
-;; the Xtensa constant pool can only be accessed with L32R instructions, it
-;; is always better to just copy a constant into a register.  Instead, use
-;; regular constraints but add a check to allow pseudos during reload.
+;; Memory constraints.
 
-(define_constraint "R"
+(define_memory_constraint "R"
  "Memory that can be accessed with a 4-bit unsigned offset from a register."
- (ior (and (match_code "mem")
-  (match_test "smalloffset_mem_p (op)"))
-  (and (match_code "reg")
-  (match_test "reload_in_progress
-   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
+ (and (match_code "mem")
+  (match_test "smalloffset_mem_p (op)")))
 
-(define_constraint "T"
+(define_memory_constraint "T"
  "Memory in a literal pool (addressable with an L32R instruction)."
  (and (match_code "mem")
   (match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
 
-(define_constraint "U"
+(define_memory_constraint "U"
  "Memory that is not in a literal pool."
- (ior (and (match_code "mem")
-  (match_test "! constantpool_mem_p (op)"))
-  (and (match_code "reg")
-  (match_test "reload_in_progress
-   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
+ (and (match_code "mem")
+  (match_test "! constantpool_mem_p (op)")))
diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index a3dd1a929c76..a296c7ecc99a 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -143,17 +143,14 @@
 (define_predicate "move_operand"
   (ior
  (ior (match_operand 0 "register_operand")
- (and (match_operand 0 "memory_operand")
-  (match_test "!constantpool_mem_p (op)
-   || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))
+ (match_operand 0 "memory_operand"))
  (ior (and (match_code "const_int")
   (match_test "(GET_MODE_CLASS (mode) == MODE_INT
 && xtensa_simm12b (INTVAL (op)))
|| ! xtensa_split1_finished_p ()"))
  (and (match_code "const_int,const_double,const,symbol_ref,label_ref")
   (match_test "(TARGET_CONST16 || TARGET_AUTO_LITPOOLS)
-   && CONSTANT_P (op)
-   && GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")
+   && CONSTANT_P (op)")
 
 ;; Accept the floating point constant 1 in the appropriate mode.
 (define_predicate "const_float_1_operand"
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 12677af3bd89..9beac9324679 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -115,7 +115,6 @@ static enum internal_test map_test_to_internal_test (enum 
rtx_code);
 static rtx gen_int_relational (enum rtx_code, rtx, rtx);
 static rtx gen_float_relational (enum rtx_code, rtx, rtx);
 static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx);
-static rtx fixup_subreg_mem 

[PATCH v2] xtensa: Make full transition to LRA

2024-01-23 Thread Max Filippov
From: Takayuki 'January June' Suwa 

gcc/ChangeLog:

* config/xtensa/constraints.md (R, T, U):
Change define_constraint to define_memory_constraint.
* config/xtensa/predicates.md (move_operand): Don't check that a
constant pool operand size is a multiple of UNITS_PER_WORD.
* config/xtensa/xtensa.cc
(xtensa_lra_p, TARGET_LRA_P): Remove.
(xtensa_emit_move_sequence): Remove "if (reload_in_progress)"
clause as it can no longer be true.
(fixup_subreg_mem): Drop function.
(xtensa_output_integer_literal_parts): Consider 16-bit wide
constants.
(xtensa_legitimate_constant_p): Add short-circuit path for
integer load instructions. Don't check that mode size is
at least UNITS_PER_WORD.
* config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p()
rather reload_in_progress and reload_completed.
(doloop_end): Drop operand 2.
(movhi_internal): Add alternative loading constant from a
literal pool.
* config/xtensa/xtensa.opt (mlra): Change to no effect.
---
 gcc/config/xtensa/constraints.md | 26 ++
 gcc/config/xtensa/predicates.md  |  4 +--
 gcc/config/xtensa/xtensa.cc  | 46 +---
 gcc/config/xtensa/xtensa.md  | 15 +--
 gcc/config/xtensa/xtensa.opt |  4 +--
 5 files changed, 24 insertions(+), 71 deletions(-)

diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
index 5cade1db8ff1..dc6ffb5ba15c 100644
--- a/gcc/config/xtensa/constraints.md
+++ b/gcc/config/xtensa/constraints.md
@@ -123,29 +123,19 @@
   (and (match_code "const_int")
   (match_test "! xtensa_split1_finished_p ()"
 
-;; Memory constraints.  Do not use define_memory_constraint here.  Doing so
-;; causes reload to force some constants into the constant pool, but since
-;; the Xtensa constant pool can only be accessed with L32R instructions, it
-;; is always better to just copy a constant into a register.  Instead, use
-;; regular constraints but add a check to allow pseudos during reload.
+;; Memory constraints.
 
-(define_constraint "R"
+(define_memory_constraint "R"
  "Memory that can be accessed with a 4-bit unsigned offset from a register."
- (ior (and (match_code "mem")
-  (match_test "smalloffset_mem_p (op)"))
-  (and (match_code "reg")
-  (match_test "reload_in_progress
-   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
+ (and (match_code "mem")
+  (match_test "smalloffset_mem_p (op)")))
 
-(define_constraint "T"
+(define_memory_constraint "T"
  "Memory in a literal pool (addressable with an L32R instruction)."
  (and (match_code "mem")
   (match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
 
-(define_constraint "U"
+(define_memory_constraint "U"
  "Memory that is not in a literal pool."
- (ior (and (match_code "mem")
-  (match_test "! constantpool_mem_p (op)"))
-  (and (match_code "reg")
-  (match_test "reload_in_progress
-   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
+ (and (match_code "mem")
+  (match_test "! constantpool_mem_p (op)")))
diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index 672fb003a6c5..dd77911e3b70 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -143,9 +143,7 @@
 (define_predicate "move_operand"
   (ior
  (ior (match_operand 0 "register_operand")
- (and (match_operand 0 "memory_operand")
-  (match_test "!constantpool_mem_p (op)
-   || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))
+ (match_operand 0 "memory_operand"))
  (ior (and (match_code "const_int")
   (match_test "(GET_MODE_CLASS (mode) == MODE_INT
 && xtensa_simm12b (INTVAL (op)))
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index a4f8e3e49d06..22b4416f48e4 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -115,7 +115,6 @@ static enum internal_test map_test_to_internal_test (enum 
rtx_code);
 static rtx gen_int_relational (enum rtx_code, rtx, rtx);
 static rtx gen_float_relational (enum rtx_code, rtx, rtx);
 static rtx gen_conditional_move (enum rtx_code, machine_mode, rtx, rtx);
-static rtx fixup_subreg_mem (rtx);
 static struct machine_function * xtensa_init_machine_status (void);
 static rtx xtensa_legitimize_tls_address (rtx);
 static rtx xtensa_legitimize_address (rtx, rtx, machine_mode);
@@ -192,7 +191,6 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk 
ATTRIBUTE_UNUSED,
HOST_WIDE_INT delta,
HOST_WIDE_INT vcall_offset,
tree function);
-static bool xtensa_lra_p (void);
 
 static rtx xtensa_delegitimize_address (rtx);
 
@@ -286,9 +284,6 @@ static rtx xtensa_delegitimize_address 

Re: [PATCH] xtensa: Make full transition to LRA

2024-01-23 Thread Max Filippov
Hi Suwa-san,

I've finally processed the new issues introduced by this change.

On Wed, May 10, 2023 at 2:10 AM Max Filippov  wrote:
> On Mon, May 8, 2023 at 6:38 AM Takayuki 'January June' Suwa
>  wrote:
> >
> > gcc/ChangeLog:
> >
> > * config/xtensa/constraints.md (R, T, U):
> > Change define_constraint to define_memory_constraint.
> > * config/xtensa/xtensa.cc
> > (xtensa_lra_p, TARGET_LRA_P): Remove.
> > (xtensa_emit_move_sequence): Remove "if (reload_in_progress)"
> > clause as it can no longer be true.
> > (xtensa_output_integer_literal_parts): Consider 16-bit wide
> > constants.
> > (xtensa_legitimate_constant_p): Add short-circuit path for
> > integer load instructions.
> > * config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p()
> > rather reload_in_progress and reload_completed.
> > * config/xtensa/xtensa.opt (mlra): Remove.
> > ---
> >  gcc/config/xtensa/constraints.md | 26 --
> >  gcc/config/xtensa/xtensa.cc  | 26 +-
> >  gcc/config/xtensa/xtensa.md  |  2 +-
> >  gcc/config/xtensa/xtensa.opt |  4 
> >  4 files changed, 14 insertions(+), 44 deletions(-)
>
> That's impressive.
> This version introduces a few execution failures in the testsuite on
> little endian targets and a bunch more (but not all, some execution
> tests still pass) on big endian.
> I'm traveling this week and likely won't be able to take a deep look
> into it until 5/15.
>
> New LE failures:

All of the LE failures are related to zero-overhead loops. Dropping the
operand 2 from the doloop_end pattern fixes that (change 1).

> New BE failures:

All of the BE failures are related to loading HImode constants into
registers, which instead of

.literal .LCx value
...
l32r register, .LCx

now generates the following code:

.literal .LCx value
.literal .LCy .LCx
...
l32r register1, .LCy
l16ui register, register1, 0

I've fixed that by allowing HImode constants in the literal pool in the
'move_operand' predicate, making addresses of such constants
legitimate in the xtensa_legitimate_address_p and adding an
alternative with l32r opcode to the movhi_internal pattern (change 2).

With these additional changes there's no new regression failures
and the generated code looks mostly the same as with the reload.

--
Thanks.
-- Max
From 0fb9ddfd22d11579674ac4a95912d2bc5612deb7 Mon Sep 17 00:00:00 2001
From: Max Filippov 
Date: Sun, 21 Jan 2024 16:14:20 -0800
Subject: [PATCH 1/2] gcc: xtensa: drop operand 2 from doloop_end pattern

gcc/ChangeLog:
	* config/xtensa/xtensa.md (doloop_end): Drop operand 2.
---
 gcc/config/xtensa/xtensa.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 7aded86e244f..a9c37da48b81 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -2368,14 +2368,12 @@
 	  (set (match_dup 0)
 		   (plus:SI (match_dup 0)
 			(const_int -1)))
-	  (unspec [(const_int 0)] UNSPEC_LSETUP_END)
-	  (clobber (match_dup 2))])] ; match_scratch
+	  (unspec [(const_int 0)] UNSPEC_LSETUP_END)])]
   "TARGET_LOOPS && optimize"
 {
   /* The loop optimizer doesn't check the predicates... */
   if (GET_MODE (operands[0]) != SImode)
 FAIL;
-  operands[2] = gen_rtx_SCRATCH (SImode);
 })
 
 
-- 
2.39.2

From e5536a47e9f1ae856c2491919933d18866511991 Mon Sep 17 00:00:00 2001
From: Max Filippov 
Date: Tue, 23 Jan 2024 10:57:21 -0800
Subject: [PATCH 2/2] gcc: xtensa: fix HImode constant loads

gcc/ChangeLog:
	* config/xtensa/predicates.md (move_operand): Don't check that a
	constant pool operand size is a multiple of UNITS_PER_WORD.
	* config/xtensa/xtensa.cc (xtensa_legitimate_address_p): Don't
	check that mode size is at least UNITS_PER_WORD.
	* config/xtensa/xtensa.md (movhi_internal): Add alternative
	loading constant from a literal pool.
---
 gcc/config/xtensa/predicates.md | 4 +---
 gcc/config/xtensa/xtensa.cc | 2 +-
 gcc/config/xtensa/xtensa.md | 9 +
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index 672fb003a6c5..dd77911e3b70 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -143,9 +143,7 @@
 (define_predicate "move_operand"
   (ior
  (ior (match_operand 0 "register_operand")
-	  (and (match_operand 0 "memory_operand")
-	   (match_test "!constantpool_mem_p (op)
-			|| GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))
+	  (match_operand 0 "memory_operand"))
  (ior (and (match_code "const_int")
 	   (match_test "(GET_MODE_CLA

[COMMITTED] gcc: xtensa: fix salt/saltu version check

2023-10-28 Thread Max Filippov
gcc/
* config/xtensa/xtensa.h (TARGET_SALT): Change HW version from
26 (which corresponds to RF-2014.0) to 27 (which
corresponds to RG-2015.0, the release where salt/saltu opcodes
were introduced).
---
 gcc/config/xtensa/xtensa.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 5987681e5496..49e6350001da 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -54,7 +54,7 @@ along with GCC; see the file COPYING3.  If not see
 #define TARGET_WINDOWED_ABIxtensa_windowed_abi
 #define TARGET_DEBUG   XCHAL_HAVE_DEBUG
 #define TARGET_L32RXCHAL_HAVE_L32R
-#define TARGET_SALT(XTENSA_MARCH_EARLIEST >= 26)
+#define TARGET_SALT(XTENSA_MARCH_EARLIEST >= 27)
 
 #define TARGET_DEFAULT (MASK_SERIALIZE_VOLATILE)
 
-- 
2.39.2



[COMMITTED] gcc: xtensa: use salt/saltu in xtensa_expand_scc

2023-09-14 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/predicates.md (xtensa_cstoresi_operator): Add
unsigned comparisons.
* config/xtensa/xtensa.cc (xtensa_expand_scc): Add code
generation of salt/saltu instructions.
* config/xtensa/xtensa.h (TARGET_SALT): New macro.
* config/xtensa/xtensa.md (salt, saltu): New instruction
patterns.
---
 gcc/config/xtensa/predicates.md |  2 +-
 gcc/config/xtensa/xtensa.cc | 55 +
 gcc/config/xtensa/xtensa.h  |  1 +
 gcc/config/xtensa/xtensa.md | 20 
 4 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index a3575a688923..672fb003a6c5 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -195,7 +195,7 @@
   (match_code "plus,minus"))
 
 (define_predicate "xtensa_cstoresi_operator"
-  (match_code "eq,ne,gt,ge,lt,le"))
+  (match_code "eq,ne,gt,ge,lt,le,gtu,geu,ltu,leu"))
 
 (define_predicate "xtensa_shift_per_byte_operator"
   (match_code "ashift,ashiftrt,lshiftrt"))
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 2481b028ca12..a4f8e3e49d06 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -995,6 +995,61 @@ xtensa_expand_scc (rtx operands[4], machine_mode cmp_mode)
   rtx one_tmp, zero_tmp;
   rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
 
+  if (cmp_mode == SImode && TARGET_SALT)
+{
+  rtx a = operands[2], b = force_reg (SImode, operands[3]);
+  enum rtx_code code = GET_CODE (operands[1]);
+  bool invert_res = false;
+
+  switch (code)
+   {
+   case GE:
+   case GEU:
+ invert_res = true;
+ break;
+   case GT:
+   case GTU:
+ std::swap (a, b);
+ break;
+   case LE:
+   case LEU:
+ invert_res = true;
+ std::swap (a, b);
+ break;
+   default:
+ break;
+   }
+
+  switch (code)
+   {
+   case GE:
+   case GT:
+   case LE:
+   case LT:
+ emit_insn (gen_salt (dest, a, b));
+ if (!invert_res)
+   return 1;
+ break;
+   case GEU:
+   case GTU:
+   case LEU:
+   case LTU:
+ emit_insn (gen_saltu (dest, a, b));
+ if (!invert_res)
+   return 1;
+ break;
+   default:
+ break;
+   }
+
+  if (invert_res)
+   {
+ emit_insn (gen_negsi2 (dest, dest));
+ emit_insn (gen_addsi3 (dest, dest, const1_rtx));
+ return 1;
+   }
+}
+
   if (! (cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode,
 operands[2], operands[3])))
 return 0;
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 34e06afcff48..5987681e5496 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -54,6 +54,7 @@ along with GCC; see the file COPYING3.  If not see
 #define TARGET_WINDOWED_ABIxtensa_windowed_abi
 #define TARGET_DEBUG   XCHAL_HAVE_DEBUG
 #define TARGET_L32RXCHAL_HAVE_L32R
+#define TARGET_SALT(XTENSA_MARCH_EARLIEST >= 26)
 
 #define TARGET_DEFAULT (MASK_SERIALIZE_VOLATILE)
 
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 6476fdc395ae..20af1cbfbd03 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -2393,6 +2393,26 @@
   DONE;
 })
 
+(define_insn "salt"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (lt:SI (match_operand:SI 1 "register_operand" "r")
+  (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_SALT"
+  "salt\t%0, %1, %2"
+  [(set_attr "type""arith")
+   (set_attr "mode""SI")
+   (set_attr "length"  "3")])
+
+(define_insn "saltu"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (ltu:SI (match_operand:SI 1 "register_operand" "r")
+   (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_SALT"
+  "saltu\t%0, %1, %2"
+  [(set_attr "type""arith")
+   (set_attr "mode""SI")
+   (set_attr "length"  "3")])
+
 (define_expand "cstoresf4"
   [(match_operand:SI 0 "register_operand")
(match_operator:SI 1 "comparison_operator"
-- 
2.30.2



Re: [PATCH] xtensa: Optimize several boolean evaluations of EQ/NE against constant zero

2023-09-13 Thread Max Filippov via Gcc-patches
On Fri, Sep 8, 2023 at 1:49 AM Takayuki 'January June' Suwa
 wrote:
>
> An idiomatic implementation of boolean evaluation of whether a register is
> zero or not in Xtensa is to assign 0 and 1 to the temporary and destination,
> and then issue the MOV[EQ/NE]Z machine instruction
> (See 8.3.2 Instruction Idioms, Xtensa ISA refman., p.599):
>
> ;; A2 = (A3 != 0) ? 1 : 0;
> movi.n  a9, 1
> movi.n  a2, 0
> movnez  a2, a9, a3  ;; if (A3 != 0) A2 = A9;
>
> As you can see in the above idiom, if the source and destination are the
> same register, a move instruction from the source to another temporary
> register must be prepended:
>
> ;; A2 = (A2 == 0) ? 1 : 0;
> mov.n   a10, a2
> movi.n  a9, 1
> movi.n  a2, 0
> moveqz  a2, a9, a10  ;; if (A10 == 0) A2 = A9;
>
> Fortunately, we can reduce the number of instructions and temporary
> registers with a few tweaks:
>
> ;; A2 = (A3 != 0) ? 1 : 0;
> movi.n  a2, 1
> moveqz  a2, a3, a3  ;; if (A3 == 0) A2 = A3;
>
> ;; A2 = (A2 != 0) ? 1 : 0;
> movi.n  a9, 1
> movnez  a2, a9, a2  ;; if (A2 != 0) A2 = A9;
>
> ;; A2 = (A3 == 0) ? 1 : 0;
> movi.n  a2, -1
> moveqz  a2, a3, a3  ;; if (A3 == 0) A2 = A3;
> addi.n  a2, a2, 1
>
> ;; A2 = (A2 == 0) ? 1 : 0;
> movi.n  a9, -1
> movnez  a2, a9, a2  ;; if (A2 != 0) A2 = A9;
> addi.n  a2, a2, 1
>
> Additionally, if TARGET_NSA is configured, the fact that it returns 32 iff
> the source of the NSAU machine instruction is 0, otherwise less than, can be
> used in boolean evaluation of EQ comparison.
>
> ;; A2 = (A3 == 0) ? 1 : 0;
> nsaua2, a3  ;; Source and destination can be the same register
> srlia2, a2, 5
>
> Furthermore, this patch also saves one instruction when determining whether
> the ANDing with mask values in which 1s are lined up from the upper or lower
> bit end (for example, 0xFFE0 or 0x003F) is 0 or not.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (xtensa_expand_scc):
> Revert the changes from the last patch, as the work in the RTL
> expansion pass is too far to determine the physical registers.
> * config/xtensa/xtensa.md (*eqne_INT_MIN): Ditto.
> (eq_zero_NSA, eqne_zero, *eqne_zero_masked_bits): New patterns.
> ---
>  gcc/config/xtensa/xtensa.cc |  35 +--
>  gcc/config/xtensa/xtensa.md | 112 
>  2 files changed, 113 insertions(+), 34 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


[RFC] gcc: xtensa: use salt/saltu in xtensa_expand_scc

2023-09-07 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/predicates.md (xtensa_cstoresi_operator): Add
unsigned comparisons.
* config/xtensa/xtensa.cc (xtensa_expand_scc): Add code
generation of salt/saltu instructions.
* config/xtensa/xtensa.h (TARGET_SALT): New macro.
* gcc/config/xtensa/xtensa.md (salt, saltu): New instruction
patterns.
---
I've tested it both with configurations that have salt/saltu and that
don't.
The inversion of the result at the end looks wasteful. I've been reading
gccint chapter about cstoreMODE4 and the following part left me with the
question:

  The value stored for a true condition must have 1 as its low bit,
  or else must be negative.

Does it mean that some variants of cstoreMODE4 may return 1 and some may
return -1 for truth, as both have 1 as its low bit? If that's true we
could use 'addi dest, dest, -1' instead of two-intruction sequence
'movi tmp, 1; xor dest, dest, tmp'.

---
 gcc/config/xtensa/predicates.md |  2 +-
 gcc/config/xtensa/xtensa.cc | 58 +
 gcc/config/xtensa/xtensa.h  |  1 +
 gcc/config/xtensa/xtensa.md | 20 
 4 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index a3575a688923..672fb003a6c5 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -195,7 +195,7 @@
   (match_code "plus,minus"))
 
 (define_predicate "xtensa_cstoresi_operator"
-  (match_code "eq,ne,gt,ge,lt,le"))
+  (match_code "eq,ne,gt,ge,lt,le,gtu,geu,ltu,leu"))
 
 (define_predicate "xtensa_shift_per_byte_operator"
   (match_code "ashift,ashiftrt,lshiftrt"))
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 1afaa1cc94e7..cc63529e80ea 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -1028,6 +1028,64 @@ xtensa_expand_scc (rtx operands[4], machine_mode 
cmp_mode)
break;
   }
 
+  if (cmp_mode == SImode && TARGET_SALT)
+{
+  bool swap_args = false;
+  bool invert_res = false;
+  rtx a = operands[2], b = force_reg (SImode, operands[3]);
+
+  switch (code)
+   {
+   case GE:
+   case GEU:
+ invert_res = true;
+ break;
+   case GT:
+   case GTU:
+ swap_args = true;
+ break;
+   case LE:
+   case LEU:
+ invert_res = true;
+ swap_args = true;
+ break;
+   default:
+ break;
+   }
+
+  if (swap_args)
+   std::swap (a, b);
+
+  switch (code)
+   {
+   case GE:
+   case GT:
+   case LE:
+   case LT:
+ emit_insn (gen_salt (dest, a, b));
+ if (!invert_res)
+   return 1;
+ break;
+   case GEU:
+   case GTU:
+   case LEU:
+   case LTU:
+ emit_insn (gen_saltu (dest, a, b));
+ if (!invert_res)
+   return 1;
+ break;
+   default:
+ break;
+   }
+
+  if (invert_res)
+   {
+ one_tmp = force_reg (SImode, const1_rtx);
+ emit_insn (gen_xorsi3 (dest, dest, one_tmp));
+ return 1;
+   }
+}
+
   if (! (cmp = gen_conditional_move (code, cmp_mode,
 operands[2], operands[3])))
 return 0;
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 34e06afcff48..5987681e5496 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -54,6 +54,7 @@ along with GCC; see the file COPYING3.  If not see
 #define TARGET_WINDOWED_ABIxtensa_windowed_abi
 #define TARGET_DEBUG   XCHAL_HAVE_DEBUG
 #define TARGET_L32RXCHAL_HAVE_L32R
+#define TARGET_SALT(XTENSA_MARCH_EARLIEST >= 26)
 
 #define TARGET_DEFAULT (MASK_SERIALIZE_VOLATILE)
 
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index d6505e7eb700..594238030237 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -2393,6 +2393,26 @@
   DONE;
 })
 
+(define_insn "salt"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (lt:SI (match_operand:SI 1 "register_operand" "r")
+  (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_SALT"
+  "salt\t%0, %1, %2"
+  [(set_attr "type""arith")
+   (set_attr "mode""SI")
+   (set_attr "length"  "3")])
+
+(define_insn "saltu"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (ltu:SI (match_operand:SI 1 "register_operand" "r")
+   (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_SALT"
+  "saltu\t%0, %1, %2"
+  [(set_attr "type""arith")
+   (set_attr "mode""SI")
+   (set_attr "length"  "3")])
+
 (define_expand "cstoresf4"
   [(match_operand:SI 0 "register_operand")
(match_operator:SI 1 "comparison_operator"
-- 
2.30.2



Re: [PATCH] xtensa: Optimize boolean evaluation when SImode EQ/NE to zero if TARGET_MINMAX

2023-09-06 Thread Max Filippov via Gcc-patches
On Tue, Sep 5, 2023 at 2:29 AM Takayuki 'January June' Suwa
 wrote:
>
> This patch optimizes the boolean evaluation for equality to 0 in SImode
> using the MINU (Minimum Value Unsigned) machine instruction available
> when TARGET_MINMAX is configured, for example, (x != 0) to MINU(x, 1)
> and (x == 0) to (MINU(x, 1) ^ 1).
>
> /* example */
> int test0(int x) {
>   return x == 0;
> }
> int test1(int x) {
>   return x != 0;
> }
>
> ;; before
> test0:
> mov.n   a10, a2
> movi.n  a9, 1
> movi.n  a2, 0
> moveqz  a2, a9, a10
> ret.n
> test1:
> mov.n   a10, a2
> movi.n  a9, 1
> movi.n  a2, 0
> movnez  a2, a9, a10
> ret.n
>
> ;; after (prereq. TARGET_MINMAX)
> test0:
> movi.n  a9, 1
> minua2, a2, a9
> xor a2, a2, a9
> ret.n
> test1:
> movi.n  a9, 1
> minua2, a2, a9
> ret.n
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (xtensa_expand_scc):
> Add code for particular constants (only 0 and INT_MIN for now)
> for EQ/NE boolean evaluation in SImode.
> * config/xtensa/xtensa.md (*eqne_INT_MIN): Remove because its
> implementation has been integrated into the above.
> ---
>  gcc/config/xtensa/xtensa.cc | 43 +++--
>  gcc/config/xtensa/xtensa.md | 34 -
>  2 files changed, 37 insertions(+), 40 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Optimize boolean evaluation when SImode EQ/NE to zero if TARGET_MINMAX

2023-09-06 Thread Max Filippov via Gcc-patches
On Tue, Sep 5, 2023 at 9:24 PM Takayuki 'January June' Suwa
 wrote:
> On 2023/09/06 8:01, Max Filippov wrote:
> > On Tue, Sep 5, 2023 at 2:29 AM Takayuki 'January June' Suwa
> >  wrote:
> >> ;; after (prereq. TARGET_MINMAX)
> >> test0:
> >> movi.n  a9, 1
> >> minua2, a2, a9
> >> xor a2, a2, a9
> >> ret.n
> >
> > ISTM that test0 could be done with movnez in the same three instructions:
> >
> >   movi a9, 1
> >   movnez a2, a9, a2
> >   xor a2, a2, a9
>
> Unfortunately, the MOV[EQ/NE]Z machine instruction can only be used
> to implement the functionality if the input and output physical registers
> are the same (a2 in the example).

Oh yeah, you're right, I missed that.

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Optimize boolean evaluation when SImode EQ/NE to zero if TARGET_MINMAX

2023-09-05 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Tue, Sep 5, 2023 at 2:29 AM Takayuki 'January June' Suwa
 wrote:
>
> This patch optimizes the boolean evaluation for equality to 0 in SImode
> using the MINU (Minimum Value Unsigned) machine instruction available
> when TARGET_MINMAX is configured, for example, (x != 0) to MINU(x, 1)
> and (x == 0) to (MINU(x, 1) ^ 1).
>
> /* example */
> int test0(int x) {
>   return x == 0;
> }
> int test1(int x) {
>   return x != 0;
> }
>
> ;; before
> test0:
> mov.n   a10, a2
> movi.n  a9, 1
> movi.n  a2, 0
> moveqz  a2, a9, a10
> ret.n
> test1:
> mov.n   a10, a2
> movi.n  a9, 1
> movi.n  a2, 0
> movnez  a2, a9, a10
> ret.n
>
> ;; after (prereq. TARGET_MINMAX)
> test0:
> movi.n  a9, 1
> minua2, a2, a9
> xor a2, a2, a9
> ret.n

ISTM that test0 could be done with movnez in the same three instructions:

  movi a9, 1
  movnez a2, a9, a2
  xor a2, a2, a9

> test1:
> movi.n  a9, 1
> minua2, a2, a9
> ret.n

ISTM that test1 could be done with movnez in the same two instructions:

  movi a9, 1
  movnez a2, a9, a2

-- 
Thanks.
-- Max


Re: [PATCH 2/3] gcc: xtensa: use dynconfig settings as builtin-macros

2023-07-20 Thread Max Filippov via Gcc-patches
On Thu, Jul 20, 2023 at 10:54 AM Alexey Lapshin
 wrote:
> Please consider to review another two pathes then.
> This would be nice to have it in upstream

Sure, it's going to take some time though as I need to take a good look,
and maybe I'll come back with some change proposals.

-- 
Thanks.
-- Max


Re: [PATCH 2/3] gcc: xtensa: use dynconfig settings as builtin-macros

2023-07-20 Thread Max Filippov via Gcc-patches
On Thu, Jul 20, 2023 at 10:45 AM Alexey Lapshin
 wrote:
>
> On Thu, 2023-07-20 at 08:25 -0700, Max Filippov wrote:
> > But it defines them with their respective values.
> > Just notice that it adds two leading underscores in front of the names.
>
> Why builtin macros were defined with prefix?
> With this approach I also need define it somewhere:
>
> #define XTHAL_ABI_WINDOWED  __XTHAL_ABI_WINDOWED
> #define XTHAL_ABI_CALL0 __XTHAL_ABI_CALL0
> .
>
> Or add prefix to macros in existing code that also looks not good..
>
> I want to get idea why toolchain can't have builtin macros with the same 
> names?

Because 1) it will break existing code and 2) it's just not nice to pollute
the namespace.

-- 
Thanks.
-- Max


Re: [PATCH 2/3] gcc: xtensa: use dynconfig settings as builtin-macros

2023-07-20 Thread Max Filippov via Gcc-patches
On Thu, Jul 20, 2023 at 9:10 AM Alexey Lapshin
 wrote:
> I see now, thanks for the explanation, I will try to rebuild toolchain 
> without this particular patch.
> BTW, what do you thing about placing config from newlib overlay to dynconfig?

That's the right thing to do. Bonus points for keeping backwards
compatibility with the overlay-based configuration method (:
I did the same for the uClibc, but the change is still in my queue:
  
https://github.com/jcmvbkbc/uclibc-ng-xtensa/commit/842aede0537812a0d2158433c5e048ee87324075

-- 
Thanks.
-- Max


Re: [PATCH 2/3] gcc: xtensa: use dynconfig settings as builtin-macros

2023-07-20 Thread Max Filippov via Gcc-patches
On Thu, Jul 20, 2023 at 8:12 AM Alexey Lapshin
 wrote:
>
> Oops, missed this loop while implementing...
>
> I had a problem with building esp chips multilib until added my changes.
>
> This loop looks like just defines a macro without value.

But it defines them with their respective values.
Just notice that it adds two leading underscores in front of the names.

> But the value must be set to make it work correctly.
> It uses builtin_define() instead builtin_define_with_int_value()
>
> I will check how it could be soved with the loop approach.

-- 
Thanks.
-- Max


Re: [PATCH 2/3] gcc: xtensa: use dynconfig settings as builtin-macros

2023-07-20 Thread Max Filippov via Gcc-patches
On Thu, Jul 20, 2023 at 7:37 AM Alexey Lapshin
 wrote:
>
> gcc/
> * config/xtensa/xtensa.h (XCHAL_HAVE_BE, XCHAL_HAVE_DENSITY,
>   XCHAL_HAVE_CONST16, XCHAL_HAVE_ABS, XCHAL_HAVE_ADDX,
>   XCHAL_HAVE_L32R, XSHAL_USE_ABSOLUTE_LITERALS,
>   XSHAL_HAVE_TEXT_SECTION_LITERALS, XCHAL_HAVE_MAC16,
>   XCHAL_HAVE_MUL16, XCHAL_HAVE_MUL32, XCHAL_HAVE_MUL32_HIGH,
>   XCHAL_HAVE_DIV32, XCHAL_HAVE_NSA, XCHAL_HAVE_MINMAX,
>   XCHAL_HAVE_SEXT, XCHAL_HAVE_LOOPS, XCHAL_HAVE_THREADPTR,
>   XCHAL_HAVE_RELEASE_SYNC, XCHAL_HAVE_S32C1I,
>   XCHAL_HAVE_BOOLEANS, XCHAL_HAVE_FP, XCHAL_HAVE_FP_DIV,
>   XCHAL_HAVE_FP_RECIP, XCHAL_HAVE_FP_SQRT,
>   XCHAL_HAVE_FP_RSQRT, XCHAL_HAVE_FP_POSTINC, XCHAL_HAVE_DFP,
>   XCHAL_HAVE_DFP_DIV, XCHAL_HAVE_DFP_RECIP,
>   XCHAL_HAVE_DFP_SQRT, XCHAL_HAVE_DFP_RSQRT,
>   XCHAL_HAVE_WINDOWED, XCHAL_NUM_AREGS,
>   XCHAL_HAVE_WIDE_BRANCHES, XCHAL_HAVE_PREDICTED_BRANCHES,
>   XCHAL_ICACHE_SIZE, XCHAL_DCACHE_SIZE,
>   XCHAL_ICACHE_LINESIZE, XCHAL_DCACHE_LINESIZE,
>   XCHAL_ICACHE_LINEWIDTH, XCHAL_DCACHE_LINEWIDTH,
>   XCHAL_DCACHE_IS_WRITEBACK, XCHAL_HAVE_MMU,
>   XCHAL_MMU_MIN_PTE_PAGE_SIZE, XCHAL_HAVE_DEBUG,
>   XCHAL_NUM_IBREAK, XCHAL_NUM_DBREAK, XCHAL_DEBUGLEVEL,
>   XCHAL_MAX_INSTRUCTION_SIZE, XCHAL_INST_FETCH_WIDTH,
>   XSHAL_ABI, XTHAL_ABI_WINDOWED, XTHAL_ABI_CALL0,
>   XCHAL_M_STAGE, XTENSA_MARCH_LATEST, XTENSA_MARCH_EARLIEST,
>   XCHAL_HAVE_CLAMPS, XCHAL_HAVE_DEPBITS,
>   XCHAL_HAVE_EXCLUSIVE, XCHAL_HAVE_XEA3): Add builtin-macros
>   with values from dynconfig.
> ---
>  gcc/config/xtensa/xtensa.h | 62 ++
>  1 file changed, 62 insertions(+)
>
> diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
> index 8ebf37cab33..a65b674915b 100644
> --- a/gcc/config/xtensa/xtensa.h
> +++ b/gcc/config/xtensa/xtensa.h
> @@ -67,6 +67,7 @@ along with GCC; see the file COPYING3.  If not see
>  #endif
>
>
> +#define XTENSA_CPU_CPP_BUILTIN(OPT) builtin_define_with_int_value (#OPT, OPT)
>  /* Target CPU builtins.  */
>  #define TARGET_CPU_CPP_BUILTINS()  \
>do { \
> @@ -82,6 +83,67 @@ along with GCC; see the file COPYING3.  If not see
>builtin_define ("__XTENSA_SOFT_FLOAT__");  
>   \
>  for (builtin = xtensa_get_config_strings (); *builtin; ++builtin)  \
>builtin_define (*builtin);   \

The loop above already does the same thing, doesn't it?

> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_BE);   
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_DENSITY);  
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_CONST16);  
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_ABS);  
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_ADDX); 
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_L32R); 
>   \
> +XTENSA_CPU_CPP_BUILTIN(XSHAL_USE_ABSOLUTE_LITERALS);   \
> +XTENSA_CPU_CPP_BUILTIN(XSHAL_HAVE_TEXT_SECTION_LITERALS);  \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_MAC16);
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_MUL16);
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_MUL32);
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_MUL32_HIGH);   
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_DIV32);
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_NSA);  
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_MINMAX);   
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_SEXT); 
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_LOOPS);
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_THREADPTR);
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_RELEASE_SYNC);   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_S32C1I);   
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_BOOLEANS); 
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_FP);   
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_FP_DIV);   
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_FP_RECIP); 
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_FP_SQRT);  
>   \
> +XTENSA_CPU_CPP_BUILTIN(XCHAL_HAVE_FP_RSQRT); 
>  

Re: [PATCH] xtensa: Use HARD_REG_SET instead of bare integer

2023-07-04 Thread Max Filippov via Gcc-patches
On Mon, Jul 3, 2023 at 5:57 PM Takayuki 'January June' Suwa
 wrote:
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (machine_function, xtensa_expand_prologue):
> Change to use HARD_REG_BIT and its macros.
> * config/xtensa/xtensa.md
> (peephole2: regmove elimination during DFmode input reload):
> Likewise.
> ---
>  gcc/config/xtensa/xtensa.cc |  9 +
>  gcc/config/xtensa/xtensa.md | 13 ++---
>  2 files changed, 11 insertions(+), 11 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH 1/2] xtensa: Fix missing mode warning in "*eqne_INT_MIN"

2023-07-02 Thread Max Filippov via Gcc-patches
On Sat, Jul 1, 2023 at 10:21 AM Takayuki 'January June' Suwa
 wrote:
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (*eqne_INT_MIN):
> Add missing ":SI" to the match_operator.
> ---
>  gcc/config/xtensa/xtensa.md | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH 2/2] xtensa: The use of CLAMPS instruction also requires TARGET_MINMAX, as well as TARGET_CLAMPS

2023-07-02 Thread Max Filippov via Gcc-patches
On Sat, Jul 1, 2023 at 10:21 AM Takayuki 'January June' Suwa
 wrote:
>
> Because both smin and smax requiring TARGET_MINMAX are essential to the
> RTL representation.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (xtensa_match_CLAMPS_imms_p):
> Simplify.
> * config/xtensa/xtensa.md (*xtensa_clamps):
> Add TARGET_MINMAX to the condition.
> ---
>  gcc/config/xtensa/xtensa.cc | 7 ++-
>  gcc/config/xtensa/xtensa.md | 4 ++--
>  2 files changed, 4 insertions(+), 7 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH 2/2] xtensa: constantsynth: Add new 2-insns synthesis pattern

2023-06-18 Thread Max Filippov via Gcc-patches
On Sun, Jun 18, 2023 at 12:10 AM Takayuki 'January June' Suwa
 wrote:
>
> This patch adds a new 2-instructions constant synthesis pattern:
>
> -  A non-negative square value that root can fit into a signed 12-bit:
> => "MOVI(.N) Ax, simm12" + "MULL Ax, Ax, Ax"
>
> Due to the execution cost of the integer multiply instruction (MULL), this
> synthesis works only when the 32-bit Integer Multiply Option is configured
> and optimize for size is specified.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (xtensa_constantsynth_2insn):
> Add new pattern for the abovementioned case.
> ---
>  gcc/config/xtensa/xtensa.cc | 12 ++--
>  1 file changed, 10 insertions(+), 2 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH 1/2] xtensa: Remove TARGET_MEMORY_MOVE_COST hook

2023-06-18 Thread Max Filippov via Gcc-patches
On Sun, Jun 18, 2023 at 12:10 AM Takayuki 'January June' Suwa
 wrote:
>
> It used to always return a constant 4, which is same as the default
> behavior, but doesn't take into account the effects of secondary
> reloads.
>
> Therefore, the implementation of this target hook is removed.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc
> (TARGET_MEMORY_MOVE_COST, xtensa_memory_move_cost): Remove.
> ---
>  gcc/config/xtensa/xtensa.cc | 13 -
>  1 file changed, 13 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH v2] xtensa: Optimize boolean evaluation or branching when EQ/NE to zero in S[IF]mode

2023-06-05 Thread Max Filippov via Gcc-patches
On Mon, Jun 5, 2023 at 8:15 AM Max Filippov  wrote:
>
> Hi Suwa-san,
>
> On Mon, Jun 5, 2023 at 2:37 AM Takayuki 'January June' Suwa
>  wrote:
> >
> > This patch optimizes the boolean evaluation of EQ/NE against zero
> > by adding two insn_and_split patterns similar to SImode conditional
> > store:
> >
> > "eq_zero":
> > op0 = (op1 == 0) ? 1 : 0;
> > op0 = clz(op1) >> 5;  /* optimized (requires TARGET_NSA) */
> >
> > "movsicc_ne0_reg_0":
> > op0 = (op1 != 0) ? op2 : 0;
> > op0 = op2; if (op1 == 0) ? op0 = op1;  /* optimized */
> >
> > /* example #1 */
> > int bool_eqSI(int x) {
> >   return x == 0;
> > }
> > int bool_neSI(int x) {
> >   return x != 0;
> > }
> >
> > ;; after (TARGET_NSA)
> > bool_eqSI:
> > nsaua2, a2
> > srlia2, a2, 5
> > ret.n
> > bool_neSI:
> > mov.n   a9, a2
> > movi.n  a2, 1
> > moveqz  a2, a9, a9
> > ret.n
> >
> > These also work in SFmode by ignoring their sign bits, and further-
> > more, the branch if EQ/NE against zero in SFmode is also done in the
> > same manner.
> >
> > The reasons for this optimization in SFmode are:
> >
> >   - Only zero values (negative or non-negative) contain no bits of 1
> > with both the exponent and the mantissa.
> >   - EQ/NE comparisons involving NaNs produce no signal even if they
> > are signaling.
> >   - Even if the use of IEEE 754 single-precision floating-point co-
> > processor is configured (TARGET_HARD_FLOAT is true):
> > 1. Load zero value to FP register
> > 2. Possibly, additional FP move if the comparison target is
> >an address register
> > 3. FP equality check instruction
> > 4. Read the boolean register containing the result, or condi-
> >tional branch
> > As noted above, a considerable number of instructions are still
> > generated.
> >
> > /* example #2 */
> > int bool_eqSF(float x) {
> >   return x == 0;
> > }
> > int bool_neSF(float x) {
> >   return x != 0;
> > }
> > int bool_ltSF(float x) {
> >   return x < 0;
> > }
> > extern void foo(void);
> > void cb_eqSF(float x) {
> >   if(x != 0)
> > foo();
> > }
> > void cb_neSF(float x) {
> >   if(x == 0)
> > foo();
> > }
> > void cb_geSF(float x) {
> >   if(x < 0)
> > foo();
> > }
> >
> > ;; after
> > ;; (TARGET_NSA, TARGET_BOOLEANS and TARGET_HARD_FLOAT)
> > bool_eqSF:
> > add.n   a2, a2, a2
> > nsaua2, a2
> > srlia2, a2, 5
> > ret.n
> > bool_neSF:
> > add.n   a9, a2, a2
> > movi.n  a2, 1
> > moveqz  a2, a9, a9
> > ret.n
> > bool_ltSF:
> > movi.n  a9, 0
> > wfr f0, a2
> > wfr f1, a9
> > olt.s   b0, f0, f1
> > movi.n  a9, 0
> > movi.n  a2, 1
> > movfa2, a9, b0
> > ret.n
> > cb_eqSF:
> > add.n   a2, a2, a2
> > beqz.n  a2, .L6
> > j.l foo, a9
> > .L6:
> > ret.n
> > cb_neSF:
> > add.n   a2, a2, a2
> > bnez.n  a2, .L8
> > j.l foo, a9
> > .L8:
> > ret.n
> > cb_geSF:
> > addisp, sp, -16
> > movi.n  a3, 0
> > s32i.n  a12, sp, 8
> > s32i.n  a0, sp, 12
> > mov.n   a12, a2
> > call0   __unordsf2
> > bnez.n  a2, .L10
> > movi.n  a3, 0
> > mov.n   a2, a12
> > call0   __gesf2
> > bneia2, -1, .L10
> > l32i.n  a0, sp, 12
> > l32i.n  a12, sp, 8
> > addisp, sp, 16
> > j.l foo, a9
> > .L10:
> > l32i.n  a0, sp, 12
> > l32i.n  a12, sp, 8
> > addisp, sp, 16
> > ret.n
> >
> > gcc/ChangeLog:
> >
> > * config/xtensa/predicates.md (const_float_0_operand):
> > Rename from obsolete "const_float_1_operand" and change the
> > constant to compare.
> > (cstoresf_cbranchsf_opera

Re: [PATCH v2] xtensa: Optimize boolean evaluation or branching when EQ/NE to zero in S[IF]mode

2023-06-05 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Mon, Jun 5, 2023 at 2:37 AM Takayuki 'January June' Suwa
 wrote:
>
> This patch optimizes the boolean evaluation of EQ/NE against zero
> by adding two insn_and_split patterns similar to SImode conditional
> store:
>
> "eq_zero":
> op0 = (op1 == 0) ? 1 : 0;
> op0 = clz(op1) >> 5;  /* optimized (requires TARGET_NSA) */
>
> "movsicc_ne0_reg_0":
> op0 = (op1 != 0) ? op2 : 0;
> op0 = op2; if (op1 == 0) ? op0 = op1;  /* optimized */
>
> /* example #1 */
> int bool_eqSI(int x) {
>   return x == 0;
> }
> int bool_neSI(int x) {
>   return x != 0;
> }
>
> ;; after (TARGET_NSA)
> bool_eqSI:
> nsaua2, a2
> srlia2, a2, 5
> ret.n
> bool_neSI:
> mov.n   a9, a2
> movi.n  a2, 1
> moveqz  a2, a9, a9
> ret.n
>
> These also work in SFmode by ignoring their sign bits, and further-
> more, the branch if EQ/NE against zero in SFmode is also done in the
> same manner.
>
> The reasons for this optimization in SFmode are:
>
>   - Only zero values (negative or non-negative) contain no bits of 1
> with both the exponent and the mantissa.
>   - EQ/NE comparisons involving NaNs produce no signal even if they
> are signaling.
>   - Even if the use of IEEE 754 single-precision floating-point co-
> processor is configured (TARGET_HARD_FLOAT is true):
> 1. Load zero value to FP register
> 2. Possibly, additional FP move if the comparison target is
>an address register
> 3. FP equality check instruction
> 4. Read the boolean register containing the result, or condi-
>tional branch
> As noted above, a considerable number of instructions are still
> generated.
>
> /* example #2 */
> int bool_eqSF(float x) {
>   return x == 0;
> }
> int bool_neSF(float x) {
>   return x != 0;
> }
> int bool_ltSF(float x) {
>   return x < 0;
> }
> extern void foo(void);
> void cb_eqSF(float x) {
>   if(x != 0)
> foo();
> }
> void cb_neSF(float x) {
>   if(x == 0)
> foo();
> }
> void cb_geSF(float x) {
>   if(x < 0)
> foo();
> }
>
> ;; after
> ;; (TARGET_NSA, TARGET_BOOLEANS and TARGET_HARD_FLOAT)
> bool_eqSF:
> add.n   a2, a2, a2
> nsaua2, a2
> srlia2, a2, 5
> ret.n
> bool_neSF:
> add.n   a9, a2, a2
> movi.n  a2, 1
> moveqz  a2, a9, a9
> ret.n
> bool_ltSF:
> movi.n  a9, 0
> wfr f0, a2
> wfr f1, a9
> olt.s   b0, f0, f1
> movi.n  a9, 0
> movi.n  a2, 1
> movfa2, a9, b0
> ret.n
> cb_eqSF:
> add.n   a2, a2, a2
> beqz.n  a2, .L6
> j.l foo, a9
> .L6:
> ret.n
> cb_neSF:
> add.n   a2, a2, a2
> bnez.n  a2, .L8
> j.l foo, a9
> .L8:
> ret.n
> cb_geSF:
> addisp, sp, -16
> movi.n  a3, 0
> s32i.n  a12, sp, 8
> s32i.n  a0, sp, 12
> mov.n   a12, a2
> call0   __unordsf2
> bnez.n  a2, .L10
> movi.n  a3, 0
> mov.n   a2, a12
> call0   __gesf2
> bneia2, -1, .L10
> l32i.n  a0, sp, 12
> l32i.n  a12, sp, 8
> addisp, sp, 16
> j.l foo, a9
> .L10:
> l32i.n  a0, sp, 12
> l32i.n  a12, sp, 8
> addisp, sp, 16
> ret.n
>
> gcc/ChangeLog:
>
> * config/xtensa/predicates.md (const_float_0_operand):
> Rename from obsolete "const_float_1_operand" and change the
> constant to compare.
> (cstoresf_cbranchsf_operand, cstoresf_cbranchsf_operator):
> New.
> * config/xtensa/xtensa.cc (xtensa_expand_conditional_branch):
> Add code for EQ/NE comparison with constant zero in SFmode.
> (xtensa_expand_scc): Added code to derive boolean evaluation
> of EQ/NE with constant zero for comparison in SFmode.
> (xtensa_rtx_costs): Change cost of CONST_DOUBLE with value
> zero inside "cbranchsf4" to 0.
> * config/xtensa/xtensa.md (cbranchsf4, cstoresf4):
> Change "match_operator" and the third "match_operand" to the
> ones mentioned above.
> (movsicc_ne0_reg_zero, eq_zero): New.
> ---
>  gcc/config/xtensa/predicates.md | 17 +--
>  gcc/config/xtensa/xtensa.cc | 45 
>  gcc/config/xtensa/xtensa.md | 53 +
>  3 files changed, 106 insertions(+), 9 deletions(-)

This version performs much better than v1, but there's still new
testsuite failure in the gcc.c-torture/execute/bitfld-3.c
and the following change in the generated code
from:

   l32i.n  a11, a7, 8
   l8uia9, a7, 12
   movia10, 0xff
   add.n   a9, a9, a10
   addi.n  a7, a11, -1
   movi.n  

Re: [PATCH] xtensa: Optimize boolean evaluation or branching when EQ/NE to INT_MIN

2023-06-04 Thread Max Filippov via Gcc-patches
On Sat, Jun 3, 2023 at 3:52 PM Takayuki 'January June' Suwa
 wrote:
>
> This patch optimizes both the boolean evaluation of and the branching of
> EQ/NE against INT_MIN (-2147483648), by taking advantage of the specifi-
> cation the ABS machine instruction on Xtensa returns INT_MIN iff INT_MIN,
> otherwise non-negative value.
>
> /* example */
> int test0(int x) {
>   return (x == -2147483648);
> }
> int test1(int x) {
>   return (x != -2147483648);
> }
> extern void foo(void);
> void test2(int x) {
>   if(x == -2147483648)
> foo();
> }
> void test3(int x) {
>   if(x != -2147483648)
> foo();
> }
>
> ;; before
> test0:
> movi.n  a9, -1
> sllia9, a9, 31
> add.n   a2, a2, a9
> nsaua2, a2
> srlia2, a2, 5
> ret.n
> test1:
> movi.n  a9, -1
> sllia9, a9, 31
> add.n   a9, a2, a9
> movi.n  a2, 1
> moveqz  a2, a9, a9
> ret.n
> test2:
> movi.n  a9, -1
> sllia9, a9, 31
> bne a2, a9, .L3
> j.l foo, a9
> .L3:
> ret.n
> test3:
> movi.n  a9, -1
> sllia9, a9, 31
> beq a2, a9, .L5
> j.l foo, a9
> .L5:
> ret.n
>
> ;; after
> test0:
> abs a2, a2
> extui   a2, a2, 31, 1
> ret.n
> test1:
> abs a2, a2
> sraia2, a2, 31
> addi.n  a2, a2, 1
> ret.n
> test2:
> abs a2, a2
> bbcia2, 31, .L3
> j.l foo, a9
> .L3:
> ret.n
> test3:
> abs a2, a2
> bbsia2, 31, .L5
> j.l foo, a9
> .L5:
> ret.n
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (*btrue_INT_MIN, *eqne_INT_MIN):
> New insn_and_split patterns.
> ---
>  gcc/config/xtensa/xtensa.md | 64 +
>  1 file changed, 64 insertions(+)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Optimize boolean evaluation or branching when EQ/NE to zero in S[IF]mode

2023-06-04 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Sat, Jun 3, 2023 at 2:55 AM Takayuki 'January June' Suwa
 wrote:
>
> This patch optimizes the boolean evaluation of EQ/NE against zero
> by adding two insn_and_split patterns similar to SImode conditional
> store:
>
> "eq_zero":
> op0 = (op1 == 0) ? 1 : 0;
> op0 = clz(op1) >> 5;  /* optimized (requires TARGET_NSA) */
>
> "movsicc_ne0_reg_0":
> op0 = (op1 != 0) ? op2 : 0;
> op0 = op2; if (op1 == 0) ? op0 = op1;  /* optimized */
>
> These also work in SFmode by ignoring their sign bits, and further-
> more, the branch if EQ/NE against zero in SFmode is also done in the
> same manner.
>
> The reasons for this optimization in SFmode are:
>
>   - Only zero values (negative or non-negative) contain no bits of 1
> with both the exponent and the mantissa.
>   - EQ/NE comparisons involving NaNs produce no signal even if they
> are signaling.
>   - Even if the use of IEEE 754 single-precision floating-point co-
> processor is configured (TARGET_HARD_FLOAT is true):
> 1. Load zero value to FP register
> 2. Possibly, additional FP move if the comparison target is
>an address register
> 3. FP equality check instruction
> 4. Read the boolean register containing the result, or condi-
>tional branch
> As noted above, a considerable number of instructions are still
> generated.
>
> gcc/ChangeLog:
>
> * config/xtensa/predicates.md (const_float_0_operand):
> Rename from obsolete "const_float_1_operand" and change the
> constant to compare.
> (cstoresf_cbranchsf_operand, cstoresf_cbranchsf_operator):
> New.
> * config/xtensa/xtensa.cc (xtensa_expand_conditional_branch):
> Add code for EQ/NE comparison with constant zero in SFmode.
> (xtensa_expand_scc): Added code to derive boolean evaluation
> of EQ/NE with constant zero for comparison in SFmode.
> (xtensa_rtx_costs): Change cost of CONST_DOUBLE with value
> zero inside "cbranchsf4" to 0.
> * config/xtensa/xtensa.md (cbranchsf4, cstoresf4):
> Change "match_operator" and the third "match_operand" to the
> ones mentioned above.
> (movsicc_ne0_reg_zero, eq_zero): New.
> ---
>  gcc/config/xtensa/predicates.md | 19 ++--
>  gcc/config/xtensa/xtensa.cc | 43 ++
>  gcc/config/xtensa/xtensa.md | 53 +
>  3 files changed, 106 insertions(+), 9 deletions(-)

This change results in a bunch of new testsuite failures
on configurations without FPU that are all ICEs:

+FAIL: gcc.c-torture/execute/bitfld-3.c   -O1  execution test
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O1  (internal compiler
error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O1  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O2  (internal compiler
error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O2  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (internal
compiler error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O3 -g  (internal compiler
error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O3 -g  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -Os  (internal compiler
error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -Os  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  (internal compiler error:
in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  (internal compiler error: in
extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-1.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-3.c   -O1  (internal compiler
error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-3.c   -O1  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-3.c   -O2  (internal compiler
error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-3.c   -O2  (test for excess errors)
+FAIL: gcc.dg/atomic/c11-atomic-exec-3.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (internal
compiler error: in extract_insn, at recog.cc:2791)
+FAIL: gcc.dg/atomic/c11-atomic-exec-3.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer 

Re: [PATCH 2/3 v3] xtensa: Add 'adddi3' and 'subdi3' insn patterns

2023-06-01 Thread Max Filippov via Gcc-patches
On Wed, May 31, 2023 at 11:01 PM Takayuki 'January June' Suwa
 wrote:
> More optimized than the default RTL generation.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (adddi3, subdi3):
> New RTL generation patterns implemented according to the instruc-
> tion idioms described in the Xtensa ISA reference manual (p. 600).
> ---
>  gcc/config/xtensa/xtensa.md | 52 +
>  1 file changed, 52 insertions(+)
>
> diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
> index eda1353894b..21afa747e89 100644
> --- a/gcc/config/xtensa/xtensa.md
> +++ b/gcc/config/xtensa/xtensa.md
> @@ -190,6 +190,35 @@
> (set_attr "mode""SI")
> (set_attr "length"  "3")])
>
> +(define_expand "adddi3"
> +  [(set (match_operand:DI 0 "register_operand")
> +   (plus:DI (match_operand:DI 1 "register_operand")
> +(match_operand:DI 2 "register_operand")))]
> +  ""
> +{
> +  rtx lo_dest, hi_dest, lo_op0, hi_op0, lo_op1, hi_op1;
> +  rtx_code_label *label;
> +  if (rtx_equal_p (operands[0], operands[1])
> +  || rtx_equal_p (operands[0], operands[2])

> +  || ! REG_P (operands[1]) || ! REG_P (operands[2]))

I wonder if these additional conditions are necessary, given that
the operands have the "register_operand" predicates?

-- 
Thanks.
-- Max


Re: [PATCH 2/3 v3] xtensa: Add 'adddi3' and 'subdi3' insn patterns

2023-06-01 Thread Max Filippov via Gcc-patches
On Wed, May 31, 2023 at 11:01 PM Takayuki 'January June' Suwa
 wrote:
> More optimized than the default RTL generation.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (adddi3, subdi3):
> New RTL generation patterns implemented according to the instruc-
> tion idioms described in the Xtensa ISA reference manual (p. 600).
> ---
>  gcc/config/xtensa/xtensa.md | 52 +
>  1 file changed, 52 insertions(+)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH 1/3] xtensa: Improve "*shlrd_reg" insn pattern and its variant

2023-05-31 Thread Max Filippov via Gcc-patches
On Tue, May 30, 2023 at 2:27 AM Takayuki 'January June' Suwa
 wrote:
>
> The insn "*shlrd_reg" shifts two registers with a funnel shifter by the
> third register to get a single word result:
>
>   reg0 = (reg1 SHIFT_OP0 reg3) BIT_JOIN_OP (reg2 SHIFT_OP1 (32 - reg3))
>
> where the funnel left shift is SHIFT_OP0 := ASHIFT, SHIFT_OP1 := LSHIFTRT
> and its right shift is SHIFT_OP0 := LSHIFTRT, SHIFT_OP1 := ASHIFT,
> respectively.  And also, BIT_JOIN_OP can be either PLUS or IOR in either
> shift direction.
>
>   [(set (match_operand:SI 0 "register_operand" "=a")
> (match_operator:SI 6 "xtensa_bit_join_operator"
> [(match_operator:SI 4 "logical_shift_operator"
> [(match_operand:SI 1 "register_operand" "r")
>  (match_operand:SI 3 "register_operand" "r")])
>  (match_operator:SI 5 "logical_shift_operator"
> [(match_operand:SI 2 "register_operand" "r")
>  (neg:SI (match_dup 3))])]))]
>
> Although the RTL matching template can express it as above, there is no
> way of direcing that the operator (operands[6]) that combines the two
> individual shifts is commutative.
> Thus, if multiple insn sequences matching the above pattern appear
> adjacently, the combiner may accidentally mix them up and get partial
> results.
>
> This patch adds a new insn-and-split pattern with the two sides swapped
> representation of the bit-combining operation that was lacking and
> described above.
>
> And also changes the other "*shlrd" variants from previously describing
> the arbitraryness of bit-combining operations with code iterators to a
> combination of the match_operator and the predicate above.
>
> gcc/ChangeLog:
>
> * config/xtensa/predicates.md (xtensa_bit_join_operator):
> New predicate.
> * config/xtensa/xtensa.md (ior_op): Remove.
> (*shlrd_reg): Rename from "*shlrd_reg_", and add the
> insn_and_split pattern of the same name to express and capture
> the bit-combining operation with both sides swapped.
> In addition, replace use of code iterator with new operator
> predicate.
> (*shlrd_const, *shlrd_per_byte):
> Likewise regarding the code iterator.
> ---
>  gcc/config/xtensa/predicates.md |  3 ++
>  gcc/config/xtensa/xtensa.md | 81 ++---
>  2 files changed, 58 insertions(+), 26 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
I can also confirm that the pattern is now used as expected in
the case where I previously had an issue. Thanks for fixing that!
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH 2/3 v2] xtensa: Add 'adddi3' and 'subdi3' insn patterns

2023-05-31 Thread Max Filippov via Gcc-patches
On Tue, May 30, 2023 at 2:50 AM Takayuki 'January June' Suwa
 wrote:
>
> Resubmitting the correct one due to a mistake in merging order of fixes.
> ---
> More optimized than the default RTL generation.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (adddi3, subdi3):
> New RTL generation patterns implemented according to the instruc-
> tion idioms described in the Xtensa ISA reference manual (p. 600).
> ---
>  gcc/config/xtensa/xtensa.md | 52 +
>  1 file changed, 52 insertions(+)
>
> diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
> index eda1353894b..6882baaedfd 100644
> --- a/gcc/config/xtensa/xtensa.md
> +++ b/gcc/config/xtensa/xtensa.md
> @@ -190,6 +190,32 @@
> (set_attr "mode""SI")
> (set_attr "length"  "3")])
>
> +(define_expand "adddi3"
> +  [(set (match_operand:DI 0 "register_operand")
> +   (plus:DI (match_operand:DI 1 "register_operand")
> +(match_operand:DI 2 "register_operand")))]
> +  ""
> +{
> +  rtx lo_dest, hi_dest, lo_op0, hi_op0, lo_op1, hi_op1;
> +  rtx_code_label *label;
> +  lo_dest = gen_lowpart (SImode, operands[0]);
> +  hi_dest = gen_highpart (SImode, operands[0]);
> +  lo_op0 = gen_lowpart (SImode, operands[1]);
> +  hi_op0 = gen_highpart (SImode, operands[1]);
> +  lo_op1 = gen_lowpart (SImode, operands[2]);
> +  hi_op1 = gen_highpart (SImode, operands[2]);
> +  if (rtx_equal_p (lo_dest, lo_op1))
> +FAIL;

With this condition I see the following source

unsigned long long foo(unsigned long long a, unsigned long long b)
{
   return a + b;
}

turns to (expected)

   .global foo
   .type   foo, @function
foo:
   add.n   a2, a2, a4
   add.n   a3, a3, a5
   bgeua2, a4, .L2
   addi.n  a3, a3, 1
.L2:
   ret.n

but

unsigned long long foo(unsigned long long a, unsigned long long b)
{
   return b + a;
}

has an extra instruction:

   .global foo
   .type   foo, @function
foo:
   mov.n   a9, a2
   add.n   a2, a4, a2
   add.n   a3, a5, a3
   bgeua2, a9, .L2
   addi.n  a3, a3, 1
.L2:
   ret.n

I though that maybe the following would help (plus using
lo_cmp in the emit_cmp_and_jump_insns below):

  if (!rtx_equal_p (lo_dest, lo_op0))
   lo_cmp = lo_op0;
 else if (!rtx_equal_p (lo_dest, lo_op1))
   lo_cmp = lo_op1;
 else
   FAIL;

but to my surprise it doesn't.

> +  emit_clobber (operands[0]);

Why is this clobber needed?

> +  emit_insn (gen_addsi3 (lo_dest, lo_op0, lo_op1));
> +  emit_insn (gen_addsi3 (hi_dest, hi_op0, hi_op1));
> +  emit_cmp_and_jump_insns (lo_dest, lo_op1, GEU, const0_rtx,
> +  SImode, true, label = gen_label_rtx ());
> +  emit_insn (gen_addsi3 (hi_dest, hi_dest, const1_rtx));
> +  emit_label (label);
> +  DONE;
> +})
> +
>  (define_insn "addsf3"
>[(set (match_operand:SF 0 "register_operand" "=f")
> (plus:SF (match_operand:SF 1 "register_operand" "%f")
> @@ -237,6 +263,32 @@
>   (const_int 5)
>   (const_int 6)))])
>
> +(define_expand "subdi3"
> +  [(set (match_operand:DI 0 "register_operand")
> +   (minus:DI (match_operand:DI 1 "register_operand")
> + (match_operand:DI 2 "register_operand")))]
> +  ""
> +{
> +  rtx lo_dest, hi_dest, lo_op0, hi_op0, lo_op1, hi_op1;
> +  rtx_code_label *label;
> +  lo_dest = gen_lowpart (SImode, operands[0]);
> +  hi_dest = gen_highpart (SImode, operands[0]);
> +  lo_op0 = gen_lowpart (SImode, operands[1]);
> +  hi_op0 = gen_highpart (SImode, operands[1]);
> +  lo_op1 = gen_lowpart (SImode, operands[2]);
> +  hi_op1 = gen_highpart (SImode, operands[2]);
> +  if (rtx_equal_p (lo_op0, lo_op1))
> +FAIL;

I believe that for the emit_cmp_and_jump_insns below
the check here should look like this:

if (rtx_equal_p (lo_dest, lo_op0) || rtx_equal_p (lo_dest, lo_op1))

But maybe drop this check and use the following instead?

 emit_insn (gen_subsi3 (hi_dest, hi_op0, hi_op1));
 emit_cmp_and_jump_insns (lo_op0, lo_op1, GEU, const0_rtx,
  SImode, true, label = gen_label_rtx ());
 emit_insn (gen_addsi3 (hi_dest, hi_dest, constm1_rtx));
 emit_label (label);
 emit_insn (gen_subsi3 (lo_dest, lo_op0, lo_op1));

> +  emit_clobber (operands[0]);

Why is this clobber needed?

> +  emit_insn (gen_subsi3 (lo_dest, lo_op0, lo_op1));
> +  emit_insn (gen_subsi3 (hi_dest, hi_op0, hi_op1));
> +  emit_cmp_and_jump_insns (lo_op0, lo_op1, GEU, const0_rtx,
> +  SImode, true, label = gen_label_rtx ());
> +  emit_insn (gen_addsi3 (hi_dest, hi_dest, constm1_rtx));
> +  emit_label (label);
> +  DONE;
> +})
> +
>  (define_insn "subsf3"
>[(set (match_operand:SF 0 "register_operand" "=f")
> (minus:SF (match_operand:SF 1 "register_operand" "f")
> --
> 2.30.2

-- 
Thanks.
-- Max


Re: [PATCH 3/3 v2] xtensa: Optimize 'cstoresi4' insn pattern

2023-05-30 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Tue, May 30, 2023 at 2:51 AM Takayuki 'January June' Suwa
 wrote:
>
> Resubmitting the correct one due to a mistake in merging order of fixes.
> ---
> This patch introduces more optimized implementations for the 6 cstoresi4
> insn comparison methods (eq/ne/lt/le/gt/ge, however, required TARGET_NSA
> for eq).
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (xtensa_expand_scc):
> Add dedicated optimization code for cstoresi4 (eq/ne/gt/ge/lt/le).
> * config/xtensa/xtensa.md (xtensa_ge_zero):
> Rename from '*signed_ge_zero', because it had to be called from
> 'xtensa_expand_scc()'.
> ---
>  gcc/config/xtensa/xtensa.cc | 106 
>  gcc/config/xtensa/xtensa.md |   2 +-
>  2 files changed, 96 insertions(+), 12 deletions(-)

This change introduces a bunch of testsuite failures:

+FAIL: gcc.c-torture/execute/20070623-1.c   -O0  execution test
+FAIL: gcc.c-torture/execute/20070623-1.c   -O1  execution test
+FAIL: gcc.c-torture/execute/20070623-1.c   -O2  execution test
+FAIL: gcc.c-torture/execute/20070623-1.c   -O3 -g  execution test
+FAIL: gcc.c-torture/execute/20070623-1.c   -Os  execution test
+FAIL: gcc.c-torture/execute/20070623-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: gcc.c-torture/execute/20070623-1.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  execution test
+FAIL: gcc.c-torture/execute/920612-1.c   -O0  execution test
+FAIL: gcc.c-torture/execute/920612-1.c   -O1  execution test
+FAIL: gcc.c-torture/execute/920612-1.c   -O2  execution test
+FAIL: gcc.c-torture/execute/920612-1.c   -O3 -g  execution test
+FAIL: gcc.c-torture/execute/920612-1.c   -Os  execution test
+FAIL: gcc.c-torture/execute/920612-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: gcc.c-torture/execute/int-compare.c   -O0  execution test
+FAIL: gcc.c-torture/execute/int-compare.c   -O1  execution test
+FAIL: gcc.c-torture/execute/int-compare.c   -O2  execution test
+FAIL: gcc.c-torture/execute/int-compare.c   -O3 -g  execution test
+FAIL: gcc.c-torture/execute/int-compare.c   -Os  execution test
+FAIL: gcc.c-torture/execute/int-compare.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: gcc.c-torture/execute/pr28651.c   -O0  execution test
+FAIL: gcc.c-torture/execute/pr28651.c   -O1  execution test
+FAIL: gcc.c-torture/execute/pr28651.c   -O2  execution test
+FAIL: gcc.c-torture/execute/pr28651.c   -O3 -g  execution test
+FAIL: gcc.c-torture/execute/pr28651.c   -Os  execution test
+FAIL: gcc.c-torture/execute/pr28651.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: gcc.c-torture/execute/pr55137.c   -O0  execution test
+FAIL: gcc.c-torture/execute/pr55137.c   -O1  execution test
+FAIL: gcc.c-torture/execute/pr55137.c   -O2  execution test
+FAIL: gcc.c-torture/execute/pr55137.c   -O3 -g  execution test
+FAIL: gcc.c-torture/execute/pr55137.c   -Os  execution test
+FAIL: gcc.c-torture/execute/pr55137.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: gcc.dg/pr61045.c execution test
+FAIL: gcc.dg/signbit-6.c execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-12.c   -O2  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-12.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-12.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-13.c   -O2  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-13.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-13.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-14.c   -O2  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-14.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-p-14.c   -O2  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-p-14.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: gcc.dg/torture/pr49958.c   -O0  execution test
+FAIL: gcc.dg/torture/pr49958.c   -O1  execution test
+FAIL: gcc.dg/torture/pr49958.c   -O2  execution test
+FAIL: gcc.dg/torture/pr49958.c   -O3 -g  execution test
+FAIL: gcc.dg/torture/pr49958.c   -Os  execution test
+FAIL: gcc.dg/torture/pr49958.c   -O2 -flto -fno-use-linker-plugin
-flto-partition=none  execution test
+FAIL: gcc.dg/tree-ssa/pr68714.c (internal compiler error: in
decompose, at rtl.h:2297)
+FAIL: gcc.dg/tree-ssa/pr68714.c (test for excess errors)
+FAIL: gcc.dg/tree-ssa/pr81346-4.c execution test

> diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
> index 3b5d25b660a..64efd3d7287 100644
> --- 

Re: [PATCH 3/3] xtensa: Rework 'setmemsi' insn pattern

2023-05-26 Thread Max Filippov via Gcc-patches
On Thu, May 25, 2023 at 8:13 AM Takayuki 'January June' Suwa
 wrote:
>
> In order to reject voodoo estimation logic with lots of magic numbers,
> this patch revises the code to measure the costs of the three memset
> methods based on the actual emission size of the insn sequence
> corresponding to each method and choose the smallest one.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa-protos.h
> (xtensa_expand_block_set_unrolled_loop,
> xtensa_expand_block_set_small_loop): Remove.
> (xtensa_expand_block_set): New prototype.
> * config/xtensa/xtensa.cc
> (xtensa_expand_block_set_libcall): New subfunction.
> (xtensa_expand_block_set_unrolled_loop,
> xtensa_expand_block_set_small_loop): Rewrite as subfunctions.
> (xtensa_expand_block_set): New function that calls the above
> subfunctions.
> * config/xtensa/xtensa.md (memsetsi): Change to invoke only
> xtensa_expand_block_set().
> ---
>  gcc/config/xtensa/xtensa-protos.h |   3 +-
>  gcc/config/xtensa/xtensa.cc   | 319 --
>  gcc/config/xtensa/xtensa.md   |   4 +-
>  3 files changed, 172 insertions(+), 154 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

--
Thanks.
-- Max


Re: [PATCH 2/3] xtensa: Add 'subtraction from constant' insn pattern

2023-05-26 Thread Max Filippov via Gcc-patches
On Thu, May 25, 2023 at 8:13 AM Takayuki 'January June' Suwa
 wrote:
>
> This patch makes try to eliminate using temporary pseudo for
> '(minus:SI (const_int) (reg:SI))' if the addition of negative constant
> value can be emitted in a single machine instruction.
>
> /* example */
> int test0(int x) {
>   return 1 - x;
> }
> int test1(int x) {
>   return 100 - x;
> }
> int test2(int x) {
>   return 25600 - x;
> }
>
> ;; before
> test0:
> movi.n  a9, 1
> sub a2, a9, a2
> ret.n
> test1:
> movia9, 0x64
> sub a2, a9, a2
> ret.n
> test2:
> movi.n  a9, 0x19
> sllia9, a9, 10
> sub a2, a9, a2
> ret.n
>
> ;; after
> test0:
> addi.n  a2, a2, -1
> neg a2, a2
> ret.n
> test1:
> addia2, a2, -100
> neg a2, a2
> ret.n
> test2:
> addmi   a2, a2, -0x6400
> neg a2, a2
> ret.n
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa-protos.h (xtensa_m1_or_1_thru_15):
> New prototype.
> * config/xtensa/xtensa.cc (xtensa_m1_or_1_thru_15):
> New function.
> * config/xtensa/constraints.md (O):
> Change to use the above function.
> * config/xtensa/xtensa.md (*subsi3_from_const):
> New insn_and_split pattern.
> ---
>  gcc/config/xtensa/constraints.md  |  2 +-
>  gcc/config/xtensa/xtensa-protos.h |  1 +
>  gcc/config/xtensa/xtensa.cc   |  7 +++
>  gcc/config/xtensa/xtensa.md   | 24 
>  4 files changed, 33 insertions(+), 1 deletion(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH 1/3] xtensa: Addendum of the commit e33d2dcb463161a110ac345a451132ce8b2b23d9

2023-05-26 Thread Max Filippov via Gcc-patches
On Thu, May 25, 2023 at 8:13 AM Takayuki 'January June' Suwa
 wrote:
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (*extzvsi-1bit_ashlsi3):
> Retract excessive line folding, and correct the value of
> the "length" insn attribute related to TARGET_DENSITY.
> (*extzvsi-1bit_addsubx): Ditto.
> ---
>  gcc/config/xtensa/xtensa.md | 11 ++-
>  1 file changed, 6 insertions(+), 5 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master with a more readable subject line.

-- 
Thanks.
-- Max


Re: [PATCH 2/2] xtensa: Merge '*addx' and '*subx' insn patterns into one

2023-05-23 Thread Max Filippov via Gcc-patches
On Mon, May 22, 2023 at 12:06 AM Takayuki 'January June' Suwa
 wrote:
>
> By making use of the 'addsub_operator' added in the last patch.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (*addsubx): Rename from '*addx',
> and change to also accept '*subx' pattern.
> (*subx): Remove.
> ---
>  gcc/config/xtensa/xtensa.md | 31 +--
>  1 file changed, 13 insertions(+), 18 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH v2] xtensa: Optimize '(x & CST1_POW2) != 0 ? CST2_POW2 : 0'

2023-05-23 Thread Max Filippov via Gcc-patches
On Mon, May 22, 2023 at 10:48 PM Takayuki 'January June' Suwa
 wrote:
>
> On 2023/05/23 11:27, Max Filippov wrote:
> > Hi Suwa-san,
>
> Hi!
>
> > This change introduces a bunch of test failures on big endian configuration.
> > I believe that's because the starting bit position for zero_extract is 
> > counted
> > from different ends depending on the endianness.
>
> Oops, what a stupid mistake... X(
>
> ===
> This patch decreses one machine instruction from "single bit extraction
> with shifting" operation, and tries to eliminate the conditional
> branch if CST2_POW2 doesn't fit into signed 12 bits with the help
> of ifcvt optimization.
>
> /* example #1 */
> int test0(int x) {
>   return (x & 1048576) != 0 ? 1024 : 0;
> }
> extern int foo(void);
> int test1(void) {
>   return (foo() & 1048576) != 0 ? 16777216 : 0;
> }
>
> ;; before
> test0:
> movia9, 0x400
> sraia2, a2, 10
> and a2, a2, a9
> ret.n
> test1:
> addisp, sp, -16
> s32i.n  a0, sp, 12
> call0   foo
> extui   a2, a2, 20, 1
> sllia2, a2, 20
> beqz.n  a2, .L2
> movi.n  a2, 1
> sllia2, a2, 24
> .L2:
> l32i.n  a0, sp, 12
> addisp, sp, 16
> ret.n
>
> ;; after
> test0:
> extui   a2, a2, 20, 1
> sllia2, a2, 10
> ret.n
> test1:
> addisp, sp, -16
> s32i.n  a0, sp, 12
> call0   foo
> l32i.n  a0, sp, 12
> extui   a2, a2, 20, 1
> sllia2, a2, 24
> addisp, sp, 16
> ret.n
>
> In addition, if the left shift amount ('exact_log2(CST2_POW2)') is
> between 1 through 3 and a either addition or subtraction with another
> register follows, emit a ADDX[248] or SUBX[248] machine instruction
> instead of separate left shift and add/subtract ones.
>
> /* example #2 */
> int test2(int x, int y) {
>   return ((x & 1048576) != 0 ? 4 : 0) + y;
> }
> int test3(int x, int y) {
>   return ((x & 2) != 0 ? 8 : 0) - y;
> }
>
> ;; before
> test2:
> movi.n  a9, 4
> sraia2, a2, 18
> and a2, a2, a9
> add.n   a2, a2, a3
> ret.n
> test3:
> movi.n  a9, 8
> sllia2, a2, 2
> and a2, a2, a9
> sub a2, a2, a3
> ret.n
>
> ;; after
> test2:
> extui   a2, a2, 20, 1
> addx4   a2, a2, a3
> ret.n
> test3:
> extui   a2, a2, 1, 1
> subx8   a2, a2, a3
> ret.n
>
> gcc/ChangeLog:
>
> * config/xtensa/predicates.md (addsub_operator): New.
> * config/xtensa/xtensa.md (*extzvsi-1bit_ashlsi3,
> *extzvsi-1bit_addsubx): New insn_and_split patterns.
> * config/xtensa/xtensa.cc (xtensa_rtx_costs):
> Add a special case about ifcvt 'noce_try_cmove()' to handle
> constant loads that do not fit into signed 12 bits in the
> patterns added above.
> ---
>  gcc/config/xtensa/predicates.md |  3 ++
>  gcc/config/xtensa/xtensa.cc |  3 +-
>  gcc/config/xtensa/xtensa.md | 83 +
>  3 files changed, 88 insertions(+), 1 deletion(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH 1/2] xtensa: Optimize '(x & CST1_POW2) != 0 ? CST2_POW2 : 0'

2023-05-22 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Mon, May 22, 2023 at 12:06 AM Takayuki 'January June' Suwa
 wrote:
>
> This patch decreses one machine instruction from "single bit extraction
> with shifting" operation, and tries to eliminate the conditional
> branch if CST2_POW2 doesn't fit into signed 12 bits with the help
> of ifcvt optimization.
>
> /* example #1 */
> int test0(int x) {
>   return (x & 1048576) != 0 ? 1024 : 0;
> }
> extern int foo(void);
> int test1(void) {
>   return (foo() & 1048576) != 0 ? 16777216 : 0;
> }
>
> ;; before
> test0:
> movia9, 0x400
> sraia2, a2, 10
> and a2, a2, a9
> ret.n
> test1:
> addisp, sp, -16
> s32i.n  a0, sp, 12
> call0   foo
> extui   a2, a2, 20, 1
> sllia2, a2, 20
> beqz.n  a2, .L2
> movi.n  a2, 1
> sllia2, a2, 24
> .L2:
> l32i.n  a0, sp, 12
> addisp, sp, 16
> ret.n
>
> ;; after
> test0:
> extui   a2, a2, 20, 1
> sllia2, a2, 10
> ret.n
> test1:
> addisp, sp, -16
> s32i.n  a0, sp, 12
> call0   foo
> l32i.n  a0, sp, 12
> extui   a2, a2, 20, 1
> sllia2, a2, 24
> addisp, sp, 16
> ret.n
>
> In addition, if the left shift amount ('exact_log2(CST2_POW2)') is
> between 1 through 3 and a either addition or subtraction with another
> register follows, emit a ADDX[248] or SUBX[248] machine instruction
> instead of separate left shift and add/subtract ones.
>
> /* example #2 */
> int test2(int x, int y) {
>   return ((x & 1048576) != 0 ? 4 : 0) + y;
> }
> int test3(int x, int y) {
>   return ((x & 2) != 0 ? 8 : 0) - y;
> }
>
> ;; before
> test2:
> movi.n  a9, 4
> sraia2, a2, 18
> and a2, a2, a9
> add.n   a2, a2, a3
> ret.n
> test3:
> movi.n  a9, 8
> sllia2, a2, 2
> and a2, a2, a9
> sub a2, a2, a3
> ret.n
>
> ;; after
> test2:
> extui   a2, a2, 20, 1
> addx4   a2, a2, a3
> ret.n
> test3:
> extui   a2, a2, 1, 1
> subx8   a2, a2, a3
> ret.n
>
> gcc/ChangeLog:
>
> * config/xtensa/predicates.md (addsub_operator): New.
> * config/xtensa/xtensa.md (*extzvsi-1bit_ashlsi3,
> *extzvsi-1bit_addsubx): New insn_and_split patterns.
> * config/xtensa/xtensa.cc (xtensa_rtx_costs):
> Add a special case about ifcvt 'noce_try_cmove()' to handle
> constant loads that do not fit into signed 12 bits in the
> patterns added above.
> ---
>  gcc/config/xtensa/predicates.md |  3 ++
>  gcc/config/xtensa/xtensa.cc |  3 +-
>  gcc/config/xtensa/xtensa.md | 75 +
>  3 files changed, 80 insertions(+), 1 deletion(-)

This change introduces a bunch of test failures on big endian configuration.
I believe that's because the starting bit position for zero_extract is counted
from different ends depending on the endianness.

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Make full transition to LRA

2023-05-10 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Mon, May 8, 2023 at 6:38 AM Takayuki 'January June' Suwa
 wrote:
>
> gcc/ChangeLog:
>
> * config/xtensa/constraints.md (R, T, U):
> Change define_constraint to define_memory_constraint.
> * config/xtensa/xtensa.cc
> (xtensa_lra_p, TARGET_LRA_P): Remove.
> (xtensa_emit_move_sequence): Remove "if (reload_in_progress)"
> clause as it can no longer be true.
> (xtensa_output_integer_literal_parts): Consider 16-bit wide
> constants.
> (xtensa_legitimate_constant_p): Add short-circuit path for
> integer load instructions.
> * config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p()
> rather reload_in_progress and reload_completed.
> * config/xtensa/xtensa.opt (mlra): Remove.
> ---
>  gcc/config/xtensa/constraints.md | 26 --
>  gcc/config/xtensa/xtensa.cc  | 26 +-
>  gcc/config/xtensa/xtensa.md  |  2 +-
>  gcc/config/xtensa/xtensa.opt |  4 
>  4 files changed, 14 insertions(+), 44 deletions(-)

That's impressive.
This version introduces a few execution failures in the testsuite on
little endian targets and a bunch more (but not all, some execution
tests still pass) on big endian.
I'm traveling this week and likely won't be able to take a deep look
into it until 5/15.

New LE failures:

+FAIL: gcc.c-torture/execute/pr56866.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gcc.dg/torture/pr45764.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gcc.dg/torture/pr45764.c   -O3 -g  execution test

+FAIL: gfortran.dg/c-interop/section-2.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.dg/c-interop/section-2p.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.dg/c-interop/section-3.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.dg/c-interop/section-3p.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.dg/bind-c-contiguous-3.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.dg/bind-c-contiguous-3.f90   -O3 -g  execution test
+FAIL: gfortran.dg/check_bits_2.f90   -O1  output pattern test
+FAIL: gfortran.dg/coarray_ptr_comp_1.f08   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.dg/coarray_ptr_comp_1.f08   -O3 -g  execution test
+FAIL: gfortran.dg/loc_2.f90   -O2  execution test
+FAIL: gfortran.dg/loc_2.f90   -O3 -fomit-frame-pointer -funroll-loops
-fpeel-loops -ftracer -finline-functions  execution test
+FAIL: gfortran.dg/loc_2.f90   -O3 -g  execution test
+FAIL: gfortran.dg/loc_2.f90   -Os  execution test
+FAIL: gfortran.dg/sizeof_6.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gfortran.fortran-torture/execute/forall_7.f90 execution,  -O2
-fbounds-check

New BE failures:

+FAIL: gcc.c-torture/execute/builtins/memset-chk.c execution,  -O3
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
-finline-functions
+FAIL: gcc.c-torture/execute/builtins/memset-chk.c execution,  -O3 -g
+FAIL: gcc.c-torture/execute/2412-3.c   -O2  execution test
+FAIL: gcc.c-torture/execute/2412-3.c   -O3 -g  execution test
+FAIL: gcc.c-torture/execute/2412-3.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: gcc.c-torture/execute/20020201-1.c   -O2  execution test
+FAIL: gcc.c-torture/execute/20020201-1.c   -O3 -g  execution test
+FAIL: gcc.c-torture/execute/20020201-1.c   -Os  execution test
+FAIL: gcc.c-torture/execute/20020201-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: gcc.c-torture/execute/20030224-2.c   -O0  execution test
+FAIL: gcc.c-torture/execute/20040629-1.c   -O0  execution test
+FAIL: gcc.c-torture/execute/20040629-1.c   -O1  execution test
+FAIL: gcc.c-torture/execute/20040705-1.c   -O0  execution test
+FAIL: gcc.c-torture/execute/20040705-1.c   -O1  execution test
+FAIL: gcc.c-torture/execute/20040705-2.c   -O0  execution test
+FAIL: gcc.c-torture/execute/20040705-2.c   -O1  execution test
+FAIL: gcc.c-torture/execute/930603-3.c   -O2  execution test
+FAIL: gcc.c-torture/execute/930603-3.c   -O3 -g  execution test
+FAIL: gcc.c-torture/execute/930603-3.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: gcc.c-torture/execute/931004-10.c   -O2  execution test
+FAIL: gcc.c-torture/execute/931004-10.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
+FAIL: gcc.c-torture/execute/931004-10.c   -O3 -g  execution 

[COMMITTED 2/2] gcc: xtensa: add -m[no-]strict-align option

2023-04-19 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/xtensa-opts.h: New header.
* config/xtensa/xtensa.h (STRICT_ALIGNMENT): Redefine as
xtensa_strict_align.
* config/xtensa/xtensa.cc (xtensa_option_override): When
-m[no-]strict-align is not specified in the command line set
xtensa_strict_align to 0 if the hardware supports both unaligned
loads and stores or to 1 otherwise.
* config/xtensa/xtensa.opt (mstrict-align): New option.
* doc/invoke.texi (Xtensa Options): Document -m[no-]strict-align.
---
 gcc/config/xtensa/xtensa-opts.h | 28 
 gcc/config/xtensa/xtensa.cc |  4 
 gcc/config/xtensa/xtensa.h  |  2 +-
 gcc/config/xtensa/xtensa.opt|  7 +++
 gcc/doc/invoke.texi | 14 +-
 5 files changed, 53 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/xtensa/xtensa-opts.h

diff --git a/gcc/config/xtensa/xtensa-opts.h b/gcc/config/xtensa/xtensa-opts.h
new file mode 100644
index ..f0b8f5b3bfe7
--- /dev/null
+++ b/gcc/config/xtensa/xtensa-opts.h
@@ -0,0 +1,28 @@
+/* Definitions for option handling for Xtensa.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#ifndef XTENSA_OPTS_H
+#define XTENSA_OPTS_H
+
+/* Undefined state for the -mstrict-alignment option  */
+enum xtensa_strict_alignment_setting {
+  XTENSA_STRICT_ALIGNMENT_UNDEFINED = -1,
+};
+
+#endif
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 7287aa7a258a..9e5d314e143e 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2792,6 +2792,10 @@ xtensa_option_override (void)
   if (xtensa_windowed_abi == -1)
 xtensa_windowed_abi = TARGET_WINDOWED_ABI_DEFAULT;
 
+  if (xtensa_strict_alignment == XTENSA_STRICT_ALIGNMENT_UNDEFINED)
+xtensa_strict_alignment = !XCHAL_UNALIGNED_LOAD_HW
+  || !XCHAL_UNALIGNED_STORE_HW;
+
   if (! TARGET_THREADPTR)
 targetm.have_tls = false;
 
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 8ebf37cab33a..34e06afcff48 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -143,7 +143,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* Set this nonzero if move instructions will actually fail to work
when given unaligned data.  */
-#define STRICT_ALIGNMENT 1
+#define STRICT_ALIGNMENT (xtensa_strict_alignment)
 
 /* Promote integer modes smaller than a word to SImode.  Set UNSIGNEDP
for QImode, because there is no 8-bit load from memory with sign
diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt
index 3a129a4c0393..f16b53bf409f 100644
--- a/gcc/config/xtensa/xtensa.opt
+++ b/gcc/config/xtensa/xtensa.opt
@@ -18,6 +18,9 @@
 ; along with GCC; see the file COPYING3.  If not see
 ; .
 
+HeaderInclude
+config/xtensa/xtensa-opts.h
+
 mconst16
 Target Mask(CONST16)
 Use CONST16 instruction to load constants.
@@ -64,3 +67,7 @@ Use call0 ABI.
 mabi=windowed
 Target RejectNegative Var(xtensa_windowed_abi, 1)
 Use windowed registers ABI.
+
+mstrict-align
+Target Var(xtensa_strict_alignment) Init(XTENSA_STRICT_ALIGNMENT_UNDEFINED)
+Do not use unaligned memory references.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 57fb170ca4cc..54dcccbc148c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1474,7 +1474,8 @@ See RS/6000 and PowerPC Options.
 -mtarget-align  -mno-target-align
 -mlongcalls  -mno-longcalls
 -mabi=@var{abi-type}
--mextra-l32r-costs=@var{cycles}}
+-mextra-l32r-costs=@var{cycles}
+-mstrict-align  -mno-strict-align}
 
 @emph{zSeries Options}
 See S/390 and zSeries Options.
@@ -34401,6 +34402,17 @@ instructions, in clock cycles.  This affects, when 
optimizing for speed,
 whether loading a constant from literal pool using @code{L32R} or
 synthesizing the constant from a small one with a couple of arithmetic
 instructions.  The default value is 0.
+
+@opindex mstrict-align
+@opindex mno-strict-align
+@item -mstrict-align
+@itemx -mno-strict-align
+Avoid or allow generating memory accesses that may not be aligned on a natural
+object boundary as described in the architecture specification.
+The default is @option{-mno-strict-align} for cores that support both
+unaligned loads and stores in hardware and @option{-mstrict-align} for all
+other cores.
+
 @end table
 
 

[COMMITTED 1/2] gcc: xtensa: add data alignment properties to dynconfig

2023-04-19 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/xtensa-dynconfig.cc (xtensa_get_config_v4): New
function.

include/
* xtensa-dynconfig.h (xtensa_config_v4): New struct.
(XCHAL_DATA_WIDTH, XCHAL_UNALIGNED_LOAD_EXCEPTION)
(XCHAL_UNALIGNED_STORE_EXCEPTION, XCHAL_UNALIGNED_LOAD_HW)
(XCHAL_UNALIGNED_STORE_HW, XTENSA_CONFIG_V4_ENTRY_LIST): New
definitions.
(XTENSA_CONFIG_INSTANCE_LIST): Add xtensa_config_v4 instance.
(XTENSA_CONFIG_ENTRY_LIST): Add XTENSA_CONFIG_V4_ENTRY_LIST.
---
 gcc/config/xtensa/xtensa-dynconfig.cc | 18 
 include/xtensa-dynconfig.h| 59 ++-
 2 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa-dynconfig.cc 
b/gcc/config/xtensa/xtensa-dynconfig.cc
index 9aea9f253c25..12dce4d1b2aa 100644
--- a/gcc/config/xtensa/xtensa-dynconfig.cc
+++ b/gcc/config/xtensa/xtensa-dynconfig.cc
@@ -182,6 +182,24 @@ const struct xtensa_config_v3 *xtensa_get_config_v3 (void)
   return config;
 }
 
+const struct xtensa_config_v4 *xtensa_get_config_v4 (void)
+{
+  static const struct xtensa_config_v4 *config;
+  static const struct xtensa_config_v4 def = {
+  16, /* xchal_data_width */
+  1,  /* xchal_unaligned_load_exception */
+  1,  /* xchal_unaligned_store_exception */
+  0,  /* xchal_unaligned_load_hw */
+  0,  /* xchal_unaligned_store_hw */
+  };
+
+  if (!config)
+config = (const struct xtensa_config_v4 *) xtensa_load_config 
("xtensa_config_v4",
+  
_config_v4,
+  );
+  return config;
+}
+
 const char * const *xtensa_get_config_strings (void)
 {
   static const char * const *config_strings;
diff --git a/include/xtensa-dynconfig.h b/include/xtensa-dynconfig.h
index 2cc15cc99112..48877ebb6b61 100644
--- a/include/xtensa-dynconfig.h
+++ b/include/xtensa-dynconfig.h
@@ -112,6 +112,15 @@ struct xtensa_config_v3
   int xchal_have_xea3;
 };
 
+struct xtensa_config_v4
+{
+  int xchal_data_width;
+  int xchal_unaligned_load_exception;
+  int xchal_unaligned_store_exception;
+  int xchal_unaligned_load_hw;
+  int xchal_unaligned_store_hw;
+};
+
 typedef struct xtensa_isa_internal_struct xtensa_isa_internal;
 
 extern const void *xtensa_load_config (const char *name,
@@ -120,6 +129,7 @@ extern const void *xtensa_load_config (const char *name,
 extern const struct xtensa_config_v1 *xtensa_get_config_v1 (void);
 extern const struct xtensa_config_v2 *xtensa_get_config_v2 (void);
 extern const struct xtensa_config_v3 *xtensa_get_config_v3 (void);
+extern const struct xtensa_config_v4 *xtensa_get_config_v4 (void);
 
 #ifdef XTENSA_CONFIG_DEFINITION
 
@@ -207,6 +217,26 @@ extern const struct xtensa_config_v3 *xtensa_get_config_v3 
(void);
 #define XCHAL_HAVE_XEA3 0
 #endif
 
+#ifndef XCHAL_DATA_WIDTH
+#define XCHAL_DATA_WIDTH 16
+#endif
+
+#ifndef XCHAL_UNALIGNED_LOAD_EXCEPTION
+#define XCHAL_UNALIGNED_LOAD_EXCEPTION 1
+#endif
+
+#ifndef XCHAL_UNALIGNED_STORE_EXCEPTION
+#define XCHAL_UNALIGNED_STORE_EXCEPTION 1
+#endif
+
+#ifndef XCHAL_UNALIGNED_LOAD_HW
+#define XCHAL_UNALIGNED_LOAD_HW 0
+#endif
+
+#ifndef XCHAL_UNALIGNED_STORE_HW
+#define XCHAL_UNALIGNED_STORE_HW 0
+#endif
+
 #define XTENSA_CONFIG_ENTRY(a) a
 
 #define XTENSA_CONFIG_V1_ENTRY_LIST \
@@ -276,6 +306,13 @@ extern const struct xtensa_config_v3 *xtensa_get_config_v3 
(void);
 XTENSA_CONFIG_ENTRY(XCHAL_HAVE_EXCLUSIVE), \
 XTENSA_CONFIG_ENTRY(XCHAL_HAVE_XEA3)
 
+#define XTENSA_CONFIG_V4_ENTRY_LIST \
+XTENSA_CONFIG_ENTRY(XCHAL_DATA_WIDTH), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_LOAD_EXCEPTION), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_STORE_EXCEPTION), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_LOAD_HW), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_STORE_HW)
+
 #define XTENSA_CONFIG_INSTANCE_LIST \
 const struct xtensa_config_v1 xtensa_config_v1 = { \
 XTENSA_CONFIG_V1_ENTRY_LIST, \
@@ -285,12 +322,16 @@ const struct xtensa_config_v2 xtensa_config_v2 = { \
 }; \
 const struct xtensa_config_v3 xtensa_config_v3 = { \
 XTENSA_CONFIG_V3_ENTRY_LIST, \
+}; \
+const struct xtensa_config_v4 xtensa_config_v4 = { \
+XTENSA_CONFIG_V4_ENTRY_LIST, \
 }
 
 #define XTENSA_CONFIG_ENTRY_LIST \
 XTENSA_CONFIG_V1_ENTRY_LIST, \
 XTENSA_CONFIG_V2_ENTRY_LIST, \
-XTENSA_CONFIG_V3_ENTRY_LIST
+XTENSA_CONFIG_V3_ENTRY_LIST, \
+XTENSA_CONFIG_V4_ENTRY_LIST
 
 #else /* XTENSA_CONFIG_DEFINITION */
 
@@ -482,6 +523,22 @@ const struct xtensa_config_v3 xtensa_config_v3 = { \
 #undef XCHAL_HAVE_XEA3
 #define XCHAL_HAVE_XEA3(xtensa_get_config_v3 
()->xchal_have_xea3)
 
+
+#undef XCHAL_DATA_WIDTH
+#define XCHAL_DATA_WIDTH   (xtensa_get_config_v4 
()->xchal_data_width)
+
+#undef XCHAL_UNALIGNED_LOAD_EXCEPTION
+#define XCHAL_UNALIGNED_LOAD_EXCEPTION (xtensa_get_config_v4 
()->xchal_unaligned_load_exception)
+
+#undef XCHAL_UNALIGNED_STORE_EXCEPTION

Re: [PATCH] xtensa: Fix for enabling LRA

2023-03-13 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Tue, Mar 7, 2023 at 10:04 PM Takayuki 'January June' Suwa
 wrote:
>
> This patch makes LRA well with some exceptions
> (e.g. MI thunk generation due to pretending reload_completed).
>
> gcc/ChangeLog:
>
> * config/xtensa/constraints.md (R, T, U):
> Change define_constraint to define_memory_constraint.
> * config/xtensa/xtensa.cc (xtensa_legitimate_constant_p):
> Add short-circuit path for integer load instructions when
> lra_in_progress.
> * config/xtensa/xtensa.md (movsf):
> Use can_create_pseudo_p() rather reload_in_progress and
> reload_completed.
> ---
>  gcc/config/xtensa/constraints.md | 26 --
>  gcc/config/xtensa/xtensa.cc  |  4 
>  gcc/config/xtensa/xtensa.md  |  2 +-
>  3 files changed, 13 insertions(+), 19 deletions(-)

this change introduces the following build- and runtime regressions
when tested without -mlra:

+FAIL: gcc.c-torture/execute/builtins/sprintf.c execution,  -O1
+FAIL: gcc.c-torture/execute/builtins/sprintf.c execution,  -Og -g
+FAIL: gcc.c-torture/execute/builtins/vsnprintf-chk.c execution,  -O1
+FAIL: gcc.c-torture/execute/builtins/vsprintf-chk.c execution,  -O1
+FAIL: gcc.c-torture/execute/stdarg-1.c   -O2  execution test
+FAIL: gcc.c-torture/execute/stdarg-1.c   -O3 -g  execution test
+FAIL: gcc.c-torture/execute/stdarg-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: gcc.c-torture/execute/strct-stdarg-1.c   -O2  (internal
compiler error: in xtensa_output_integer_literal_parts, at
config/xtensa/xtensa.cc:3201)
+FAIL: gcc.c-torture/execute/strct-stdarg-1.c   -O3
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
-finline-functions  (internal compiler error: in
xtensa_output_integer_literal_parts, at config/xtensa/xtensa.cc:3201)
+FAIL: gcc.c-torture/execute/strct-stdarg-1.c   -O3 -g  (internal
compiler error: in xtensa_output_integer_literal_parts, at
config/xtensa/xtensa.cc:3201)
+FAIL: gcc.c-torture/execute/strct-stdarg-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  (internal compiler error:
in xtensa_output_integer_literal_parts, at
config/xtensa/xtensa.cc:3201)
+FAIL: gcc.c-torture/execute/strct-stdarg-1.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  (internal compiler error: in
xtensa_output_integer_literal_parts, at config/xtensa/xtensa.cc:3201)
+FAIL: gcc.c-torture/execute/va-arg-pack-1.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  execution test
+FAIL: gcc.dg/atomic/c11-atomic-exec-4.c   -O1  execution test
+FAIL: gcc.dg/strcmpopt_6.c execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-14.c   -O2  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-14.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-3.c   -O2  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-3.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-p-14.c   -O2  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-p-14.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: c-c++-common/torture/builtin-shufflevector-1.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  execution test
+FAIL: gcc.dg/torture/vec-cvt-1.c   -O2 -flto -fuse-linker-plugin
-fno-fat-lto-objects  execution test
+FAIL: gcc.dg/tree-ssa/forwprop-39.c (test for excess errors)
+FAIL: gcc.dg/tree-ssa/forwprop-39.c scan-tree-dump-not forwprop1 "COMPLEX_EXPR"
+FAIL: gcc.dg/tree-ssa/forwprop-39.c scan-tree-dump-not optimized
"REALPART_EXPR"
+FAIL: c-c++-common/torture/builtin-arith-overflow-14.c   -O2  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-14.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-3.c   -O2  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-3.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-p-14.c   -O2  execution test
+FAIL: c-c++-common/torture/builtin-arith-overflow-p-14.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
+FAIL: c-c++-common/torture/builtin-shufflevector-1.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  execution test

-- 
Thanks.
-- Max


[COMMITTED] xtensa: add .note.GNU-stack section on linux

2023-03-13 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/linux.h (TARGET_ASM_FILE_END): New macro.

libgcc/
* config/xtensa/crti.S: Add .note.GNU-stack section on linux.
* config/xtensa/crtn.S: Likewise.
* config/xtensa/lib1funcs.S: Likewise.
* config/xtensa/lib2funcs.S: Likewise.
---
 gcc/config/xtensa/linux.h| 2 ++
 libgcc/config/xtensa/crti.S  | 6 ++
 libgcc/config/xtensa/crtn.S  | 6 ++
 libgcc/config/xtensa/lib1funcs.S | 6 ++
 libgcc/config/xtensa/lib2funcs.S | 6 ++
 5 files changed, 26 insertions(+)

diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h
index dc4d547fc5fd..e684e7deebf9 100644
--- a/gcc/config/xtensa/linux.h
+++ b/gcc/config/xtensa/linux.h
@@ -69,3 +69,5 @@ along with GCC; see the file COPYING3.  If not see
 #define XTENSA_ALWAYS_PIC 1
 
 #undef DEBUGGER_REGNO
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
diff --git a/libgcc/config/xtensa/crti.S b/libgcc/config/xtensa/crti.S
index b88df50207b6..637203500c41 100644
--- a/libgcc/config/xtensa/crti.S
+++ b/libgcc/config/xtensa/crti.S
@@ -26,6 +26,12 @@
 
 #include "xtensa-config-builtin.h"
 
+/* An executable stack is *not* required for these functions.  */
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif
+
.section .init
.globl _init
.type _init,@function
diff --git a/libgcc/config/xtensa/crtn.S b/libgcc/config/xtensa/crtn.S
index 7f271660f942..15c13ea03d0d 100644
--- a/libgcc/config/xtensa/crtn.S
+++ b/libgcc/config/xtensa/crtn.S
@@ -27,6 +27,12 @@
 
 #include "xtensa-config-builtin.h"
 
+/* An executable stack is *not* required for these functions.  */
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif
+
.section .init
 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
retw
diff --git a/libgcc/config/xtensa/lib1funcs.S b/libgcc/config/xtensa/lib1funcs.S
index a3f17b6b2cbc..5baf2df1d20e 100644
--- a/libgcc/config/xtensa/lib1funcs.S
+++ b/libgcc/config/xtensa/lib1funcs.S
@@ -25,6 +25,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 
 #include "xtensa-config-builtin.h"
 
+/* An executable stack is *not* required for these functions.  */
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif
+
 /* Define macros for the ABS and ADDX* instructions to handle cases
where they are not included in the Xtensa processor configuration.  */
 
diff --git a/libgcc/config/xtensa/lib2funcs.S b/libgcc/config/xtensa/lib2funcs.S
index e038e41eb6f1..992f712238c4 100644
--- a/libgcc/config/xtensa/lib2funcs.S
+++ b/libgcc/config/xtensa/lib2funcs.S
@@ -25,6 +25,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 
 #include "xtensa-config-builtin.h"
 
+/* An executable stack is *not* required for these functions.  */
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif
+
 /* __xtensa_libgcc_window_spill: This function flushes out all but the
current register window.  This is used to set up the stack so that
arbitrary frames can be accessed.  */
-- 
2.30.2



Re: [PATCH] xtensa: Remove REG_OK_STRICT and its derivatives

2023-03-13 Thread Max Filippov via Gcc-patches
On Sun, Mar 12, 2023 at 5:37 PM Takayuki 'January June' Suwa
 wrote:
>
> Because GO_IF_LEGITIMATE_ADDRESS was deprecated a long time ago
> (see commit c6c3dba931548987c78719180e30ebc863404b89).
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.h (REG_OK_STRICT, REG_OK_FOR_INDEX_P,
> REG_OK_FOR_BASE_P): Remove.
> ---
>  gcc/config/xtensa/xtensa.h | 11 +--
>  1 file changed, 1 insertion(+), 10 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


[PATCH 1/2] gcc: xtensa: add data alignment properties to dynconfig

2023-02-28 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/xtensa-dynconfig.cc (xtensa_get_config_v4): New
function.

include/
* xtensa-dynconfig.h (xtensa_config_v4): New struct.
(XCHAL_DATA_WIDTH, XCHAL_UNALIGNED_LOAD_EXCEPTION)
(XCHAL_UNALIGNED_STORE_EXCEPTION, XCHAL_UNALIGNED_LOAD_HW)
(XCHAL_UNALIGNED_STORE_HW, XTENSA_CONFIG_V4_ENTRY_LIST): New
definitions.
(XTENSA_CONFIG_INSTANCE_LIST): Add xtensa_config_v4 instance.
(XTENSA_CONFIG_ENTRY_LIST): Add XTENSA_CONFIG_V4_ENTRY_LIST.
---
 gcc/config/xtensa/xtensa-dynconfig.cc | 18 
 include/xtensa-dynconfig.h| 59 ++-
 2 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa-dynconfig.cc 
b/gcc/config/xtensa/xtensa-dynconfig.cc
index 9aea9f253c25..12dce4d1b2aa 100644
--- a/gcc/config/xtensa/xtensa-dynconfig.cc
+++ b/gcc/config/xtensa/xtensa-dynconfig.cc
@@ -182,6 +182,24 @@ const struct xtensa_config_v3 *xtensa_get_config_v3 (void)
   return config;
 }
 
+const struct xtensa_config_v4 *xtensa_get_config_v4 (void)
+{
+  static const struct xtensa_config_v4 *config;
+  static const struct xtensa_config_v4 def = {
+  16, /* xchal_data_width */
+  1,  /* xchal_unaligned_load_exception */
+  1,  /* xchal_unaligned_store_exception */
+  0,  /* xchal_unaligned_load_hw */
+  0,  /* xchal_unaligned_store_hw */
+  };
+
+  if (!config)
+config = (const struct xtensa_config_v4 *) xtensa_load_config 
("xtensa_config_v4",
+  
_config_v4,
+  );
+  return config;
+}
+
 const char * const *xtensa_get_config_strings (void)
 {
   static const char * const *config_strings;
diff --git a/include/xtensa-dynconfig.h b/include/xtensa-dynconfig.h
index 2cc15cc99112..48877ebb6b61 100644
--- a/include/xtensa-dynconfig.h
+++ b/include/xtensa-dynconfig.h
@@ -112,6 +112,15 @@ struct xtensa_config_v3
   int xchal_have_xea3;
 };
 
+struct xtensa_config_v4
+{
+  int xchal_data_width;
+  int xchal_unaligned_load_exception;
+  int xchal_unaligned_store_exception;
+  int xchal_unaligned_load_hw;
+  int xchal_unaligned_store_hw;
+};
+
 typedef struct xtensa_isa_internal_struct xtensa_isa_internal;
 
 extern const void *xtensa_load_config (const char *name,
@@ -120,6 +129,7 @@ extern const void *xtensa_load_config (const char *name,
 extern const struct xtensa_config_v1 *xtensa_get_config_v1 (void);
 extern const struct xtensa_config_v2 *xtensa_get_config_v2 (void);
 extern const struct xtensa_config_v3 *xtensa_get_config_v3 (void);
+extern const struct xtensa_config_v4 *xtensa_get_config_v4 (void);
 
 #ifdef XTENSA_CONFIG_DEFINITION
 
@@ -207,6 +217,26 @@ extern const struct xtensa_config_v3 *xtensa_get_config_v3 
(void);
 #define XCHAL_HAVE_XEA3 0
 #endif
 
+#ifndef XCHAL_DATA_WIDTH
+#define XCHAL_DATA_WIDTH 16
+#endif
+
+#ifndef XCHAL_UNALIGNED_LOAD_EXCEPTION
+#define XCHAL_UNALIGNED_LOAD_EXCEPTION 1
+#endif
+
+#ifndef XCHAL_UNALIGNED_STORE_EXCEPTION
+#define XCHAL_UNALIGNED_STORE_EXCEPTION 1
+#endif
+
+#ifndef XCHAL_UNALIGNED_LOAD_HW
+#define XCHAL_UNALIGNED_LOAD_HW 0
+#endif
+
+#ifndef XCHAL_UNALIGNED_STORE_HW
+#define XCHAL_UNALIGNED_STORE_HW 0
+#endif
+
 #define XTENSA_CONFIG_ENTRY(a) a
 
 #define XTENSA_CONFIG_V1_ENTRY_LIST \
@@ -276,6 +306,13 @@ extern const struct xtensa_config_v3 *xtensa_get_config_v3 
(void);
 XTENSA_CONFIG_ENTRY(XCHAL_HAVE_EXCLUSIVE), \
 XTENSA_CONFIG_ENTRY(XCHAL_HAVE_XEA3)
 
+#define XTENSA_CONFIG_V4_ENTRY_LIST \
+XTENSA_CONFIG_ENTRY(XCHAL_DATA_WIDTH), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_LOAD_EXCEPTION), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_STORE_EXCEPTION), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_LOAD_HW), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_STORE_HW)
+
 #define XTENSA_CONFIG_INSTANCE_LIST \
 const struct xtensa_config_v1 xtensa_config_v1 = { \
 XTENSA_CONFIG_V1_ENTRY_LIST, \
@@ -285,12 +322,16 @@ const struct xtensa_config_v2 xtensa_config_v2 = { \
 }; \
 const struct xtensa_config_v3 xtensa_config_v3 = { \
 XTENSA_CONFIG_V3_ENTRY_LIST, \
+}; \
+const struct xtensa_config_v4 xtensa_config_v4 = { \
+XTENSA_CONFIG_V4_ENTRY_LIST, \
 }
 
 #define XTENSA_CONFIG_ENTRY_LIST \
 XTENSA_CONFIG_V1_ENTRY_LIST, \
 XTENSA_CONFIG_V2_ENTRY_LIST, \
-XTENSA_CONFIG_V3_ENTRY_LIST
+XTENSA_CONFIG_V3_ENTRY_LIST, \
+XTENSA_CONFIG_V4_ENTRY_LIST
 
 #else /* XTENSA_CONFIG_DEFINITION */
 
@@ -482,6 +523,22 @@ const struct xtensa_config_v3 xtensa_config_v3 = { \
 #undef XCHAL_HAVE_XEA3
 #define XCHAL_HAVE_XEA3(xtensa_get_config_v3 
()->xchal_have_xea3)
 
+
+#undef XCHAL_DATA_WIDTH
+#define XCHAL_DATA_WIDTH   (xtensa_get_config_v4 
()->xchal_data_width)
+
+#undef XCHAL_UNALIGNED_LOAD_EXCEPTION
+#define XCHAL_UNALIGNED_LOAD_EXCEPTION (xtensa_get_config_v4 
()->xchal_unaligned_load_exception)
+
+#undef XCHAL_UNALIGNED_STORE_EXCEPTION

[PATCH 2/2] gcc: xtensa: adjust STRICT_ALIGNMENT per hardware capabilities

2023-02-28 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/xtensa.h (STRICT_ALIGNMENT): Make it 0 when the
hardware supports both unaligned loads and stores.
---
 gcc/config/xtensa/xtensa.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 058602e44ee2..49ec9147b543 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -143,7 +143,8 @@ along with GCC; see the file COPYING3.  If not see
 
 /* Set this nonzero if move instructions will actually fail to work
when given unaligned data.  */
-#define STRICT_ALIGNMENT 1
+#define STRICT_ALIGNMENT (!XCHAL_UNALIGNED_LOAD_HW \
+ || !XCHAL_UNALIGNED_STORE_HW)
 
 /* Promote integer modes smaller than a word to SImode.  Set UNSIGNEDP
for QImode, because there is no 8-bit load from memory with sign
-- 
2.30.2



Re: [PATCH] xtensa: Make use of CLAMPS instruction if configured

2023-02-27 Thread Max Filippov via Gcc-patches
On Sun, Feb 26, 2023 at 9:27 AM Takayuki 'January June' Suwa
 wrote:
>
> This patch introduces the use of CLAMPS instruction when the instruction
> is configured.
>
> /* example */
> int test(int a) {
>   if (a < -512)
> return -512;
>   if (a > 511)
> return 511;
>   return a;
> }
>
> ;; prereq: TARGET_CLAMPS
> test:
> clamps  a2, a2, 9
> ret.n
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa-protos.h (xtensa_match_CLAMPS_imms_p):
> New prototype.
> * config/xtensa/xtensa.cc (xtensa_match_CLAMPS_imms_p):
> New function.
> * config/xtensa/xtensa.h (TARGET_CLAMPS): New macro definition.
> * config/xtensa/xtensa.md (*xtensa_clamps): New insn pattern.
> ---
>  gcc/config/xtensa/xtensa-protos.h |  1 +
>  gcc/config/xtensa/xtensa.cc   | 13 +++
>  gcc/config/xtensa/xtensa.h|  1 +
>  gcc/config/xtensa/xtensa.md   | 37 +++
>  4 files changed, 52 insertions(+)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


[PATCH] gcc: xtensa: add XCHAL_HAVE_{CLAMPS, DEPBITS, EXCLUSIVE, XEA3} to dynconfig

2023-02-27 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/xtensa-dynconfig.cc (xtensa_get_config_v2)
(xtensa_get_config_v3): New functions.

include/
* xtensa-dynconfig.h (xtensa_config_v3): New struct.
(xtensa_get_config_v3): New declaration.
(XCHAL_HAVE_CLAMPS, XCHAL_HAVE_DEPBITS, XCHAL_HAVE_EXCLUSIVE)
(XCHAL_HAVE_XEA3, XTENSA_CONFIG_V3_ENTRY_LIST): New definitions.
(XTENSA_CONFIG_INSTANCE_LIST): Add xtensa_config_v3 instance.
(XTENSA_CONFIG_ENTRY_LIST): Add XTENSA_CONFIG_V3_ENTRY_LIST.
---
 gcc/config/xtensa/xtensa-dynconfig.cc | 24 +
 include/xtensa-dynconfig.h| 50 ++-
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa-dynconfig.cc 
b/gcc/config/xtensa/xtensa-dynconfig.cc
index db8ff43c498b..9aea9f253c25 100644
--- a/gcc/config/xtensa/xtensa-dynconfig.cc
+++ b/gcc/config/xtensa/xtensa-dynconfig.cc
@@ -158,6 +158,30 @@ const struct xtensa_config_v1 *xtensa_get_config_v1 (void)
   return config;
 }
 
+const struct xtensa_config_v2 *xtensa_get_config_v2 (void)
+{
+  static const struct xtensa_config_v2 *config;
+  static struct xtensa_config_v2 def;
+
+  if (!config)
+config = (const struct xtensa_config_v2 *) xtensa_load_config 
("xtensa_config_v2",
+  
_config_v2,
+  );
+  return config;
+}
+
+const struct xtensa_config_v3 *xtensa_get_config_v3 (void)
+{
+  static const struct xtensa_config_v3 *config;
+  static struct xtensa_config_v3 def;
+
+  if (!config)
+config = (const struct xtensa_config_v3 *) xtensa_load_config 
("xtensa_config_v3",
+  
_config_v3,
+  );
+  return config;
+}
+
 const char * const *xtensa_get_config_strings (void)
 {
   static const char * const *config_strings;
diff --git a/include/xtensa-dynconfig.h b/include/xtensa-dynconfig.h
index bb72d6ab22d7..2cc15cc99112 100644
--- a/include/xtensa-dynconfig.h
+++ b/include/xtensa-dynconfig.h
@@ -104,6 +104,14 @@ struct xtensa_config_v2
   int xtensa_march_earliest;
 };
 
+struct xtensa_config_v3
+{
+  int xchal_have_clamps;
+  int xchal_have_depbits;
+  int xchal_have_exclusive;
+  int xchal_have_xea3;
+};
+
 typedef struct xtensa_isa_internal_struct xtensa_isa_internal;
 
 extern const void *xtensa_load_config (const char *name,
@@ -111,6 +119,7 @@ extern const void *xtensa_load_config (const char *name,
   const void *no_name_def);
 extern const struct xtensa_config_v1 *xtensa_get_config_v1 (void);
 extern const struct xtensa_config_v2 *xtensa_get_config_v2 (void);
+extern const struct xtensa_config_v3 *xtensa_get_config_v3 (void);
 
 #ifdef XTENSA_CONFIG_DEFINITION
 
@@ -182,6 +191,22 @@ extern const struct xtensa_config_v2 *xtensa_get_config_v2 
(void);
 #define XTENSA_MARCH_EARLIEST 0
 #endif
 
+#ifndef XCHAL_HAVE_CLAMPS
+#define XCHAL_HAVE_CLAMPS 0
+#endif
+
+#ifndef XCHAL_HAVE_DEPBITS
+#define XCHAL_HAVE_DEPBITS 0
+#endif
+
+#ifndef XCHAL_HAVE_EXCLUSIVE
+#define XCHAL_HAVE_EXCLUSIVE 0
+#endif
+
+#ifndef XCHAL_HAVE_XEA3
+#define XCHAL_HAVE_XEA3 0
+#endif
+
 #define XTENSA_CONFIG_ENTRY(a) a
 
 #define XTENSA_CONFIG_V1_ENTRY_LIST \
@@ -245,17 +270,27 @@ extern const struct xtensa_config_v2 
*xtensa_get_config_v2 (void);
 XTENSA_CONFIG_ENTRY(XTENSA_MARCH_LATEST), \
 XTENSA_CONFIG_ENTRY(XTENSA_MARCH_EARLIEST)
 
+#define XTENSA_CONFIG_V3_ENTRY_LIST \
+XTENSA_CONFIG_ENTRY(XCHAL_HAVE_CLAMPS), \
+XTENSA_CONFIG_ENTRY(XCHAL_HAVE_DEPBITS), \
+XTENSA_CONFIG_ENTRY(XCHAL_HAVE_EXCLUSIVE), \
+XTENSA_CONFIG_ENTRY(XCHAL_HAVE_XEA3)
+
 #define XTENSA_CONFIG_INSTANCE_LIST \
 const struct xtensa_config_v1 xtensa_config_v1 = { \
 XTENSA_CONFIG_V1_ENTRY_LIST, \
 }; \
 const struct xtensa_config_v2 xtensa_config_v2 = { \
 XTENSA_CONFIG_V2_ENTRY_LIST, \
+}; \
+const struct xtensa_config_v3 xtensa_config_v3 = { \
+XTENSA_CONFIG_V3_ENTRY_LIST, \
 }
 
 #define XTENSA_CONFIG_ENTRY_LIST \
 XTENSA_CONFIG_V1_ENTRY_LIST, \
-XTENSA_CONFIG_V2_ENTRY_LIST
+XTENSA_CONFIG_V2_ENTRY_LIST, \
+XTENSA_CONFIG_V3_ENTRY_LIST
 
 #else /* XTENSA_CONFIG_DEFINITION */
 
@@ -434,6 +469,19 @@ const struct xtensa_config_v2 xtensa_config_v2 = { \
 #undef XTENSA_MARCH_EARLIEST
 #define XTENSA_MARCH_EARLIEST  (xtensa_get_config_v2 
()->xtensa_march_earliest)
 
+
+#undef XCHAL_HAVE_CLAMPS
+#define XCHAL_HAVE_CLAMPS  (xtensa_get_config_v3 
()->xchal_have_clamps)
+
+#undef XCHAL_HAVE_DEPBITS
+#define XCHAL_HAVE_DEPBITS (xtensa_get_config_v3 
()->xchal_have_depbits)
+
+#undef XCHAL_HAVE_EXCLUSIVE
+#define XCHAL_HAVE_EXCLUSIVE   (xtensa_get_config_v3 
()->xchal_have_exclusive)
+
+#undef XCHAL_HAVE_XEA3
+#define XCHAL_HAVE_XEA3(xtensa_get_config_v3 
()->xchal_have_xea3)
+
 

Re: [PATCH] xtensa: Make use of CLAMPS instruction if configured

2023-02-26 Thread Max Filippov via Gcc-patches
On Sun, Feb 26, 2023 at 9:27 AM Takayuki 'January June' Suwa
 wrote:
> This patch introduces the use of CLAMPS instruction when the instruction
> is configured.

Testing.

> (Totally off-topic, but do you know anything about the SALT/SALTU 
> instructions?
> I see them in the "Core Architecture Instructions" in a recent Cadence 
> document
> but not in slightly older Tensilica one...)

Yes, they are a part of the core xtensa instruction set since the
release RG-2015.0.
I believe this translates to the test (XTENSA_MARCH_EARLIEST >= 26).
There's a chapter "Added instructions" at the end of the xtensa ISA book with
the list of such opcodes.

-- 
Thanks.
-- Max


[COMMITTED] gcc: xtensa: fix PR target/108919

2023-02-25 Thread Max Filippov via Gcc-patches
gcc/
PR target/108919

* config/xtensa/xtensa-protos.h
(xtensa_prepare_expand_call): Rename to xtensa_expand_call.
* config/xtensa/xtensa.cc (xtensa_prepare_expand_call): Rename
to xtensa_expand_call.
(xtensa_expand_call): Emit the call and add a clobber expression
for the static chain to it in case of windowed ABI.
* config/xtensa/xtensa.md (call, call_value, sibcall)
(sibcall_value): Call xtensa_expand_call and complete expansion
right after that call.

gcc/testsuite/
* gcc.target/xtensa/pr108919.c: New test.
---
 gcc/config/xtensa/xtensa-protos.h  |  2 +-
 gcc/config/xtensa/xtensa.cc| 25 +++-
 gcc/config/xtensa/xtensa.md| 12 --
 gcc/testsuite/gcc.target/xtensa/pr108919.c | 46 ++
 4 files changed, 79 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/pr108919.c

diff --git a/gcc/config/xtensa/xtensa-protos.h 
b/gcc/config/xtensa/xtensa-protos.h
index ecd0f0c8d108..c81cf94323ac 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -53,7 +53,7 @@ extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, 
rtx, bool);
 extern void xtensa_emit_loop_end (rtx_insn *, rtx *);
 extern char *xtensa_emit_branch (bool, rtx *);
 extern char *xtensa_emit_movcc (bool, bool, bool, rtx *);
-extern void xtensa_prepare_expand_call (int, rtx *);
+extern void xtensa_expand_call (int, rtx *);
 extern char *xtensa_emit_call (int, rtx *);
 extern char *xtensa_emit_sibcall (int, rtx *);
 extern bool xtensa_tls_referenced_p (rtx);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index e52fba082550..5044bc25c2fe 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2183,8 +2183,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool 
isbool, rtx *operands)
 
 
 void
-xtensa_prepare_expand_call (int callop, rtx *operands)
+xtensa_expand_call (int callop, rtx *operands)
 {
+  rtx call;
+  rtx_insn *call_insn;
   rtx addr = XEXP (operands[callop], 0);
 
   if (flag_pic && SYMBOL_REF_P (addr)
@@ -2202,6 +2204,27 @@ xtensa_prepare_expand_call (int callop, rtx *operands)
 Pmode);
   XEXP (operands[callop], 0) = reg;
 }
+
+  call = gen_rtx_CALL (VOIDmode, operands[callop], operands[callop + 1]);
+
+  if (callop)
+call = gen_rtx_SET (operands[0], call);
+
+  call_insn = emit_call_insn (call);
+
+  if (TARGET_WINDOWED_ABI)
+{
+  /*
+   * Windowed xtensa ABI specifies that static chain pointer is passed
+   * in memory below the caller's stack pointer, which means that the
+   * callee may clobber it if it's a non-leaf function.
+   * Add the clobber expression for the static chain to the function call
+   * expression list so that it is not assumed to be live across the call.
+   */
+  rtx clob = gen_rtx_CLOBBER (Pmode, xtensa_static_chain (NULL, false));
+  CALL_INSN_FUNCTION_USAGE (call_insn) =
+   gen_rtx_EXPR_LIST (Pmode, clob, CALL_INSN_FUNCTION_USAGE (call_insn));
+}
 }
 
 
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index cf25beb83d54..b60dec2447f3 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -2333,7 +2333,8 @@
 (match_operand 1 "" ""))]
   ""
 {
-  xtensa_prepare_expand_call (0, operands);
+  xtensa_expand_call (0, operands);
+  DONE;
 })
 
 (define_insn "call_internal"
@@ -2353,7 +2354,8 @@
  (match_operand 2 "" "")))]
   ""
 {
-  xtensa_prepare_expand_call (1, operands);
+  xtensa_expand_call (1, operands);
+  DONE;
 })
 
 (define_insn "call_value_internal"
@@ -2373,7 +2375,8 @@
 (match_operand 1 "" ""))]
   "!TARGET_WINDOWED_ABI"
 {
-  xtensa_prepare_expand_call (0, operands);
+  xtensa_expand_call (0, operands);
+  DONE;
 })
 
 (define_insn "sibcall_internal"
@@ -2393,7 +2396,8 @@
  (match_operand 2 "" "")))]
   "!TARGET_WINDOWED_ABI"
 {
-  xtensa_prepare_expand_call (1, operands);
+  xtensa_expand_call (1, operands);
+  DONE;
 })
 
 (define_insn "sibcall_value_internal"
diff --git a/gcc/testsuite/gcc.target/xtensa/pr108919.c 
b/gcc/testsuite/gcc.target/xtensa/pr108919.c
new file mode 100644
index ..300b6fd10a99
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/pr108919.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+
+#ifdef __XTENSA_CALL0_ABI__
+void __xtensa_libgcc_window_spill (void)
+{
+}
+#else
+void __xtensa_libgcc_window_spill (void);
+#endif
+
+__attribute__((noinline)) void h (void)
+{
+  __xtensa_libgcc_window_spill ();
+}
+
+int f (int u, int v)
+{
+  int a = u;
+  int s;
+
+  __attribute__((noinline,pure)) int nested1 (int b)
+  {
+  h();
+  return a + b;
+  }
+
+  __attribute__((noinline,pure)) int nested2 (int b)
+  {
+  h();
+  return a - b;
+  }
+
+  s = nested1 (v);
+  s += nested2 (v);
+  return s;

Re: [PATCH] gcc: xtensa: fix PR target/108919

2023-02-25 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Sat, Feb 25, 2023 at 3:33 AM Takayuki 'January June' Suwa
 wrote:
> On 2023/02/25 19:01, Max Filippov wrote:
> > diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
> > index e52fba082550..babe7f0ebd68 100644
> > --- a/gcc/config/xtensa/xtensa.cc
> > +++ b/gcc/config/xtensa/xtensa.cc
> > @@ -2183,8 +2183,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool 
> > isbool, rtx *operands)
> >
> >
> >  void
> > -xtensa_prepare_expand_call (int callop, rtx *operands)
> > +xtensa_expand_call (int callop, rtx *operands)
> >  {
> > +  rtx call;
> > +  rtx call_insn;
>
> ;; This should be rtx_insn* rather than rtx,
> -  rtx call_insn;
> +  rtx_insn *call_insn;
>
> >rtx addr = XEXP (operands[callop], 0);
> >
> >if (flag_pic && SYMBOL_REF_P (addr)
> > @@ -2202,6 +2204,27 @@ xtensa_prepare_expand_call (int callop, rtx 
> > *operands)
> >Pmode);
> >XEXP (operands[callop], 0) = reg;
> >  }
> > +
> > +  call = gen_rtx_CALL (VOIDmode, operands[callop], operands[callop + 1]);
> > +
> > +  if (callop)
> > +call_insn = emit_call_insn (gen_rtx_SET (operands[0], call));
> > +  else
> > +call_insn = emit_call_insn (call);
>
> ;; Simpler,
>call = gen_rtx_CALL (VOIDmode, operands[callop], operands[callop + 1]);
> -
>if (callop)
> -call_insn = emit_call_insn (gen_rtx_SET (operands[0], call));
> -  else
> -call_insn = emit_call_insn (call);
> +call = gen_rtx_SET (operands[0], call);
> +  call_insn = emit_call_insn (call);

Thank you for the review!

> (I had just removed "WIP" from the set of backported patches to the 
> esp8266/Arduino toolchain,
> so I am worried that it was a bit premature, but relieved soon to find that 
> it has nothing to do
> with the CALL0 ABI...)

Unrelated indeed, just popped up during the recent testing.

-- 
Thanks.
-- Max


[PATCH] gcc: xtensa: fix PR target/108919

2023-02-25 Thread Max Filippov via Gcc-patches
gcc/
PR target/108919

* config/xtensa/xtensa-protos.h
(xtensa_prepare_expand_call): Rename to xtensa_expand_call.
* config/xtensa/xtensa.cc (xtensa_prepare_expand_call): Rename
to xtensa_expand_call.
(xtensa_expand_call): Emit the call and add a clobber expression
for the static chain to it in case of windowed ABI.
* config/xtensa/xtensa.md (call, call_value, sibcall)
(sibcall_value): Call xtensa_expand_call and complete expansion
right after that call.

gcc/testduite/
* gcc.target/xtensa/pr108919.c: New test.
---
 gcc/config/xtensa/xtensa-protos.h  |  2 +-
 gcc/config/xtensa/xtensa.cc| 25 +++-
 gcc/config/xtensa/xtensa.md| 12 --
 gcc/testsuite/gcc.target/xtensa/pr108919.c | 46 ++
 4 files changed, 79 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/pr108919.c

diff --git a/gcc/config/xtensa/xtensa-protos.h 
b/gcc/config/xtensa/xtensa-protos.h
index ecd0f0c8d108..c81cf94323ac 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -53,7 +53,7 @@ extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, 
rtx, bool);
 extern void xtensa_emit_loop_end (rtx_insn *, rtx *);
 extern char *xtensa_emit_branch (bool, rtx *);
 extern char *xtensa_emit_movcc (bool, bool, bool, rtx *);
-extern void xtensa_prepare_expand_call (int, rtx *);
+extern void xtensa_expand_call (int, rtx *);
 extern char *xtensa_emit_call (int, rtx *);
 extern char *xtensa_emit_sibcall (int, rtx *);
 extern bool xtensa_tls_referenced_p (rtx);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index e52fba082550..babe7f0ebd68 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2183,8 +2183,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool 
isbool, rtx *operands)
 
 
 void
-xtensa_prepare_expand_call (int callop, rtx *operands)
+xtensa_expand_call (int callop, rtx *operands)
 {
+  rtx call;
+  rtx call_insn;
   rtx addr = XEXP (operands[callop], 0);
 
   if (flag_pic && SYMBOL_REF_P (addr)
@@ -2202,6 +2204,27 @@ xtensa_prepare_expand_call (int callop, rtx *operands)
 Pmode);
   XEXP (operands[callop], 0) = reg;
 }
+
+  call = gen_rtx_CALL (VOIDmode, operands[callop], operands[callop + 1]);
+
+  if (callop)
+call_insn = emit_call_insn (gen_rtx_SET (operands[0], call));
+  else
+call_insn = emit_call_insn (call);
+
+  if (TARGET_WINDOWED_ABI)
+{
+  /*
+   * Windowed xtensa ABI specifies that static chain pointer is passed
+   * in memory below the caller stack pointer, which means that the callee
+   * will likely clobber it if it's a non-leaf function.
+   * Add the clobber expression for the static chain to the function call
+   * expression list so that it is not assumed to be live across the call.
+   */
+  rtx clob = gen_rtx_CLOBBER (Pmode, xtensa_static_chain (NULL, false));
+  CALL_INSN_FUNCTION_USAGE (call_insn) =
+   gen_rtx_EXPR_LIST (Pmode, clob, CALL_INSN_FUNCTION_USAGE (call_insn));
+}
 }
 
 
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index cf25beb83d54..b60dec2447f3 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -2333,7 +2333,8 @@
 (match_operand 1 "" ""))]
   ""
 {
-  xtensa_prepare_expand_call (0, operands);
+  xtensa_expand_call (0, operands);
+  DONE;
 })
 
 (define_insn "call_internal"
@@ -2353,7 +2354,8 @@
  (match_operand 2 "" "")))]
   ""
 {
-  xtensa_prepare_expand_call (1, operands);
+  xtensa_expand_call (1, operands);
+  DONE;
 })
 
 (define_insn "call_value_internal"
@@ -2373,7 +2375,8 @@
 (match_operand 1 "" ""))]
   "!TARGET_WINDOWED_ABI"
 {
-  xtensa_prepare_expand_call (0, operands);
+  xtensa_expand_call (0, operands);
+  DONE;
 })
 
 (define_insn "sibcall_internal"
@@ -2393,7 +2396,8 @@
  (match_operand 2 "" "")))]
   "!TARGET_WINDOWED_ABI"
 {
-  xtensa_prepare_expand_call (1, operands);
+  xtensa_expand_call (1, operands);
+  DONE;
 })
 
 (define_insn "sibcall_value_internal"
diff --git a/gcc/testsuite/gcc.target/xtensa/pr108919.c 
b/gcc/testsuite/gcc.target/xtensa/pr108919.c
new file mode 100644
index ..300b6fd10a99
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/pr108919.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+
+#ifdef __XTENSA_CALL0_ABI__
+void __xtensa_libgcc_window_spill (void)
+{
+}
+#else
+void __xtensa_libgcc_window_spill (void);
+#endif
+
+__attribute__((noinline)) void h (void)
+{
+  __xtensa_libgcc_window_spill ();
+}
+
+int f (int u, int v)
+{
+  int a = u;
+  int s;
+
+  __attribute__((noinline,pure)) int nested1 (int b)
+  {
+  h();
+  return a + b;
+  }
+
+  __attribute__((noinline,pure)) int nested2 (int b)
+  {
+  h();
+  return a - b;
+  }
+
+  s = nested1 (v);
+  

[COMMITTED 1/2] gcc: xtensa: rename xtensa-dynconfig.c and update its build rule

2023-02-23 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/t-xtensa (xtensa-dynconfig.o): Use $(COMPILE)
and $(POSTCOMPILE) instead of manual dependency listing.
* config/xtensa/xtensa-dynconfig.c: Rename to ...
* config/xtensa/xtensa-dynconfig.cc: ... this.
---
 gcc/config/xtensa/t-xtensa | 7 +++
 .../xtensa/{xtensa-dynconfig.c => xtensa-dynconfig.cc} | 0
 2 files changed, 3 insertions(+), 4 deletions(-)
 rename gcc/config/xtensa/{xtensa-dynconfig.c => xtensa-dynconfig.cc} (100%)

diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa
index d95bd6f15462..cf6574be0353 100644
--- a/gcc/config/xtensa/t-xtensa
+++ b/gcc/config/xtensa/t-xtensa
@@ -20,7 +20,6 @@ TM_H += $(srcdir)/../include/xtensa-config.h \
$(srcdir)/../include/xtensa-dynconfig.h
 $(out_object_file): gt-xtensa.h
 
-xtensa-dynconfig.o: $(srcdir)/config/xtensa/xtensa-dynconfig.c \
-  $(CONFIG_H) $(SYSTEM_H) $(srcdir)/../include/xtensa-dynconfig.h \
-  $(srcdir)/../include/xtensa-config.h
-   $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $<
+xtensa-dynconfig.o: $(srcdir)/config/xtensa/xtensa-dynconfig.cc
+   $(COMPILE) $<
+   $(POSTCOMPILE)
diff --git a/gcc/config/xtensa/xtensa-dynconfig.c 
b/gcc/config/xtensa/xtensa-dynconfig.cc
similarity index 100%
rename from gcc/config/xtensa/xtensa-dynconfig.c
rename to gcc/config/xtensa/xtensa-dynconfig.cc
-- 
2.30.2



[COMMITTED 2/2] gcc: xtensa: update include style in xtensa-dynconfig.cc

2023-02-23 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/xtensa-dynconfig.cc (config.h, system.h)
(coretypes.h, diagnostic.h, intl.h): Use "..." instead of <...>
for the gcc-internal headers.
---
 gcc/config/xtensa/xtensa-dynconfig.cc | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/xtensa/xtensa-dynconfig.cc 
b/gcc/config/xtensa/xtensa-dynconfig.cc
index e0091f3e6669..db8ff43c498b 100644
--- a/gcc/config/xtensa/xtensa-dynconfig.cc
+++ b/gcc/config/xtensa/xtensa-dynconfig.cc
@@ -17,11 +17,11 @@
along with GCC; see the file COPYING3.  If not see
.  */
 
-#include 
-#include 
-#include 
-#include 
-#include 
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic.h"
+#include "intl.h"
 #define XTENSA_CONFIG_DEFINITION
 #include "xtensa-config.h"
 #include "xtensa-dynconfig.h"
-- 
2.30.2



Re: [PATCH 1/2] xtensa: Fix non-fatal regression introduced by b2ef02e8cbbaf95fee98be255f697f47193960ec

2023-02-23 Thread Max Filippov via Gcc-patches
On Thu, Feb 23, 2023 at 1:35 AM Max Filippov  wrote:
>
> On Wed, Feb 22, 2023 at 7:42 PM Takayuki 'January June' Suwa
>  wrote:
> >
> > In commit b2ef02e8cbbaf95fee98be255f697f47193960ec, the sibling call
> > insn included (use (reg:SI A0_REG)) to fix the problem, which added
> > a USE chain unconditionally to the data flow of register A0 during
> > the sibling call.
> >
> > As a result, df_regs_ever_live_p (A0_REG) returns true, so even if
> > register A0 is not used outside of the sibling call insn, saves and
> > restores to stack slots are emitted in pro/epilogue, and finally
> > code size increases.
> > (This is why I never included (use A0) in sibling calls)
> >
> > /* example */
> > extern int foo(int);
> > int test(int a) {
> >   return foo(a * 3 + 1);
> > }
> >
> > ;; before
> > test:
> > addisp, sp, -16 ;; unneeded stack frame allocation (induced)
> > s32i.n  a0, sp, 12  ;; unneeded saving of register A0
> > l32i.n  a0, sp, 12  ;; unneeded restoration of register A0
> > addx2   a2, a2, a2
> > addi.n  a2, a2, 1
> > addisp, sp, 16  ;; unneeded stack frame freeing (induced)
> > j.l foo, a9 ;; sibling call (truly needs register A0)
> >
> > The essential cause is that we emit (use A0) *before* the insns that
> > does the stack pointer adjustment during epilogue expansion, so the
> > liveness of register A0 ends early, so register A0 is reused afterwards.
> >
> > This patch fixes the problem and avoids such regression by doing the
> > emit of (use A0) in the sibling call epilogue expansion at the end.
> >
> > ;; after
> > test:
> > addx2   a2, a2, a2
> > addi.n  a2, a2, 1
> > j.l foo, a9
> >
> > >From RTL-pass "315r.rnreg" by
> > "gfortran -O3 -funroll-loops -mabi=call0 -S -da 
> > gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":
> >
> > ;; Function selector_init (__selectors_MOD_selector_init, funcdef_no=2, 
> > decl_uid=987, cgraph_uid=3, symbol_order=4)
> > ...
> > (insn 3807 3806 3808 121 (set (reg:SI 15 a15)
> > (mem/c:SI (plus:SI (reg/f:SI 1 sp)
> > (const_int 268 [0x10c])) [31  S4 A32])) 
> > "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 
> > {movsi_internal}
> >  (nil))
> > (insn 3808 3807 3809 121 (set (reg:SI 7 a7)
> > (const_int 288 [0x120])) 
> > "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 
> > {movsi_internal}
> >  (nil))
> > (insn 3809 3808 3810 121 (set (reg/f:SI 1 sp)
> > (plus:SI (reg/f:SI 1 sp)
> > (reg:SI 7 a7))) 
> > "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 1 
> > {addsi3}
> >  (expr_list:REG_DEAD (reg:SI 9 a9)
> > (nil)))
> > (insn 3810 3809 721 121 (use (reg:SI 0 a0)) 
> > "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 -1
> >  (expr_list:REG_DEAD (reg:SI 0 a0)
> > (nil)))
> > (call_insn/j 721 3810 722 121 (call (mem:SI (symbol_ref:SI ("free") 
> > [flags 0x41]  ) [0 
> > __builtin_free S4 A32])
> > (const_int 0 [0])) 
> > "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 
> > discrim 1 106 {sibcall_internal}
> >  (expr_list:REG_DEAD (reg:SI 2 a2)
> > (expr_list:REG_CALL_DECL (symbol_ref:SI ("free") [flags 0x41]  
> > )
> > (expr_list:REG_EH_REGION (const_int 0 [0])
> > (nil
> > (expr_list:SI (use (reg:SI 2 a2))
> > (nil)))
> >
> > (IMHO the "rnreg" pass doesn't take REG_ALLOC_ORDER into account;
> > it just seems to allocate registers in fixed_regs index order,
> > which may have hurt register A0 that became allocatable in the recent
> > patch)
> >
> > gcc/ChangeLog:
> >
> > * config/xtensa/xtensa.cc (xtensa_expand_epilogue):
> > Emit (use (reg:SI A0_REG)) at the end in the sibling call
> > (i.e. the same place as (return) in the normal call).
> > * config/xtensa/xtensa.md
> > (sibcall, sibcall_internal, sibcall_value, sibcall_value_internal):
> > Revert changes by the previous patch.
> > ---
> >  gcc/config/xtensa/xtensa.cc |  4 +++-
> >  gcc/config/xtensa/xtensa.md | 20 +++-
> >  2 files changed, 10 insertions(+), 14 deletions(-)
>
> I've reverted my fix and committed this fix minus the revert.

Sorry, I've messed up the patch authorship in the rebase ):

-- 
Thanks.
-- Max


Re: [PATCH v7] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-02-23 Thread Max Filippov via Gcc-patches
On Thu, Feb 16, 2023 at 11:54 PM Takayuki 'January June' Suwa
 wrote:
>
> In the case of the CALL0 ABI, values that must be retained before and
> after function calls are placed in the callee-saved registers (A12
> through A15) and referenced later.  However, it is often the case that
> the save and the reference are each only once and a simple register-
> register move (with two exceptions; i. the register saved to/restored
> from is the stack pointer, ii. the function needs an additional stack
> pointer adjustment to grow the stack).
>
> e.g. in the following example, if there are no other occurrences of
> register A14:
>
> ;; before
> ; prologue {
>   ...
> s32i.n  a14, sp, 16
>   ...   ;; no frame pointer needed
> ;; no additional stack growth
> ; } prologue
>   ...
> mov.n   a14, a6 ;; A6 is not SP
>   ...
> call0   foo
>   ...
> mov.n   a8, a14 ;; A8 is not SP
>   ...
> ; epilogue {
>   ...
> l32i.n  a14, sp, 16
>   ...
> ; } epilogue
>
> It can be possible like this:
>
> ;; after
> ; prologue {
>   ...
> (no save needed)
>   ...
> ; } prologue
>   ...
> s32i.n  a6, sp, 16  ;; replaced with A14's slot
>   ...
> call0   foo
>   ...
> l32i.n  a8, sp, 16  ;; through SP
>   ...
> ; epilogue {
>   ...
> (no restoration needed)
>   ...
> ; } epilogue
>
> This patch adds the abovementioned logic to the function prologue/epilogue
> RTL expander code.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (machine_function): Add new member
> 'eliminated_callee_saved_regs'.
> (xtensa_can_eliminate_callee_saved_reg_p): New function to
> determine whether the register can be eliminated or not.
> (xtensa_expand_prologue): Add invoking the above function and
> elimination the use of callee-saved register by using its stack
> slot through the stack pointer (or the frame pointer if needed)
> directly.
> (xtensa_expand_prologue): Modify to not emit register restoration
> insn from its stack slot if the register is already eliminated.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/xtensa/elim_callee_saved.c: New.
> ---
>  gcc/config/xtensa/xtensa.cc   | 134 ++
>  .../gcc.target/xtensa/elim_callee_saved.c |  37 +
>  2 files changed, 146 insertions(+), 25 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH v5] xtensa: Eliminate unnecessary general-purpose reg-reg moves

2023-02-23 Thread Max Filippov via Gcc-patches
On Fri, Feb 17, 2023 at 8:43 PM Takayuki 'January June' Suwa
 wrote:
>
> Register-register move instructions that can be easily seen as
> unnecessary by the human eye may remain in the compiled result.
> For example:
>
> /* example */
> double test(double a, double b) {
>   return __builtin_copysign(a, b);
> }
>
> test:
> add.n   a3, a3, a3
> extui   a5, a5, 31, 1
> ssai1
> ;; Be in the same BB
> src a7, a5, a3  ;; Replacing the destination doesn't
> ;;   violate any constraints of the
> ;;   operands
> ;; No CALL insns in this span
> ;; Both A3 and A7 are irrelevant to
> ;;   insns in this span
> mov.n   a3, a7  ;; An unnecessary reg-reg move
> ;; A7 is not used after this
> ret.n
>
> The last two instructions above, excluding the return instruction,
> could be done like this:
>
> src a3, a5, a3
>
> This symptom often occurs when handling DI/DFmode values with SImode
> instructions.  This patch solves the above problem using peephole2
> pattern.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md: New peephole2 pattern that eliminates
> the occurrence of general-purpose register used only once and for
> transferring intermediate value.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/xtensa/elim_GP_regmove_[01].c: New.
> ---
>  gcc/config/xtensa/xtensa.md   | 46 +++
>  .../gcc.target/xtensa/elim_GP_regmove_0.c | 23 ++
>  .../gcc.target/xtensa/elim_GP_regmove_1.c | 10 
>  3 files changed, 79 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c
>  create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Fix up fatal_error message strings in xtensa-dynconfig.c [PR108890]

2023-02-23 Thread Max Filippov via Gcc-patches
Hi Jakub,

On Thu, Feb 23, 2023 at 2:34 AM Jakub Jelinek  wrote:
> The translation PR complains that these 4 messages from xtensa-dynconfig.c
> are marked in po/gcc.pot as c-format (which doesn't allow %qs) while they
> should be gcc-internal-format.
>
> The problem is in the manual translation of the strings with _(),
> that should be both unnecessary because fatal_error invokes _() on its
> argument already, but also incorrect for the above reason, for
> gcc-internal-format strings one should use G_("...") instead if really
> needed.
>
> The following patch drops those _("..."), tested by regenerating po/gcc.pot
> to see they are now gcc-internal-format, but not really tested on xtensa
> target.
>
> Ok for trunk?

Ok.

> BTW, why is the file using .c extension rather than .cc?

It was initially developed when backend code was still in .c
files and I failed to update this part during forward porting.
I'll fix it.

>  Why isn't t-xtensa using $(COMPILE) and $(POSTCOMPILE)
> to compile it like for most other extra_objs on other targets?
>  And, why does that file use <> style includes of gcc internal
> headers rather than "" style which is used everywhere else
> in gcc?

No real reason for either. I'll fix it.
Thanks for your review.

-- Max


[COMMITTED 2/2] xtensa: fix PR target/108876

2023-02-23 Thread Max Filippov via Gcc-patches
In commit b2ef02e8cbbaf95fee98be255f697f47193960ec, the sibling call
insn included (use (reg:SI A0_REG)) to fix the problem, which added
a USE chain unconditionally to the data flow of register A0 during
the sibling call.

As a result, df_regs_ever_live_p (A0_REG) returns true, so even if
register A0 is not used outside of the sibling call insn, saves and
restores to stack slots are emitted in pro/epilogue, and finally
code size increases.
(This is why I never included (use A0) in sibling calls)

/* example */
extern int foo(int);
int test(int a) {
  return foo(a * 3 + 1);
}

;; before
test:
addisp, sp, -16 ;; unneeded stack frame allocation (induced)
s32i.n  a0, sp, 12  ;; unneeded saving of register A0
l32i.n  a0, sp, 12  ;; unneeded restoration of register A0
addx2   a2, a2, a2
addi.n  a2, a2, 1
addisp, sp, 16  ;; unneeded stack frame freeing (induced)
j.l foo, a9 ;; sibling call (truly needs register A0)

The essential cause is that we emit (use A0) *before* the insns that
does the stack pointer adjustment during epilogue expansion, so the
liveness of register A0 ends early, so register A0 is reused afterwards.

This patch fixes the problem and avoids such regression by doing the
emit of (use A0) in the sibling call epilogue expansion at the end.

;; after
test:
addx2   a2, a2, a2
addi.n  a2, a2, 1
j.l foo, a9

>From RTL-pass "315r.rnreg" by
"gfortran -O3 -funroll-loops -mabi=call0 -S -da 
gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":

;; Function selector_init (__selectors_MOD_selector_init, funcdef_no=2, 
decl_uid=987, cgraph_uid=3, symbol_order=4)
...
(insn 3807 3806 3808 121 (set (reg:SI 15 a15)
(mem/c:SI (plus:SI (reg/f:SI 1 sp)
(const_int 268 [0x10c])) [31  S4 A32])) 
"gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 
{movsi_internal}
 (nil))
(insn 3808 3807 3809 121 (set (reg:SI 7 a7)
(const_int 288 [0x120])) 
"gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 
{movsi_internal}
 (nil))
(insn 3809 3808 3810 121 (set (reg/f:SI 1 sp)
(plus:SI (reg/f:SI 1 sp)
(reg:SI 7 a7))) 
"gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 1 {addsi3}
 (expr_list:REG_DEAD (reg:SI 9 a9)
(nil)))
(insn 3810 3809 721 121 (use (reg:SI 0 a0)) 
"gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 -1
 (expr_list:REG_DEAD (reg:SI 0 a0)
(nil)))
(call_insn/j 721 3810 722 121 (call (mem:SI (symbol_ref:SI ("free") [flags 
0x41]  ) [0 __builtin_free S4 A32])
(const_int 0 [0])) 
"gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 discrim 1 
106 {sibcall_internal}
 (expr_list:REG_DEAD (reg:SI 2 a2)
(expr_list:REG_CALL_DECL (symbol_ref:SI ("free") [flags 0x41]  
)
(expr_list:REG_EH_REGION (const_int 0 [0])
(nil
(expr_list:SI (use (reg:SI 2 a2))
(nil)))

(IMHO the "rnreg" pass doesn't take REG_ALLOC_ORDER into account;
it just seems to allocate registers in fixed_regs index order,
which may have hurt register A0 that became allocatable in the recent
patch)

gcc/ChangeLog:
PR target/108876

* config/xtensa/xtensa.cc (xtensa_expand_epilogue):
Emit (use (reg:SI A0_REG)) at the end in the sibling call
(i.e. the same place as (return) in the normal call).
---
 gcc/config/xtensa/xtensa.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index d0320efe21d4..b80eef5c19ef 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -3548,8 +3548,6 @@ xtensa_expand_epilogue (bool sibcall_p)
  gen_frame_mem (SImode, x));
}
}
-  if (sibcall_p)
-   emit_use (gen_rtx_REG (SImode, A0_REG));
 
   if (cfun->machine->current_frame_size > 0)
{
@@ -3575,7 +3573,9 @@ xtensa_expand_epilogue (bool sibcall_p)
  EH_RETURN_STACKADJ_RTX));
 }
   cfun->machine->epilogue_done = true;
-  if (!sibcall_p)
+  if (sibcall_p)
+emit_use (gen_rtx_REG (SImode, A0_REG));
+  else
 emit_jump_insn (gen_return ());
 }
 
-- 
2.30.2



[COMMITTED 1/2] Revert "gcc: xtensa: fix PR target/108876"

2023-02-23 Thread Max Filippov via Gcc-patches
This reverts commit b2ef02e8cbbaf95fee98be255f697f47193960ec.
---
 gcc/config/xtensa/xtensa.cc |  2 ++
 gcc/config/xtensa/xtensa.md | 20 +++-
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 5c1c713e122d..d0320efe21d4 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -3548,6 +3548,8 @@ xtensa_expand_epilogue (bool sibcall_p)
  gen_frame_mem (SImode, x));
}
}
+  if (sibcall_p)
+   emit_use (gen_rtx_REG (SImode, A0_REG));
 
   if (cfun->machine->current_frame_size > 0)
{
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index b8a8aaf97640..d3996b26cb5c 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -2369,10 +2369,8 @@
(set_attr "length"  "3")])
 
 (define_expand "sibcall"
-  [(parallel [
-(call (match_operand 0 "memory_operand" "")
- (match_operand 1 "" ""))
-(use (reg:SI A0_REG))])]
+  [(call (match_operand 0 "memory_operand" "")
+(match_operand 1 "" ""))]
   "!TARGET_WINDOWED_ABI"
 {
   xtensa_prepare_expand_call (0, operands);
@@ -2380,8 +2378,7 @@
 
 (define_insn "sibcall_internal"
   [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nic"))
-(match_operand 1 "" "i"))
-   (use (reg:SI A0_REG))]
+(match_operand 1 "" "i"))]
   "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)"
 {
   return xtensa_emit_sibcall (0, operands);
@@ -2391,11 +2388,9 @@
(set_attr "length"  "3")])
 
 (define_expand "sibcall_value"
-  [(parallel [
-(set (match_operand 0 "register_operand" "")
-(call (match_operand 1 "memory_operand" "")
-  (match_operand 2 "" "")))
-(use (reg:SI A0_REG))])]
+  [(set (match_operand 0 "register_operand" "")
+   (call (match_operand 1 "memory_operand" "")
+ (match_operand 2 "" "")))]
   "!TARGET_WINDOWED_ABI"
 {
   xtensa_prepare_expand_call (1, operands);
@@ -2404,8 +2399,7 @@
 (define_insn "sibcall_value_internal"
   [(set (match_operand 0 "register_operand" "=a")
(call (mem:SI (match_operand:SI 1 "call_insn_operand" "nic"))
- (match_operand 2 "" "i")))
-   (use (reg:SI A0_REG))]
+ (match_operand 2 "" "i")))]
   "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)"
 {
   return xtensa_emit_sibcall (1, operands);
-- 
2.30.2



Re: [PATCH 1/2] xtensa: Fix non-fatal regression introduced by b2ef02e8cbbaf95fee98be255f697f47193960ec

2023-02-23 Thread Max Filippov via Gcc-patches
On Wed, Feb 22, 2023 at 7:42 PM Takayuki 'January June' Suwa
 wrote:
>
> In commit b2ef02e8cbbaf95fee98be255f697f47193960ec, the sibling call
> insn included (use (reg:SI A0_REG)) to fix the problem, which added
> a USE chain unconditionally to the data flow of register A0 during
> the sibling call.
>
> As a result, df_regs_ever_live_p (A0_REG) returns true, so even if
> register A0 is not used outside of the sibling call insn, saves and
> restores to stack slots are emitted in pro/epilogue, and finally
> code size increases.
> (This is why I never included (use A0) in sibling calls)
>
> /* example */
> extern int foo(int);
> int test(int a) {
>   return foo(a * 3 + 1);
> }
>
> ;; before
> test:
> addisp, sp, -16 ;; unneeded stack frame allocation (induced)
> s32i.n  a0, sp, 12  ;; unneeded saving of register A0
> l32i.n  a0, sp, 12  ;; unneeded restoration of register A0
> addx2   a2, a2, a2
> addi.n  a2, a2, 1
> addisp, sp, 16  ;; unneeded stack frame freeing (induced)
> j.l foo, a9 ;; sibling call (truly needs register A0)
>
> The essential cause is that we emit (use A0) *before* the insns that
> does the stack pointer adjustment during epilogue expansion, so the
> liveness of register A0 ends early, so register A0 is reused afterwards.
>
> This patch fixes the problem and avoids such regression by doing the
> emit of (use A0) in the sibling call epilogue expansion at the end.
>
> ;; after
> test:
> addx2   a2, a2, a2
> addi.n  a2, a2, 1
> j.l foo, a9
>
> >From RTL-pass "315r.rnreg" by
> "gfortran -O3 -funroll-loops -mabi=call0 -S -da 
> gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":
>
> ;; Function selector_init (__selectors_MOD_selector_init, funcdef_no=2, 
> decl_uid=987, cgraph_uid=3, symbol_order=4)
> ...
> (insn 3807 3806 3808 121 (set (reg:SI 15 a15)
> (mem/c:SI (plus:SI (reg/f:SI 1 sp)
> (const_int 268 [0x10c])) [31  S4 A32])) 
> "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 
> {movsi_internal}
>  (nil))
> (insn 3808 3807 3809 121 (set (reg:SI 7 a7)
> (const_int 288 [0x120])) 
> "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 
> {movsi_internal}
>  (nil))
> (insn 3809 3808 3810 121 (set (reg/f:SI 1 sp)
> (plus:SI (reg/f:SI 1 sp)
> (reg:SI 7 a7))) 
> "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 1 
> {addsi3}
>  (expr_list:REG_DEAD (reg:SI 9 a9)
> (nil)))
> (insn 3810 3809 721 121 (use (reg:SI 0 a0)) 
> "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 -1
>  (expr_list:REG_DEAD (reg:SI 0 a0)
> (nil)))
> (call_insn/j 721 3810 722 121 (call (mem:SI (symbol_ref:SI ("free") 
> [flags 0x41]  ) [0 
> __builtin_free S4 A32])
> (const_int 0 [0])) 
> "gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 discrim 
> 1 106 {sibcall_internal}
>  (expr_list:REG_DEAD (reg:SI 2 a2)
> (expr_list:REG_CALL_DECL (symbol_ref:SI ("free") [flags 0x41]  
> )
> (expr_list:REG_EH_REGION (const_int 0 [0])
> (nil
> (expr_list:SI (use (reg:SI 2 a2))
> (nil)))
>
> (IMHO the "rnreg" pass doesn't take REG_ALLOC_ORDER into account;
> it just seems to allocate registers in fixed_regs index order,
> which may have hurt register A0 that became allocatable in the recent
> patch)
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (xtensa_expand_epilogue):
> Emit (use (reg:SI A0_REG)) at the end in the sibling call
> (i.e. the same place as (return) in the normal call).
> * config/xtensa/xtensa.md
> (sibcall, sibcall_internal, sibcall_value, sibcall_value_internal):
> Revert changes by the previous patch.
> ---
>  gcc/config/xtensa/xtensa.cc |  4 +++-
>  gcc/config/xtensa/xtensa.md | 20 +++-
>  2 files changed, 10 insertions(+), 14 deletions(-)

I've reverted my fix and committed this fix minus the revert.

-- 
Thanks.
-- Max


Re: [PATCH 2/2] xtensa: Fix missing mode warnings in machine description

2023-02-23 Thread Max Filippov via Gcc-patches
On Wed, Feb 22, 2023 at 7:42 PM Takayuki 'January June' Suwa
 wrote:
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md
> (zero_cost_loop_start, zero_cost_loop_end, loop_end):
> Add missing "SI:" to PLUS RTXes.
> ---
>  gcc/config/xtensa/xtensa.md | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


[COMMITTED] gcc: xtensa: fix PR target/108876

2023-02-21 Thread Max Filippov via Gcc-patches
gcc/
PR target/108876
* config/xtensa/xtensa.cc (xtensa_expand_epilogue): Drop emit_use
for A0_REG.
* config/xtensa/xtensa.md (sibcall, sibcall_internal)
(sibcall_value, sibcall_value_internal): Add 'use' expression
for A0_REG.
---
 gcc/config/xtensa/xtensa.cc |  2 --
 gcc/config/xtensa/xtensa.md | 20 +---
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index d0320efe21d4..5c1c713e122d 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -3548,8 +3548,6 @@ xtensa_expand_epilogue (bool sibcall_p)
  gen_frame_mem (SImode, x));
}
}
-  if (sibcall_p)
-   emit_use (gen_rtx_REG (SImode, A0_REG));
 
   if (cfun->machine->current_frame_size > 0)
{
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index d3996b26cb5c..b8a8aaf97640 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -2369,8 +2369,10 @@
(set_attr "length"  "3")])
 
 (define_expand "sibcall"
-  [(call (match_operand 0 "memory_operand" "")
-(match_operand 1 "" ""))]
+  [(parallel [
+(call (match_operand 0 "memory_operand" "")
+ (match_operand 1 "" ""))
+(use (reg:SI A0_REG))])]
   "!TARGET_WINDOWED_ABI"
 {
   xtensa_prepare_expand_call (0, operands);
@@ -2378,7 +2380,8 @@
 
 (define_insn "sibcall_internal"
   [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nic"))
-(match_operand 1 "" "i"))]
+(match_operand 1 "" "i"))
+   (use (reg:SI A0_REG))]
   "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)"
 {
   return xtensa_emit_sibcall (0, operands);
@@ -2388,9 +2391,11 @@
(set_attr "length"  "3")])
 
 (define_expand "sibcall_value"
-  [(set (match_operand 0 "register_operand" "")
-   (call (match_operand 1 "memory_operand" "")
- (match_operand 2 "" "")))]
+  [(parallel [
+(set (match_operand 0 "register_operand" "")
+(call (match_operand 1 "memory_operand" "")
+  (match_operand 2 "" "")))
+(use (reg:SI A0_REG))])]
   "!TARGET_WINDOWED_ABI"
 {
   xtensa_prepare_expand_call (1, operands);
@@ -2399,7 +2404,8 @@
 (define_insn "sibcall_value_internal"
   [(set (match_operand 0 "register_operand" "=a")
(call (mem:SI (match_operand:SI 1 "call_insn_operand" "nic"))
- (match_operand 2 "" "i")))]
+ (match_operand 2 "" "i")))
+   (use (reg:SI A0_REG))]
   "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)"
 {
   return xtensa_emit_sibcall (1, operands);
-- 
2.30.2



Re: [PATCH] xtensa: Enforce return address saving when -Og is specified

2023-02-20 Thread Max Filippov via Gcc-patches
On Fri, Feb 17, 2023 at 8:54 PM Takayuki 'January June' Suwa
 wrote:
>
> Leaf function often omits saving its return address to the stack slot,
> and this feature often makes debugging very confusing, especially for
> stack dump analysis.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (xtensa_call_save_reg): Change to return
> true if register A0 (return address register) when -Og is specified.
> ---
>  gcc/config/xtensa/xtensa.cc | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH v6] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-02-15 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Thu, Jan 26, 2023 at 7:17 PM Takayuki 'January June' Suwa
 wrote:
>
> In the case of the CALL0 ABI, values that must be retained before and
> after function calls are placed in the callee-saved registers (A12
> through A15) and referenced later.  However, it is often the case that
> the save and the reference are each only once and a simple register-
> register move (with two exceptions; i. the register saved to/restored
> from is the stack pointer, ii. the function needs an additional stack
> pointer adjustment to grow the stack).
>
> e.g. in the following example, if there are no other occurrences of
> register A14:
>
> ;; before
> ; prologue {
>   ...
> s32i.n  a14, sp, 16
>   ...   ;; no frame pointer needed
> ;; no additional stack growth
> ; } prologue
>   ...
> mov.n   a14, a6 ;; A6 is not SP
>   ...
> call0   foo
>   ...
> mov.n   a8, a14 ;; A8 is not SP
>   ...
> ; epilogue {
>   ...
> l32i.n  a14, sp, 16
>   ...
> ; } epilogue
>
> It can be possible like this:
>
> ;; after
> ; prologue {
>   ...
> (no save needed)
>   ...
> ; } prologue
>   ...
> s32i.n  a6, sp, 16  ;; replaced with A14's slot
>   ...
> call0   foo
>   ...
> l32i.n  a8, sp, 16  ;; through SP
>   ...
> ; epilogue {
>   ...
> (no restoration needed)
>   ...
> ; } epilogue
>
> This patch adds the abovementioned logic to the function prologue/epilogue
> RTL expander code.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (machine_function): Add new member
> 'eliminated_callee_saved_bmp'.
> (xtensa_can_eliminate_callee_saved_reg_p): New function to
> determine whether the register can be eliminated or not.
> (xtensa_expand_prologue): Add invoking the above function and
> elimination the use of callee-saved register by using its stack
> slot through the stack pointer (or the frame pointer if needed)
> directly.
> (xtensa_expand_prologue): Modify to not emit register restoration
> insn from its stack slot if the register is already eliminated.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/xtensa/elim_callee_saved.c: New.
> ---
>  gcc/config/xtensa/xtensa.cc   | 132 ++
>  .../gcc.target/xtensa/elim_callee_saved.c |  38 +
>  2 files changed, 145 insertions(+), 25 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c

This version passes regression tests, but I still have a couple questions.

> diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
> index 3e2e22d4cbe..ff59c933d4d 100644
> --- a/gcc/config/xtensa/xtensa.cc
> +++ b/gcc/config/xtensa/xtensa.cc
> @@ -105,6 +105,7 @@ struct GTY(()) machine_function
>bool epilogue_done;
>bool inhibit_logues_a1_adjusts;
>rtx last_logues_a9_content;
> +  HOST_WIDE_INT eliminated_callee_saved_bmp;
>  };
>
>  static void xtensa_option_override (void);
> @@ -3343,6 +3344,66 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, 
> int flags)
>  cfun->machine->last_logues_a9_content = GEN_INT (offset);
>  }
>
> +static bool
> +xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
> +rtx_insn **p_insnS,
> +rtx_insn **p_insnR)
> +{
> +  df_ref ref;
> +  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
> +  rtx pattern;
> +
> +  if (!optimize || !df || call_used_or_fixed_reg_p (regno))
> +return false;
> +
> +  for (ref = DF_REG_DEF_CHAIN (regno);
> +   ref; ref = DF_REF_NEXT_REG (ref))
> +if (DF_REF_CLASS (ref) != DF_REF_REGULAR
> +   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
> +  continue;
> +else if (GET_CODE (pattern = PATTERN (insn)) == SET
> +&& REG_P (SET_DEST (pattern))
> +&& REGNO (SET_DEST (pattern)) == regno
> +&& REG_NREGS (SET_DEST (pattern)) == 1
> +&& REG_P (SET_SRC (pattern))
> +&& REGNO (SET_SRC (pattern)) != A1_REG)

Do I understand correctly that the check for A1 here and below is
for the case when regno is a hard frame pointer and the function
needs the frame pointer? If so, wouldn't it be better to check
for it explicitly in the beginning?

> +  {
> +   if (insnS)
> + return false;
> +   insnS = insn;
> +   continue;
> +  }
> +else
> +  return false;
> +
> +  for (ref = DF_REG_USE_CHAIN (regno);
> +   ref; ref = DF_REF_NEXT_REG (ref))
> +if (DF_REF_CLASS (ref) != DF_REF_REGULAR
> +   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
> +  continue;
> +else if (GET_CODE (pattern = PATTERN (insn)) == SET
> +&& REG_P (SET_SRC (pattern))
> +&& REGNO (SET_SRC (pattern)) == regno
> +&& REG_NREGS (SET_SRC 

Re: [PATCH v4] xtensa: Eliminate unnecessary general-purpose reg-reg moves

2023-01-24 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Mon, Jan 23, 2023 at 7:43 PM Takayuki 'January June' Suwa
 wrote:
>
> Register-register move instructions that can be easily seen as
> unnecessary by the human eye may remain in the compiled result.
> For example:
>
> /* example */
> double test(double a, double b) {
>   return __builtin_copysign(a, b);
> }
>
> test:
> add.n   a3, a3, a3
> extui   a5, a5, 31, 1
> ssai1
> ;; be in the same BB
> src a7, a5, a3  ;; No '0' in the source constraints
> ;; The destination replaced is
> ;;   irrelevant to the sources if the
> ;;   destination constraint has '&'
> ;; No CALL insns in this span
> ;; Both A3 and A7 are irrelevant to
> ;;   insns in this span
> mov.n   a3, a7  ;; An unnecessary reg-reg move
> ;; A7 is not used after this
> ret.n
>
> The last two instructions above, excluding the return instruction,
> could be done like this:
>
> src a3, a5, a3
>
> This symptom often occurs when handling DI/DFmode values with SImode
> instructions.  This patch solves the above problem using peephole2
> pattern.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md: New peephole2 pattern that eliminates
> the occurrence of general-purpose register used only once and for
> transferring intermediate value.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/xtensa/elim_GP_regmove.c: New.
> ---
>  gcc/config/xtensa/xtensa.md   | 49 +++
>  .../gcc.target/xtensa/elim_GP_regmove.c   | 23 +
>  2 files changed, 72 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_GP_regmove.c

This change breaks windowed builds by the following ICEs:

libgcc/libgcov-interface.c: In function ‘__gcov_execlp’:
libgcc/libgcov-interface.c:263:1: error: insn does not satisfy its constraints:
 263 | }
 | ^
(insn 96 95 98 11 (set (reg/f:SI 1 sp)
   (minus:SI (reg/f:SI 1 sp)
   (reg:SI 8 a8 [85]))) "libgcc/libgcov-interface.c":253:20 4 {subsi3}
(expr_list:REG_DEAD (reg:SI 8 a8 [85])
   (nil)))
during RTL pass: cprop_hardreg
libgcc/libgcov-interface.c:263:1: internal compiler error: in
extract_constrain_insn, at recog.cc:2692

It also introduces at least one regression in executable tests
in call0 build, I haven't tracked it down yet.

-- 
Thanks.
-- Max


Re: [PATCH v5] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-24 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Mon, Jan 23, 2023 at 7:43 PM Takayuki 'January June' Suwa
 wrote:
>
> In the case of the CALL0 ABI, values that must be retained before and
> after function calls are placed in the callee-saved registers (A12
> through A15) and referenced later.  However, it is often the case that
> the save and the reference are each only once and a simple register-
> register move (with two exceptions; i. the register saved to/restored
> from is the stack pointer, ii. the function needs an additional stack
> pointer adjustment to grow the stack).
>
> e.g. in the following example, if there are no other occurrences of
> register A14:
>
> ;; before
> ; prologue {
>   ...
> s32i.n  a14, sp, 16
>   ...   ;; no frame pointer needed
> ;; no additional stack growth
> ; } prologue
>   ...
> mov.n   a14, a6 ;; A6 is not SP
>   ...
> call0   foo
>   ...
> mov.n   a8, a14 ;; A8 is not SP
>   ...
> ; epilogue {
>   ...
> l32i.n  a14, sp, 16
>   ...
> ; } epilogue
>
> It can be possible like this:
>
> ;; after
> ; prologue {
>   ...
> (no save needed)
>   ...
> ; } prologue
>   ...
> s32i.n  a6, sp, 16  ;; replaced with A14's slot
>   ...
> call0   foo
>   ...
> l32i.n  a8, sp, 16  ;; through SP
>   ...
> ; epilogue {
>   ...
> (no restoration needed)
>   ...
> ; } epilogue
>
> This patch adds the abovementioned logic to the function prologue/epilogue
> RTL expander code.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (machine_function): Add new member
> 'eliminated_callee_saved_bmp'.
> (xtensa_can_eliminate_callee_saved_reg_p): New function to
> determine whether the register can be eliminated or not.
> (xtensa_expand_prologue): Add invoking the above function and
> elimination the use of callee-saved register by using its stack
> slot through the stack pointer (or the frame pointer if needed)
> directly.
> (xtensa_expand_prologue): Modify to not emit register restoration
> insn from its stack slot if the register is already eliminated.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/xtensa/elim_callee_saved.c: New.
> ---
>  gcc/config/xtensa/xtensa.cc   | 130 ++
>  .../gcc.target/xtensa/elim_callee_saved.c |  32 +
>  2 files changed, 137 insertions(+), 25 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c

This version results in multiple ICEs with the following stack traces:

gcc/testsuite/gcc.c-torture/compile/2127-1.c:7:1: error: could not
split insn
(insn 7 6 8 2 (set (mem/c:SI (plus:SI (reg/f:SI 15 a15)
   (const_int 24 [0x18])) [3  S4 A32])
   (reg:DF 2 a2))
"gcc/testsuite/gcc.c-torture/compile/2127-1.c":4:10 61
{movdf_internal}
(nil))
during RTL pass: final
gcc/testsuite/gcc.c-torture/compile/2127-1.c:7:1: internal
compiler error: in final_scan_insn_1, at final.cc:2808
0x6cb090 _fatal_insn(char const*, rtx_def const*, char const*, int, char const*)
   gcc/rtl-error.cc:108
0x639b17 final_scan_insn_1
   gcc/final.cc:2808
0xb1817b final_scan_insn(rtx_insn*, _IO_FILE*, int, int, int*)
   gcc/final.cc:2887
0xb18266 final_1
   gcc/final.cc:1979
0xb18f94 rest_of_handle_final
   gcc/final.cc:4240
0xb18f94 execute
   gcc/final.cc:4318



gcc/testsuite/gcc.c-torture/compile/950729-1.c:39:1: error: could not split insn
(insn 91 93 228 13 (set (reg:DI 4 a4)
   (mem/c:SI (plus:SI (reg/f:SI 15 a15)
   (const_int 72 [0x48])) [5  S4 A32]))
"gcc/testsuite/gcc.c-torture/compile/950729-1.c":28:55 52
{movdi_internal}
(nil))
during RTL pass: final
gcc/testsuite/gcc.c-torture/compile/950729-1.c:39:1: internal compiler
error: in final_scan_insn_1, at final.cc:2808
0x6cb090 _fatal_insn(char const*, rtx_def const*, char const*, int, char const*)
   gcc/rtl-error.cc:108
0x639b17 final_scan_insn_1
   gcc/final.cc:2808
0xb1817b final_scan_insn(rtx_insn*, _IO_FILE*, int, int, int*)
   gcc/final.cc:2887
0xb18266 final_1
   gcc/final.cc:1979
0xb18f94 rest_of_handle_final
   gcc/final.cc:4240
0xb18f94 execute
   gcc/final.cc:4318

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Revise 89afb2e86fcb29c559b2957fdcbea0d01740c49b

2023-01-24 Thread Max Filippov via Gcc-patches
On Thu, Jan 19, 2023 at 7:33 PM Takayuki 'January June' Suwa
 wrote:
>
> In the previously posted patch
> "xtensa: Make complex hard register clobber elimination more robust and 
> accurate",
> the check code for insns that refer to the [DS]Cmode hard register before
> it is overwritten after it is clobbered is incomplete.  Fortunately such
> insns are seldom emitted, so it didn't matter.
>
> This patch fixes that for the sake of completeness.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md:
> Fix exit from loops detecting references before overwriting in the
> split pattern.
> ---
>  gcc/config/xtensa/xtensa.md | 72 +++--
>  1 file changed, 37 insertions(+), 35 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master with more human readable subject line.

-- 
Thanks.
-- Max


Re: [PATCH v4] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-22 Thread Max Filippov via Gcc-patches
On Fri, Jan 20, 2023 at 8:39 PM Takayuki 'January June' Suwa
 wrote:
> On 2023/01/21 0:14, Max Filippov wrote:
> > After having this many attempts and getting to the issues that are
> > really hard to detect I wonder if the target backend is the right place
> > for this optimization?
> >
> I guess they are not hard to detect

I mean, on the testing side. check-gcc testsuite passed without new
regressions with this change, linux kernel smoke test passed, I was
almost convinced that it's ok to commit.

> but just issues I didn't anticipate (and I just need a little more work).

Looking at other peephole2 patterns I see that their code transformations
are much more compact and they don't need to track additional properties
of unrelated instructions.

> And where else should it be done?  What about implementing a
> target-specific pass just for one-point optimization?

I don't even understand what's target-specific in this optimization?
It looks very generic to me.

-- 
Thanks.
-- Max


Re: [PATCH v3] xtensa: Eliminate unnecessary general-purpose reg-reg moves

2023-01-20 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Wed, Jan 18, 2023 at 9:06 PM Takayuki 'January June' Suwa
 wrote:
>
> Register-register move instructions that can be easily seen as
> unnecessary by the human eye may remain in the compiled result.
> For example:
>
> /* example */
> double test(double a, double b) {
>   return __builtin_copysign(a, b);
> }
>
> test:
> add.n   a3, a3, a3
> extui   a5, a5, 31, 1
> ssai1
> ;; be in the same BB
> src a7, a5, a3  ;; No '0' in the source constraints
> ;; No CALL insns in this span
> ;; Both A3 and A7 are irrelevant to
> ;;   insns in this span
> mov.n   a3, a7  ;; An unnecessary reg-reg move
> ;; A7 is not used after this
> ret.n
>
> The last two instructions above, excluding the return instruction,
> could be done like this:
>
> src a3, a5, a3
>
> This symptom often occurs when handling DI/DFmode values with SImode
> instructions.  This patch solves the above problem using peephole2
> pattern.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md: New peephole2 pattern that eliminates
> the occurrence of general-purpose register used only once and for
> transferring intermediate value.
> ---
>  gcc/config/xtensa/xtensa.md | 45 +
>  1 file changed, 45 insertions(+)

With this change I see the following ICEs:

in the libgcc build:

gcc/libgcc/libgcov-interface.c: In function ‘__gcov_execl’:
gcc/libgcc/libgcov-interface.c:228:1: error: insn does not satisfy its
constraints:
 228 | }
 | ^
(insn 96 95 98 11 (set (reg/f:SI 1 sp)
   (minus:SI (reg/f:SI 1 sp)
   (reg:SI 8 a8 [85])))
"gcc/libgcc/libgcov-interface.c":218:20 4 {subsi3}
(expr_list:REG_DEAD (reg:SI 8 a8 [85])
   (nil)))
during RTL pass: cprop_hardreg


in the linux kernel build:

linux/lib/find_bit.c: In function ‘_find_next_bit’:
linux/lib/find_bit.c:70:1: error: unrecognizable insn:
  70 | }
 | ^
(insn 74 72 75 16 (set (reg:SI 10 a10)
   (asm_operands:SI ("ssai 8
   srli %0, %1, 16
   src  %0, %0, %1
   src  %0, %0, %0
   src  %0, %1, %0
") ("=") 0 [
   (reg/v:SI 10 a10 [orig:59 res ] [59])
   ]
[
   (asm_input:SI ("a") linux/arch/xtensa/include/uapi/asm/swab.h:24)
   ]
[] linux/arch/xtensa/include/uapi/asm/swab.h:24))
"linux/arch/xtensa/include/uapi/asm/swab.h":24:5 -1
(nil))
during RTL pass: cprop_hardreg
linux/lib/find_bit.c:70:1: internal compiler error: in
extract_constrain_insn, at recog.cc:2692
0x6c3214 _fatal_insn(char const*, rtx_def const*, char const*, int, char const*)
   gcc/gcc/rtl-error.cc:108
0x6c3297 _fatal_insn_not_found(rtx_def const*, char const*, int, char const*)
   gcc/gcc/rtl-error.cc:116
0x6b4735 extract_constrain_insn(rtx_insn*)
   gcc/gcc/recog.cc:2692
0xe1f67e copyprop_hardreg_forward_1
   gcc/gcc/regcprop.cc:826
0xe20a0f execute
   gcc/gcc/regcprop.cc:1408

-- 
Thanks.
-- Max


Re: [PATCH v4] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-20 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Wed, Jan 18, 2023 at 7:50 PM Takayuki 'January June' Suwa
 wrote:
>
> In the previous patch, if insn is JUMP_INSN or CALL_INSN, it bypasses the reg 
> check (possibly FAIL).
>
> =
> In the case of the CALL0 ABI, values that must be retained before and
> after function calls are placed in the callee-saved registers (A12
> through A15) and referenced later.  However, it is often the case that
> the save and the reference are each only once and a simple register-
> register move (the frame pointer is needed to recover the stack pointer
> and must be excluded).
>
> e.g. in the following example, if there are no other occurrences of
> register A14:
>
> ;; before
> ; prologue {
>   ...
> s32i.n  a14, sp, 16
>   ...
> ; } prologue
>   ...
> mov.n   a14, a6
>   ...
> call0   foo
>   ...
> mov.n   a8, a14
>   ...
> ; epilogue {
>   ...
> l32i.n  a14, sp, 16
>   ...
> ; } epilogue
>
> It can be possible like this:
>
> ;; after
> ; prologue {
>   ...
> (deleted)
>   ...
> ; } prologue
>   ...
> s32i.n  a6, sp, 16
>   ...
> call0   foo
>   ...
> l32i.n  a8, sp, 16
>   ...
> ; epilogue {
>   ...
> (deleted)
>   ...
> ; } epilogue
>
> This patch introduces a new peephole2 pattern that implements the above.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md: New peephole2 pattern that eliminates
> the use of callee-saved register that saves and restores only once
> for other register, by using its stack slot directly.
> ---
>  gcc/config/xtensa/xtensa.md | 62 +
>  1 file changed, 62 insertions(+)

There are still issues with this change in the libgomp:

FAIL: libgomp.c/examples-4/target-1.c execution test
FAIL: libgomp.c/examples-4/target-2.c execution test

They come from the following function:

code produced before the change:
   .literal_position
   .literal .LC8, init@PLT
   .literal .LC9, 40
   .literal .LC10, 10
   .literal .LC11, -80
   .literal .LC12, 80
   .align  4
   .global vec_mult_ref
   .type   vec_mult_ref, @function
vec_mult_ref:
   l32ra9, .LC11
   addisp, sp, -16
   l32ra10, .LC9
   s32i.n  a12, sp, 8
   s32i.n  a13, sp, 4
   s32i.n  a0, sp, 12
   add.n   sp, sp, a9
   add.n   a12, sp, a10
   l32ra9, .LC8
   mov.n   a13, a2
   mov.n   a3, sp
   mov.n   a2, a12
   callx0  a9
   l32ra7, .LC10
   mov.n   a10, a12
   mov.n   a11, sp
   mov.n   a2, a13
   loopa7, .L17_LEND
.L17:
   l32i.n  a9, a10, 0
   l32i.n  a6, a11, 0
   addi.n  a10, a10, 4
   mulla9, a9, a6
   addi.n  a11, a11, 4
   s32i.n  a9, a2, 0
   addi.n  a2, a2, 4
   .L17_LEND:
   l32ra9, .LC12
   add.n   sp, sp, a9
   l32i.n  a0, sp, 12
   l32i.n  a12, sp, 8
   l32i.n  a13, sp, 4
   addisp, sp, 16
   ret.n



with the change:
   .literal_position
   .literal .LC8, init@PLT
   .literal .LC9, 40
   .literal .LC10, 10
   .literal .LC11, -80
   .literal .LC12, 80
   .align  4
   .global vec_mult_ref
   .type   vec_mult_ref, @function
vec_mult_ref:
   l32ra9, .LC11
   l32ra10, .LC9
   addisp, sp, -16
   s32i.n  a12, sp, 8
   s32i.n  a0, sp, 12
   add.n   sp, sp, a9
   add.n   a12, sp, a10
   l32ra9, .LC8
   s32i.n  a2, sp, 4
   mov.n   a3, sp
   mov.n   a2, a12
   callx0  a9
   l32ra7, .LC10
   l32i.n  a2, sp, 4
   mov.n   a10, a12
   mov.n   a11, sp
   loopa7, .L17_LEND
.L17:
   l32i.n  a9, a10, 0
   l32i.n  a6, a11, 0
   addi.n  a10, a10, 4
   mulla9, a9, a6
   addi.n  a11, a11, 4
   s32i.n  a9, a2, 0
   addi.n  a2, a2, 4
   .L17_LEND:
   l32ra9, .LC12
   add.n   sp, sp, a9
   l32i.n  a0, sp, 12
   l32i.n  a12, sp, 8
   addisp, sp, 16
   ret.n

the stack pointer is modified after saving callee-saved registers,
but the stack offset where a2 is stored and reloaded does not take
this into an account.

After having this many attempts and getting to the issues that are
really hard to detect I wonder if the target backend is the right place
for this optimization?

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Optimize inversion of the MSB

2023-01-18 Thread Max Filippov via Gcc-patches
On Tue, Jan 17, 2023 at 9:43 PM Takayuki 'January June' Suwa
 wrote:
>
> Such operation can be done either bitwise-XOR or addition with -2147483648,
> but the latter is one byte less if TARGET_DENSITY.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (xorsi3_internal):
> Rename from the original of "xorsi3".
> (xorsi3): New expansion pattern that emits addition rather than
> bitwise-XOR when the second source is a constant of -2147483648
> if TARGET_DENSITY.
> ---
>  gcc/config/xtensa/xtensa.md | 26 +-
>  1 file changed, 25 insertions(+), 1 deletion(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH v2] xtensa: Eliminate unnecessary general-purpose reg-reg moves

2023-01-18 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Tue, Jan 17, 2023 at 9:25 PM Takayuki 'January June' Suwa
 wrote:
>
> Register-register move instructions that can be easily seen as
> unnecessary by the human eye may remain in the compiled result.
> For example:
>
> /* example */
> double test(double a, double b) {
>   return __builtin_copysign(a, b);
> }
>
> test:
> add.n   a3, a3, a3
> extui   a5, a5, 31, 1
> ssai1
> ;; be in the same BB
> src a7, a5, a3  ;; No '0' in the source constraints
> ;; No CALL insns in this span
> ;; Both A3 and A7 are irrelevant to
> ;;   insns in this span
> mov.n   a3, a7  ;; An unnecessary reg-reg move
> ;; A7 is not used after this
> ret.n
>
> The last two instructions above, excluding the return instruction,
> could be done like this:
>
> src a3, a5, a3
>
> This symptom often occurs when handling DI/DFmode values with SImode
> instructions.  This patch solves the above problem using peephole2
> pattern.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md: New peephole2 pattern that eliminates
> the occurrence of genral-purpose register used only once and for
> transferring intermediate value.
> ---
>  gcc/config/xtensa/xtensa.md | 43 +
>  1 file changed, 43 insertions(+)

This still generates ICE, this time while building libstdc++:

during RTL pass: ce3
In file included from
build/xtensa-buildroot-linux-uclibc/libstdc++-v3/include/bits/locale_facets.h:2687,
from
build/xtensa-buildroot-linux-uclibc/libstdc++-v3/include/locale:42,
from gcc/libstdc++-v3/src/c++11/locale-inst.cc:38,
from gcc/libstdc++-v3/src/c++11/wlocale-inst.cc:35:
build/xtensa-buildroot-linux-uclibc/libstdc++-v3/include/bits/locale_facets.tcc:
In member function ‘_InIter std::num_get<_CharT,
_InIter>::do_get(iter_type, iter_type, std::ios_base&,
std::ios_base::iostate&, bool&) const [with _CharT = wchar_t; _InIter
= std::istreamb
uf_iterator >]’:
build/xtensa-buildroot-linux-uclibc/libstdc++-v3/include/bits/locale_facets.tcc:686:5:
internal compiler error: in df_refs_verify, at df-scan.cc:4009
 686 | }
 | ^
0x6eb0dc df_refs_verify
   gcc/gcc/df-scan.cc:4009
0xd19a74 df_insn_refs_verify
   gcc/gcc/df-scan.cc:4092
0xd1b94c df_bb_verify
   gcc/gcc/df-scan.cc:4125
0xd1bd77 df_scan_verify()
   gcc/gcc/df-scan.cc:4246
0xd06ca7 df_verify()
   gcc/gcc/df-core.cc:1818
0xd06ca7 df_analyze_1
   gcc/gcc/df-core.cc:1214
0x1a7287c if_convert
   gcc/gcc/ifcvt.cc:5858
0x1a73ddd execute
   gcc/gcc/ifcvt.cc:6026

-- 
Thanks.
-- Max


Re: [PATCH v3] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-18 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Tue, Jan 17, 2023 at 8:23 PM Takayuki 'January June' Suwa
 wrote:
> In the case of the CALL0 ABI, values that must be retained before and
> after function calls are placed in the callee-saved registers (A12
> through A15) and referenced later.  However, it is often the case that
> the save and the reference are each only once and a simple register-
> register move (the frame pointer is needed to recover the stack pointer
> and must be excluded).
>
> e.g. in the following example, if there are no other occurrences of
> register A14:
>
> ;; before
> ; prologue {
>   ...
> s32i.n  a14, sp, 16
>   ...
> ; } prologue
>   ...
> mov.n   a14, a6
>   ...
> call0   foo
>   ...
> mov.n   a8, a14
>   ...
> ; epilogue {
>   ...
> l32i.n  a14, sp, 16
>   ...
> ; } epilogue
>
> It can be possible like this:
>
> ;; after
> ; prologue {
>   ...
> (deleted)
>   ...
> ; } prologue
>   ...
> s32i.n  a6, sp, 16
>   ...
> call0   foo
>   ...
> l32i.n  a8, sp, 16
>   ...
> ; epilogue {
>   ...
> (deleted)
>   ...
> ; } epilogue
>
> This patch introduces a new peephole2 pattern that implements the above.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md: New peephole2 pattern that eliminates
> the use of callee-saved register that saves and restores only once
> for other register, by using its stack slot directly.
> ---
>  gcc/config/xtensa/xtensa.md | 62 +
>  1 file changed, 62 insertions(+)

This change introduces a bunch of different test failures:

FAIL: gcc.c-torture/execute/builtins/strpbrk.c execution,  -O2
FAIL: gcc.c-torture/execute/builtins/strpbrk.c execution,  -O3 -g
FAIL: gcc.c-torture/execute/builtins/strpbrk.c execution,  -Os
FAIL: gcc.c-torture/execute/builtins/strpbrk.c execution,  -O2 -flto
-fno-use-linker-plugin -flto-partition=none
FAIL: gcc.c-torture/execute/builtins/strstr-asm.c execution,  -Os
FAIL: gcc.c-torture/execute/20001130-1.c   -Os  execution test
FAIL: gcc.c-torture/execute/20040311-1.c   -O2  execution test
FAIL: gcc.c-torture/execute/20040311-1.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/20040311-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/20121108-1.c   -O2  execution test
FAIL: gcc.c-torture/execute/20121108-1.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
FAIL: gcc.c-torture/execute/20121108-1.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/20121108-1.c   -Os  execution test
FAIL: gcc.c-torture/execute/20121108-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/20121108-1.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  execution test
FAIL: gcc.c-torture/execute/20140622-1.c   -O2  execution test
FAIL: gcc.c-torture/execute/20140622-1.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/20140622-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/20141022-1.c   -O2  execution test
FAIL: gcc.c-torture/execute/20141022-1.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
FAIL: gcc.c-torture/execute/20141022-1.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/20141022-1.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/20141022-1.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  execution test
FAIL: gcc.c-torture/execute/20141107-1.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  execution test
FAIL: gcc.c-torture/execute/961213-1.c   -Os  execution test
FAIL: gcc.c-torture/execute/builtin-bitops-1.c   -Os  execution test
FAIL: gcc.c-torture/execute/cvt-1.c   -O2  execution test
FAIL: gcc.c-torture/execute/cvt-1.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/cvt-1.c   -Os  execution test
FAIL: gcc.c-torture/execute/cvt-1.c   -O2 -flto -fno-use-linker-plugin
-flto-partition=none  execution test
FAIL: gcc.c-torture/execute/pr40747.c   -O2  execution test
FAIL: gcc.c-torture/execute/pr40747.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/pr40747.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/pr60960.c   -O2  execution test
FAIL: gcc.c-torture/execute/pr60960.c   -O3 -g  execution test
FAIL: gcc.c-torture/execute/pr60960.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  execution test
FAIL: gcc.c-torture/execute/pr60960.c   -O2 -flto -fuse-linker-plugin
-fno-fat-lto-objects  execution test
FAIL: gcc.c-torture/execute/ieee/fp-cmp-5.c execution,  -O2
FAIL: gcc.c-torture/execute/ieee/fp-cmp-5.c execution,  -O3
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
-finline-functions
FAIL: gcc.c-torture/execute/ieee/fp-cmp-5.c execution,  -O3 -g

Re: [PATCH v2] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-17 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Mon, Jan 16, 2023 at 8:12 PM Takayuki 'January June' Suwa
 wrote:
>
> In the case of the CALL0 ABI, values that must be retained before and
> after function calls are placed in the callee-saved registers (A12
> through A15) and referenced later.  However, it is often the case that
> the save and the reference are each only once and a simple register-
> register move (the frame pointer is needed to recover the stack pointer
> and must be excluded).
>
> e.g. in the following example, if there are no other occurrences of
> register A14:
>
> ;; before
> ; prologue {
>   ...
> s32i.n  a14, sp, 16
>   ...
> ; } prologue
>   ...
> mov.n   a14, a6
>   ...
> call0   foo
>   ...
> mov.n   a8, a14
>   ...
> ; epilogue {
>   ...
> l32i.n  a14, sp, 16
>   ...
> ; } epilogue
>
> It can be possible like this:
>
> ;; after
> ; prologue {
>   ...
> (deleted)
>   ...
> ; } prologue
>   ...
> s32i.n  a6, sp, 16
>   ...
> call0   foo
>   ...
> l32i.n  a8, sp, 16
>   ...
> ; epilogue {
>   ...
> (deleted)
>   ...
> ; } epilogue
>
> This patch introduces a new peephole2 pattern that implements the above.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md: New peephole2 pattern that eliminates
> the use of callee-saved register that saves and restores only once
> for other register, by using its stack slot directly.
> ---
>  gcc/config/xtensa/xtensa.md | 60 +
>  1 file changed, 60 insertions(+)

There's still a few regressions in tests with -fcompare-debug because
code generated with -g and without it is different:

+FAIL: gcc.dg/pr41241.c (test for excess errors)
+FAIL: gcc.dg/pr48159-1.c (test for excess errors)
+FAIL: gcc.dg/pr65521.c (test for excess errors)
+FAIL: gcc.dg/torture/pr42878-1.c   -O2  (test for excess errors)
+FAIL: gcc.dg/torture/pr42878-1.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
excess errors)
+FAIL: gcc.dg/torture/pr42878-1.c   -O3 -g  (test for excess errors)
+FAIL: gcc.dg/torture/pr42878-1.c   -Os  (test for excess errors)
+FAIL: gcc.dg/torture/pr42878-1.c   -O2 -flto -fno-use-linker-plugin
-flto-partition=none  (test for excess errors)

E.g. check the following test with -g0 and -g:

gcc/cc1 gcc/testsuite/gcc.dg/torture/pr42878-1.c -mlongcalls
-mtext-section-literals -fdiagnostics-plain-output -O3
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
-finline-functions

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Eliminate unnecessary general-purpose reg-reg moves

2023-01-17 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Mon, Jan 16, 2023 at 8:54 PM Takayuki 'January June' Suwa
 wrote:
>
> Register-register move instructions that can be easily seen as
> unnecessary by the human eye may remain in the compiled result.
> For example:
>
> /* example */
> double test(double a, double b) {
>   return __builtin_copysign(a, b);
> }
>
> test:
> add.n   a3, a3, a3
> extui   a5, a5, 31, 1
> ssai1
> ;; be in the same BB
> src a7, a5, a3  ;; No '0' in the source constraints
> ;; No CALL insns in this span
> ;; Both A3 and A7 are irrelevant to
> ;;   insns in this span
> mov.n   a3, a7  ;; An unnecessary reg-reg move
> ;; A7 is not used after this
> ret.n
>
> The last two instructions above, excluding the return instruction,
> could be done like this:
>
> src a3, a5, a3
>
> This symptom often occurs when handling DI/DFmode values with SImode
> instructions.  This patch solves the above problem using peephole2
> pattern.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md: New peephole2 pattern that eliminates
> the occurrence of genral-purpose register used only once and for
> transferring intermediate value.
> ---
>  gcc/config/xtensa/xtensa.md | 44 +
>  1 file changed, 44 insertions(+)

This change results in a bunch of ICEs with the following backtrace:

gcc/libgcc/unwind-dw2.c: In function ‘execute_cfa_program_specialized’:
gcc/libgcc/unwind-dw2.c:972:1: internal compiler error: RTL check:
expected elt 2 type 'B', have '0' (rtx barrier) in BLOCK_FOR_INSN, at
rtl.h:1493
 972 | }
 | ^
0x6c3334 rtl_check_failed_type1(rtx_def const*, int, int, char const*,
int, char const*)
   gcc/gcc/rtl.cc:897
0x7bf285 BLOCK_FOR_INSN(rtx_def*)
   gcc/gcc/rtl.h:1493
0x7c448d BLOCK_FOR_INSN(rtx_def*)
   gcc/gcc/rtl.h:1509
0x7c448d gen_peephole2_4(rtx_insn*, rtx_def**)
   gcc/gcc/config/xtensa/xtensa.md:3102
0xe1cce2 peephole2_optimize
   gcc/gcc/recog.cc:4180
0xe1cce2 rest_of_handle_peephole2
   gcc/gcc/recog.cc:4331
0xe1cce2 execute
   gcc/gcc/recog.cc:4368

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-16 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Sun, Jan 15, 2023 at 6:53 PM Takayuki 'January June' Suwa
 wrote:
>
> In the case of the CALL0 ABI, values that must be retained before and
> after function calls are placed in the callee-saved registers (A12
> through A15) and referenced later.  However, it is often the case that
> the save and the reference are each only once and a simple register-
> register move.
>
> e.g. in the following example, if there are no other occurrences of
> register A14:
>
> ;; before
> ; prologue {
>   ...
> s32i.n  a14, sp, 16
>   ...
> ; } prologue
>   ...
> mov.n   a14, a6
>   ...
> call0   foo
>   ...
> mov.n   a8, a14
>   ...
> ; epilogue {
>   ...
> l32i.n  a14, sp, 16
>   ...
> ; } epilogue
>
> It can be possible like this:
>
> ;; after
> ; prologue {
>   ...
> (deleted)
>   ...
> ; } prologue
>   ...
> s32i.n  a6, sp, 16
>   ...
> call0   foo
>   ...
> l32i.n  a8, sp, 16
>   ...
> ; epilogue {
>   ...
> (deleted)
>   ...
> ; } epilogue
>
> This patch introduces a new peephole2 pattern that implements the above.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md: New peephole2 pattern that eliminates
> the use of callee-saved register that saves and restores only once
> for other register, by using its stack slot directly.
> ---
>  gcc/config/xtensa/xtensa.md | 58 +
>  1 file changed, 58 insertions(+)

This change introduces a bunch of test failures in cases where alloca
or similar mechanisms are used in a function and a15 is used as a
stack frame pointer. E.g., gcc.c-torture/execute/pr82210.c has the
following diff:

@@ -20,9 +20,8 @@
   srlia10, a10, 4
   sllia10, a10, 4
   s32i.n  a12, sp, 8
-   s32i.n  a15, sp, 0
   s32i.n  a0, sp, 12
-   mov.n   a15, sp
+   s32i.n  sp, sp, 0
   sub sp, sp, a10
   mov.n   a6, sp
   mov.n   a12, a6
@@ -59,11 +58,10 @@
   addi.n  a2, a2, 4
   bne a12, a13, .L6
.L1:
-   mov.n   sp, a15
+   l32i.n  sp, sp, 0
   l32i.n  a0, sp, 12
   l32i.n  a12, sp, 8
   l32i.n  a13, sp, 4
-   l32i.n  a15, sp, 0
   addisp, sp, 16
   ret.n
   .size   foo, .-foo

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Remove old broken tweak for leaf function

2023-01-14 Thread Max Filippov via Gcc-patches
On Fri, Jan 13, 2023 at 9:03 PM Takayuki 'January June' Suwa
 wrote:
>
> In the before-IRA era, ORDER_REGS_FOR_LOCAL_ALLOC was called for each
> function in Xtensa, and there was register allocation table reordering
> for leaf functions to compensate for the poor performance of local-alloc.
>
> Today the adjustment hook is still called via its alternative
> ADJUST_REG_ALLOC_ORDER, but it is only called once at the start of the IRA,
> and leaf_function_p() erroneously returns true and also gives no argument
> count.
>
> That straightforwardly misleads register allocation that all functions are
> always leaves with no arguments, which leads to inefficiencies in allocation
> results.
>
> Fortunately, IRA is smart enough than local-alloc to not need such assistance.
>
> This patch does away with the antiquated by removing the wreckage that no
> longer works.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa-protos.h (order_regs_for_local_alloc):
>   Rename to xtensa_adjust_reg_alloc_order.
> * config/xtensa/xtensa.cc (xtensa_adjust_reg_alloc_order):
>   Ditto.  And also remove code to reorder register numbers for
>   leaf functions, rename the tables, and adjust the allocation
>   order for the call0 ABI to use register A0 more.
>   (xtensa_leaf_regs): Remove.
> * config/xtensa/xtensa.h (REG_ALLOC_ORDER): Cosmetics.
>   (order_regs_for_local_alloc): Rename as the above.
>   (LEAF_REGISTERS, LEAF_REG_REMAP, leaf_function): Remove.
> ---
>  gcc/config/xtensa/xtensa-protos.h |  2 +-
>  gcc/config/xtensa/xtensa.cc   | 77 +++
>  gcc/config/xtensa/xtensa.h| 51 ++--
>  3 files changed, 31 insertions(+), 99 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH 2/2] xtensa: Optimize ctzsi2 and ffssi2 a bit

2023-01-12 Thread Max Filippov via Gcc-patches
On Wed, Jan 11, 2023 at 8:26 PM Takayuki 'January June' Suwa
 wrote:
>
> This patch saves one byte when the Code Density Option is enabled,
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (ctzsi2, ffssi2):
> Rearrange the emitting codes.
> ---
>  gcc/config/xtensa/xtensa.md | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH 1/2] xtensa: Tune "*btrue" insn pattern

2023-01-12 Thread Max Filippov via Gcc-patches
On Wed, Jan 11, 2023 at 8:26 PM Takayuki 'January June' Suwa
 wrote:
>
> This branch instruction has short encoding if EQ/NE comparison against
> immediate zero when the Code Density Option is enabled, but its "length"
> attribute was only for normal encoding.  This patch fixes it.
>
> This patch also prevents undesireable replacement the comparison immediate
> zero of the instruction (short encoding, as mentioned above) with a
> register that has value of zero (normal encoding) by the postreload pass.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (*btrue):
> Correct value of the attribute "length" that depends on
> TARGET_DENSITY and operands, and add '?' character to the register
> constraint of the compared operand.
> ---
>  gcc/config/xtensa/xtensa.md | 11 +--
>  1 file changed, 9 insertions(+), 2 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Make instruction cost estimation for size more accurate

2023-01-10 Thread Max Filippov via Gcc-patches
On Mon, Jan 9, 2023 at 7:34 PM Takayuki 'January June' Suwa
 wrote:
>
> Until now, we applied COSTS_N_INSNS() (multiplying by 4) after dividing
> the instruction length by 3, so we couldn't express the difference less
> than modulo 3 in insn cost for size (e.g. 11 Bytes and 12 bytes cost the
> same).
>
> This patch fixes that.
>
> ;; 2 bytes
> addi.n  a2, a2, -1  ; cost 3
>
> ;; 3 bytes
> addmi   a2, a2, 1024; cost 4
>
> ;; 4 bytes
> movi.n  a3, 80  ; cost 5
> bnez.n  a2, a3, .L4
>
> ;; 5 bytes
> srlia2, a3, 1   ; cost 7
> add.n   a2, a2, a2
>
> ;; 6 bytes
> ssai8   ; cost 8
> src a4, a2, a3
>
> :: 3 + 4 bytes
> l32ra2, .L5 ; cost 9
>
> ;; 11 bytes ; cost 15
> ;; 12 bytes ; cost 16
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (xtensa_insn_cost):
> Let insn cost for size be obtained by applying COSTS_N_INSNS()
> to instruction length and then dividing by 3.
> ---
>  gcc/config/xtensa/xtensa.cc | 11 +++
>  1 file changed, 7 insertions(+), 4 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH v2] xtensa: Optimize bitwise splicing operation

2023-01-08 Thread Max Filippov via Gcc-patches
On Sat, Jan 7, 2023 at 9:04 PM Takayuki 'January June' Suwa
 wrote:
>
> This patch optimizes the operation of cutting and splicing two register
> values at a specified bit position, in other words, combining (bitwise
> ORing) bits 0 through (C-1) of the register with bits C through 31
> of the other, where C is the specified immediate integer 17 through 31.
>
> This typically applies to signed copy of floating point number and
> __builtin_return_address() if the windowed register ABI, and saves one
> instruction compared to four shifts and a bitwise OR by the default RTL
> combination pass.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (*splice_bits):
> New insn_and_split pattern.
> ---
>  gcc/config/xtensa/xtensa.md | 47 +
>  1 file changed, 47 insertions(+)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Optimize bitwise splicing operation

2023-01-07 Thread Max Filippov via Gcc-patches
On Fri, Jan 6, 2023 at 6:55 PM Takayuki 'January June' Suwa
 wrote:
>
> This patch optimizes the operation of cutting and splicing two register
> values at a specified bit position, in other words, combining (bitwise
> ORing) bits 0 through (C-1) of the register with bits C through 31
> of the other, where C is the specified immediate integer 1 through 31.
>
> This typically applies to signedness copy of floating point number or
> __builtin_return_address() if the windowed register ABI, and saves one
> instruction compared to four shifts and a bitwise OR by the RTL
> generation pass.

While I indeed see this kind of change, e.g.:
-   extui   a3, a3, 27, 5
-   sllia2, a2, 5
-   srlia2, a2, 5
-   sllia3, a3, 27
-   or  a2, a2, a3
+   sllia2, a2, 5
+   extui   a3, a3, 27, 5
+   ssai5
+   src a2, a3, a2

I also see the following:
-   movi.n  a6, -4
-   and a5, a5, a6
-   extui   a3, a3, 0, 2
-   or  a3, a3, a5
+   srlia5, a5, 2
+   sllia3, a3, 30
+   ssai30
+   src a3, a5, a3

i.e. after the split there's the same number of instructions,
but the new sequence is one byte longer than the original one
because of the movi.n.

Looking at a bunch of linux builds I observe a slight code size
growth in call0 kernels and a slight code size reduction in
windowed kernels.

> gcc/ChangeLog:
>
> * config/xtensa/xtensa.md (*splice_bits):
> New insn_and_split pattern.
> ---
>  gcc/config/xtensa/xtensa.md | 47 +
>  1 file changed, 47 insertions(+)
>
> diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
> index 0a26d3dccf4..36ec1b1918e 100644
> --- a/gcc/config/xtensa/xtensa.md
> +++ b/gcc/config/xtensa/xtensa.md
> @@ -746,6 +746,53 @@
> (set_attr "mode""SI")
> (set_attr "length"  "3")])
>
> +(define_insn_and_split "*splice_bits"
> +  [(set (match_operand:SI 0 "register_operand" "=a")
> +   (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
> +   (match_operand:SI 3 "const_int_operand" "i"))
> +   (and:SI (match_operand:SI 2 "register_operand" "r")
> +   (match_operand:SI 4 "const_int_operand" "i"]
> +
> +  "!optimize_debug && optimize
> +   && INTVAL (operands[3]) + INTVAL (operands[4]) == -1
> +   && (exact_log2 (INTVAL (operands[3]) + 1) > 0
> +   || exact_log2 (INTVAL (operands[4]) + 1) > 0)"
> +  "#"
> +  "&& can_create_pseudo_p ()"
> +  [(set (match_dup 5)
> +   (ashift:SI (match_dup 1)
> +  (match_dup 4)))
> +   (set (match_dup 6)
> +   (lshiftrt:SI (match_dup 2)
> +(match_dup 3)))
> +   (set (match_dup 0)
> +   (ior:SI (lshiftrt:SI (match_dup 5)
> +(match_dup 4))
> +   (ashift:SI (match_dup 6)
> +  (match_dup 3]
> +{
> +  int shift;
> +  if (INTVAL (operands[3]) < 0)
> +{
> +  rtx x;
> +  x = operands[1], operands[1] = operands[2], operands[2] = x;
> +  x = operands[3], operands[3] = operands[4], operands[4] = x;
> +}
> +  shift = floor_log2 (INTVAL (operands[3]) + 1);
> +  operands[3] = GEN_INT (shift);
> +  operands[4] = GEN_INT (32 - shift);
> +  operands[5] = gen_reg_rtx (SImode);
> +  operands[6] = gen_reg_rtx (SImode);
> +}
> +  [(set_attr "type""arith")
> +   (set_attr "mode""SI")
> +   (set (attr "length")
> +   (if_then_else (match_test "TARGET_DENSITY
> +  && (INTVAL (operands[3]) == 0x7FFF
> +  || INTVAL (operands[4]) == 
> 0x7FFF)")
> + (const_int 11)
> + (const_int 12)))])

I wonder how the length could be 11 here? I always see 4 3-byte
instructions generated by this pattern.

-- 
Thanks.
-- Max


Re: [PATCH v2] xtensa: Optimize stack frame adjustment more

2023-01-07 Thread Max Filippov via Gcc-patches
On Fri, Jan 6, 2023 at 6:55 PM Takayuki 'January June' Suwa
 wrote:
>
> This patch introduces a convenient helper function for integer immediate
> addition with scratch register as needed, that splits and emits either
> up to two ADDI/ADDMI machine instructions or an addition by register
> following an integer immediate load (which may later be transformed by
> constantsynth).
>
> By using the helper function, it makes stack frame adjustment logic
> simplified and instruction count less in some cases.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc
> (xtensa_split_imm_two_addends, xtensa_emit_add_imm):
> New helper functions.
> (xtensa_set_return_address, xtensa_output_mi_thunk):
> Change to use the helper function.
> (xtensa_emit_adjust_stack_ptr): Ditto.
> And also change to try reusing the content of scratch register
> A9 if the register is not modified in the function body.
> ---
>  gcc/config/xtensa/xtensa.cc | 151 +---
>  1 file changed, 106 insertions(+), 45 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Optimize stack frame adjustment more

2023-01-06 Thread Max Filippov via Gcc-patches
On Thu, Jan 5, 2023 at 10:57 PM Takayuki 'January June' Suwa
 wrote:
> By using the helper function, it makes stack frame adjustment logic
> simplified and instruction count less in some cases.

I've built a couple linux configurations with and without this change and
I observe consistent code size growth, e.g.:

iss_defconfig without the change:
  textdata bss dec hex filename
3014768  164016  115108 3293892  3242c4 vmlinux

iss_defconfig with the change:
  textdata bss dec hex filename
3015296  164008  115108 3294412  3244cc vmlinux

virt_defconfig without the change:
  textdata bss dec hex filename
5498881 2254360  291768 8045009  7ac1d1 vmlinux

virt_defconfig with the change:
  textdata bss dec hex filename
5500389 2254360  291768 8046517  7ac7b5 vmlinux

generic_kc705_defconfig without the change:
  textdata bss dec hex filename
7062530  635340  286400 7984270  79d48e vmlinux

generic_kc705_defconfig with the change:
  textdata bss dec hex filename
7064078  635340  286400 7985818  79da9a vmlinux

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Optimize stack frame adjustment more

2023-01-05 Thread Max Filippov via Gcc-patches
On Thu, Jan 5, 2023 at 7:35 PM Takayuki 'January June' Suwa
 wrote:
> On second thought, it cannot be a good idea to split addition/subtraction to 
> the stack pointer.
>
> > -4aaf:  b0a192  movia9, 0x1b0
> > -4ab2:  1f9aadd.n   a1, a15, a9
>
> > +4aaf:  02df12  addmi   a1, a15, 0x200
> > +4ab2:  b0c112  addia1, a1, -80
>
> Because the former is atomic, but the latter is not. (may be interrupted 
> between the two add instructions)

Oh, right, there are two issues: one is interruption in the absence of
detailed stack tracking in the DWARF info, which can be fixed by emitting
a separate note for each a1 change, the other is interruption when
a1 is in the parent frame, which can be fixed by always moving a1
down first, e.g. with the following change:

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3b8a7bcda371..29cb91fa7de5 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2539,7 +2539,10 @@ xtensa_split_imm_two_addends (HOST_WIDE_INT
imm, HOST_WIDE_INT v[2])

  if (xtensa_simm8 (v1) || xtensa_simm8x256 (v1))
{
-  v[0] = v0, v[1] = v1;
+  if (v0 < 0)
+   v[0] = v0, v[1] = v1;
+  else
+   v[0] = v1, v[1] = v0;
  return true;
}

Or both can be fixed by using a scratch register in the middle of the
addi/addmi sequence.

> I'll wait for the results of your investigation, but it may be better to 
> withdraw the patch.

The issue was in the unwinding code in the libgcc_s.so. I haven't figured
out the exact mechanism, but found that emitting a separate note for each
a1 change fixes it.

-- 
Thanks.
-- Max


Re: [PATCH] xtensa: Optimize stack frame adjustment more

2023-01-05 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Thu, Jan 5, 2023 at 3:57 AM Takayuki 'January June' Suwa
 wrote:
>
> This patch introduces a convenient helper function for integer immediate
> addition with scratch register as needed, that splits and emits either
> up to two ADDI/ADDMI machine instructions or an addition by register
> following an immediate integer load (which may later be transformed by
> constantsynth).
>
> By using the helper function, it makes stack frame adjustment logic
> simplified and instruction count less in some cases.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc
> (xtensa_split_imm_two_addends, xtensa_emit_add_imm):
> New helper functions.
> (xtensa_emit_adjust_stack_ptr, xtensa_set_return_address,
> xtensa_output_mi_thunk): Change to use the helper function.
> ---
>  gcc/config/xtensa/xtensa.cc | 139 +++-
>  1 file changed, 88 insertions(+), 51 deletions(-)

This change introduces a bunch of failures in the g++ testsuite,
but the culprit is apparently somewhere in the libstdc++.so, I'm
still looking for it.

I see the following pattern change in the generated epilogue code:

-4aaf:  b0a192  movia9, 0x1b0
-4ab2:  1f9aadd.n   a1, a15, a9
...
-4abe:  20c112  addia1, a1, 32
-4ac1:  f00dret.n
+4aaf:  02df12  addmi   a1, a15, 0x200
+4ab2:  b0c112  addia1, a1, -80
...
+4abf:  20c112  addia1, a1, 32
+4ac2:  f00dret.n

I.e. a1 is first moved into the parent stack frame, then back to the right
spot. This does not look correct, especially for bare-metal targets.

-- 
Thanks.
-- Max


[COMMITTED] gcc: xtensa: use GP_RETURN_* instead of magic constant

2022-12-29 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/xtensa.cc (xtensa_return_in_memory): Use
GP_RETURN_* instead of magic constant.
---
 gcc/config/xtensa/xtensa.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index e726a115029f..ae44199bc988 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -4516,7 +4516,7 @@ static bool
 xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 {
   return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)
- > 4 * UNITS_PER_WORD);
+ > (unsigned) (GP_RETURN_LAST - GP_RETURN_FIRST + 1) * UNITS_PER_WORD);
 }
 
 /* Worker function for TARGET_FUNCTION_VALUE.  */
-- 
2.30.2



Re: [PATCH] xtensa: Check DF availability before use

2022-12-29 Thread Max Filippov via Gcc-patches
On Thu, Dec 29, 2022 at 4:33 AM Takayuki 'January June' Suwa
 wrote:
>
> Parhaps no problem, but for safety.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (xtensa_expand_prologue): Fix to check
> DF availability before use of DF_* macros.
> ---
>  gcc/config/xtensa/xtensa.cc | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max


[COMMITTED 1/4] xtensa: Tabify, and trim trailing spaces

2022-12-27 Thread Max Filippov via Gcc-patches
From: Takayuki 'January June' Suwa 

Cosmetic and no functional changes.

gcc/ChangeLog:

* config/xtensa/elf.h: Tabify, and trim trailing spaces.
* config/xtensa/linux.h: Likewise.
* config/xtensa/uclinux.h: Likewise.
* config/xtensa/xtensa-dynconfig.c: Likewise.
* config/xtensa/xtensa.cc: Likewise.
* config/xtensa/xtensa.h: Likewise.
* config/xtensa/xtensa.md: Likewise.
---
 gcc/config/xtensa/elf.h  | 32 +
 gcc/config/xtensa/linux.h|  1 -
 gcc/config/xtensa/uclinux.h  |  1 -
 gcc/config/xtensa/xtensa-dynconfig.c |  6 +-
 gcc/config/xtensa/xtensa.cc  | 85 
 gcc/config/xtensa/xtensa.h   |  6 +-
 gcc/config/xtensa/xtensa.md  | 98 ++--
 7 files changed, 113 insertions(+), 116 deletions(-)

diff --git a/gcc/config/xtensa/elf.h b/gcc/config/xtensa/elf.h
index fbdccc49c9b5..1edc1761d74d 100644
--- a/gcc/config/xtensa/elf.h
+++ b/gcc/config/xtensa/elf.h
@@ -59,7 +59,7 @@ along with GCC; see the file COPYING3.  If not see
   "crt1-sim%O%s crt0%O%s crti%O%s crtbegin%O%s _vectors%O%s"
 
 #undef ENDFILE_SPEC
-#define ENDFILE_SPEC "crtend%O%s crtn%O%s"  
+#define ENDFILE_SPEC "crtend%O%s crtn%O%s"
 
 #undef LINK_SPEC
 #define LINK_SPEC \
@@ -86,19 +86,17 @@ along with GCC; see the file COPYING3.  If not see
 /* Search for headers in $tooldir/arch/include and for libraries and
startfiles in $tooldir/arch/lib.  */
 #define GCC_DRIVER_HOST_INITIALIZATION \
-do \
-{ \
-  char *tooldir, *archdir; \
-  tooldir = concat (tooldir_base_prefix, spec_machine, \
-   dir_separator_str, NULL); \
-  if (!IS_ABSOLUTE_PATH (tooldir)) \
-tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \
- spec_version, dir_separator_str, tooldir, NULL); \
-  archdir = concat (tooldir, "arch", dir_separator_str, NULL); \
-  add_prefix (_prefixes, \
- concat (archdir, "lib", dir_separator_str, NULL), \
- "GCC", PREFIX_PRIORITY_LAST, 0, 1); \
-  add_prefix (_prefixes, archdir, \
- "GCC", PREFIX_PRIORITY_LAST, 0, 0); \
-  } \
-while (0)
+  do { \
+char *tooldir, *archdir; \
+tooldir = concat (tooldir_base_prefix, spec_machine, \
+ dir_separator_str, NULL); \
+if (!IS_ABSOLUTE_PATH (tooldir)) \
+  tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, 
\
+   spec_version, dir_separator_str, tooldir, NULL); \
+archdir = concat (tooldir, "arch", dir_separator_str, NULL); \
+add_prefix (_prefixes, \
+   concat (archdir, "lib", dir_separator_str, NULL), \
+   "GCC", PREFIX_PRIORITY_LAST, 0, 1); \
+add_prefix (_prefixes, archdir, \
+   "GCC", PREFIX_PRIORITY_LAST, 0, 0); \
+  } while (0)
diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h
index bc7bee71517d..198edfe05531 100644
--- a/gcc/config/xtensa/linux.h
+++ b/gcc/config/xtensa/linux.h
@@ -69,4 +69,3 @@ along with GCC; see the file COPYING3.  If not see
 #define XTENSA_ALWAYS_PIC 1
 
 #undef DEBUGGER_REGNO
-
diff --git a/gcc/config/xtensa/uclinux.h b/gcc/config/xtensa/uclinux.h
index 5fcf639ccff4..5787b2f1ab95 100644
--- a/gcc/config/xtensa/uclinux.h
+++ b/gcc/config/xtensa/uclinux.h
@@ -71,4 +71,3 @@ along with GCC; see the file COPYING3.  If not see
 #define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
 
 #undef DEBUGGER_REGNO
-
diff --git a/gcc/config/xtensa/xtensa-dynconfig.c 
b/gcc/config/xtensa/xtensa-dynconfig.c
index 056204ae9463..0a611fd14b03 100644
--- a/gcc/config/xtensa/xtensa-dynconfig.c
+++ b/gcc/config/xtensa/xtensa-dynconfig.c
@@ -35,7 +35,7 @@
 
 #if !defined (HAVE_DLFCN_H) && defined (_WIN32)
 
-#define RTLD_LAZY 0  /* Dummy value.  */
+#define RTLD_LAZY 0/* Dummy value.  */
 
 static void *
 dlopen (const char *file, int mode ATTRIBUTE_UNUSED)
@@ -142,8 +142,8 @@ XTENSA_CONFIG_INSTANCE_LIST;
 #define XTENSA_CONFIG_ENTRY(a) "__" #a "=" STRINGIFY(a)
 
 static const char * const xtensa_config_strings[] = {
-XTENSA_CONFIG_ENTRY_LIST,
-NULL,
+  XTENSA_CONFIG_ENTRY_LIST,
+  NULL,
 };
 
 const struct xtensa_config_v1 *xtensa_get_config_v1 (void)
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 94a98c25f8c9..178d16a78462 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -176,7 +176,7 @@ static bool constantpool_address_p (const_rtx addr);
 static bool xtensa_legitimate_constant_p (machine_mode, rtx);
 static void xtensa_reorg (void);
 static bool xtensa_can_use_doloop_p (const widest_int &, const widest_int &,
- unsigned int, bool);
+unsigned int, bool);
 static const char *xtensa_invalid_within_doloop (const rtx_insn *);
 
 static bool xtensa_member_type_forces_blk (const_tree,
@@ -2115,7 +2115,7 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx 

[COMMITTED] gcc: xtensa: use define_c_enums instead of define_constants

2022-12-27 Thread Max Filippov via Gcc-patches
This improves RTL dumps readability. No functional changes.

gcc/
* config/xtensa/xtensa.md (unspec): Extract UNSPEC_* constants
into this enum.
(unspecv): Extract UNSPECV_* constants into this enum.
---
 gcc/config/xtensa/xtensa.md | 46 -
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index e72fd9ac3f61..0a26d3dccf44 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -25,28 +25,32 @@
   (A7_REG  7)
   (A8_REG  8)
   (A9_REG  9)
+])
+
+(define_c_enum "unspec" [
+  UNSPEC_NOP
+  UNSPEC_PLT
+  UNSPEC_RET_ADDR
+  UNSPEC_TPOFF
+  UNSPEC_DTPOFF
+  UNSPEC_TLS_FUNC
+  UNSPEC_TLS_ARG
+  UNSPEC_TLS_CALL
+  UNSPEC_TP
+  UNSPEC_MEMW
+  UNSPEC_LSETUP_START
+  UNSPEC_LSETUP_END
+  UNSPEC_FRAME_BLOCKAGE
+])
 
-  (UNSPEC_NOP  2)
-  (UNSPEC_PLT  3)
-  (UNSPEC_RET_ADDR 4)
-  (UNSPEC_TPOFF5)
-  (UNSPEC_DTPOFF   6)
-  (UNSPEC_TLS_FUNC 7)
-  (UNSPEC_TLS_ARG  8)
-  (UNSPEC_TLS_CALL 9)
-  (UNSPEC_TP   10)
-  (UNSPEC_MEMW 11)
-  (UNSPEC_LSETUP_START  12)
-  (UNSPEC_LSETUP_END13)
-  (UNSPEC_FRAME_BLOCKAGE 14)
-
-  (UNSPECV_SET_FP  1)
-  (UNSPECV_ENTRY   2)
-  (UNSPECV_S32RI   4)
-  (UNSPECV_S32C1I  5)
-  (UNSPECV_EH_RETURN   6)
-  (UNSPECV_SET_TP  7)
-  (UNSPECV_BLOCKAGE8)
+(define_c_enum "unspecv" [
+  UNSPECV_SET_FP
+  UNSPECV_ENTRY
+  UNSPECV_S32RI
+  UNSPECV_S32C1I
+  UNSPECV_EH_RETURN
+  UNSPECV_SET_TP
+  UNSPECV_BLOCKAGE
 ])
 
 ;; This code iterator allows signed and unsigned widening multiplications
-- 
2.30.2



[COMMITTED 2/4] xtensa: Clean up xtensa_expand_prologue

2022-12-27 Thread Max Filippov via Gcc-patches
From: Takayuki 'January June' Suwa 

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_expand_prologue): Modify to
exit the inspection loops as soon as the necessity of stack
pointer is found.
---
 gcc/config/xtensa/xtensa.cc | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 178d16a78462..709b0d52d069 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -3327,12 +3327,18 @@ xtensa_expand_prologue (void)
 ref; ref = DF_REF_NEXT_REG (ref))
  if (DF_REF_CLASS (ref) == DF_REF_REGULAR
  && NONJUMP_INSN_P (DF_REF_INSN (ref)))
-   stack_pointer_needed = true;
+   {
+ stack_pointer_needed = true;
+ break;
+   }
   /* Check if callee-saved registers really need saving to the stack.  */
   if (!stack_pointer_needed)
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
  if (xtensa_call_save_reg (regno))
-   stack_pointer_needed = true;
+   {
+ stack_pointer_needed = true;
+ break;
+   }
 
   cfun->machine->inhibit_logues_a1_adjusts = !stack_pointer_needed;
 
-- 
2.30.2



[COMMITTED 3/4] xtensa: Change GP_RETURN{, _REG_COUNT} to GP_RETURN_{FIRST, LAST}

2022-12-27 Thread Max Filippov via Gcc-patches
From: Takayuki 'January June' Suwa 

gcc/ChangeLog:

* config/xtensa/xtensa.h (GP_RETURN, GP_RETURN_REG_COUNT):
Change to GP_RETURN_FIRST and GP_RETURN_LAST, respectively.
* config/xtensa/xtensa.cc (xtensa_function_value,
xtensa_libcall_value, xtensa_function_value_regno_p): Ditto.
---
 gcc/config/xtensa/xtensa.cc | 10 +-
 gcc/config/xtensa/xtensa.h  |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 709b0d52d069..66e253495211 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -4526,9 +4526,9 @@ xtensa_function_value (const_tree valtype, const_tree 
func ATTRIBUTE_UNUSED,
   bool outgoing)
 {
   return gen_rtx_REG ((INTEGRAL_TYPE_P (valtype)
-  && TYPE_PRECISION (valtype) < BITS_PER_WORD)
- ? SImode : TYPE_MODE (valtype),
- outgoing ? GP_OUTGOING_RETURN : GP_RETURN);
+  && TYPE_PRECISION (valtype) < BITS_PER_WORD)
+ ? SImode : TYPE_MODE (valtype),
+ outgoing ? GP_OUTGOING_RETURN : GP_RETURN_FIRST);
 }
 
 /* Worker function for TARGET_LIBCALL_VALUE.  */
@@ -4538,7 +4538,7 @@ xtensa_libcall_value (machine_mode mode, const_rtx fun 
ATTRIBUTE_UNUSED)
 {
   return gen_rtx_REG ((GET_MODE_CLASS (mode) == MODE_INT
   && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
- ? SImode : mode, GP_RETURN);
+ ? SImode : mode, GP_RETURN_FIRST);
 }
 
 /* Worker function TARGET_FUNCTION_VALUE_REGNO_P.  */
@@ -4546,7 +4546,7 @@ xtensa_libcall_value (machine_mode mode, const_rtx fun 
ATTRIBUTE_UNUSED)
 static bool
 xtensa_function_value_regno_p (const unsigned int regno)
 {
-  return (regno >= GP_RETURN && regno < GP_RETURN + GP_RETURN_REG_COUNT);
+  return IN_RANGE (regno, GP_RETURN_FIRST, GP_RETURN_LAST);
 }
 
 /* The static chain is passed in memory.  Provide rtx giving 'mem'
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 60d3fdfbc706..a7c112b87538 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -474,9 +474,9 @@ enum reg_class
 
 /* Symbolic macros for the registers used to return integer, floating
point, and values of coprocessor and user-defined modes.  */
-#define GP_RETURN (GP_REG_FIRST + 2 + WINDOW_SIZE)
+#define GP_RETURN_FIRST (GP_REG_FIRST + 2 + WINDOW_SIZE)
+#define GP_RETURN_LAST  (GP_RETURN_FIRST + 3)
 #define GP_OUTGOING_RETURN (GP_REG_FIRST + 2)
-#define GP_RETURN_REG_COUNT 4
 
 /* Symbolic macros for the first/last argument registers.  */
 #define GP_ARG_FIRST (GP_REG_FIRST + 2)
-- 
2.30.2



[COMMITTED 4/4] xtensa: Generate density instructions in set_frame_ptr

2022-12-27 Thread Max Filippov via Gcc-patches
From: Takayuki 'January June' Suwa 

gcc/ChangeLog:

* config/xtensa/xtensa.md (set_frame_ptr): Fix to reflect
TARGET_DENSITY.
---
 gcc/config/xtensa/xtensa.md | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index a77d3285bad1..e72fd9ac3f61 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -2562,12 +2562,15 @@
   ""
 {
   if (frame_pointer_needed)
-return "mov\ta7, sp";
+return (TARGET_DENSITY ? "mov.n\ta7, sp" : "mov\ta7, sp");
   return "";
 }
   [(set_attr "type""move")
(set_attr "mode""SI")
-   (set_attr "length"  "3")])
+   (set (attr "length")
+   (if_then_else (match_test "TARGET_DENSITY")
+ (const_int 2)
+ (const_int 3)))])
 
 ;; Post-reload splitter to remove fp assignment when it's not needed.
 (define_split
-- 
2.30.2



Re: [PATCH] xtensa: Apply a few minor fixes

2022-12-27 Thread Max Filippov via Gcc-patches
On Mon, Dec 26, 2022 at 10:30 PM Takayuki 'January June' Suwa
 wrote:
>
> Almost cosmetic and no functional changes.
>
> gcc/ChangeLog:
>
> * config/xtensa/*: Tabify, and trim trailing spaces.
> * config/xtensa/xtensa.h (GP_RETURN, GP_RETURN_REG_COUNT):
> Change to GP_RETURN_FIRST and GP_RETURN_LAST, respectively.
> * config/xtensa/xtensa.cc (xtensa_function_value,
> xtensa_libcall_value, xtensa_function_value_regno_p): Ditto.
> (xtensa_expand_prologue): Modify to exit the inspection loops
> as soon as the necessity of stack pointer is found.
> (xtensa_set_return_address): Change the style of brackets.
> * config/xtensa/xtensa.md (set_frame_ptr):
> Fix to reflect TARGET_DENSITY.
> ---
>  gcc/config/xtensa/elf.h  |  32 
>  gcc/config/xtensa/linux.h|   1 -
>  gcc/config/xtensa/uclinux.h  |   1 -
>  gcc/config/xtensa/xtensa-dynconfig.c |   6 +-
>  gcc/config/xtensa/xtensa.cc  | 105 ++-
>  gcc/config/xtensa/xtensa.h   |  10 +--
>  gcc/config/xtensa/xtensa.md  | 105 ++-
>  7 files changed, 133 insertions(+), 127 deletions(-)

I've split it into independent parts and applied to master.

-- 
Thanks.
-- Max


[COMMITTED] libgcc: xtensa: remove stray symbols from X*HAL macro definitions

2022-12-08 Thread Max Filippov via Gcc-patches
libgcc/
* config/xtensa/xtensa-config-builtin.h (XCHAL_NUM_AREGS)
(XCHAL_ICACHE_SIZE, XCHAL_DCACHE_SIZE, XCHAL_ICACHE_LINESIZE)
(XCHAL_DCACHE_LINESIZE, XCHAL_MMU_MIN_PTE_PAGE_SIZE)
(XSHAL_ABI): Remove stray symbols from macro definitions.
---
 libgcc/config/xtensa/xtensa-config-builtin.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libgcc/config/xtensa/xtensa-config-builtin.h 
b/libgcc/config/xtensa/xtensa-config-builtin.h
index 36d4d9db330b..47782a064523 100644
--- a/libgcc/config/xtensa/xtensa-config-builtin.h
+++ b/libgcc/config/xtensa/xtensa-config-builtin.h
@@ -130,7 +130,7 @@
 #define XCHAL_HAVE_WINDOWED__XCHAL_HAVE_WINDOWED
 
 #undef XCHAL_NUM_AREGS
-#define XCHAL_NUM_AREGS__XCHAL_NUM_AREGS2
+#define XCHAL_NUM_AREGS__XCHAL_NUM_AREGS
 
 #undef XCHAL_HAVE_WIDE_BRANCHES
 #define XCHAL_HAVE_WIDE_BRANCHES   __XCHAL_HAVE_WIDE_BRANCHES
@@ -140,16 +140,16 @@
 
 
 #undef XCHAL_ICACHE_SIZE
-#define XCHAL_ICACHE_SIZE  __XCHAL_ICACHE_SIZE6384
+#define XCHAL_ICACHE_SIZE  __XCHAL_ICACHE_SIZE
 
 #undef XCHAL_DCACHE_SIZE
-#define XCHAL_DCACHE_SIZE  __XCHAL_DCACHE_SIZE6384
+#define XCHAL_DCACHE_SIZE  __XCHAL_DCACHE_SIZE
 
 #undef XCHAL_ICACHE_LINESIZE
-#define XCHAL_ICACHE_LINESIZE  __XCHAL_ICACHE_LINESIZE2
+#define XCHAL_ICACHE_LINESIZE  __XCHAL_ICACHE_LINESIZE
 
 #undef XCHAL_DCACHE_LINESIZE
-#define XCHAL_DCACHE_LINESIZE  __XCHAL_DCACHE_LINESIZE2
+#define XCHAL_DCACHE_LINESIZE  __XCHAL_DCACHE_LINESIZE
 
 #undef XCHAL_ICACHE_LINEWIDTH
 #define XCHAL_ICACHE_LINEWIDTH __XCHAL_ICACHE_LINEWIDTH
@@ -165,7 +165,7 @@
 #define XCHAL_HAVE_MMU __XCHAL_HAVE_MMU
 
 #undef XCHAL_MMU_MIN_PTE_PAGE_SIZE
-#define XCHAL_MMU_MIN_PTE_PAGE_SIZE__XCHAL_MMU_MIN_PTE_PAGE_SIZE2
+#define XCHAL_MMU_MIN_PTE_PAGE_SIZE__XCHAL_MMU_MIN_PTE_PAGE_SIZE
 
 
 #undef XCHAL_HAVE_DEBUG
@@ -191,7 +191,7 @@
 #undef XSHAL_ABI
 #undef XTHAL_ABI_WINDOWED
 #undef XTHAL_ABI_CALL0
-#define XSHAL_ABI  __XSHAL_ABITHAL_ABI_WINDOWED
+#define XSHAL_ABI  __XSHAL_ABI
 #define XTHAL_ABI_WINDOWED __XTHAL_ABI_WINDOWED
 #define XTHAL_ABI_CALL0__XTHAL_ABI_CALL0
 
-- 
2.30.2



  1   2   3   4   >