[PATCH 1/2] xtensa: Resurrect LEAF_REGISTERS and LEAF_REG_REMAP

2024-03-26 Thread Takayuki 'January June' Suwa
They were once mistakenly removed with
"xtensa: Remove old broken tweak for leaf function", but caused unwanted
register spills.

gcc/ChangeLog:

* config/xtensa/xtensa.h (LEAF_REGISTERS, LEAF_REG_REMAP):
Withdraw the removal.
(REG_ALLOC_ORDER): Cosmetics.
* config/xtensa/xtensa.cc (xtensa_leaf_regs): Withdraw the removal.
(xtensa_adjust_reg_alloc_order): Cosmetics.
---
 gcc/config/xtensa/xtensa.cc | 45 ++---
 gcc/config/xtensa/xtensa.h  | 29 ++--
 2 files changed, 64 insertions(+), 10 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 9beac932467..df888294556 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -110,6 +110,18 @@ struct GTY(()) machine_function
   HARD_REG_SET eliminated_callee_saved;
 };
 
+/* Vector, indexed by hard register number, which contains 1 for a
+   register that is allowable in a candidate for leaf function
+   treatment.  */
+
+const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] =
+{
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1
+};
+
 static void xtensa_option_override (void);
 static enum internal_test map_test_to_internal_test (enum rtx_code);
 static rtx gen_int_relational (enum rtx_code, rtx, rtx);
@@ -4314,15 +4326,32 @@ void
 xtensa_adjust_reg_alloc_order (void)
 {
   static const int reg_windowed_alloc_order[FIRST_PSEUDO_REGISTER] =
-   REG_ALLOC_ORDER;
+REG_ALLOC_ORDER;
   static const int reg_call0_alloc_order[FIRST_PSEUDO_REGISTER] =
-  {
- 9, 10, 11,  7,  6,  5,  4,  3,  2,  8,  0, 12, 13, 14, 15,
-18,
-19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
- 1, 16, 17,
-35,
-  };
+{
+  /*  a9 ... a11 : no special usage */
+   9, 10, 11,
+  /*  a7 ...  a2 : function arguments, in reverse order */
+   7,  6,  5,  4,  3,  2,
+  /*  a8: static chain */
+   8,
+  /*  a0: return address (also callee saved) */
+   0,
+  /* a12 ... a15 : callee saved */
+  12, 13, 14, 15,
+  /*  b0: boolean register for floating-point CC */
+  18,
+  /*  f0 ... f15 : floating-point registers */
+  19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+  /*  sp: stack pointer */
+   1,
+  /*  fp: FRAME_POINTER (fake) */
+  16,
+  /* argp   : ARG_POINTER (fake) */
+  17,
+  /* acc: MAC16 accumulator */
+  35,
+};
 
   memcpy (reg_alloc_order, TARGET_WINDOWED_ABI ?
  reg_windowed_alloc_order : reg_call0_alloc_order,
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 9591b3d4b40..835cb4bbf3b 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -248,14 +248,39 @@ along with GCC; see the file COPYING3.  If not see
 
 #define REG_ALLOC_ORDER
\
 {  \
-   8,  9, 10, 11, 12, 13, 14, 15,  7,  6,  5,  4,  3,  2,  \
+  /*  a8 ... a15 : no special usage */ \
+   8,  9, 10, 11, 12, 13, 14, 15,  \
+  /*  a7 ...  a2 : incoming arguments, in reverse order */ \
+   7,  6,  5,  4,  3,  2,  \
+  /*  b0: boolean register for floating-point CC */\
   18,  \
+  /*  f0 ... f15 : floating-point registers */ \
   19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,  \
-   0,  1, 16, 17,  \
+  /*  a0: return address */\
+   0,  \
+  /*  sp: stack pointer */ \
+   1,  \
+  /*  fp: FRAME_POINTER (fake) */  \
+  16,  \
+  /* argp   : ARG_POINTER (fake) */\
+  17,  \
+  /* acc: MAC16 accumulator */ \
   35,  \
 }
 #define ADJUST_REG_ALLOC_ORDER xtensa_adjust_reg_alloc_order ()
 
+/* For Xtensa, the only point of this is to prevent GCC from otherwise
+   giving preference to call-used registers.  To minimize window
+   overflows for the AR registers, we want to give preference to the
+   lower-numbered AR registers.  For other register files, which are
+   not windowed, we still prefer call-used registers, if 

[PATCH 2/2] xtensa: Make use of std::swap where appropriate

2024-03-26 Thread Takayuki 'January June' Suwa
No functional changes.

gcc/ChangeLog:

* config/xtensa/xtensa.cc
(gen_int_relational, gen_float_relational): Replace tempvar-based
value-swapping codes with std::swap.
* config/xtensa/xtensa.md (movdi_internal, movdf_internal):
Ditto.
---
 gcc/config/xtensa/xtensa.cc | 12 ++--
 gcc/config/xtensa/xtensa.md | 10 --
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index df888294556..38c6966cc31 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -799,11 +799,7 @@ gen_int_relational (enum rtx_code test_code, /* relational 
test (EQ, etc) */
 
 }
   else if (p_info->reverse_regs)
-{
-  rtx temp = cmp0;
-  cmp0 = cmp1;
-  cmp1 = temp;
-}
+std::swap (cmp0, cmp1);
 
   return gen_rtx_fmt_ee (invert ? reverse_condition (p_info->test_code)
: p_info->test_code,
@@ -847,11 +843,7 @@ gen_float_relational (enum rtx_code test_code, /* 
relational test (EQ, etc) */
 }
 
   if (reverse_regs)
-{
-  rtx temp = cmp0;
-  cmp0 = cmp1;
-  cmp1 = temp;
-}
+std::swap (cmp0, cmp1);
 
   brtmp = gen_rtx_REG (CCmode, FPCC_REGNUM);
   emit_insn (gen_fn (brtmp, cmp0, cmp1));
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index fbe40ec671a..93c3ee78a01 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1253,9 +1253,8 @@
   xtensa_split_operand_pair (operands, SImode);
   if (reg_overlap_mentioned_p (operands[0], operands[3]))
 {
-  rtx tmp;
-  tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
-  tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
+  std::swap (operands[0], operands[1]);
+  std::swap (operands[2], operands[3]);
 }
 })
 
@@ -1588,9 +1587,8 @@
   xtensa_split_operand_pair (operands, SFmode);
   if (reg_overlap_mentioned_p (operands[0], operands[3]))
 {
-  rtx tmp;
-  tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
-  tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
+  std::swap (operands[0], operands[1]);
+  std::swap (operands[2], operands[3]);
 }
 })
 
-- 
2.39.2


[PATCH] xtensa: Add supplementary split pattern for "*addsubx"

2024-03-21 Thread Takayuki 'January June' Suwa
int test(int a) {
   return a * 4 + 3;
}

In the example above, since Xtensa has instructions to add register value
scaled by 2, 4 or 8 (and corresponding define_insns), we would expect them
to be used but not, because it is transformed before reaching the RTL
generation pass as below:

int test(int a) {
   return (a + 7500) * 4;
}

Fortunately, the RTL combination pass tries a splitting pattern that matches
the first example, so it is easy to solve by defining that pattern.

gcc/ChangeLog:

* config/xtensa/xtensa.md: Add new split pattern described above.
---
 gcc/config/xtensa/xtensa.md | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 5cdf4dffe70..fbe40ec671a 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -194,6 +194,20 @@
(set_attr "mode""SI")
(set_attr "length"  "3")])
 
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+   (plus:SI (ashift:SI (match_operand:SI 1 "register_operand")
+   (match_operand:SI 3 "addsubx_operand"))
+(match_operand:SI 2 "const_int_operand")))]
+  "TARGET_ADDX && can_create_pseudo_p ()"
+  [(set (match_dup 0)
+   (plus:SI (ashift:SI (match_dup 1)
+   (match_dup 3))
+(match_dup 2)))]
+{
+  operands[2] = force_reg (SImode, operands[2]);
+})
+
 (define_expand "adddi3"
   [(set (match_operand:DI 0 "register_operand")
(plus:DI (match_operand:DI 1 "register_operand")
-- 
2.39.2


[PATCH 1/2 v2] xtensa: Recover constant synthesis for HImode after LRA transition

2024-02-04 Thread Takayuki 'January June' Suwa
After LRA transition, HImode constants that don't fit into signed 12 bits
are no longer subject to constant synthesis:

/* example */
void test(void) {
  short foo = 32767;
  __asm__ ("" :: "r"(foo));
}

;; before
.literal_position
.literal .LC0, 32767
test:
l32ra9, .LC0
ret.n

This patch fixes that:

;; after
test:
movi.n  a9, -1
extui   a9, a9, 17, 15
ret.n

gcc/ChangeLog:

* config/xtensa/xtensa.md (SHI): New mode iterator.
(2 split patterns related to constsynth):
Change to also accept HImode operands.
---
 gcc/config/xtensa/xtensa.md | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 13b8b57f1fc..1a2249b059a 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -87,6 +87,10 @@
 ;; the same template.
 (define_mode_iterator HQI [HI QI])
 
+;; This mode iterator allows the SI and HI patterns to be defined from
+;; the same template.
+(define_mode_iterator SHI [SI HI])
+
 
 ;; Attributes.
 
@@ -1291,28 +1295,30 @@
(set_attr "length"  "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")])
 
 (define_split
-  [(set (match_operand:SI 0 "register_operand")
-   (match_operand:SI 1 "const_int_operand"))]
+  [(set (match_operand:SHI 0 "register_operand")
+   (match_operand:SHI 1 "const_int_operand"))]
   "!TARGET_CONST16 && !TARGET_AUTO_LITPOOLS
&& ! xtensa_split1_finished_p ()
&& ! xtensa_simm12b (INTVAL (operands[1]))"
   [(set (match_dup 0)
(match_dup 1))]
 {
-  operands[1] = force_const_mem (SImode, operands[1]);
+  operands[1] = force_const_mem (mode, operands[1]);
 })
 
 (define_split
-  [(set (match_operand:SI 0 "register_operand")
-   (match_operand:SI 1 "constantpool_operand"))]
+  [(set (match_operand:SHI 0 "register_operand")
+   (match_operand:SHI 1 "constantpool_operand"))]
   "! optimize_debug && reload_completed"
   [(const_int 0)]
 {
-  rtx x = avoid_constant_pool_reference (operands[1]);
+  rtx x = avoid_constant_pool_reference (operands[1]), dst = operands[0];
   if (! CONST_INT_P (x))
 FAIL;
-  if (! xtensa_constantsynth (operands[0], INTVAL (x)))
-emit_move_insn (operands[0], x);
+  if (mode == HImode)
+dst = gen_rtx_REG (SImode, REGNO (dst));
+  if (! xtensa_constantsynth (dst, INTVAL (x)))
+emit_move_insn (dst, x);
   DONE;
 })
 
-- 
2.30.2


[PATCH 1/2] xtensa: Recover constant synthesis for HImode after LRA transition

2024-02-03 Thread Takayuki 'January June' Suwa
After LRA transition, HImode constants that don't fit into signed 12 bits
are no longer subject to constant synthesis:

/* example */
void test(void) {
  short foo = 32767;
  __asm__ ("" :: "r"(foo));
}

;; before
.literal_position
.literal .LC0, 32767
test:
l32ra9, .LC0
ret.n

This patch fixes that:

;; after
test:
movi.n  a9, -1
extui   a9, a9, 17, 15
ret.n

gcc/ChangeLog:

* config/xtensa/xtensa.md (2 split patterns related to constsynth):
Change to also accept HImode operands.
---
 gcc/config/xtensa/xtensa.md | 30 +++---
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index f3953aa26b0..5242eb3c006 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1291,28 +1291,36 @@
(set_attr "length"  "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")])
 
 (define_split
-  [(set (match_operand:SI 0 "register_operand")
-   (match_operand:SI 1 "const_int_operand"))]
+  [(set (match_operand 0 "register_operand")
+   (match_operand 1 "const_int_operand"))]
   "!TARGET_CONST16 && !TARGET_AUTO_LITPOOLS
&& ! xtensa_split1_finished_p ()
-   && ! xtensa_simm12b (INTVAL (operands[1]))"
+   && ! xtensa_simm12b (INTVAL (operands[1]))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && (GET_MODE (operands[0]) == SImode
+   || GET_MODE (operands[0]) == HImode)"
   [(set (match_dup 0)
(match_dup 1))]
 {
-  operands[1] = force_const_mem (SImode, operands[1]);
+  operands[1] = force_const_mem (GET_MODE (operands[0]), operands[1]);
 })
 
 (define_split
-  [(set (match_operand:SI 0 "register_operand")
-   (match_operand:SI 1 "constantpool_operand"))]
-  "! optimize_debug && reload_completed"
+  [(set (match_operand 0 "register_operand")
+   (match_operand 1 "constantpool_operand"))]
+  "! optimize_debug && reload_completed
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && (GET_MODE (operands[0]) == SImode
+   || GET_MODE (operands[0]) == HImode)"
   [(const_int 0)]
 {
-  rtx x = avoid_constant_pool_reference (operands[1]);
-  if (! CONST_INT_P (x))
+  rtx x, dst;
+  if (! CONST_INT_P (x = avoid_constant_pool_reference (operands[1])))
 FAIL;
-  if (! xtensa_constantsynth (operands[0], INTVAL (x)))
-emit_move_insn (operands[0], x);
+  if (GET_MODE (dst = operands[0]) == HImode)
+dst = gen_rtx_REG (SImode, REGNO (dst));
+  if (! xtensa_constantsynth (dst, INTVAL (x)))
+emit_move_insn (dst, x);
   DONE;
 })
 
-- 
2.30.2


[PATCH 2/2] xtensa: Fix missing mode warning in "*eqne_zero_masked_bits"

2024-02-03 Thread Takayuki 'January June' Suwa
gcc/ChangeLog:

* config/xtensa/xtensa.md (*eqne_zero_masked_bits):
Add missing ":SI" to the match_operator.
---
 gcc/config/xtensa/xtensa.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 5242eb3c006..1a031d79cf3 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3271,7 +3271,7 @@
 
 (define_insn_and_split "*eqne_zero_masked_bits"
   [(set (match_operand:SI 0 "register_operand" "=a")
-   (match_operator 3 "boolean_operator"
+   (match_operator:SI 3 "boolean_operator"
[(and:SI (match_operand:SI 1 "register_operand" "r")
 (match_operand:SI 2 "const_int_operand" "i"))
 (const_int 0)]))]
-- 
2.30.2


Re: [RFC] gcc: xtensa: use salt/saltu in xtensa_expand_scc

2023-09-08 Thread Takayuki 'January June' Suwa via Gcc-patches
Hi!

On 2023/09/07 23:22, Max Filippov wrote:
> gcc/
>   * config/xtensa/predicates.md (xtensa_cstoresi_operator): Add
>   unsigned comparisons.
>   * config/xtensa/xtensa.cc (xtensa_expand_scc): Add code
>   generation of salt/saltu instructions.
>   * config/xtensa/xtensa.h (TARGET_SALT): New macro.
>   * gcc/config/xtensa/xtensa.md (salt, saltu): New instruction
>   patterns.
> ---
> I've tested it both with configurations that have salt/saltu and that
> don't.
> The inversion of the result at the end looks wasteful. I've been reading
> gccint chapter about cstoreMODE4 and the following part left me with the
> question:
> 
>   The value stored for a true condition must have 1 as its low bit,
>   or else must be negative.
> 
> Does it mean that some variants of cstoreMODE4 may return 1 and some may
> return -1 for truth, as both have 1 as its low bit?

IMHO it is nothing more than the fact that there are two possible integer 
constants that represent 'true' (the result of !0), namely either 1 or -1.
And given a certain target and configuration, it must be consistently fixed to 
one value or the other; for Xtensa, it should be an integer constant of 1.

>  If that's true we
> could use 'addi dest, dest, -1' instead of two-intruction sequence
> 'movi tmp, 1; xor dest, dest, tmp'.

An alternative way to convert 1 to 0 and 0 to 1:

neg dest, dest
addidest, 1

This requires no temporary register.


[PATCH] xtensa: Optimize several boolean evaluations of EQ/NE against constant zero

2023-09-08 Thread Takayuki 'January June' Suwa via Gcc-patches
An idiomatic implementation of boolean evaluation of whether a register is
zero or not in Xtensa is to assign 0 and 1 to the temporary and destination,
and then issue the MOV[EQ/NE]Z machine instruction
(See 8.3.2 Instruction Idioms, Xtensa ISA refman., p.599):

;; A2 = (A3 != 0) ? 1 : 0;
movi.n  a9, 1
movi.n  a2, 0
movnez  a2, a9, a3  ;; if (A3 != 0) A2 = A9;

As you can see in the above idiom, if the source and destination are the
same register, a move instruction from the source to another temporary
register must be prepended:

;; A2 = (A2 == 0) ? 1 : 0;
mov.n   a10, a2
movi.n  a9, 1
movi.n  a2, 0
moveqz  a2, a9, a10  ;; if (A10 == 0) A2 = A9;

Fortunately, we can reduce the number of instructions and temporary
registers with a few tweaks:

;; A2 = (A3 != 0) ? 1 : 0;
movi.n  a2, 1
moveqz  a2, a3, a3  ;; if (A3 == 0) A2 = A3;

;; A2 = (A2 != 0) ? 1 : 0;
movi.n  a9, 1
movnez  a2, a9, a2  ;; if (A2 != 0) A2 = A9;

;; A2 = (A3 == 0) ? 1 : 0;
movi.n  a2, -1
moveqz  a2, a3, a3  ;; if (A3 == 0) A2 = A3;
addi.n  a2, a2, 1

;; A2 = (A2 == 0) ? 1 : 0;
movi.n  a9, -1
movnez  a2, a9, a2  ;; if (A2 != 0) A2 = A9;
addi.n  a2, a2, 1

Additionally, if TARGET_NSA is configured, the fact that it returns 32 iff
the source of the NSAU machine instruction is 0, otherwise less than, can be
used in boolean evaluation of EQ comparison.

;; A2 = (A3 == 0) ? 1 : 0;
nsaua2, a3  ;; Source and destination can be the same register
srlia2, a2, 5

Furthermore, this patch also saves one instruction when determining whether
the ANDing with mask values in which 1s are lined up from the upper or lower
bit end (for example, 0xFFE0 or 0x003F) is 0 or not.

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_expand_scc):
Revert the changes from the last patch, as the work in the RTL
expansion pass is too far to determine the physical registers.
* config/xtensa/xtensa.md (*eqne_INT_MIN): Ditto.
(eq_zero_NSA, eqne_zero, *eqne_zero_masked_bits): New patterns.
---
 gcc/config/xtensa/xtensa.cc |  35 +--
 gcc/config/xtensa/xtensa.md | 112 
 2 files changed, 113 insertions(+), 34 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 1afaa1cc94e..2481b028ca1 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -994,41 +994,8 @@ xtensa_expand_scc (rtx operands[4], machine_mode cmp_mode)
   rtx cmp;
   rtx one_tmp, zero_tmp;
   rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
-  enum rtx_code code = GET_CODE (operands[1]);
 
-  if (cmp_mode == SImode && CONST_INT_P (operands[3])
-  && (code == EQ || code == NE))
-switch (INTVAL (operands[3]))
-  {
-  case 0:
-   if (TARGET_MINMAX)
- {
-   one_tmp = force_reg (SImode, const1_rtx);
-   emit_insn (gen_uminsi3 (dest, operands[2], one_tmp));
-   if (code == EQ)
- emit_insn (gen_xorsi3 (dest, dest, one_tmp));
-   return 1;
- }
-   break;
-  case -2147483648:
-   if (TARGET_ABS)
- {
-   emit_insn (gen_abssi2 (dest, operands[2]));
-   if (code == EQ)
- emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (31)));
-   else
- {
-   emit_insn (gen_ashrsi3 (dest, dest, GEN_INT (31)));
-   emit_insn (gen_addsi3 (dest, dest, const1_rtx));
- }
-   return 1;
- }
-   break;
-  default:
-   break;
-  }
-
-  if (! (cmp = gen_conditional_move (code, cmp_mode,
+  if (! (cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode,
 operands[2], operands[3])))
 return 0;
 
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index d6505e7eb70..6476fdc395a 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3188,6 +3188,118 @@
  (const_int 5)
  (const_int 6)))])
 
+(define_insn_and_split "eq_zero_NSA"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (eq:SI (match_operand:SI 1 "register_operand" "r")
+  (const_int 0)))]
+  "TARGET_NSA"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (clz:SI (match_dup 1)))
+   (set (match_dup 0)
+   (lshiftrt:SI (match_dup 0)
+(const_int 5)))]
+  ""
+  [(set_attr "type""move")
+   (set_attr "mode""SI")
+   (set_attr "length"  "6")])
+
+(define_insn_and_split "eqne_zero"
+  [(set (match_operand:SI 0 "register_operand" "=a,")
+   (match_operator:SI 2 "boolean_operator"
+   [(match_operand:SI 1 "register_operand" "0,r")
+(const_int 0)]))
+   (clobber (match_scratch:SI 3 "=,X"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  enum rtx_code code = GET_CODE 

Re: [PATCH] xtensa: Optimize boolean evaluation when SImode EQ/NE to zero if TARGET_MINMAX

2023-09-05 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/09/06 8:01, Max Filippov wrote:
> Hi Suwa-san,
Hi!

> 
> On Tue, Sep 5, 2023 at 2:29 AM Takayuki 'January June' Suwa
>  wrote:
>>
>> This patch optimizes the boolean evaluation for equality to 0 in SImode
>> using the MINU (Minimum Value Unsigned) machine instruction available
>> when TARGET_MINMAX is configured, for example, (x != 0) to MINU(x, 1)
>> and (x == 0) to (MINU(x, 1) ^ 1).
>>
>> /* example */
>> int test0(int x) {
>>   return x == 0;
>> }
>> int test1(int x) {
>>   return x != 0;
>> }
>>
>> ;; before
>> test0:
>> mov.n   a10, a2
>> movi.n  a9, 1
>> movi.n  a2, 0
>> moveqz  a2, a9, a10
>> ret.n
>> test1:
>> mov.n   a10, a2
>> movi.n  a9, 1
>> movi.n  a2, 0
>> movnez  a2, a9, a10
>> ret.n
>>
>> ;; after (prereq. TARGET_MINMAX)
>> test0:
>> movi.n  a9, 1
>> minua2, a2, a9
>> xor a2, a2, a9
>> ret.n
> 
> ISTM that test0 could be done with movnez in the same three instructions:
> 
>   movi a9, 1
>   movnez a2, a9, a2
>   xor a2, a2, a9

Unfortunately, the MOV[EQ/NE]Z machine instruction can only be used to 
implement the functionality if the input and output physical registers are the 
same (a2 in the example).
In fact, when modified to use MOV[EQ/NE]Z, GCC register allocator often 
prepends a register move instruction to satisfy the above constraint (and thus 
often does not save instruction count).

I'm currently trying to see if I can somehow follow up after the physical 
register is determined (around split2 or peephole2).

> 
>> test1:
>> movi.n  a9, 1
>> minua2, a2, a9
>> ret.n
> 
> ISTM that test1 could be done with movnez in the same two instructions:
> 
>   movi a9, 1
>   movnez a2, a9, a2
> 


[PATCH] xtensa: Optimize boolean evaluation when SImode EQ/NE to zero if TARGET_MINMAX

2023-09-05 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch optimizes the boolean evaluation for equality to 0 in SImode
using the MINU (Minimum Value Unsigned) machine instruction available
when TARGET_MINMAX is configured, for example, (x != 0) to MINU(x, 1)
and (x == 0) to (MINU(x, 1) ^ 1).

/* example */
int test0(int x) {
  return x == 0;
}
int test1(int x) {
  return x != 0;
}

;; before
test0:
mov.n   a10, a2
movi.n  a9, 1
movi.n  a2, 0
moveqz  a2, a9, a10
ret.n
test1:
mov.n   a10, a2
movi.n  a9, 1
movi.n  a2, 0
movnez  a2, a9, a10
ret.n

;; after (prereq. TARGET_MINMAX)
test0:
movi.n  a9, 1
minua2, a2, a9
xor a2, a2, a9
ret.n
test1:
movi.n  a9, 1
minua2, a2, a9
ret.n

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_expand_scc):
Add code for particular constants (only 0 and INT_MIN for now)
for EQ/NE boolean evaluation in SImode.
* config/xtensa/xtensa.md (*eqne_INT_MIN): Remove because its
implementation has been integrated into the above.
---
 gcc/config/xtensa/xtensa.cc | 43 +++--
 gcc/config/xtensa/xtensa.md | 34 -
 2 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index af71e2179d0..1afaa1cc94e 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -994,15 +994,46 @@ xtensa_expand_scc (rtx operands[4], machine_mode cmp_mode)
   rtx cmp;
   rtx one_tmp, zero_tmp;
   rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
+  enum rtx_code code = GET_CODE (operands[1]);
 
-  if (!(cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode,
-   operands[2], operands[3])))
+  if (cmp_mode == SImode && CONST_INT_P (operands[3])
+  && (code == EQ || code == NE))
+switch (INTVAL (operands[3]))
+  {
+  case 0:
+   if (TARGET_MINMAX)
+ {
+   one_tmp = force_reg (SImode, const1_rtx);
+   emit_insn (gen_uminsi3 (dest, operands[2], one_tmp));
+   if (code == EQ)
+ emit_insn (gen_xorsi3 (dest, dest, one_tmp));
+   return 1;
+ }
+   break;
+  case -2147483648:
+   if (TARGET_ABS)
+ {
+   emit_insn (gen_abssi2 (dest, operands[2]));
+   if (code == EQ)
+ emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (31)));
+   else
+ {
+   emit_insn (gen_ashrsi3 (dest, dest, GEN_INT (31)));
+   emit_insn (gen_addsi3 (dest, dest, const1_rtx));
+ }
+   return 1;
+ }
+   break;
+  default:
+   break;
+  }
+
+  if (! (cmp = gen_conditional_move (code, cmp_mode,
+operands[2], operands[3])))
 return 0;
 
-  one_tmp = gen_reg_rtx (SImode);
-  zero_tmp = gen_reg_rtx (SImode);
-  emit_insn (gen_movsi (one_tmp, const_true_rtx));
-  emit_insn (gen_movsi (zero_tmp, const0_rtx));
+  one_tmp = force_reg (SImode, const1_rtx);
+  zero_tmp = force_reg (SImode, const0_rtx);
 
   gen_fn = (cmp_mode == SImode
? gen_movsicc_internal0
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 5386e45b51d..d6505e7eb70 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3188,40 +3188,6 @@
  (const_int 5)
  (const_int 6)))])
 
-
-(define_insn_and_split "*eqne_INT_MIN"
-  [(set (match_operand:SI 0 "register_operand" "=a")
-   (match_operator:SI 2 "boolean_operator"
-   [(match_operand:SI 1 "register_operand" "r")
-(const_int -2147483648)]))]
-  "TARGET_ABS"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-   (abs:SI (match_dup 1)))
-   (set (match_dup 0)
-   (match_op_dup:SI 2
-   [(match_dup 0)
-(const_int 31)]))
-   (match_dup 3)]
-{
-  enum rtx_code code = GET_CODE (operands[2]);
-  operands[2] = gen_rtx_fmt_ee ((code == EQ) ? LSHIFTRT : ASHIFTRT,
-   SImode, XEXP (operands[2], 0),
-   XEXP (operands[2], 1));
-  operands[3] = (code != EQ) ? gen_addsi3 (operands[0],
-  operands[0], const1_rtx)
-: const0_rtx;
-}
-  [(set_attr "type""move")
-   (set_attr "mode""SI")
-   (set (attr "length")
-   (if_then_else (match_test "GET_CODE (operands[2]) == EQ")
- (const_int 3)
- (if_then_else (match_test "TARGET_DENSITY")
-   (const_int 5)
-   (const_int 6])
-
 (define_peephole2
   [(set (match_operand:SI 0 "register_operand")
(match_operand:SI 6 "reload_operand"))
-- 
2.30.2


[PATCH] xtensa: Use HARD_REG_SET instead of bare integer

2023-07-03 Thread Takayuki 'January June' Suwa via Gcc-patches
gcc/ChangeLog:

* config/xtensa/xtensa.cc (machine_function, xtensa_expand_prologue):
Change to use HARD_REG_BIT and its macros.
* config/xtensa/xtensa.md
(peephole2: regmove elimination during DFmode input reload):
Likewise.
---
 gcc/config/xtensa/xtensa.cc |  9 +
 gcc/config/xtensa/xtensa.md | 13 ++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3298d53493c..992e80d824d 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -107,7 +107,7 @@ struct GTY(()) machine_function
   bool epilogue_done;
   bool inhibit_logues_a1_adjusts;
   rtx last_logues_a9_content;
-  HOST_WIDE_INT eliminated_callee_saved_bmp;
+  HARD_REG_SET eliminated_callee_saved;
 };
 
 static void xtensa_option_override (void);
@@ -3586,7 +3586,8 @@ xtensa_expand_prologue (void)
df_insn_rescan (insnS);
SET_SRC (PATTERN (insnR)) = copy_rtx (mem);
df_insn_rescan (insnR);
-   cfun->machine->eliminated_callee_saved_bmp |= 1 << regno;
+   SET_HARD_REG_BIT (cfun->machine->eliminated_callee_saved,
+ regno);
  }
else
  {
@@ -3690,8 +3691,8 @@ xtensa_expand_epilogue (bool sibcall_p)
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
if (xtensa_call_save_reg(regno))
  {
-   if (! (cfun->machine->eliminated_callee_saved_bmp
-  & (1 << regno)))
+   if (! TEST_HARD_REG_BIT (cfun->machine->eliminated_callee_saved,
+regno))
  {
rtx x = gen_rtx_PLUS (Pmode,
  stack_pointer_rtx, GEN_INT (offset));
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 664424f1239..5386e45b51d 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3240,15 +3240,14 @@
(set (match_dup 3)
(match_dup 7))]
 {
-  uint32_t check = 0;
+  HARD_REG_SET regs;
   int i;
+  CLEAR_HARD_REG_SET (regs);
   for (i = 0; i <= 3; ++i)
-{
-  uint32_t mask = (uint32_t)1 << REGNO (operands[i]);
-  if (check & mask)
-   FAIL;
-  check |= mask;
-}
+if (TEST_HARD_REG_BIT (regs, REGNO (operands[i])))
+  FAIL;
+else
+  SET_HARD_REG_BIT (regs, REGNO (operands[i]));
   operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0));
   operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0));
 })
-- 
2.30.2


[PATCH 1/2] xtensa: Fix missing mode warning in "*eqne_INT_MIN"

2023-07-01 Thread Takayuki 'January June' Suwa via Gcc-patches
gcc/ChangeLog:

* config/xtensa/xtensa.md (*eqne_INT_MIN):
Add missing ":SI" to the match_operator.
---
 gcc/config/xtensa/xtensa.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 4b4ab3f5f37..b1af08eba8a 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3191,7 +3191,7 @@
 
 (define_insn_and_split "*eqne_INT_MIN"
   [(set (match_operand:SI 0 "register_operand" "=a")
-   (match_operator 2 "boolean_operator"
+   (match_operator:SI 2 "boolean_operator"
[(match_operand:SI 1 "register_operand" "r")
 (const_int -2147483648)]))]
   "TARGET_ABS"
-- 
2.30.2


[PATCH 2/2] xtensa: The use of CLAMPS instruction also requires TARGET_MINMAX, as well as TARGET_CLAMPS

2023-07-01 Thread Takayuki 'January June' Suwa via Gcc-patches
Because both smin and smax requiring TARGET_MINMAX are essential to the
RTL representation.

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_match_CLAMPS_imms_p):
Simplify.
* config/xtensa/xtensa.md (*xtensa_clamps):
Add TARGET_MINMAX to the condition.
---
 gcc/config/xtensa/xtensa.cc | 7 ++-
 gcc/config/xtensa/xtensa.md | 4 ++--
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index dd35e63c094..3298d53493c 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2649,11 +2649,8 @@ xtensa_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT 
imm, rtx scratch,
 bool
 xtensa_match_CLAMPS_imms_p (rtx cst_max, rtx cst_min)
 {
-  int max, min;
-
-  return IN_RANGE (max = exact_log2 (-INTVAL (cst_max)), 7, 22)
-&& IN_RANGE (min = exact_log2 (INTVAL (cst_min) + 1), 7, 22)
-&& max == min;
+  return IN_RANGE (exact_log2 (-INTVAL (cst_max)), 7, 22)
+&& (INTVAL (cst_max) + INTVAL (cst_min)) == -1;
 }
 
 
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index b1af08eba8a..664424f1239 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -522,7 +522,7 @@
(smax:SI (smin:SI (match_operand:SI 1 "register_operand" "r")
  (match_operand:SI 2 "const_int_operand" "i"))
 (match_operand:SI 3 "const_int_operand" "i")))]
-  "TARGET_CLAMPS
+  "TARGET_MINMAX && TARGET_CLAMPS
&& xtensa_match_CLAMPS_imms_p (operands[3], operands[2])"
   "#"
   "&& 1"
@@ -540,7 +540,7 @@
(smin:SI (smax:SI (match_operand:SI 1 "register_operand" "r")
  (match_operand:SI 2 "const_int_operand" "i"))
 (match_operand:SI 3 "const_int_operand" "i")))]
-  "TARGET_CLAMPS
+  "TARGET_MINMAX && TARGET_CLAMPS
&& xtensa_match_CLAMPS_imms_p (operands[2], operands[3])"
 {
   static char result[64];
-- 
2.30.2


[PATCH 1/2] xtensa: Remove TARGET_MEMORY_MOVE_COST hook

2023-06-18 Thread Takayuki 'January June' Suwa via Gcc-patches
It used to always return a constant 4, which is same as the default
behavior, but doesn't take into account the effects of secondary
reloads.

Therefore, the implementation of this target hook is removed.

gcc/ChangeLog:

* config/xtensa/xtensa.cc
(TARGET_MEMORY_MOVE_COST, xtensa_memory_move_cost): Remove.
---
 gcc/config/xtensa/xtensa.cc | 13 -
 1 file changed, 13 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3b5d25b660a..721c99b56a3 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -131,7 +131,6 @@ static bool xtensa_rtx_costs (rtx, machine_mode, int, int, 
int *, bool);
 static int xtensa_insn_cost (rtx_insn *, bool);
 static int xtensa_register_move_cost (machine_mode, reg_class_t,
  reg_class_t);
-static int xtensa_memory_move_cost (machine_mode, reg_class_t, bool);
 static tree xtensa_build_builtin_va_list (void);
 static bool xtensa_return_in_memory (const_tree, const_tree);
 static tree xtensa_gimplify_va_arg_expr (tree, tree, gimple_seq *,
@@ -213,8 +212,6 @@ static rtx xtensa_delegitimize_address (rtx);
 
 #undef TARGET_REGISTER_MOVE_COST
 #define TARGET_REGISTER_MOVE_COST xtensa_register_move_cost
-#undef TARGET_MEMORY_MOVE_COST
-#define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost
 #undef TARGET_RTX_COSTS
 #define TARGET_RTX_COSTS xtensa_rtx_costs
 #undef TARGET_INSN_COST
@@ -4356,16 +4353,6 @@ xtensa_register_move_cost (machine_mode mode 
ATTRIBUTE_UNUSED,
 return 10;
 }
 
-/* Worker function for TARGET_MEMORY_MOVE_COST.  */
-
-static int
-xtensa_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
-reg_class_t rclass ATTRIBUTE_UNUSED,
-bool in ATTRIBUTE_UNUSED)
-{
-  return 4;
-}
-
 /* Compute a (partial) cost for rtx X.  Return true if the complete
cost has been computed, and false if subexpressions should be
scanned.  In either case, *TOTAL contains the cost result.  */
-- 
2.30.2


[PATCH 2/2] xtensa: constantsynth: Add new 2-insns synthesis pattern

2023-06-18 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch adds a new 2-instructions constant synthesis pattern:

-  A non-negative square value that root can fit into a signed 12-bit:
=> "MOVI(.N) Ax, simm12" + "MULL Ax, Ax, Ax"

Due to the execution cost of the integer multiply instruction (MULL), this
synthesis works only when the 32-bit Integer Multiply Option is configured
and optimize for size is specified.

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_constantsynth_2insn):
Add new pattern for the abovementioned case.
---
 gcc/config/xtensa/xtensa.cc | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 721c99b56a3..dd35e63c094 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -58,6 +58,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "insn-attr.h"
 #include "tree-pass.h"
 #include "print-rtl.h"
+#include 
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -1067,7 +1068,7 @@ xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval,
 {
   HOST_WIDE_INT imm = INT_MAX;
   rtx x = NULL_RTX;
-  int shift;
+  int shift, sqr;
 
   gcc_assert (REG_P (dst));
 
@@ -1078,7 +1079,6 @@ xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval,
   x = gen_lshrsi3 (dst, dst, GEN_INT (32 - shift));
 }
 
-
   shift = ctz_hwi (srcval);
   if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95)))
   && xtensa_simm12b (srcval >> shift))
@@ -1105,6 +1105,14 @@ xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT 
srcval,
   x = gen_addsi3 (dst, dst, GEN_INT (imm1));
 }
 
+  sqr = (int) floorf (sqrtf (srcval));
+  if (TARGET_MUL32 && optimize_size
+  && !x && IN_RANGE (srcval, 0, (2047 * 2047)) && sqr * sqr == srcval)
+{
+  imm = sqr;
+  x = gen_mulsi3 (dst, dst, dst);
+}
+
   if (!x)
 return 0;
 
-- 
2.30.2


Re: [PATCH v2] xtensa: Optimize boolean evaluation or branching when EQ/NE to zero in S[IF]mode

2023-06-05 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/06/06 0:15, Max Filippov wrote:
> Hi Suwa-san,
Hi!  Thanks for your regtest every time.

> 
> On Mon, Jun 5, 2023 at 2:37 AM Takayuki 'January June' Suwa
>  wrote:
>>
>> This patch optimizes the boolean evaluation of EQ/NE against zero
>> by adding two insn_and_split patterns similar to SImode conditional
>> store:
>>
>> "eq_zero":
>> op0 = (op1 == 0) ? 1 : 0;
>> op0 = clz(op1) >> 5;  /* optimized (requires TARGET_NSA) */
>>
>> "movsicc_ne0_reg_0":
>> op0 = (op1 != 0) ? op2 : 0;
>> op0 = op2; if (op1 == 0) ? op0 = op1;  /* optimized */
>>
>> /* example #1 */
>> int bool_eqSI(int x) {
>>   return x == 0;
>> }
>> int bool_neSI(int x) {
>>   return x != 0;
>> }
>>
>> ;; after (TARGET_NSA)
>> bool_eqSI:
>> nsaua2, a2
>> srlia2, a2, 5
>> ret.n
>> bool_neSI:
>> mov.n   a9, a2
>> movi.n  a2, 1
>> moveqz  a2, a9, a9
>> ret.n
>>
>> These also work in SFmode by ignoring their sign bits, and further-
>> more, the branch if EQ/NE against zero in SFmode is also done in the
>> same manner.
>>
>> The reasons for this optimization in SFmode are:
>>
>>   - Only zero values (negative or non-negative) contain no bits of 1
>> with both the exponent and the mantissa.
>>   - EQ/NE comparisons involving NaNs produce no signal even if they
>> are signaling.
>>   - Even if the use of IEEE 754 single-precision floating-point co-
>> processor is configured (TARGET_HARD_FLOAT is true):
>> 1. Load zero value to FP register
>> 2. Possibly, additional FP move if the comparison target is
>>an address register
>> 3. FP equality check instruction
>> 4. Read the boolean register containing the result, or condi-
>>tional branch
>> As noted above, a considerable number of instructions are still
>> generated.
>>
>> /* example #2 */
>> int bool_eqSF(float x) {
>>   return x == 0;
>> }
>> int bool_neSF(float x) {
>>   return x != 0;
>> }
>> int bool_ltSF(float x) {
>>   return x < 0;
>> }
>> extern void foo(void);
>> void cb_eqSF(float x) {
>>   if(x != 0)
>> foo();
>> }
>> void cb_neSF(float x) {
>>   if(x == 0)
>> foo();
>> }
>> void cb_geSF(float x) {
>>   if(x < 0)
>> foo();
>> }
>>
>> ;; after
>> ;; (TARGET_NSA, TARGET_BOOLEANS and TARGET_HARD_FLOAT)
>> bool_eqSF:
>> add.n   a2, a2, a2
>> nsaua2, a2
>> srlia2, a2, 5
>> ret.n
>> bool_neSF:
>> add.n   a9, a2, a2
>> movi.n  a2, 1
>> moveqz  a2, a9, a9
>> ret.n
>> bool_ltSF:
>> movi.n  a9, 0
>> wfr f0, a2
>> wfr f1, a9
>> olt.s   b0, f0, f1
>> movi.n  a9, 0
>> movi.n  a2, 1
>> movfa2, a9, b0
>> ret.n
>> cb_eqSF:
>> add.n   a2, a2, a2
>> beqz.n  a2, .L6
>> j.l foo, a9
>> .L6:
>> ret.n
>> cb_neSF:
>> add.n   a2, a2, a2
>> bnez.n  a2, .L8
>> j.l foo, a9
>> .L8:
>> ret.n
>> cb_geSF:
>> addisp, sp, -16
>> movi.n  a3, 0
>> s32i.n  a12, sp, 8
>> s32i.n  a0, sp, 12
>> mov.n   a12, a2
>> call0   __unordsf2
>> bnez.n  a2, .L10
>> movi.n  a3, 0
>> mov.n   a2, a12
>> call0   __gesf2
>> bneia2, -1, .L10
>> l32i.n  a0, sp, 12
>> l32i.n  a12, sp, 8
>> addisp, sp, 16
>> j.l foo, a9
>> .L10:
>> l32i.n  a0, sp, 12
>> l32i.n  a12, sp, 8
>> addisp, sp, 16
>> ret.n
>>
>> gcc/ChangeLog:
>>
>> * config/xtensa/predicates.md (const_float_0_operand):
>> Rename from obsolete "const_float_1_operand" and change the
>> constant to compare.
>> (cstoresf_cbranchsf_operand, cstoresf_cbranchsf_operator):
>> New.
>> * config/xtensa/xtensa.cc (xtensa

[PATCH v2] xtensa: Optimize boolean evaluation or branching when EQ/NE to zero in S[IF]mode

2023-06-05 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch optimizes the boolean evaluation of EQ/NE against zero
by adding two insn_and_split patterns similar to SImode conditional
store:

"eq_zero":
op0 = (op1 == 0) ? 1 : 0;
op0 = clz(op1) >> 5;  /* optimized (requires TARGET_NSA) */

"movsicc_ne0_reg_0":
op0 = (op1 != 0) ? op2 : 0;
op0 = op2; if (op1 == 0) ? op0 = op1;  /* optimized */

/* example #1 */
int bool_eqSI(int x) {
  return x == 0;
}
int bool_neSI(int x) {
  return x != 0;
}

;; after (TARGET_NSA)
bool_eqSI:
nsaua2, a2
srlia2, a2, 5
ret.n
bool_neSI:
mov.n   a9, a2
movi.n  a2, 1
moveqz  a2, a9, a9
ret.n

These also work in SFmode by ignoring their sign bits, and further-
more, the branch if EQ/NE against zero in SFmode is also done in the
same manner.

The reasons for this optimization in SFmode are:

  - Only zero values (negative or non-negative) contain no bits of 1
with both the exponent and the mantissa.
  - EQ/NE comparisons involving NaNs produce no signal even if they
are signaling.
  - Even if the use of IEEE 754 single-precision floating-point co-
processor is configured (TARGET_HARD_FLOAT is true):
1. Load zero value to FP register
2. Possibly, additional FP move if the comparison target is
   an address register
3. FP equality check instruction
4. Read the boolean register containing the result, or condi-
   tional branch
As noted above, a considerable number of instructions are still
generated.

/* example #2 */
int bool_eqSF(float x) {
  return x == 0;
}
int bool_neSF(float x) {
  return x != 0;
}
int bool_ltSF(float x) {
  return x < 0;
}
extern void foo(void);
void cb_eqSF(float x) {
  if(x != 0)
foo();
}
void cb_neSF(float x) {
  if(x == 0)
foo();
}
void cb_geSF(float x) {
  if(x < 0)
foo();
}

;; after
;; (TARGET_NSA, TARGET_BOOLEANS and TARGET_HARD_FLOAT)
bool_eqSF:
add.n   a2, a2, a2
nsaua2, a2
srlia2, a2, 5
ret.n
bool_neSF:
add.n   a9, a2, a2
movi.n  a2, 1
moveqz  a2, a9, a9
ret.n
bool_ltSF:
movi.n  a9, 0
wfr f0, a2
wfr f1, a9
olt.s   b0, f0, f1
movi.n  a9, 0
movi.n  a2, 1
movfa2, a9, b0
ret.n
cb_eqSF:
add.n   a2, a2, a2
beqz.n  a2, .L6
j.l foo, a9
.L6:
ret.n
cb_neSF:
add.n   a2, a2, a2
bnez.n  a2, .L8
j.l foo, a9
.L8:
ret.n
cb_geSF:
addisp, sp, -16
movi.n  a3, 0
s32i.n  a12, sp, 8
s32i.n  a0, sp, 12
mov.n   a12, a2
call0   __unordsf2
bnez.n  a2, .L10
movi.n  a3, 0
mov.n   a2, a12
call0   __gesf2
bneia2, -1, .L10
l32i.n  a0, sp, 12
l32i.n  a12, sp, 8
addisp, sp, 16
j.l foo, a9
.L10:
l32i.n  a0, sp, 12
l32i.n  a12, sp, 8
addisp, sp, 16
ret.n

gcc/ChangeLog:

* config/xtensa/predicates.md (const_float_0_operand):
Rename from obsolete "const_float_1_operand" and change the
constant to compare.
(cstoresf_cbranchsf_operand, cstoresf_cbranchsf_operator):
New.
* config/xtensa/xtensa.cc (xtensa_expand_conditional_branch):
Add code for EQ/NE comparison with constant zero in SFmode.
(xtensa_expand_scc): Added code to derive boolean evaluation
of EQ/NE with constant zero for comparison in SFmode.
(xtensa_rtx_costs): Change cost of CONST_DOUBLE with value
zero inside "cbranchsf4" to 0.
* config/xtensa/xtensa.md (cbranchsf4, cstoresf4):
Change "match_operator" and the third "match_operand" to the
ones mentioned above.
(movsicc_ne0_reg_zero, eq_zero): New.
---
 gcc/config/xtensa/predicates.md | 17 +--
 gcc/config/xtensa/xtensa.cc | 45 
 gcc/config/xtensa/xtensa.md | 53 +
 3 files changed, 106 insertions(+), 9 deletions(-)

diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index a3575a68892..cfac3ad4936 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -155,11 +155,11 @@
&& CONSTANT_P (op)
&& GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")
 
-;; Accept the floating point constant 1 in the appropriate mode.
-(define_predicate "const_float_1_operand"
+;; Accept the floating point constant 0 in the appropriate mode.
+(define_predicate "const_float_0_operand"
   (match_code "const_double")
 {
-  return real_equal (CONST_DOUBLE_REAL_VALUE (op), );
+  return real_equal (CONST_DOUBLE_REAL_VALUE 

[PATCH] xtensa: Optimize boolean evaluation or branching when EQ/NE to INT_MIN

2023-06-03 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch optimizes both the boolean evaluation of and the branching of
EQ/NE against INT_MIN (-2147483648), by taking advantage of the specifi-
cation the ABS machine instruction on Xtensa returns INT_MIN iff INT_MIN,
otherwise non-negative value.

/* example */
int test0(int x) {
  return (x == -2147483648);
}
int test1(int x) {
  return (x != -2147483648);
}
extern void foo(void);
void test2(int x) {
  if(x == -2147483648)
foo();
}
void test3(int x) {
  if(x != -2147483648)
foo();
}

;; before
test0:
movi.n  a9, -1
sllia9, a9, 31
add.n   a2, a2, a9
nsaua2, a2
srlia2, a2, 5
ret.n
test1:
movi.n  a9, -1
sllia9, a9, 31
add.n   a9, a2, a9
movi.n  a2, 1
moveqz  a2, a9, a9
ret.n
test2:
movi.n  a9, -1
sllia9, a9, 31
bne a2, a9, .L3
j.l foo, a9
.L3:
ret.n
test3:
movi.n  a9, -1
sllia9, a9, 31
beq a2, a9, .L5
j.l foo, a9
.L5:
ret.n

;; after
test0:
abs a2, a2
extui   a2, a2, 31, 1
ret.n
test1:
abs a2, a2
sraia2, a2, 31
addi.n  a2, a2, 1
ret.n
test2:
abs a2, a2
bbcia2, 31, .L3
j.l foo, a9
.L3:
ret.n
test3:
abs a2, a2
bbsia2, 31, .L5
j.l foo, a9
.L5:
ret.n

gcc/ChangeLog:

* config/xtensa/xtensa.md (*btrue_INT_MIN, *eqne_INT_MIN):
New insn_and_split patterns.
---
 gcc/config/xtensa/xtensa.md | 64 +
 1 file changed, 64 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 87620934bbe..c9790babf75 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1940,6 +1940,37 @@
   (const_int 2)
   (const_int 3)))])
 
+(define_insn_and_split "*btrue_INT_MIN"
+  [(set (pc)
+   (if_then_else (match_operator 2 "boolean_operator"
+   [(match_operand:SI 0 "register_operand" "r")
+(const_int -2147483648)])
+ (label_ref (match_operand 1 ""))
+ (pc)))]
+  "TARGET_ABS"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 3)
+   (abs:SI (match_dup 0)))
+   (set (pc)
+   (if_then_else (match_op_dup 2
+   [(zero_extract:SI (match_dup 3)
+ (const_int 1)
+ (match_dup 4))
+(const_int 0)])
+ (label_ref (match_dup 1))
+ (pc)))]
+{
+  operands[3] = gen_reg_rtx (SImode);
+  operands[4] = GEN_INT (BITS_BIG_ENDIAN ? 0 : 31);
+  operands[2] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[2])),
+   VOIDmode, XEXP (operands[2], 0),
+   const0_rtx);
+}
+  [(set_attr "type""jump")
+   (set_attr "mode""none")
+   (set_attr "length"  "6")])
+
 (define_insn "*ubtrue"
   [(set (pc)
(if_then_else (match_operator 3 "ubranch_operator"
@@ -3198,6 +3229,39 @@
(set_attr "mode""SI")
(set_attr "length"  "6")])
 
+(define_insn_and_split "*eqne_INT_MIN"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (match_operator 2 "boolean_operator"
+   [(match_operand:SI 1 "register_operand" "r")
+(const_int -2147483648)]))]
+  "TARGET_ABS"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (abs:SI (match_dup 1)))
+   (set (match_dup 0)
+   (match_op_dup:SI 2
+   [(match_dup 0)
+(const_int 31)]))
+   (match_dup 3)]
+{
+  enum rtx_code code = GET_CODE (operands[2]);
+  operands[2] = gen_rtx_fmt_ee ((code == EQ) ? LSHIFTRT : ASHIFTRT,
+   SImode, XEXP (operands[2], 0),
+   XEXP (operands[2], 1));
+  operands[3] = (code != EQ) ? gen_addsi3 (operands[0],
+  operands[0], const1_rtx)
+: const0_rtx;
+}
+  [(set_attr "type""move")
+   (set_attr "mode""SI")
+   (set (attr "length")
+   (if_then_else (match_test "GET_CODE (operands[2]) == EQ")
+ (const_int 3)
+ (if_then_else (match_test "TARGET_DENSITY")
+   (const_int 5)
+   (const_int 6])
+
 (define_peephole2
   [(set (match_operand:SI 0 "register_operand")
(match_operand:SI 6 "reload_operand"))
-- 
2.30.2


[PATCH] xtensa: Optimize boolean evaluation or branching when EQ/NE to zero in S[IF]mode

2023-06-03 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch optimizes the boolean evaluation of EQ/NE against zero
by adding two insn_and_split patterns similar to SImode conditional
store:

"eq_zero":
op0 = (op1 == 0) ? 1 : 0;
op0 = clz(op1) >> 5;  /* optimized (requires TARGET_NSA) */

"movsicc_ne0_reg_0":
op0 = (op1 != 0) ? op2 : 0;
op0 = op2; if (op1 == 0) ? op0 = op1;  /* optimized */

These also work in SFmode by ignoring their sign bits, and further-
more, the branch if EQ/NE against zero in SFmode is also done in the
same manner.

The reasons for this optimization in SFmode are:

  - Only zero values (negative or non-negative) contain no bits of 1
with both the exponent and the mantissa.
  - EQ/NE comparisons involving NaNs produce no signal even if they
are signaling.
  - Even if the use of IEEE 754 single-precision floating-point co-
processor is configured (TARGET_HARD_FLOAT is true):
1. Load zero value to FP register
2. Possibly, additional FP move if the comparison target is
   an address register
3. FP equality check instruction
4. Read the boolean register containing the result, or condi-
   tional branch
As noted above, a considerable number of instructions are still
generated.

gcc/ChangeLog:

* config/xtensa/predicates.md (const_float_0_operand):
Rename from obsolete "const_float_1_operand" and change the
constant to compare.
(cstoresf_cbranchsf_operand, cstoresf_cbranchsf_operator):
New.
* config/xtensa/xtensa.cc (xtensa_expand_conditional_branch):
Add code for EQ/NE comparison with constant zero in SFmode.
(xtensa_expand_scc): Added code to derive boolean evaluation
of EQ/NE with constant zero for comparison in SFmode.
(xtensa_rtx_costs): Change cost of CONST_DOUBLE with value
zero inside "cbranchsf4" to 0.
* config/xtensa/xtensa.md (cbranchsf4, cstoresf4):
Change "match_operator" and the third "match_operand" to the
ones mentioned above.
(movsicc_ne0_reg_zero, eq_zero): New.
---
 gcc/config/xtensa/predicates.md | 19 ++--
 gcc/config/xtensa/xtensa.cc | 43 ++
 gcc/config/xtensa/xtensa.md | 53 +
 3 files changed, 106 insertions(+), 9 deletions(-)

diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index a3575a68892..d3b49e32fa4 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -155,11 +155,11 @@
&& CONSTANT_P (op)
&& GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")
 
-;; Accept the floating point constant 1 in the appropriate mode.
-(define_predicate "const_float_1_operand"
+;; Accept the floating point constant 0 in the appropriate mode.
+(define_predicate "const_float_0_operand"
   (match_code "const_double")
 {
-  return real_equal (CONST_DOUBLE_REAL_VALUE (op), );
+  return real_equal (CONST_DOUBLE_REAL_VALUE (op), );
 })
 
 (define_predicate "fpmem_offset_operand"
@@ -179,6 +179,13 @@
   return false;
 })
 
+(define_predicate "cstoresf_cbranchsf_operand"
+  (ior (and (match_test "TARGET_HARD_FLOAT")
+   (match_operand 0 "register_operand"))
+   (and (match_code "const_double")
+   (match_test "real_equal (CONST_DOUBLE_REAL_VALUE (op),
+)"
+
 (define_predicate "branch_operator"
   (match_code "eq,ne,lt,ge"))
 
@@ -197,6 +204,12 @@
 (define_predicate "xtensa_cstoresi_operator"
   (match_code "eq,ne,gt,ge,lt,le"))
 
+(define_predicate "cstoresf_cbranchsf_operator"
+  (ior (and (match_test "TARGET_HARD_FLOAT")
+   (match_operand 0 "comparison_operator"))
+   (and (match_test "!TARGET_HARD_FLOAT")
+   (match_operand 0 "boolean_operator"
+
 (define_predicate "xtensa_shift_per_byte_operator"
   (match_code "ashift,ashiftrt,lshiftrt"))
 
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3b5d25b660a..fefca3b11cd 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -865,6 +865,16 @@ xtensa_expand_conditional_branch (rtx *operands, 
machine_mode mode)
   switch (mode)
 {
 case E_SFmode:
+  if ((test_code == EQ || test_code == NE)
+ && const_float_0_operand (cmp1, SFmode))
+   {
+ emit_move_insn (cmp1 = gen_reg_rtx (SImode),
+ gen_rtx_SUBREG (SImode, cmp0, 0));
+ emit_insn (gen_addsi3 (cmp1, cmp1, cmp1));
+ cmp = gen_int_relational (test_code, cmp1, const0_rtx);
+ break;
+   }
+
   if (TARGET_HARD_FLOAT)
{
  cmp = gen_float_relational (test_code, cmp0, cmp1);
@@ -996,6 +1006,34 @@ xtensa_expand_scc (rtx operands[4], machine_mode cmp_mode)
   rtx one_tmp, zero_tmp;
   rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
 
+  if (cmp_mode == SFmode)
+{
+  if (const_float_0_operand (operands[3], SFmode))
+ 

Re: [PATCH 2/3 v3] xtensa: Add 'adddi3' and 'subdi3' insn patterns

2023-06-01 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/06/01 23:20, Max Filippov wrote:
> On Wed, May 31, 2023 at 11:01 PM Takayuki 'January June' Suwa
>  wrote:
>> More optimized than the default RTL generation.
>>
>> gcc/ChangeLog:
>>
>> * config/xtensa/xtensa.md (adddi3, subdi3):
>> New RTL generation patterns implemented according to the instruc-
>> tion idioms described in the Xtensa ISA reference manual (p. 600).
>> ---
>>  gcc/config/xtensa/xtensa.md | 52 +
>>  1 file changed, 52 insertions(+)
>>
>> diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
>> index eda1353894b..21afa747e89 100644
>> --- a/gcc/config/xtensa/xtensa.md
>> +++ b/gcc/config/xtensa/xtensa.md
>> @@ -190,6 +190,35 @@
>> (set_attr "mode""SI")
>> (set_attr "length"  "3")])
>>
>> +(define_expand "adddi3"
>> +  [(set (match_operand:DI 0 "register_operand")
>> +   (plus:DI (match_operand:DI 1 "register_operand")
>> +(match_operand:DI 2 "register_operand")))]
>> +  ""
>> +{
>> +  rtx lo_dest, hi_dest, lo_op0, hi_op0, lo_op1, hi_op1;
>> +  rtx_code_label *label;
>> +  if (rtx_equal_p (operands[0], operands[1])
>> +  || rtx_equal_p (operands[0], operands[2])
> 
>> +  || ! REG_P (operands[1]) || ! REG_P (operands[2]))
> 
> I wonder if these additional conditions are necessary, given that
> the operands have the "register_operand" predicates?
> 

See register_operand() in gcc/recog.cc.

In fact, I've encountered several operands that satisfy the
register_operand predicate but result in REG_P() being false.


[PATCH 2/3 v3] xtensa: Add 'adddi3' and 'subdi3' insn patterns

2023-06-01 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/05/31 15:02, Max Filippov wrote:
Hi!

> On Tue, May 30, 2023 at 2:50 AM Takayuki 'January June' Suwa
>  wrote:
>>
>> Resubmitting the correct one due to a mistake in merging order of fixes.
>> ---
>> More optimized than the default RTL generation.
>>
>> gcc/ChangeLog:
>>
>> * config/xtensa/xtensa.md (adddi3, subdi3):
>> New RTL generation patterns implemented according to the instruc-
>> tion idioms described in the Xtensa ISA reference manual (p. 600).
>> ---
>>  gcc/config/xtensa/xtensa.md | 52 +
>>  1 file changed, 52 insertions(+)
>>
>> diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
>> index eda1353894b..6882baaedfd 100644
>> --- a/gcc/config/xtensa/xtensa.md
>> +++ b/gcc/config/xtensa/xtensa.md
>> @@ -190,6 +190,32 @@
>> (set_attr "mode""SI")
>> (set_attr "length"  "3")])
>>
>> +(define_expand "adddi3"
>> +  [(set (match_operand:DI 0 "register_operand")
>> +   (plus:DI (match_operand:DI 1 "register_operand")
>> +(match_operand:DI 2 "register_operand")))]
>> +  ""
>> +{
>> +  rtx lo_dest, hi_dest, lo_op0, hi_op0, lo_op1, hi_op1;
>> +  rtx_code_label *label;
>> +  lo_dest = gen_lowpart (SImode, operands[0]);
>> +  hi_dest = gen_highpart (SImode, operands[0]);
>> +  lo_op0 = gen_lowpart (SImode, operands[1]);
>> +  hi_op0 = gen_highpart (SImode, operands[1]);
>> +  lo_op1 = gen_lowpart (SImode, operands[2]);
>> +  hi_op1 = gen_highpart (SImode, operands[2]);
>> +  if (rtx_equal_p (lo_dest, lo_op1))
>> +FAIL;
> 
> With this condition I see the following source
> 
> unsigned long long foo(unsigned long long a, unsigned long long b)
> {
>return a + b;
> }
> 
> turns to (expected)
> 
>.global foo
>.type   foo, @function
> foo:
>add.n   a2, a2, a4
>add.n   a3, a3, a5
>bgeua2, a4, .L2
>addi.n  a3, a3, 1
> .L2:
>ret.n
> 
> but
> 
> unsigned long long foo(unsigned long long a, unsigned long long b)
> {
>return b + a;
> }
> 
> has an extra instruction:
> 
>.global foo
>.type   foo, @function
> foo:
>mov.n   a9, a2
>add.n   a2, a4, a2
>add.n   a3, a5, a3
>bgeua2, a9, .L2
>addi.n  a3, a3, 1
> .L2:
>ret.n
> 
> I though that maybe the following would help (plus using
> lo_cmp in the emit_cmp_and_jump_insns below):
> 
>   if (!rtx_equal_p (lo_dest, lo_op0))
>lo_cmp = lo_op0;
>  else if (!rtx_equal_p (lo_dest, lo_op1))
>lo_cmp = lo_op1;
>  else
>FAIL;
> 
> but to my surprise it doesn't.

As you may have noticed, at the time of RTL generation both of the above-
mentioned are almost the same (only a and b have been swapped).
Whether or not there are extra registers is determined at a later stage,
so there is very little that can be done about it at (define_expand).

I thought as above, but when I looked at the generated RTL again, I noticed
that I could somehow make a decision based on the order of the generated
pseudo-register numbers.

> 
>> +  emit_clobber (operands[0]);
> 
> Why is this clobber needed?

Apparently there is no need to clobber explicitly (because even if omitted,
it will appear in the generated result).

> 
>> +  emit_insn (gen_addsi3 (lo_dest, lo_op0, lo_op1));
>> +  emit_insn (gen_addsi3 (hi_dest, hi_op0, hi_op1));
>> +  emit_cmp_and_jump_insns (lo_dest, lo_op1, GEU, const0_rtx,
>> +  SImode, true, label = gen_label_rtx ());
>> +  emit_insn (gen_addsi3 (hi_dest, hi_dest, const1_rtx));
>> +  emit_label (label);
>> +  DONE;
>> +})
>> +
>>  (define_insn "addsf3"
>>[(set (match_operand:SF 0 "register_operand" "=f")
>> (plus:SF (match_operand:SF 1 "register_operand" "%f")
>> @@ -237,6 +263,32 @@
>>   (const_int 5)
>>   (const_int 6)))])
>>
>> +(define_expand "subdi3"
>> +  [(set (match_operand:DI 0 "register_operand")
>> +   (minus:DI (match_operand:DI 1 "register_operand")
>> + (match_operand:DI 2 "register_operand")))]
>> +  ""
>> +{
>> +  rtx lo_dest, hi_dest, lo_op0, hi_op0, lo_op1, hi_op1;
>> +  rtx_code_label *label;
>> +  lo_dest = gen_lowpart (SImode, operands[0]);
>> +  hi_dest = ge

[PATCH 3/3 v2] xtensa: Optimize 'cstoresi4' insn pattern

2023-05-30 Thread Takayuki 'January June' Suwa via Gcc-patches
Resubmitting the correct one due to a mistake in merging order of fixes.
---
This patch introduces more optimized implementations for the 6 cstoresi4
insn comparison methods (eq/ne/lt/le/gt/ge, however, required TARGET_NSA
for eq).

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_expand_scc):
Add dedicated optimization code for cstoresi4 (eq/ne/gt/ge/lt/le).
* config/xtensa/xtensa.md (xtensa_ge_zero):
Rename from '*signed_ge_zero', because it had to be called from
'xtensa_expand_scc()'.
---
 gcc/config/xtensa/xtensa.cc | 106 
 gcc/config/xtensa/xtensa.md |   2 +-
 2 files changed, 96 insertions(+), 12 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3b5d25b660a..64efd3d7287 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -991,24 +991,108 @@ xtensa_expand_conditional_move (rtx *operands, int isflt)
 int
 xtensa_expand_scc (rtx operands[4], machine_mode cmp_mode)
 {
-  rtx dest = operands[0];
-  rtx cmp;
-  rtx one_tmp, zero_tmp;
+  rtx dest = operands[0], op0 = operands[2], op1 = operands[3];
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx cmp, tmp0, tmp1;
   rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
 
-  if (!(cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode,
-   operands[2], operands[3])))
-return 0;
+  /* Dedicated optimizations for cstoresi4.
+ a. In a magnitude comparison operator, swapping both sides and
+   inverting magnitude does not change the result,
+   eg. '(x >= y) != (y <= x)' is a constant of zero
+   (GE is changed to LE, not LT).
+ b. Due to room for further optimization, we use subtraction rather
+   than XOR (the default for RTL expansion of EQ/NE) as the binary
+   operation which is zero if both sides are the same and non-zero
+   otherwise.  */
+  if (cmp_mode == SImode)
+switch (code)
+  {
+  /* EQ(op0, op1) := clz(op0 - op1) / 32 [requires TARGET_NSA] */
+  case EQ:
+   if (!TARGET_NSA)
+ break;
+   /* EQ to EQZ conversion by subtracting op1 from op0.  */
+   emit_move_insn (dest,
+   expand_binop (SImode, sub_optab, op0, op1,
+ 0, 0, OPTAB_LIB_WIDEN));
+   /* NSAU instruction will return 32 iff the source is zero,
+  zero through 31 otherwise (See Xtensa ISA Reference Manual,
+  p. 462)  */
+   emit_insn (gen_clzsi2 (dest, dest));
+   emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (5)));
+   return 1;
+
+  /* NE(op0, op1) := (op0 - op1) == 0 ? 0 : 1 */
+  case NE:
+   /* NE to NEZ conversion by subtracting op1 from op0.  */
+   emit_move_insn (tmp0 = gen_reg_rtx (SImode),
+   expand_binop (SImode, sub_optab, op0, op1,
+ 0, 0, OPTAB_LIB_WIDEN));
+   emit_move_insn (dest, const_true_rtx);
+   emit_move_insn (dest,
+   gen_rtx_fmt_eee (IF_THEN_ELSE, SImode,
+gen_rtx_fmt_ee (EQ, VOIDmode,
+tmp0, const0_rtx),
+tmp0, dest));
+   return 1;
+
+  case LE:
+   if (REG_P (op1))
+ {
+   /* LE to GE conversion by swapping both sides.  */
+   tmp0 = op0, op0 = op1, op1 = tmp0;
+   goto case_GE_reg;
+ }
+   /* LE to LT conversion by adding one to op1.  */
+   op1 = GEN_INT (INTVAL (op1) + 1);
+   /* fallthru */
+
+  /* LT(op0, op1) := (unsigned)(op0 - op1) >> 31 */
+  case LT:
+case_LT:
+   /* LT to LTZ conversion by subtracting op1 from op0.  */
+   emit_move_insn (dest,
+   expand_binop (SImode, sub_optab, op0, op1,
+ 0, 0, OPTAB_LIB_WIDEN));
+   emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (31)));
+   return 1;
+
+  case GE:
+   if (REG_P (op1))
+ {
+case_GE_reg:
+   /* GE to GEZ conversion by subtracting op1 from op0.  */
+   emit_move_insn (dest,
+   expand_binop (SImode, sub_optab, op0, op1,
+ 0, 0, OPTAB_LIB_WIDEN));
+   /* Emitting the dedicated insn pattern.  */
+   emit_insn (gen_xtensa_ge_zero (dest, dest));
+   return 1;
+ }
+   /* GE to GT conversion by subtracting one from op1.  */
+   op1 = GEN_INT (INTVAL (op1) - 1);
+   /* fallthru */
 
-  one_tmp = gen_reg_rtx (SImode);
-  zero_tmp = gen_reg_rtx (SImode);
-  emit_insn (gen_movsi (one_tmp, const_true_rtx));
-  emit_insn (gen_movsi (zero_tmp, const0_rtx));
+  case GT:
+   /* GT to LT conversion by swapping both sides.  */
+   tmp0 = op0, op0 = op1, op1 = tmp0;
+   goto case_LT;
 
+  default:
+   break;
+  }
+
+  if (! (cmp = 

[PATCH 2/3 v2] xtensa: Add 'adddi3' and 'subdi3' insn patterns

2023-05-30 Thread Takayuki 'January June' Suwa via Gcc-patches
Resubmitting the correct one due to a mistake in merging order of fixes.
---
More optimized than the default RTL generation.

gcc/ChangeLog:

* config/xtensa/xtensa.md (adddi3, subdi3):
New RTL generation patterns implemented according to the instruc-
tion idioms described in the Xtensa ISA reference manual (p. 600).
---
 gcc/config/xtensa/xtensa.md | 52 +
 1 file changed, 52 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index eda1353894b..6882baaedfd 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -190,6 +190,32 @@
(set_attr "mode""SI")
(set_attr "length"  "3")])
 
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand")
+   (plus:DI (match_operand:DI 1 "register_operand")
+(match_operand:DI 2 "register_operand")))]
+  ""
+{
+  rtx lo_dest, hi_dest, lo_op0, hi_op0, lo_op1, hi_op1;
+  rtx_code_label *label;
+  lo_dest = gen_lowpart (SImode, operands[0]);
+  hi_dest = gen_highpart (SImode, operands[0]);
+  lo_op0 = gen_lowpart (SImode, operands[1]);
+  hi_op0 = gen_highpart (SImode, operands[1]);
+  lo_op1 = gen_lowpart (SImode, operands[2]);
+  hi_op1 = gen_highpart (SImode, operands[2]);
+  if (rtx_equal_p (lo_dest, lo_op1))
+FAIL;
+  emit_clobber (operands[0]);
+  emit_insn (gen_addsi3 (lo_dest, lo_op0, lo_op1));
+  emit_insn (gen_addsi3 (hi_dest, hi_op0, hi_op1));
+  emit_cmp_and_jump_insns (lo_dest, lo_op1, GEU, const0_rtx,
+  SImode, true, label = gen_label_rtx ());
+  emit_insn (gen_addsi3 (hi_dest, hi_dest, const1_rtx));
+  emit_label (label);
+  DONE;
+})
+
 (define_insn "addsf3"
   [(set (match_operand:SF 0 "register_operand" "=f")
(plus:SF (match_operand:SF 1 "register_operand" "%f")
@@ -237,6 +263,32 @@
  (const_int 5)
  (const_int 6)))])
 
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "register_operand")
+   (minus:DI (match_operand:DI 1 "register_operand")
+ (match_operand:DI 2 "register_operand")))]
+  ""
+{
+  rtx lo_dest, hi_dest, lo_op0, hi_op0, lo_op1, hi_op1;
+  rtx_code_label *label;
+  lo_dest = gen_lowpart (SImode, operands[0]);
+  hi_dest = gen_highpart (SImode, operands[0]);
+  lo_op0 = gen_lowpart (SImode, operands[1]);
+  hi_op0 = gen_highpart (SImode, operands[1]);
+  lo_op1 = gen_lowpart (SImode, operands[2]);
+  hi_op1 = gen_highpart (SImode, operands[2]);
+  if (rtx_equal_p (lo_op0, lo_op1))
+FAIL;
+  emit_clobber (operands[0]);
+  emit_insn (gen_subsi3 (lo_dest, lo_op0, lo_op1));
+  emit_insn (gen_subsi3 (hi_dest, hi_op0, hi_op1));
+  emit_cmp_and_jump_insns (lo_op0, lo_op1, GEU, const0_rtx,
+  SImode, true, label = gen_label_rtx ());
+  emit_insn (gen_addsi3 (hi_dest, hi_dest, constm1_rtx));
+  emit_label (label);
+  DONE;
+})
+
 (define_insn "subsf3"
   [(set (match_operand:SF 0 "register_operand" "=f")
(minus:SF (match_operand:SF 1 "register_operand" "f")
-- 
2.30.2


[PATCH 3/3] xtensa: Optimize 'cstoresi4' insn pattern

2023-05-30 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch introduces more optimized implementations for the 6 cstoresi4
insn comparison methods (eq/ne/lt/le/gt/ge, however, required TARGET_NSA
for eq).

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_expand_scc):
Add dedicated optimization code for cstoresi4 (eq/ne/gt/ge/lt/le).
* config/xtensa/xtensa.md (xtensa_ge_zero):
Rename from '*signed_ge_zero', because it had to be called from
'xtensa_expand_scc()'.
---
 gcc/config/xtensa/xtensa.cc | 106 
 gcc/config/xtensa/xtensa.md |  14 ++---
 2 files changed, 102 insertions(+), 18 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3b5d25b660a..64efd3d7287 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -991,24 +991,108 @@ xtensa_expand_conditional_move (rtx *operands, int isflt)
 int
 xtensa_expand_scc (rtx operands[4], machine_mode cmp_mode)
 {
-  rtx dest = operands[0];
-  rtx cmp;
-  rtx one_tmp, zero_tmp;
+  rtx dest = operands[0], op0 = operands[2], op1 = operands[3];
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx cmp, tmp0, tmp1;
   rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
 
-  if (!(cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode,
-   operands[2], operands[3])))
-return 0;
+  /* Dedicated optimizations for cstoresi4.
+ a. In a magnitude comparison operator, swapping both sides and
+   inverting magnitude does not change the result,
+   eg. '(x >= y) != (y <= x)' is a constant of zero
+   (GE is changed to LE, not LT).
+ b. Due to room for further optimization, we use subtraction rather
+   than XOR (the default for RTL expansion of EQ/NE) as the binary
+   operation which is zero if both sides are the same and non-zero
+   otherwise.  */
+  if (cmp_mode == SImode)
+switch (code)
+  {
+  /* EQ(op0, op1) := clz(op0 - op1) / 32 [requires TARGET_NSA] */
+  case EQ:
+   if (!TARGET_NSA)
+ break;
+   /* EQ to EQZ conversion by subtracting op1 from op0.  */
+   emit_move_insn (dest,
+   expand_binop (SImode, sub_optab, op0, op1,
+ 0, 0, OPTAB_LIB_WIDEN));
+   /* NSAU instruction will return 32 iff the source is zero,
+  zero through 31 otherwise (See Xtensa ISA Reference Manual,
+  p. 462)  */
+   emit_insn (gen_clzsi2 (dest, dest));
+   emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (5)));
+   return 1;
+
+  /* NE(op0, op1) := (op0 - op1) == 0 ? 0 : 1 */
+  case NE:
+   /* NE to NEZ conversion by subtracting op1 from op0.  */
+   emit_move_insn (tmp0 = gen_reg_rtx (SImode),
+   expand_binop (SImode, sub_optab, op0, op1,
+ 0, 0, OPTAB_LIB_WIDEN));
+   emit_move_insn (dest, const_true_rtx);
+   emit_move_insn (dest,
+   gen_rtx_fmt_eee (IF_THEN_ELSE, SImode,
+gen_rtx_fmt_ee (EQ, VOIDmode,
+tmp0, const0_rtx),
+tmp0, dest));
+   return 1;
+
+  case LE:
+   if (REG_P (op1))
+ {
+   /* LE to GE conversion by swapping both sides.  */
+   tmp0 = op0, op0 = op1, op1 = tmp0;
+   goto case_GE_reg;
+ }
+   /* LE to LT conversion by adding one to op1.  */
+   op1 = GEN_INT (INTVAL (op1) + 1);
+   /* fallthru */
+
+  /* LT(op0, op1) := (unsigned)(op0 - op1) >> 31 */
+  case LT:
+case_LT:
+   /* LT to LTZ conversion by subtracting op1 from op0.  */
+   emit_move_insn (dest,
+   expand_binop (SImode, sub_optab, op0, op1,
+ 0, 0, OPTAB_LIB_WIDEN));
+   emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (31)));
+   return 1;
+
+  case GE:
+   if (REG_P (op1))
+ {
+case_GE_reg:
+   /* GE to GEZ conversion by subtracting op1 from op0.  */
+   emit_move_insn (dest,
+   expand_binop (SImode, sub_optab, op0, op1,
+ 0, 0, OPTAB_LIB_WIDEN));
+   /* Emitting the dedicated insn pattern.  */
+   emit_insn (gen_xtensa_ge_zero (dest, dest));
+   return 1;
+ }
+   /* GE to GT conversion by subtracting one from op1.  */
+   op1 = GEN_INT (INTVAL (op1) - 1);
+   /* fallthru */
 
-  one_tmp = gen_reg_rtx (SImode);
-  zero_tmp = gen_reg_rtx (SImode);
-  emit_insn (gen_movsi (one_tmp, const_true_rtx));
-  emit_insn (gen_movsi (zero_tmp, const0_rtx));
+  case GT:
+   /* GT to LT conversion by swapping both sides.  */
+   tmp0 = op0, op0 = op1, op1 = tmp0;
+   goto case_LT;
 
+  default:
+   break;
+  }
+
+  if (! (cmp = gen_conditional_move (code, cmp_mode, op0, op1)))
+return 0;
+
+  tmp0 = force_reg 

[PATCH 2/3] xtensa: Add 'adddi3' and 'subdi3' insn patterns

2023-05-30 Thread Takayuki 'January June' Suwa via Gcc-patches
More optimized than the default RTL generation.

gcc/ChangeLog:

* config/xtensa/xtensa.md (adddi3, subdi3):
New RTL generation patterns implemented according to the instruc-
tion idioms described in the Xtensa ISA reference manual (p. 600).
---
 gcc/config/xtensa/xtensa.md | 52 +
 1 file changed, 52 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index eda1353894b..7870fb0bfce 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -190,6 +190,32 @@
(set_attr "mode""SI")
(set_attr "length"  "3")])
 
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand")
+   (plus:DI (match_operand:DI 1 "register_operand")
+(match_operand:DI 2 "register_operand")))]
+  ""
+{
+  rtx_code_label *label = gen_label_rtx ();
+  rtx lo_dest, hi_dest, lo_op0, hi_op0, lo_op1, hi_op1;
+  lo_dest = gen_lowpart (SImode, operands[0]);
+  hi_dest = gen_highpart (SImode, operands[0]);
+  lo_op0 = gen_lowpart (SImode, operands[1]);
+  hi_op0 = gen_highpart (SImode, operands[1]);
+  lo_op1 = gen_lowpart (SImode, operands[2]);
+  hi_op1 = gen_highpart (SImode, operands[2]);
+  if (rtx_equal_p (lo_dest, lo_op1))
+FAIL;
+  emit_clobber (operands[0]);
+  emit_insn (gen_addsi3 (lo_dest, lo_op0, lo_op1));
+  emit_insn (gen_addsi3 (hi_dest, hi_op0, hi_op1));
+  emit_cmp_and_jump_insns (lo_dest, lo_op1, GEU,
+  const0_rtx, SImode, true, label);
+  emit_insn (gen_addsi3 (hi_dest, hi_dest, const1_rtx));
+  emit_label (label);
+  DONE;
+})
+
 (define_insn "addsf3"
   [(set (match_operand:SF 0 "register_operand" "=f")
(plus:SF (match_operand:SF 1 "register_operand" "%f")
@@ -237,6 +263,32 @@
  (const_int 5)
  (const_int 6)))])
 
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "register_operand")
+   (minus:DI (match_operand:DI 1 "register_operand")
+ (match_operand:DI 2 "register_operand")))]
+  ""
+{
+  rtx_code_label *label = gen_label_rtx ();
+  rtx lo_dest, hi_dest, lo_op0, hi_op0, lo_op1, hi_op1;
+  lo_dest = gen_lowpart (SImode, operands[0]);
+  hi_dest = gen_highpart (SImode, operands[0]);
+  lo_op0 = gen_lowpart (SImode, operands[1]);
+  hi_op0 = gen_highpart (SImode, operands[1]);
+  lo_op1 = gen_lowpart (SImode, operands[2]);
+  hi_op1 = gen_highpart (SImode, operands[2]);
+  if (rtx_equal_p (lo_op0, lo_op1))
+FAIL;
+  emit_clobber (operands[0]);
+  emit_insn (gen_subsi3 (lo_dest, lo_op0, lo_op1));
+  emit_insn (gen_subsi3 (hi_dest, hi_op0, hi_op1));
+  emit_cmp_and_jump_insns (lo_op0, lo_op1, GEU,
+  const0_rtx, SImode, true, label);
+  emit_insn (gen_addsi3 (hi_dest, hi_dest, constm1_rtx));
+  emit_label (label);
+  DONE;
+})
+
 (define_insn "subsf3"
   [(set (match_operand:SF 0 "register_operand" "=f")
(minus:SF (match_operand:SF 1 "register_operand" "f")
-- 
2.30.2


[PATCH 1/3] xtensa: Improve "*shlrd_reg" insn pattern and its variant

2023-05-30 Thread Takayuki 'January June' Suwa via Gcc-patches
The insn "*shlrd_reg" shifts two registers with a funnel shifter by the
third register to get a single word result:

  reg0 = (reg1 SHIFT_OP0 reg3) BIT_JOIN_OP (reg2 SHIFT_OP1 (32 - reg3))

where the funnel left shift is SHIFT_OP0 := ASHIFT, SHIFT_OP1 := LSHIFTRT
and its right shift is SHIFT_OP0 := LSHIFTRT, SHIFT_OP1 := ASHIFT,
respectively.  And also, BIT_JOIN_OP can be either PLUS or IOR in either
shift direction.

  [(set (match_operand:SI 0 "register_operand" "=a")
(match_operator:SI 6 "xtensa_bit_join_operator"
[(match_operator:SI 4 "logical_shift_operator"
[(match_operand:SI 1 "register_operand" "r")
 (match_operand:SI 3 "register_operand" "r")])
 (match_operator:SI 5 "logical_shift_operator"
[(match_operand:SI 2 "register_operand" "r")
 (neg:SI (match_dup 3))])]))]

Although the RTL matching template can express it as above, there is no
way of direcing that the operator (operands[6]) that combines the two
individual shifts is commutative.
Thus, if multiple insn sequences matching the above pattern appear
adjacently, the combiner may accidentally mix them up and get partial
results.

This patch adds a new insn-and-split pattern with the two sides swapped
representation of the bit-combining operation that was lacking and
described above.

And also changes the other "*shlrd" variants from previously describing
the arbitraryness of bit-combining operations with code iterators to a
combination of the match_operator and the predicate above.

gcc/ChangeLog:

* config/xtensa/predicates.md (xtensa_bit_join_operator):
New predicate.
* config/xtensa/xtensa.md (ior_op): Remove.
(*shlrd_reg): Rename from "*shlrd_reg_", and add the
insn_and_split pattern of the same name to express and capture
the bit-combining operation with both sides swapped.
In addition, replace use of code iterator with new operator
predicate.
(*shlrd_const, *shlrd_per_byte):
Likewise regarding the code iterator.
---
 gcc/config/xtensa/predicates.md |  3 ++
 gcc/config/xtensa/xtensa.md | 81 ++---
 2 files changed, 58 insertions(+), 26 deletions(-)

diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index 5faf1be8c15..a3575a68892 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -200,6 +200,9 @@
 (define_predicate "xtensa_shift_per_byte_operator"
   (match_code "ashift,ashiftrt,lshiftrt"))
 
+(define_predicate "xtensa_bit_join_operator"
+  (match_code "plus,ior"))
+
 (define_predicate "tls_symbol_operand"
   (and (match_code "symbol_ref")
(match_test "SYMBOL_REF_TLS_MODEL (op) != 0")))
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 57e50911f52..eda1353894b 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -87,9 +87,6 @@
 ;; the same template.
 (define_mode_iterator HQI [HI QI])
 
-;; This code iterator is for *shlrd and its variants.
-(define_code_iterator ior_op [ior plus])
-
 
 ;; Attributes.
 
@@ -1682,21 +1679,22 @@
(set_attr "mode""SI")
(set_attr "length"  "9")])
 
-(define_insn "*shlrd_reg_"
+(define_insn "*shlrd_reg"
   [(set (match_operand:SI 0 "register_operand" "=a")
-   (ior_op:SI (match_operator:SI 4 "logical_shift_operator"
+   (match_operator:SI 6 "xtensa_bit_join_operator"
+   [(match_operator:SI 4 "logical_shift_operator"
[(match_operand:SI 1 "register_operand" "r")
-(match_operand:SI 2 "register_operand" "r")])
-  (match_operator:SI 5 "logical_shift_operator"
-   [(match_operand:SI 3 "register_operand" "r")
-(neg:SI (match_dup 2))])))]
+(match_operand:SI 3 "register_operand" "r")])
+(match_operator:SI 5 "logical_shift_operator"
+   [(match_operand:SI 2 "register_operand" "r")
+(neg:SI (match_dup 3))])]))]
   "!optimize_debug && optimize
&& xtensa_shlrd_which_direction (operands[4], operands[5]) != UNKNOWN"
 {
   switch (xtensa_shlrd_which_direction (operands[4], operands[5]))
 {
-case ASHIFT:   return "ssl\t%2\;src\t%0, %1, %3";
-case LSHIFTRT: return "ssr\t%2\;src\t%0, %3, %1";
+case ASHIFT:   return "ssl\t%3\;src\t%0, %1, %2";
+case LSHIFTRT: return "ssr\t%3\;src\t%0, %2, %1";
 default:   gcc_unreachable ();
 }
 }
@@ -1704,14 +1702,42 @@
(set_attr "mode""SI")
(set_attr "length"  "6")])
 
-(define_insn "*shlrd_const_"
+(define_insn_and_split "*shlrd_reg"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (match_operator:SI 6 "xtensa_bit_join_operator"
+   [(match_operator:SI 4 "logical_shift_operator"
+   

[PATCH 3/3] xtensa: Rework 'setmemsi' insn pattern

2023-05-25 Thread Takayuki 'January June' Suwa via Gcc-patches
In order to reject voodoo estimation logic with lots of magic numbers,
this patch revises the code to measure the costs of the three memset
methods based on the actual emission size of the insn sequence
corresponding to each method and choose the smallest one.

gcc/ChangeLog:

* config/xtensa/xtensa-protos.h
(xtensa_expand_block_set_unrolled_loop,
xtensa_expand_block_set_small_loop): Remove.
(xtensa_expand_block_set): New prototype.
* config/xtensa/xtensa.cc
(xtensa_expand_block_set_libcall): New subfunction.
(xtensa_expand_block_set_unrolled_loop,
xtensa_expand_block_set_small_loop): Rewrite as subfunctions.
(xtensa_expand_block_set): New function that calls the above
subfunctions.
* config/xtensa/xtensa.md (memsetsi): Change to invoke only
xtensa_expand_block_set().
---
 gcc/config/xtensa/xtensa-protos.h |   3 +-
 gcc/config/xtensa/xtensa.cc   | 319 --
 gcc/config/xtensa/xtensa.md   |   4 +-
 3 files changed, 172 insertions(+), 154 deletions(-)

diff --git a/gcc/config/xtensa/xtensa-protos.h 
b/gcc/config/xtensa/xtensa-protos.h
index ec715b44e4d..b0b15a42799 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -42,8 +42,7 @@ extern void xtensa_expand_conditional_branch (rtx *, 
machine_mode);
 extern int xtensa_expand_conditional_move (rtx *, int);
 extern int xtensa_expand_scc (rtx *, machine_mode);
 extern int xtensa_expand_block_move (rtx *);
-extern int xtensa_expand_block_set_unrolled_loop (rtx *);
-extern int xtensa_expand_block_set_small_loop (rtx *);
+extern int xtensa_expand_block_set (rtx *);
 extern void xtensa_split_operand_pair (rtx *, machine_mode);
 extern int xtensa_constantsynth (rtx, HOST_WIDE_INT);
 extern int xtensa_emit_move_sequence (rtx *, machine_mode);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 46ab9f36b56..3b5d25b660a 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -57,6 +57,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "rtl-iter.h"
 #include "insn-attr.h"
 #include "tree-pass.h"
+#include "print-rtl.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -1530,77 +1531,61 @@ xtensa_expand_block_move (rtx *operands)
 }
 
 
-/* Try to expand a block set operation to a sequence of RTL move
-   instructions.  If not optimizing, or if the block size is not a
-   constant, or if the block is too large, or if the value to
-   initialize the block with is not a constant, the expansion
-   fails and GCC falls back to calling memset().
+/* Worker function for xtensa_expand_block_set().
 
-   operands[0] is the destination
-   operands[1] is the length
-   operands[2] is the initialization value
-   operands[3] is the alignment */
+   Expand into an insn sequence that calls the "memset" function.  */
 
-static int
-xtensa_sizeof_MOVI (HOST_WIDE_INT imm)
+static rtx_insn *
+xtensa_expand_block_set_libcall (rtx dst_mem,
+HOST_WIDE_INT value,
+HOST_WIDE_INT bytes)
 {
-  return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3;
+  rtx reg;
+  rtx_insn *seq;
+
+  start_sequence ();
+
+  reg = XEXP (dst_mem, 0);
+  if (! REG_P (reg))
+reg = XEXP (replace_equiv_address (dst_mem,
+  force_reg (Pmode, reg)), 0);
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "memset"),
+LCT_NORMAL, VOIDmode,
+reg, SImode,
+GEN_INT (value), SImode,
+GEN_INT (bytes), SImode);
+
+  seq = get_insns ();
+  end_sequence ();
+
+  return seq;
 }
 
-int
-xtensa_expand_block_set_unrolled_loop (rtx *operands)
+/* Worker function for xtensa_expand_block_set().
+
+   Expand into an insn sequence of one constant load followed by multiple
+   memory stores.  Returns NULL if the conditions for expansion are not
+   met.  */
+
+static rtx_insn *
+xtensa_expand_block_set_unrolled_loop (rtx dst_mem,
+  HOST_WIDE_INT value,
+  HOST_WIDE_INT bytes,
+  HOST_WIDE_INT align)
 {
-  rtx dst_mem = operands[0];
-  HOST_WIDE_INT bytes, value, align;
-  int expand_len, funccall_len;
-  rtx x, reg;
+  rtx reg;
   int offset;
+  rtx_insn *seq;
 
-  if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2]))
-return 0;
+  if (bytes > 64)
+return NULL;
 
-  bytes = INTVAL (operands[1]);
-  if (bytes <= 0)
-return 0;
-  value = (int8_t)INTVAL (operands[2]);
-  align = INTVAL (operands[3]);
-  if (align > MOVE_MAX)
-align = MOVE_MAX;
-
-  /* Insn expansion: holding the init value.
- Either MOV(.N) or L32R w/litpool.  */
-  if (align == 1)
-expand_len = xtensa_sizeof_MOVI (value);
-  else if (value == 0 || value == -1)
-expand_len = TARGET_DENSITY ? 2 : 3;
-  

[PATCH 1/3] xtensa: Addendum of the commit e33d2dcb463161a110ac345a451132ce8b2b23d9

2023-05-25 Thread Takayuki 'January June' Suwa via Gcc-patches
gcc/ChangeLog:

* config/xtensa/xtensa.md (*extzvsi-1bit_ashlsi3):
Retract excessive line folding, and correct the value of
the "length" insn attribute related to TARGET_DENSITY.
(*extzvsi-1bit_addsubx): Ditto.
---
 gcc/config/xtensa/xtensa.md | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 6c1d8ee8f81..11258125165 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1009,8 +1009,7 @@
(ashift:SI (match_dup 0)
   (match_dup 3)))]
 {
-  int pos = INTVAL (operands[2]),
-  shift = floor_log2 (INTVAL (operands[3]));
+  int pos = INTVAL (operands[2]), shift = floor_log2 (INTVAL (operands[3]));
   switch (GET_CODE (operands[4]))
 {
 case ASHIFT:
@@ -1029,7 +1028,10 @@
 }
   [(set_attr "type""arith")
(set_attr "mode""SI")
-   (set_attr "length"  "6")])
+   (set (attr "length")
+(if_then_else (match_test "TARGET_DENSITY && INTVAL (operands[3]) == 
2")
+ (const_int 5)
+ (const_int 6)))])
 
 (define_insn_and_split "*extzvsi-1bit_addsubx"
   [(set (match_operand:SI 0 "register_operand" "=a")
@@ -1053,8 +1055,7 @@
(match_dup 4))
 (match_dup 2)]))]
 {
-  int pos = INTVAL (operands[3]),
-  shift = floor_log2 (INTVAL (operands[4]));
+  int pos = INTVAL (operands[3]), shift = floor_log2 (INTVAL (operands[4]));
   switch (GET_CODE (operands[6]))
 {
 case ASHIFT:
-- 
2.30.2


[PATCH 2/3] xtensa: Add 'subtraction from constant' insn pattern

2023-05-25 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch makes try to eliminate using temporary pseudo for
'(minus:SI (const_int) (reg:SI))' if the addition of negative constant
value can be emitted in a single machine instruction.

/* example */
int test0(int x) {
  return 1 - x;
}
int test1(int x) {
  return 100 - x;
}
int test2(int x) {
  return 25600 - x;
}

;; before
test0:
movi.n  a9, 1
sub a2, a9, a2
ret.n
test1:
movia9, 0x64
sub a2, a9, a2
ret.n
test2:
movi.n  a9, 0x19
sllia9, a9, 10
sub a2, a9, a2
ret.n

;; after
test0:
addi.n  a2, a2, -1
neg a2, a2
ret.n
test1:
addia2, a2, -100
neg a2, a2
ret.n
test2:
addmi   a2, a2, -0x6400
neg a2, a2
ret.n

gcc/ChangeLog:

* config/xtensa/xtensa-protos.h (xtensa_m1_or_1_thru_15):
New prototype.
* config/xtensa/xtensa.cc (xtensa_m1_or_1_thru_15):
New function.
* config/xtensa/constraints.md (O):
Change to use the above function.
* config/xtensa/xtensa.md (*subsi3_from_const):
New insn_and_split pattern.
---
 gcc/config/xtensa/constraints.md  |  2 +-
 gcc/config/xtensa/xtensa-protos.h |  1 +
 gcc/config/xtensa/xtensa.cc   |  7 +++
 gcc/config/xtensa/xtensa.md   | 24 
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
index 53e4d0d8dd1..5cade1db8ff 100644
--- a/gcc/config/xtensa/constraints.md
+++ b/gcc/config/xtensa/constraints.md
@@ -108,7 +108,7 @@
 (define_constraint "O"
  "An integer constant that can be used in ADDI.N instructions."
  (and (match_code "const_int")
-  (match_test "ival == -1 || IN_RANGE (ival, 1, 15)")))
+  (match_test "xtensa_m1_or_1_thru_15 (ival)")))
 
 (define_constraint "P"
  "An integer constant that can be used as a mask value in an EXTUI
diff --git a/gcc/config/xtensa/xtensa-protos.h 
b/gcc/config/xtensa/xtensa-protos.h
index 64cbf27c248..ec715b44e4d 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -27,6 +27,7 @@ extern bool xtensa_simm8x256 (HOST_WIDE_INT);
 extern bool xtensa_simm12b (HOST_WIDE_INT);
 extern bool xtensa_b4const_or_zero (HOST_WIDE_INT);
 extern bool xtensa_b4constu (HOST_WIDE_INT);
+extern bool xtensa_m1_or_1_thru_15 (HOST_WIDE_INT);
 extern bool xtensa_mask_immediate (HOST_WIDE_INT);
 extern bool xtensa_mem_offset (unsigned, machine_mode);
 
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index e3af78cd228..46ab9f36b56 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -471,6 +471,13 @@ xtensa_b4constu (HOST_WIDE_INT v)
 }
 
 
+bool
+xtensa_m1_or_1_thru_15 (HOST_WIDE_INT v)
+{
+  return v == -1 || IN_RANGE (v, 1, 15);
+}
+
+
 bool
 xtensa_mask_immediate (HOST_WIDE_INT v)
 {
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 11258125165..113b313026e 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -216,6 +216,30 @@
(set_attr "mode""SI")
(set_attr "length"  "3")])
 
+(define_insn_and_split "*subsi3_from_const"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (minus:SI (match_operand:SI 1 "const_int_operand" "i")
+ (match_operand:SI 2 "register_operand" "r")))]
+  "xtensa_simm8 (-INTVAL (operands[1]))
+   || xtensa_simm8x256 (-INTVAL (operands[1]))"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (plus:SI (match_dup 2)
+(match_dup 1)))
+   (set (match_dup 0)
+   (neg:SI (match_dup 0)))]
+{
+  operands[1] = GEN_INT (-INTVAL (operands[1]));
+}
+  [(set_attr "type""arith")
+   (set_attr "mode""SI")
+   (set (attr "length")
+   (if_then_else (match_test "TARGET_DENSITY
+  && xtensa_m1_or_1_thru_15 (-INTVAL 
(operands[1]))")
+ (const_int 5)
+ (const_int 6)))])
+
 (define_insn "subsf3"
   [(set (match_operand:SF 0 "register_operand" "=f")
(minus:SF (match_operand:SF 1 "register_operand" "f")
-- 
2.30.2


[PATCH v2] xtensa: Optimize '(x & CST1_POW2) != 0 ? CST2_POW2 : 0'

2023-05-22 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/05/23 11:27, Max Filippov wrote:
> Hi Suwa-san,

Hi!

> This change introduces a bunch of test failures on big endian configuration.
> I believe that's because the starting bit position for zero_extract is counted
> from different ends depending on the endianness.

Oops, what a stupid mistake... X(

===
This patch decreses one machine instruction from "single bit extraction
with shifting" operation, and tries to eliminate the conditional
branch if CST2_POW2 doesn't fit into signed 12 bits with the help
of ifcvt optimization.

/* example #1 */
int test0(int x) {
  return (x & 1048576) != 0 ? 1024 : 0;
}
extern int foo(void);
int test1(void) {
  return (foo() & 1048576) != 0 ? 16777216 : 0;
}

;; before
test0:
movia9, 0x400
sraia2, a2, 10
and a2, a2, a9
ret.n
test1:
addisp, sp, -16
s32i.n  a0, sp, 12
call0   foo
extui   a2, a2, 20, 1
sllia2, a2, 20
beqz.n  a2, .L2
movi.n  a2, 1
sllia2, a2, 24
.L2:
l32i.n  a0, sp, 12
addisp, sp, 16
ret.n

;; after
test0:
extui   a2, a2, 20, 1
sllia2, a2, 10
ret.n
test1:
addisp, sp, -16
s32i.n  a0, sp, 12
call0   foo
l32i.n  a0, sp, 12
extui   a2, a2, 20, 1
sllia2, a2, 24
addisp, sp, 16
ret.n

In addition, if the left shift amount ('exact_log2(CST2_POW2)') is
between 1 through 3 and a either addition or subtraction with another
register follows, emit a ADDX[248] or SUBX[248] machine instruction
instead of separate left shift and add/subtract ones.

/* example #2 */
int test2(int x, int y) {
  return ((x & 1048576) != 0 ? 4 : 0) + y;
}
int test3(int x, int y) {
  return ((x & 2) != 0 ? 8 : 0) - y;
}

;; before
test2:
movi.n  a9, 4
sraia2, a2, 18
and a2, a2, a9
add.n   a2, a2, a3
ret.n
test3:
movi.n  a9, 8
sllia2, a2, 2
and a2, a2, a9
sub a2, a2, a3
ret.n

;; after
test2:
extui   a2, a2, 20, 1
addx4   a2, a2, a3
ret.n
test3:
extui   a2, a2, 1, 1
subx8   a2, a2, a3
ret.n

gcc/ChangeLog:

* config/xtensa/predicates.md (addsub_operator): New.
* config/xtensa/xtensa.md (*extzvsi-1bit_ashlsi3,
*extzvsi-1bit_addsubx): New insn_and_split patterns.
* config/xtensa/xtensa.cc (xtensa_rtx_costs):
Add a special case about ifcvt 'noce_try_cmove()' to handle
constant loads that do not fit into signed 12 bits in the
patterns added above.
---
 gcc/config/xtensa/predicates.md |  3 ++
 gcc/config/xtensa/xtensa.cc |  3 +-
 gcc/config/xtensa/xtensa.md | 83 +
 3 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index 2dac193373a..5faf1be8c15 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -191,6 +191,9 @@
 (define_predicate "logical_shift_operator"
   (match_code "ashift,lshiftrt"))
 
+(define_predicate "addsub_operator"
+  (match_code "plus,minus"))
+
 (define_predicate "xtensa_cstoresi_operator"
   (match_code "eq,ne,gt,ge,lt,le"))
 
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index bb1444c44b6..e3af78cd228 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -4355,7 +4355,8 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
   switch (outer_code)
{
case SET:
- if (xtensa_simm12b (INTVAL (x)))
+ if (xtensa_simm12b (INTVAL (x))
+ || (current_pass && current_pass->tv_id == TV_IFCVT))
{
  *total = speed ? COSTS_N_INSNS (1) : 0;
  return true;
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 3521fa33b47..c75fde1023a 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -997,6 +997,89 @@
(set_attr "mode""SI")
(set_attr "length"  "3")])
 
+(define_insn_and_split "*extzvsi-1bit_ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (and:SI (match_operator:SI 4 "logical_shift_operator"
+   [(match_operand:SI 1 "register_operand" "r")
+(match_operand:SI 2 "const_int_operand" "i")])
+   (match_operand:SI 3 "const_int_operand" "i")))]
+  "exact_log2 (INTVAL (operands[3])) > 0"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (zero_extract:SI (match_dup 1)
+(const_int 1)
+(match_dup 2)))
+   (set (match_dup 0)
+   (ashift:SI (match_dup 0)
+  (match_dup 3)))]
+{
+  int pos = INTVAL (operands[2]),
+  shift = floor_log2 (INTVAL (operands[3]));
+ 

[PATCH 1/2] xtensa: Optimize '(x & CST1_POW2) != 0 ? CST2_POW2 : 0'

2023-05-22 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch decreses one machine instruction from "single bit extraction
with shifting" operation, and tries to eliminate the conditional
branch if CST2_POW2 doesn't fit into signed 12 bits with the help
of ifcvt optimization.

/* example #1 */
int test0(int x) {
  return (x & 1048576) != 0 ? 1024 : 0;
}
extern int foo(void);
int test1(void) {
  return (foo() & 1048576) != 0 ? 16777216 : 0;
}

;; before
test0:
movia9, 0x400
sraia2, a2, 10
and a2, a2, a9
ret.n
test1:
addisp, sp, -16
s32i.n  a0, sp, 12
call0   foo
extui   a2, a2, 20, 1
sllia2, a2, 20
beqz.n  a2, .L2
movi.n  a2, 1
sllia2, a2, 24
.L2:
l32i.n  a0, sp, 12
addisp, sp, 16
ret.n

;; after
test0:
extui   a2, a2, 20, 1
sllia2, a2, 10
ret.n
test1:
addisp, sp, -16
s32i.n  a0, sp, 12
call0   foo
l32i.n  a0, sp, 12
extui   a2, a2, 20, 1
sllia2, a2, 24
addisp, sp, 16
ret.n

In addition, if the left shift amount ('exact_log2(CST2_POW2)') is
between 1 through 3 and a either addition or subtraction with another
register follows, emit a ADDX[248] or SUBX[248] machine instruction
instead of separate left shift and add/subtract ones.

/* example #2 */
int test2(int x, int y) {
  return ((x & 1048576) != 0 ? 4 : 0) + y;
}
int test3(int x, int y) {
  return ((x & 2) != 0 ? 8 : 0) - y;
}

;; before
test2:
movi.n  a9, 4
sraia2, a2, 18
and a2, a2, a9
add.n   a2, a2, a3
ret.n
test3:
movi.n  a9, 8
sllia2, a2, 2
and a2, a2, a9
sub a2, a2, a3
ret.n

;; after
test2:
extui   a2, a2, 20, 1
addx4   a2, a2, a3
ret.n
test3:
extui   a2, a2, 1, 1
subx8   a2, a2, a3
ret.n

gcc/ChangeLog:

* config/xtensa/predicates.md (addsub_operator): New.
* config/xtensa/xtensa.md (*extzvsi-1bit_ashlsi3,
*extzvsi-1bit_addsubx): New insn_and_split patterns.
* config/xtensa/xtensa.cc (xtensa_rtx_costs):
Add a special case about ifcvt 'noce_try_cmove()' to handle
constant loads that do not fit into signed 12 bits in the
patterns added above.
---
 gcc/config/xtensa/predicates.md |  3 ++
 gcc/config/xtensa/xtensa.cc |  3 +-
 gcc/config/xtensa/xtensa.md | 75 +
 3 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index 2dac193373a..5faf1be8c15 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -191,6 +191,9 @@
 (define_predicate "logical_shift_operator"
   (match_code "ashift,lshiftrt"))
 
+(define_predicate "addsub_operator"
+  (match_code "plus,minus"))
+
 (define_predicate "xtensa_cstoresi_operator"
   (match_code "eq,ne,gt,ge,lt,le"))
 
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index bb1444c44b6..e3af78cd228 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -4355,7 +4355,8 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
   switch (outer_code)
{
case SET:
- if (xtensa_simm12b (INTVAL (x)))
+ if (xtensa_simm12b (INTVAL (x))
+ || (current_pass && current_pass->tv_id == TV_IFCVT))
{
  *total = speed ? COSTS_N_INSNS (1) : 0;
  return true;
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 3521fa33b47..bd4614e4be0 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -997,6 +997,81 @@
(set_attr "mode""SI")
(set_attr "length"  "3")])
 
+(define_insn_and_split "*extzvsi-1bit_ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (and:SI (match_operator:SI 4 "logical_shift_operator"
+   [(match_operand:SI 1 "register_operand" "r")
+(match_operand:SI 2 "const_int_operand" "i")])
+   (match_operand:SI 3 "const_int_operand" "i")))]
+  "exact_log2 (INTVAL (operands[3])) > 0"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (zero_extract:SI (match_dup 1)
+(const_int 1)
+(match_dup 2)))
+   (set (match_dup 0)
+   (ashift:SI (match_dup 0)
+  (match_dup 3)))]
+{
+  int shift = floor_log2 (INTVAL (operands[3]));
+  switch (GET_CODE (operands[4]))
+{
+case ASHIFT:
+  operands[2] = GEN_INT (shift - INTVAL (operands[2]));
+  break;
+case LSHIFTRT:
+  operands[2] = GEN_INT (shift + INTVAL (operands[2]));
+  break;
+default:
+  gcc_unreachable ();
+}
+  operands[3] = GEN_INT (shift);
+}
+  [(set_attr "type""arith")
+ 

[PATCH 2/2] xtensa: Merge '*addx' and '*subx' insn patterns into one

2023-05-22 Thread Takayuki 'January June' Suwa via Gcc-patches
By making use of the 'addsub_operator' added in the last patch.

gcc/ChangeLog:

* config/xtensa/xtensa.md (*addsubx): Rename from '*addx',
and change to also accept '*subx' pattern.
(*subx): Remove.
---
 gcc/config/xtensa/xtensa.md | 31 +--
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index bd4614e4be0..f3313266645 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -170,15 +170,24 @@
(set_attr "mode""SI")
(set_attr "length"  "2,2,3,3,3")])
 
-(define_insn "*addx"
+(define_insn "*addsubx"
   [(set (match_operand:SI 0 "register_operand" "=a")
-   (plus:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+   (match_operator:SI 4 "addsub_operator"
+   [(ashift:SI (match_operand:SI 1 "register_operand" "r")
(match_operand:SI 3 "addsubx_operand" "i"))
-(match_operand:SI 2 "register_operand" "r")))]
+(match_operand:SI 2 "register_operand" "r")]))]
   "TARGET_ADDX"
 {
   operands[3] = GEN_INT (1 << INTVAL (operands[3]));
-  return "addx%3\t%0, %1, %2";
+  switch (GET_CODE (operands[4]))
+{
+case PLUS:
+  return "addx%3\t%0, %1, %2";
+case MINUS:
+  return "subx%3\t%0, %1, %2";
+default:
+  gcc_unreachable ();
+}
 }
   [(set_attr "type""arith")
(set_attr "mode""SI")
@@ -207,20 +216,6 @@
(set_attr "mode""SI")
(set_attr "length"  "3")])
 
-(define_insn "*subx"
-  [(set (match_operand:SI 0 "register_operand" "=a")
-   (minus:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
-(match_operand:SI 3 "addsubx_operand" "i"))
- (match_operand:SI 2 "register_operand" "r")))]
-  "TARGET_ADDX"
-{
-  operands[3] = GEN_INT (1 << INTVAL (operands[3]));
-  return "subx%3\t%0, %1, %2";
-}
-  [(set_attr "type""arith")
-   (set_attr "mode""SI")
-   (set_attr "length"  "3")])
-
 (define_insn "subsf3"
   [(set (match_operand:SF 0 "register_operand" "=f")
(minus:SF (match_operand:SF 1 "register_operand" "f")
-- 
2.30.2


[PATCH v2] xtensa: Make full transition to LRA

2023-05-08 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/05/08 22:43, Richard Biener wrote:
[snip]
>> -mlra
> 
> If they were in any released compiler options should be kept
> (doing nothing) for backward compatibility.  Use for example
> 
> mlra
> Target WarnRemoved
> Removed in GCC 14.  This switch has no effect.
> 
> or
> 
> mlra
> Target Ignore
> Does nothing.  Preserved for backward compatibility.
> 
> which doesn't inform the user (I think that's the better choice here).
> 
>> -Target Mask(LRA)
Thank you for your helpful advice.

=
gcc/ChangeLog:

* config/xtensa/constraints.md (R, T, U):
Change define_constraint to define_memory_constraint.
* config/xtensa/xtensa.cc
(xtensa_lra_p, TARGET_LRA_P): Remove.
(xtensa_emit_move_sequence): Remove "if (reload_in_progress)"
clause as it can no longer be true.
(xtensa_output_integer_literal_parts): Consider 16-bit wide
constants.
(xtensa_legitimate_constant_p): Add short-circuit path for
integer load instructions.
* config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p()
rather reload_in_progress and reload_completed.
* config/xtensa/xtensa.opt (mlra): Change to no effect.
---
 gcc/config/xtensa/constraints.md | 26 --
 gcc/config/xtensa/xtensa.cc  | 26 +-
 gcc/config/xtensa/xtensa.md  |  2 +-
 gcc/config/xtensa/xtensa.opt |  4 ++--
 4 files changed, 16 insertions(+), 42 deletions(-)

diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
index 53e4d0d8dd1..9b31e162941 100644
--- a/gcc/config/xtensa/constraints.md
+++ b/gcc/config/xtensa/constraints.md
@@ -123,29 +123,19 @@
   (and (match_code "const_int")
   (match_test "! xtensa_split1_finished_p ()"
 
-;; Memory constraints.  Do not use define_memory_constraint here.  Doing so
-;; causes reload to force some constants into the constant pool, but since
-;; the Xtensa constant pool can only be accessed with L32R instructions, it
-;; is always better to just copy a constant into a register.  Instead, use
-;; regular constraints but add a check to allow pseudos during reload.
+;; Memory constraints.
 
-(define_constraint "R"
+(define_memory_constraint "R"
  "Memory that can be accessed with a 4-bit unsigned offset from a register."
- (ior (and (match_code "mem")
-  (match_test "smalloffset_mem_p (op)"))
-  (and (match_code "reg")
-  (match_test "reload_in_progress
-   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
+ (and (match_code "mem")
+  (match_test "smalloffset_mem_p (op)")))
 
-(define_constraint "T"
+(define_memory_constraint "T"
  "Memory in a literal pool (addressable with an L32R instruction)."
  (and (match_code "mem")
   (match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
 
-(define_constraint "U"
+(define_memory_constraint "U"
  "Memory that is not in a literal pool."
- (ior (and (match_code "mem")
-  (match_test "! constantpool_mem_p (op)"))
-  (and (match_code "reg")
-  (match_test "reload_in_progress
-   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
+ (and (match_code "mem")
+  (match_test "! constantpool_mem_p (op)")))
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 9e5d314e143..f4434ec6e2c 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -190,7 +190,6 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk 
ATTRIBUTE_UNUSED,
HOST_WIDE_INT delta,
HOST_WIDE_INT vcall_offset,
tree function);
-static bool xtensa_lra_p (void);
 
 static rtx xtensa_delegitimize_address (rtx);
 
@@ -286,9 +285,6 @@ static rtx xtensa_delegitimize_address (rtx);
 #undef TARGET_CANNOT_FORCE_CONST_MEM
 #define TARGET_CANNOT_FORCE_CONST_MEM xtensa_cannot_force_const_mem
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P xtensa_lra_p
-
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_Pxtensa_legitimate_address_p
 
@@ -1266,14 +1262,6 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode 
mode)
 
   operands[1] = xtensa_copy_incoming_a7 (operands[1]);
 
-  /* During reload we don't want to emit (subreg:X (mem:Y)) since that
- instruction won't be recognized after reload, so we remove the
- subreg and adjust mem accordingly.  */
-  if (reload_in_progress)
-{
-  operands[0] = fixup_subreg_mem (operands[0]);
-  operands[1] = fixup_subreg_mem (operands[1]);
-}
   return 0;
 }
 
@@ -3196,7 +3184,7 @@ xtensa_output_integer_literal_parts (FILE *file, rtx x, 
int size)
   fputs (", ", file);
   xtensa_output_integer_literal_parts (file, second, size / 2);
 }
-  else if (size == 4)
+  else if (size == 4 || size == 2)
 {
   output_addr_const (file, x);
 }
@@ -4876,6 +4864,10 @@ xtensa_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain)
 

[PATCH] xtensa: Make full transition to LRA

2023-05-08 Thread Takayuki 'January June' Suwa via Gcc-patches
gcc/ChangeLog:

* config/xtensa/constraints.md (R, T, U):
Change define_constraint to define_memory_constraint.
* config/xtensa/xtensa.cc
(xtensa_lra_p, TARGET_LRA_P): Remove.
(xtensa_emit_move_sequence): Remove "if (reload_in_progress)"
clause as it can no longer be true.
(xtensa_output_integer_literal_parts): Consider 16-bit wide
constants.
(xtensa_legitimate_constant_p): Add short-circuit path for
integer load instructions.
* config/xtensa/xtensa.md (movsf): Use can_create_pseudo_p()
rather reload_in_progress and reload_completed.
* config/xtensa/xtensa.opt (mlra): Remove.
---
 gcc/config/xtensa/constraints.md | 26 --
 gcc/config/xtensa/xtensa.cc  | 26 +-
 gcc/config/xtensa/xtensa.md  |  2 +-
 gcc/config/xtensa/xtensa.opt |  4 
 4 files changed, 14 insertions(+), 44 deletions(-)

diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
index 53e4d0d8dd1..9b31e162941 100644
--- a/gcc/config/xtensa/constraints.md
+++ b/gcc/config/xtensa/constraints.md
@@ -123,29 +123,19 @@
   (and (match_code "const_int")
   (match_test "! xtensa_split1_finished_p ()"
 
-;; Memory constraints.  Do not use define_memory_constraint here.  Doing so
-;; causes reload to force some constants into the constant pool, but since
-;; the Xtensa constant pool can only be accessed with L32R instructions, it
-;; is always better to just copy a constant into a register.  Instead, use
-;; regular constraints but add a check to allow pseudos during reload.
+;; Memory constraints.
 
-(define_constraint "R"
+(define_memory_constraint "R"
  "Memory that can be accessed with a 4-bit unsigned offset from a register."
- (ior (and (match_code "mem")
-  (match_test "smalloffset_mem_p (op)"))
-  (and (match_code "reg")
-  (match_test "reload_in_progress
-   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
+ (and (match_code "mem")
+  (match_test "smalloffset_mem_p (op)")))
 
-(define_constraint "T"
+(define_memory_constraint "T"
  "Memory in a literal pool (addressable with an L32R instruction)."
  (and (match_code "mem")
   (match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
 
-(define_constraint "U"
+(define_memory_constraint "U"
  "Memory that is not in a literal pool."
- (ior (and (match_code "mem")
-  (match_test "! constantpool_mem_p (op)"))
-  (and (match_code "reg")
-  (match_test "reload_in_progress
-   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
+ (and (match_code "mem")
+  (match_test "! constantpool_mem_p (op)")))
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 9e5d314e143..f4434ec6e2c 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -190,7 +190,6 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk 
ATTRIBUTE_UNUSED,
HOST_WIDE_INT delta,
HOST_WIDE_INT vcall_offset,
tree function);
-static bool xtensa_lra_p (void);
 
 static rtx xtensa_delegitimize_address (rtx);
 
@@ -286,9 +285,6 @@ static rtx xtensa_delegitimize_address (rtx);
 #undef TARGET_CANNOT_FORCE_CONST_MEM
 #define TARGET_CANNOT_FORCE_CONST_MEM xtensa_cannot_force_const_mem
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P xtensa_lra_p
-
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_Pxtensa_legitimate_address_p
 
@@ -1266,14 +1262,6 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode 
mode)
 
   operands[1] = xtensa_copy_incoming_a7 (operands[1]);
 
-  /* During reload we don't want to emit (subreg:X (mem:Y)) since that
- instruction won't be recognized after reload, so we remove the
- subreg and adjust mem accordingly.  */
-  if (reload_in_progress)
-{
-  operands[0] = fixup_subreg_mem (operands[0]);
-  operands[1] = fixup_subreg_mem (operands[1]);
-}
   return 0;
 }
 
@@ -3196,7 +3184,7 @@ xtensa_output_integer_literal_parts (FILE *file, rtx x, 
int size)
   fputs (", ", file);
   xtensa_output_integer_literal_parts (file, second, size / 2);
 }
-  else if (size == 4)
+  else if (size == 4 || size == 2)
 {
   output_addr_const (file, x);
 }
@@ -4876,6 +4864,10 @@ xtensa_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain)
 static bool
 xtensa_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
 {
+  if (CONST_INT_P (x))
+return TARGET_AUTO_LITPOOLS || TARGET_CONST16
+  || xtensa_simm12b (INTVAL (x));
+
   return !xtensa_tls_referenced_p (x);
 }
 
@@ -5317,12 +5309,4 @@ xtensa_delegitimize_address (rtx op)
   return op;
 }
 
-/* Implement TARGET_LRA_P.  */
-
-static bool
-xtensa_lra_p (void)
-{
-  return TARGET_LRA;
-}
-
 #include "gt-xtensa.h"
diff --git a/gcc/config/xtensa/xtensa.md 

[PATCH] xtensa: Remove REG_OK_STRICT and its derivatives

2023-03-12 Thread Takayuki 'January June' Suwa via Gcc-patches
Because GO_IF_LEGITIMATE_ADDRESS was deprecated a long time ago
(see commit c6c3dba931548987c78719180e30ebc863404b89).

gcc/ChangeLog:

* config/xtensa/xtensa.h (REG_OK_STRICT, REG_OK_FOR_INDEX_P,
REG_OK_FOR_BASE_P): Remove.
---
 gcc/config/xtensa/xtensa.h | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 058602e44ee..8ebf37cab33 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -590,19 +590,10 @@ typedef struct xtensa_args
 /* C expressions that are nonzero if X (assumed to be a `reg' RTX) is
valid for use as a base or index register.  */
 
-#ifdef REG_OK_STRICT
-#define REG_OK_STRICT_FLAG 1
-#else
-#define REG_OK_STRICT_FLAG 0
-#endif
-
 #define BASE_REG_P(X, STRICT)  \
-  ((!(STRICT) && REGNO (X) >= FIRST_PSEUDO_REGISTER)   \
+  ((!(STRICT) && ! HARD_REGISTER_P (X))
\
|| REGNO_OK_FOR_BASE_P (REGNO (X)))
 
-#define REG_OK_FOR_INDEX_P(X) 0
-#define REG_OK_FOR_BASE_P(X) BASE_REG_P (X, REG_OK_STRICT_FLAG)
-
 /* Maximum number of registers that can appear in a valid memory address.  */
 #define MAX_REGS_PER_ADDRESS 1
 
-- 
2.30.2


[PATCH] xtensa: Fix for enabling LRA

2023-03-07 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch makes LRA well with some exceptions
(e.g. MI thunk generation due to pretending reload_completed).

gcc/ChangeLog:

* config/xtensa/constraints.md (R, T, U):
Change define_constraint to define_memory_constraint.
* config/xtensa/xtensa.cc (xtensa_legitimate_constant_p):
Add short-circuit path for integer load instructions when
lra_in_progress.
* config/xtensa/xtensa.md (movsf):
Use can_create_pseudo_p() rather reload_in_progress and
reload_completed.
---
 gcc/config/xtensa/constraints.md | 26 --
 gcc/config/xtensa/xtensa.cc  |  4 
 gcc/config/xtensa/xtensa.md  |  2 +-
 3 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
index 53e4d0d8dd1..9b31e162941 100644
--- a/gcc/config/xtensa/constraints.md
+++ b/gcc/config/xtensa/constraints.md
@@ -123,29 +123,19 @@
   (and (match_code "const_int")
   (match_test "! xtensa_split1_finished_p ()"
 
-;; Memory constraints.  Do not use define_memory_constraint here.  Doing so
-;; causes reload to force some constants into the constant pool, but since
-;; the Xtensa constant pool can only be accessed with L32R instructions, it
-;; is always better to just copy a constant into a register.  Instead, use
-;; regular constraints but add a check to allow pseudos during reload.
+;; Memory constraints.
 
-(define_constraint "R"
+(define_memory_constraint "R"
  "Memory that can be accessed with a 4-bit unsigned offset from a register."
- (ior (and (match_code "mem")
-  (match_test "smalloffset_mem_p (op)"))
-  (and (match_code "reg")
-  (match_test "reload_in_progress
-   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
+ (and (match_code "mem")
+  (match_test "smalloffset_mem_p (op)")))
 
-(define_constraint "T"
+(define_memory_constraint "T"
  "Memory in a literal pool (addressable with an L32R instruction)."
  (and (match_code "mem")
   (match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
 
-(define_constraint "U"
+(define_memory_constraint "U"
  "Memory that is not in a literal pool."
- (ior (and (match_code "mem")
-  (match_test "! constantpool_mem_p (op)"))
-  (and (match_code "reg")
-  (match_test "reload_in_progress
-   && REGNO (op) >= FIRST_PSEUDO_REGISTER"
+ (and (match_code "mem")
+  (match_test "! constantpool_mem_p (op)")))
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 7287aa7a258..a500dc2a06e 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -4872,6 +4872,10 @@ xtensa_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain)
 static bool
 xtensa_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
 {
+  if (lra_in_progress && CONST_INT_P (x))
+return TARGET_AUTO_LITPOOLS || TARGET_CONST16
+  || xtensa_simm12b (INTVAL (x));
+
   return !xtensa_tls_referenced_p (x);
 }
 
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 3521fa33b47..195515d9427 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1268,7 +1268,7 @@
   if ((!register_operand (operands[0], SFmode)
&& !register_operand (operands[1], SFmode))
   || (FP_REG_P (xt_true_regnum (operands[0]))
- && !(reload_in_progress | reload_completed)
+ && can_create_pseudo_p ()
  && (constantpool_mem_p (operands[1])
  || CONSTANT_P (operands[1]
 operands[1] = force_reg (SFmode, operands[1]);
-- 
2.30.2


[PATCH] xtensa: Make use of CLAMPS instruction if configured

2023-02-26 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch introduces the use of CLAMPS instruction when the instruction
is configured.

/* example */
int test(int a) {
  if (a < -512)
return -512;
  if (a > 511)
return 511;
  return a;
}

;; prereq: TARGET_CLAMPS
test:
clamps  a2, a2, 9
ret.n

gcc/ChangeLog:

* config/xtensa/xtensa-protos.h (xtensa_match_CLAMPS_imms_p):
New prototype.
* config/xtensa/xtensa.cc (xtensa_match_CLAMPS_imms_p):
New function.
* config/xtensa/xtensa.h (TARGET_CLAMPS): New macro definition.
* config/xtensa/xtensa.md (*xtensa_clamps): New insn pattern.
---
 gcc/config/xtensa/xtensa-protos.h |  1 +
 gcc/config/xtensa/xtensa.cc   | 13 +++
 gcc/config/xtensa/xtensa.h|  1 +
 gcc/config/xtensa/xtensa.md   | 37 +++
 4 files changed, 52 insertions(+)

diff --git a/gcc/config/xtensa/xtensa-protos.h 
b/gcc/config/xtensa/xtensa-protos.h
index c81cf94323a..64cbf27c248 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -60,6 +60,7 @@ extern bool xtensa_tls_referenced_p (rtx);
 extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx);
 extern bool xtensa_split1_finished_p (void);
 extern void xtensa_split_DI_reg_imm (rtx *);
+extern bool xtensa_match_CLAMPS_imms_p (rtx, rtx);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 5044bc25c2f..7287aa7a258 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2611,6 +2611,19 @@ xtensa_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT 
imm, rtx scratch,
 }
 
 
+/* Return true if the constants used in the application of smin() following
+   smax() meet the specifications of the CLAMPS machine instruction.  */
+bool
+xtensa_match_CLAMPS_imms_p (rtx cst_max, rtx cst_min)
+{
+  int max, min;
+
+  return IN_RANGE (max = exact_log2 (-INTVAL (cst_max)), 7, 22)
+&& IN_RANGE (min = exact_log2 (INTVAL (cst_min) + 1), 7, 22)
+&& max == min;
+}
+
+
 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
 
 static bool
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index d4cd5def7b5..058602e44ee 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
 #define TARGET_NSA XCHAL_HAVE_NSA
 #define TARGET_MINMAX  XCHAL_HAVE_MINMAX
 #define TARGET_SEXTXCHAL_HAVE_SEXT
+#define TARGET_CLAMPS  XCHAL_HAVE_CLAMPS
 #define TARGET_BOOLEANSXCHAL_HAVE_BOOLEANS
 #define TARGET_HARD_FLOAT  XCHAL_HAVE_FP
 #define TARGET_HARD_FLOAT_DIV  XCHAL_HAVE_FP_DIV
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index b60dec2447f..3521fa33b47 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -446,6 +446,43 @@
(set_attr "mode""SI")
(set_attr "length"  "3")])
 
+
+;; Signed clamp.
+
+(define_insn_and_split "*xtensa_clamps"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (smax:SI (smin:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "i"))
+(match_operand:SI 3 "const_int_operand" "i")))]
+  "TARGET_CLAMPS
+   && xtensa_match_CLAMPS_imms_p (operands[3], operands[2])"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (smin:SI (smax:SI (match_dup 1)
+ (match_dup 3))
+(match_dup 2)))]
+  ""
+  [(set_attr "type""arith")
+   (set_attr "mode""SI")
+   (set_attr "length"  "3")])
+
+(define_insn "*xtensa_clamps"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (smin:SI (smax:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "i"))
+(match_operand:SI 3 "const_int_operand" "i")))]
+  "TARGET_CLAMPS
+   && xtensa_match_CLAMPS_imms_p (operands[2], operands[3])"
+{
+  static char result[64];
+  sprintf (result, "clamps\t%%0, %%1, %d", floor_log2 (-INTVAL (operands[2])));
+  return result;
+}
+  [(set_attr "type""arith")
+   (set_attr "mode""SI")
+   (set_attr "length"  "3")])
+
 
 ;; Count redundant leading sign bits and leading/trailing zeros,
 ;; and find first bit.
-- 
2.30.2

(Totally off-topic, but do you know anything about the SALT/SALTU instructions?
I see them in the "Core Architecture Instructions" in a recent Cadence document
but not in slightly older Tensilica one...)


Re: [PATCH] gcc: xtensa: fix PR target/108919

2023-02-25 Thread Takayuki 'January June' Suwa via Gcc-patches
Hello, Max:

On 2023/02/25 19:01, Max Filippov wrote:

> gcc/
>   PR target/108919
> 
>   * config/xtensa/xtensa-protos.h
>   (xtensa_prepare_expand_call): Rename to xtensa_expand_call.
>   * config/xtensa/xtensa.cc (xtensa_prepare_expand_call): Rename
>   to xtensa_expand_call.
>   (xtensa_expand_call): Emit the call and add a clobber expression
>   for the static chain to it in case of windowed ABI.
>   * config/xtensa/xtensa.md (call, call_value, sibcall)
>   (sibcall_value): Call xtensa_expand_call and complete expansion
>   right after that call.
> 
> gcc/testduite/
>   * gcc.target/xtensa/pr108919.c: New test.
> ---
>  gcc/config/xtensa/xtensa-protos.h  |  2 +-
>  gcc/config/xtensa/xtensa.cc| 25 +++-
>  gcc/config/xtensa/xtensa.md| 12 --
>  gcc/testsuite/gcc.target/xtensa/pr108919.c | 46 ++
>  4 files changed, 79 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/xtensa/pr108919.c
> 
> diff --git a/gcc/config/xtensa/xtensa-protos.h 
> b/gcc/config/xtensa/xtensa-protos.h
> index ecd0f0c8d108..c81cf94323ac 100644
> --- a/gcc/config/xtensa/xtensa-protos.h
> +++ b/gcc/config/xtensa/xtensa-protos.h
> @@ -53,7 +53,7 @@ extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, 
> rtx, bool);
>  extern void xtensa_emit_loop_end (rtx_insn *, rtx *);
>  extern char *xtensa_emit_branch (bool, rtx *);
>  extern char *xtensa_emit_movcc (bool, bool, bool, rtx *);
> -extern void xtensa_prepare_expand_call (int, rtx *);
> +extern void xtensa_expand_call (int, rtx *);
>  extern char *xtensa_emit_call (int, rtx *);
>  extern char *xtensa_emit_sibcall (int, rtx *);
>  extern bool xtensa_tls_referenced_p (rtx);
> diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
> index e52fba082550..babe7f0ebd68 100644
> --- a/gcc/config/xtensa/xtensa.cc
> +++ b/gcc/config/xtensa/xtensa.cc
> @@ -2183,8 +2183,10 @@ xtensa_emit_movcc (bool inverted, bool isfp, bool 
> isbool, rtx *operands)
>  
>  
>  void
> -xtensa_prepare_expand_call (int callop, rtx *operands)
> +xtensa_expand_call (int callop, rtx *operands)
>  {
> +  rtx call;
> +  rtx call_insn;

;; This should be rtx_insn* rather than rtx,
-  rtx call_insn;
+  rtx_insn *call_insn;

>rtx addr = XEXP (operands[callop], 0);
>  
>if (flag_pic && SYMBOL_REF_P (addr)
> @@ -2202,6 +2204,27 @@ xtensa_prepare_expand_call (int callop, rtx *operands)
>Pmode);
>XEXP (operands[callop], 0) = reg;
>  }
> +
> +  call = gen_rtx_CALL (VOIDmode, operands[callop], operands[callop + 1]);
> +
> +  if (callop)
> +call_insn = emit_call_insn (gen_rtx_SET (operands[0], call));
> +  else
> +call_insn = emit_call_insn (call);

;; Simpler,
   call = gen_rtx_CALL (VOIDmode, operands[callop], operands[callop + 1]);
-
   if (callop)
-call_insn = emit_call_insn (gen_rtx_SET (operands[0], call));
-  else
-call_insn = emit_call_insn (call);
+call = gen_rtx_SET (operands[0], call);
+  call_insn = emit_call_insn (call);

> +
> +  if (TARGET_WINDOWED_ABI)
> +{
> +  /*
> +   * Windowed xtensa ABI specifies that static chain pointer is passed
> +   * in memory below the caller stack pointer, which means that the 
> callee
> +   * will likely clobber it if it's a non-leaf function.
> +   * Add the clobber expression for the static chain to the function call
> +   * expression list so that it is not assumed to be live across the 
> call.
> +   */
> +  rtx clob = gen_rtx_CLOBBER (Pmode, xtensa_static_chain (NULL, false));
> +  CALL_INSN_FUNCTION_USAGE (call_insn) =
> + gen_rtx_EXPR_LIST (Pmode, clob, CALL_INSN_FUNCTION_USAGE (call_insn));
> +}
>  }
>  
>  
> diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
> index cf25beb83d54..b60dec2447f3 100644
> --- a/gcc/config/xtensa/xtensa.md
> +++ b/gcc/config/xtensa/xtensa.md
> @@ -2333,7 +2333,8 @@
>(match_operand 1 "" ""))]
>""
>  {
> -  xtensa_prepare_expand_call (0, operands);
> +  xtensa_expand_call (0, operands);
> +  DONE;
>  })
>  
>  (define_insn "call_internal"
> @@ -2353,7 +2354,8 @@
> (match_operand 2 "" "")))]
>""
>  {
> -  xtensa_prepare_expand_call (1, operands);
> +  xtensa_expand_call (1, operands);
> +  DONE;
>  })
>  
>  (define_insn "call_value_internal"
> @@ -2373,7 +2375,8 @@
>(match_operand 1 "" ""))]
>"!TARGET_WINDOWED_ABI"
>  {
> -  xtensa_prepare_expand_call (0, operands);
> +  xtensa_expand_call (0, operands);
> +  DONE;
>  })
>  
>  (define_insn "sibcall_internal"
> @@ -2393,7 +2396,8 @@
> (match_operand 2 "" "")))]
>"!TARGET_WINDOWED_ABI"
>  {
> -  xtensa_prepare_expand_call (1, operands);
> +  xtensa_expand_call (1, operands);
> +  DONE;
>  })
>  
>  (define_insn "sibcall_value_internal"
> diff --git a/gcc/testsuite/gcc.target/xtensa/pr108919.c 
> 

[PATCH 2/2] xtensa: Fix missing mode warnings in machine description

2023-02-22 Thread Takayuki 'January June' Suwa via Gcc-patches
gcc/ChangeLog:

* config/xtensa/xtensa.md
(zero_cost_loop_start, zero_cost_loop_end, loop_end):
Add missing "SI:" to PLUS RTXes.
---
 gcc/config/xtensa/xtensa.md | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index d3996b26cb5..d6116d63ddf 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -2028,8 +2028,8 @@
  (label_ref (match_operand 1 "" ""))
  (pc)))
(set (match_operand:SI 0 "register_operand" "=a")
-   (plus (match_dup 0)
- (const_int -1)))
+   (plus:SI (match_dup 0)
+(const_int -1)))
(unspec [(const_int 0)] UNSPEC_LSETUP_START)]
   "TARGET_LOOPS && optimize"
   "loop\t%0, %l1_LEND"
@@ -2044,8 +2044,8 @@
  (label_ref (match_operand 1 "" ""))
  (pc)))
(set (match_operand:SI 0 "nonimmediate_operand" "=a,m")
-   (plus (match_dup 0)
- (const_int -1)))
+   (plus:SI (match_dup 0)
+(const_int -1)))
(unspec [(const_int 0)] UNSPEC_LSETUP_END)
(clobber (match_scratch:SI 3 "=X,"))]
   "TARGET_LOOPS && optimize"
@@ -2061,8 +2061,8 @@
  (label_ref (match_operand 1 "" ""))
  (pc)))
(set (match_operand:SI 0 "register_operand" "=a")
-   (plus (match_dup 0)
- (const_int -1)))
+   (plus:SI (match_dup 0)
+(const_int -1)))
(unspec [(const_int 0)] UNSPEC_LSETUP_END)]
   "TARGET_LOOPS && optimize"
 {
-- 
2.30.2


[PATCH 1/2] xtensa: Fix non-fatal regression introduced by b2ef02e8cbbaf95fee98be255f697f47193960ec

2023-02-22 Thread Takayuki 'January June' Suwa via Gcc-patches
In commit b2ef02e8cbbaf95fee98be255f697f47193960ec, the sibling call
insn included (use (reg:SI A0_REG)) to fix the problem, which added
a USE chain unconditionally to the data flow of register A0 during
the sibling call.

As a result, df_regs_ever_live_p (A0_REG) returns true, so even if
register A0 is not used outside of the sibling call insn, saves and
restores to stack slots are emitted in pro/epilogue, and finally
code size increases.
(This is why I never included (use A0) in sibling calls)

/* example */
extern int foo(int);
int test(int a) {
  return foo(a * 3 + 1);
}

;; before
test:
addisp, sp, -16 ;; unneeded stack frame allocation (induced)
s32i.n  a0, sp, 12  ;; unneeded saving of register A0
l32i.n  a0, sp, 12  ;; unneeded restoration of register A0
addx2   a2, a2, a2
addi.n  a2, a2, 1
addisp, sp, 16  ;; unneeded stack frame freeing (induced)
j.l foo, a9 ;; sibling call (truly needs register A0)

The essential cause is that we emit (use A0) *before* the insns that
does the stack pointer adjustment during epilogue expansion, so the
liveness of register A0 ends early, so register A0 is reused afterwards.

This patch fixes the problem and avoids such regression by doing the
emit of (use A0) in the sibling call epilogue expansion at the end.

;; after
test:
addx2   a2, a2, a2
addi.n  a2, a2, 1
j.l foo, a9

>From RTL-pass "315r.rnreg" by
"gfortran -O3 -funroll-loops -mabi=call0 -S -da 
gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":

;; Function selector_init (__selectors_MOD_selector_init, funcdef_no=2, 
decl_uid=987, cgraph_uid=3, symbol_order=4)
...
(insn 3807 3806 3808 121 (set (reg:SI 15 a15)
(mem/c:SI (plus:SI (reg/f:SI 1 sp)
(const_int 268 [0x10c])) [31  S4 A32])) 
"gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 
{movsi_internal}
 (nil))
(insn 3808 3807 3809 121 (set (reg:SI 7 a7)
(const_int 288 [0x120])) 
"gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 53 
{movsi_internal}
 (nil))
(insn 3809 3808 3810 121 (set (reg/f:SI 1 sp)
(plus:SI (reg/f:SI 1 sp)
(reg:SI 7 a7))) 
"gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 1 {addsi3}
 (expr_list:REG_DEAD (reg:SI 9 a9)
(nil)))
(insn 3810 3809 721 121 (use (reg:SI 0 a0)) 
"gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 -1
 (expr_list:REG_DEAD (reg:SI 0 a0)
(nil)))
(call_insn/j 721 3810 722 121 (call (mem:SI (symbol_ref:SI ("free") [flags 
0x41]  ) [0 __builtin_free S4 A32])
(const_int 0 [0])) 
"gcc-gnu/gcc/testsuite/gfortran.dg/allocate_with_source_5.f90":35:30 discrim 1 
106 {sibcall_internal}
 (expr_list:REG_DEAD (reg:SI 2 a2)
(expr_list:REG_CALL_DECL (symbol_ref:SI ("free") [flags 0x41]  
)
(expr_list:REG_EH_REGION (const_int 0 [0])
(nil
(expr_list:SI (use (reg:SI 2 a2))
(nil)))

(IMHO the "rnreg" pass doesn't take REG_ALLOC_ORDER into account;
it just seems to allocate registers in fixed_regs index order,
which may have hurt register A0 that became allocatable in the recent
patch)

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_expand_epilogue):
Emit (use (reg:SI A0_REG)) at the end in the sibling call
(i.e. the same place as (return) in the normal call).
* config/xtensa/xtensa.md
(sibcall, sibcall_internal, sibcall_value, sibcall_value_internal):
Revert changes by the previous patch.
---
 gcc/config/xtensa/xtensa.cc |  4 +++-
 gcc/config/xtensa/xtensa.md | 20 +++-
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 5c1c713e122..b80eef5c19e 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -3573,7 +3573,9 @@ xtensa_expand_epilogue (bool sibcall_p)
  EH_RETURN_STACKADJ_RTX));
 }
   cfun->machine->epilogue_done = true;
-  if (!sibcall_p)
+  if (sibcall_p)
+emit_use (gen_rtx_REG (SImode, A0_REG));
+  else
 emit_jump_insn (gen_return ());
 }
 
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index b8a8aaf9764..d3996b26cb5 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -2369,10 +2369,8 @@
(set_attr "length"  "3")])
 
 (define_expand "sibcall"
-  [(parallel [
-(call (match_operand 0 "memory_operand" "")
- (match_operand 1 "" ""))
-(use (reg:SI A0_REG))])]
+  [(call (match_operand 0 "memory_operand" "")
+(match_operand 1 "" ""))]
   "!TARGET_WINDOWED_ABI"
 {
   xtensa_prepare_expand_call (0, operands);
@@ -2380,8 +2378,7 @@
 
 (define_insn "sibcall_internal"
   [(call (mem:SI 

[PATCH] xtensa: Enforce return address saving when -Og is specified

2023-02-17 Thread Takayuki 'January June' Suwa via Gcc-patches
Leaf function often omits saving its return address to the stack slot,
and this feature often makes debugging very confusing, especially for
stack dump analysis.

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_call_save_reg): Change to return
true if register A0 (return address register) when -Og is specified.
---
 gcc/config/xtensa/xtensa.cc | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index d987f1dfede..1d9e4d1561a 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -3224,8 +3224,11 @@ xtensa_call_save_reg (int regno)
 return false;
 
   if (regno == A0_REG)
-return crtl->profile || !crtl->is_leaf || crtl->calls_eh_return ||
-  df_regs_ever_live_p (regno);
+/* Ensure the return address to be saved to the stack slot in order
+   to assist stack dump analysis when -Og is specified.  */
+return optimize_debug
+  || crtl->profile || !crtl->is_leaf || crtl->calls_eh_return
+  || df_regs_ever_live_p (regno);
 
   if (crtl->calls_eh_return && IN_RANGE (regno, 2, 3))
 return true;
-- 
2.30.2


[PATCH v5] xtensa: Eliminate unnecessary general-purpose reg-reg moves

2023-02-17 Thread Takayuki 'January June' Suwa via Gcc-patches
Register-register move instructions that can be easily seen as
unnecessary by the human eye may remain in the compiled result.
For example:

/* example */
double test(double a, double b) {
  return __builtin_copysign(a, b);
}

test:
add.n   a3, a3, a3
extui   a5, a5, 31, 1
ssai1
;; Be in the same BB
src a7, a5, a3  ;; Replacing the destination doesn't
;;   violate any constraints of the
;;   operands
;; No CALL insns in this span
;; Both A3 and A7 are irrelevant to
;;   insns in this span
mov.n   a3, a7  ;; An unnecessary reg-reg move
;; A7 is not used after this
ret.n

The last two instructions above, excluding the return instruction,
could be done like this:

src a3, a5, a3

This symptom often occurs when handling DI/DFmode values with SImode
instructions.  This patch solves the above problem using peephole2
pattern.

gcc/ChangeLog:

* config/xtensa/xtensa.md: New peephole2 pattern that eliminates
the occurrence of general-purpose register used only once and for
transferring intermediate value.

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/elim_GP_regmove_[01].c: New.
---
 gcc/config/xtensa/xtensa.md   | 46 +++
 .../gcc.target/xtensa/elim_GP_regmove_0.c | 23 ++
 .../gcc.target/xtensa/elim_GP_regmove_1.c | 10 
 3 files changed, 79 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c
 create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index d3996b26cb5..4c1305c05e7 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3050,3 +3050,49 @@ FALLTHRU:;
   operands[1] = GEN_INT (imm0);
   operands[2] = GEN_INT (imm1);
 })
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand")
+   (match_operand 1 "register_operand"))]
+  "REG_NREGS (operands[0]) == 1 && GP_REG_P (REGNO (operands[0]))
+   && REG_NREGS (operands[1]) == 1 && GP_REG_P (REGNO (operands[1]))
+   && peep2_reg_dead_p (1, operands[1])"
+  [(const_int 0)]
+{
+  basic_block bb = BLOCK_FOR_INSN (curr_insn);
+  rtx_insn *head = BB_HEAD (bb), *insn;
+  rtx dest = operands[0], src = operands[1], pattern, t_dest, dest_orig;
+  for (insn = PREV_INSN (curr_insn);
+   insn && insn != head;
+   insn = PREV_INSN (insn))
+if (CALL_P (insn))
+  break;
+else if (INSN_P (insn))
+  {
+   if (GET_CODE (pattern = PATTERN (insn)) == SET
+   && REG_P (t_dest = SET_DEST (pattern))
+   && REG_NREGS (t_dest) == 1
+   && REGNO (t_dest) == REGNO (src))
+   {
+ dest_orig = SET_DEST (pattern);
+ SET_DEST (pattern) = gen_rtx_REG (GET_MODE (t_dest),
+   REGNO (dest));
+ extract_insn (insn);
+ if (!constrain_operands (true, get_enabled_alternatives (insn)))
+   {
+ SET_DEST (pattern) = dest_orig;
+ goto ABORT;
+   }
+ df_insn_rescan (insn);
+ goto FALLTHRU;
+   }
+   if (reg_overlap_mentioned_p (dest, pattern)
+   || reg_overlap_mentioned_p (src, pattern)
+   || set_of (dest, insn)
+   || set_of (src, insn))
+ break;
+  }
+ABORT:
+  FAIL;
+FALLTHRU:;
+})
diff --git a/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c 
b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c
new file mode 100644
index 000..5c195c357dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_0.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fpeephole2" } */
+
+/* can be processed */
+double test0(double a, double b) {
+  return __builtin_copysign(a, b);
+}
+
+/* cannot be processed: due to violate '0' constraint of the 2nd source 
operand.  */
+int test1(int a, int b) {
+  int c;
+  asm volatile ("" : "=a"(c) : "r"(a), "0"(b));
+  return c;
+}
+
+/* cannot be processed: due to violate '&' constraint of the destination 
operand.  */
+int test2(int a) {
+  int b;
+  asm volatile ("" : "="(b) : "r"(a));
+  return b;
+}
+
+/* { dg-final { scan-assembler-times "mov\t|mov.n\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c 
b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c
new file mode 100644
index 000..a13ef818827
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove_1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fpeephole2 -mabi=windowed" } */
+
+/* cannot be processed: due to violate 'a' constraint of the destination 
operand of the stack adjustment instruction.  */
+void test(void) {
+  int buffer[8192];
+  asm volatile ("" : 

[PATCH v7] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-02-16 Thread Takayuki 'January June' Suwa via Gcc-patches
In the case of the CALL0 ABI, values that must be retained before and
after function calls are placed in the callee-saved registers (A12
through A15) and referenced later.  However, it is often the case that
the save and the reference are each only once and a simple register-
register move (with two exceptions; i. the register saved to/restored
from is the stack pointer, ii. the function needs an additional stack
pointer adjustment to grow the stack).

e.g. in the following example, if there are no other occurrences of
register A14:

;; before
; prologue {
  ...
s32i.n  a14, sp, 16
  ...   ;; no frame pointer needed
;; no additional stack growth
; } prologue
  ...
mov.n   a14, a6 ;; A6 is not SP
  ...
call0   foo
  ...
mov.n   a8, a14 ;; A8 is not SP
  ...
; epilogue {
  ...
l32i.n  a14, sp, 16
  ...
; } epilogue

It can be possible like this:

;; after
; prologue {
  ...
(no save needed)
  ...
; } prologue
  ...
s32i.n  a6, sp, 16  ;; replaced with A14's slot
  ...
call0   foo
  ...
l32i.n  a8, sp, 16  ;; through SP
  ...
; epilogue {
  ...
(no restoration needed)
  ...
; } epilogue

This patch adds the abovementioned logic to the function prologue/epilogue
RTL expander code.

gcc/ChangeLog:

* config/xtensa/xtensa.cc (machine_function): Add new member
'eliminated_callee_saved_regs'.
(xtensa_can_eliminate_callee_saved_reg_p): New function to
determine whether the register can be eliminated or not.
(xtensa_expand_prologue): Add invoking the above function and
elimination the use of callee-saved register by using its stack
slot through the stack pointer (or the frame pointer if needed)
directly.
(xtensa_expand_prologue): Modify to not emit register restoration
insn from its stack slot if the register is already eliminated.

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/elim_callee_saved.c: New.
---
 gcc/config/xtensa/xtensa.cc   | 134 ++
 .../gcc.target/xtensa/elim_callee_saved.c |  37 +
 2 files changed, 146 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3e2e22d4cbe..d987f1dfede 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -105,6 +105,7 @@ struct GTY(()) machine_function
   bool epilogue_done;
   bool inhibit_logues_a1_adjusts;
   rtx last_logues_a9_content;
+  bitmap eliminated_callee_saved_regs;
 };
 
 static void xtensa_option_override (void);
@@ -3343,6 +3344,65 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int 
flags)
 cfun->machine->last_logues_a9_content = GEN_INT (offset);
 }
 
+static bool
+xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
+rtx_insn **p_insnS,
+rtx_insn **p_insnR)
+{
+  df_ref ref;
+  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
+  rtx pattern;
+
+  if (!optimize || !df || call_used_or_fixed_reg_p (regno)
+  || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM))
+return false;
+
+  for (ref = DF_REG_DEF_CHAIN (regno);
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& REG_P (SET_DEST (pattern))
+&& REGNO (SET_DEST (pattern)) == regno
+&& REG_NREGS (SET_DEST (pattern)) == 1
+&& REG_P (SET_SRC (pattern)))
+  {
+   if (insnS)
+ return false;
+   insnS = insn;
+   continue;
+  }
+else
+  return false;
+
+  for (ref = DF_REG_USE_CHAIN (regno);
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& REG_P (SET_SRC (pattern))
+&& REGNO (SET_SRC (pattern)) == regno
+&& REG_NREGS (SET_SRC (pattern)) == 1
+&& REG_P (SET_DEST (pattern)))
+  {
+   if (insnR)
+ return false;
+   insnR = insn;
+   continue;
+  }
+else
+  return false;
+
+  if (!insnS || !insnR)
+return false;
+
+  *p_insnS = insnS, *p_insnR = insnR;
+
+  return true;
+}
+
 /* minimum frame = reg save area (4 words) plus static chain (1 word)
and the total number of words must be a multiple of 128 bits.  */
 #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
@@ -3382,6 +3442,7 @@ xtensa_expand_prologue (void)
   df_ref ref;
   bool stack_pointer_needed = frame_pointer_needed

Re: [PATCH v6] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-02-16 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/02/16 7:18, Max Filippov wrote:
> Hi Suwa-san,

Hi!

> 
> On Thu, Jan 26, 2023 at 7:17 PM Takayuki 'January June' Suwa
>  wrote:
>>
>> In the case of the CALL0 ABI, values that must be retained before and
>> after function calls are placed in the callee-saved registers (A12
>> through A15) and referenced later.  However, it is often the case that
>> the save and the reference are each only once and a simple register-
>> register move (with two exceptions; i. the register saved to/restored
>> from is the stack pointer, ii. the function needs an additional stack
>> pointer adjustment to grow the stack).
>>
>> e.g. in the following example, if there are no other occurrences of
>> register A14:
>>
>> ;; before
>> ; prologue {
>>   ...
>> s32i.n  a14, sp, 16
>>   ...   ;; no frame pointer needed
>> ;; no additional stack growth
>> ; } prologue
>>   ...
>> mov.n   a14, a6 ;; A6 is not SP
>>   ...
>> call0   foo
>>   ...
>> mov.n   a8, a14 ;; A8 is not SP
>>   ...
>> ; epilogue {
>>   ...
>> l32i.n  a14, sp, 16
>>   ...
>> ; } epilogue
>>
>> It can be possible like this:
>>
>> ;; after
>> ; prologue {
>>   ...
>> (no save needed)
>>   ...
>> ; } prologue
>>   ...
>> s32i.n  a6, sp, 16  ;; replaced with A14's slot
>>   ...
>> call0   foo
>>   ...
>> l32i.n  a8, sp, 16  ;; through SP
>>   ...
>> ; epilogue {
>>   ...
>> (no restoration needed)
>>   ...
>> ; } epilogue
>>
>> This patch adds the abovementioned logic to the function prologue/epilogue
>> RTL expander code.
>>
>> gcc/ChangeLog:
>>
>> * config/xtensa/xtensa.cc (machine_function): Add new member
>> 'eliminated_callee_saved_bmp'.
>> (xtensa_can_eliminate_callee_saved_reg_p): New function to
>> determine whether the register can be eliminated or not.
>> (xtensa_expand_prologue): Add invoking the above function and
>> elimination the use of callee-saved register by using its stack
>> slot through the stack pointer (or the frame pointer if needed)
>> directly.
>> (xtensa_expand_prologue): Modify to not emit register restoration
>> insn from its stack slot if the register is already eliminated.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/xtensa/elim_callee_saved.c: New.
>> ---
>>  gcc/config/xtensa/xtensa.cc   | 132 ++
>>  .../gcc.target/xtensa/elim_callee_saved.c |  38 +
>>  2 files changed, 145 insertions(+), 25 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c
> 
> This version passes regression tests, but I still have a couple questions.
> 
>> diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
>> index 3e2e22d4cbe..ff59c933d4d 100644
>> --- a/gcc/config/xtensa/xtensa.cc
>> +++ b/gcc/config/xtensa/xtensa.cc
>> @@ -105,6 +105,7 @@ struct GTY(()) machine_function
>>bool epilogue_done;
>>bool inhibit_logues_a1_adjusts;
>>rtx last_logues_a9_content;
>> +  HOST_WIDE_INT eliminated_callee_saved_bmp;
>>  };
>>
>>  static void xtensa_option_override (void);
>> @@ -3343,6 +3344,66 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, 
>> int flags)
>>  cfun->machine->last_logues_a9_content = GEN_INT (offset);
>>  }
>>
>> +static bool
>> +xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
>> +rtx_insn **p_insnS,
>> +rtx_insn **p_insnR)
>> +{
>> +  df_ref ref;
>> +  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
>> +  rtx pattern;
>> +
>> +  if (!optimize || !df || call_used_or_fixed_reg_p (regno))
>> +return false;
>> +
>> +  for (ref = DF_REG_DEF_CHAIN (regno);
>> +   ref; ref = DF_REF_NEXT_REG (ref))
>> +if (DF_REF_CLASS (ref) != DF_REF_REGULAR
>> +   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
>> +  continue;
>> +else if (GET_CODE (pattern = PATTERN (insn)) == SET
>> +&& REG_P (SET_DEST (pattern))
>> +&& REGNO (SET_DEST (pattern)) == regno
>> +&& REG_N

[PATCH v6] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-26 Thread Takayuki 'January June' Suwa via Gcc-patches
In the case of the CALL0 ABI, values that must be retained before and
after function calls are placed in the callee-saved registers (A12
through A15) and referenced later.  However, it is often the case that
the save and the reference are each only once and a simple register-
register move (with two exceptions; i. the register saved to/restored
from is the stack pointer, ii. the function needs an additional stack
pointer adjustment to grow the stack).

e.g. in the following example, if there are no other occurrences of
register A14:

;; before
; prologue {
  ...
s32i.n  a14, sp, 16
  ...   ;; no frame pointer needed
;; no additional stack growth
; } prologue
  ...
mov.n   a14, a6 ;; A6 is not SP
  ...
call0   foo
  ...
mov.n   a8, a14 ;; A8 is not SP
  ...
; epilogue {
  ...
l32i.n  a14, sp, 16
  ...
; } epilogue

It can be possible like this:

;; after
; prologue {
  ...
(no save needed)
  ...
; } prologue
  ...
s32i.n  a6, sp, 16  ;; replaced with A14's slot
  ...
call0   foo
  ...
l32i.n  a8, sp, 16  ;; through SP
  ...
; epilogue {
  ...
(no restoration needed)
  ...
; } epilogue

This patch adds the abovementioned logic to the function prologue/epilogue
RTL expander code.

gcc/ChangeLog:

* config/xtensa/xtensa.cc (machine_function): Add new member
'eliminated_callee_saved_bmp'.
(xtensa_can_eliminate_callee_saved_reg_p): New function to
determine whether the register can be eliminated or not.
(xtensa_expand_prologue): Add invoking the above function and
elimination the use of callee-saved register by using its stack
slot through the stack pointer (or the frame pointer if needed)
directly.
(xtensa_expand_prologue): Modify to not emit register restoration
insn from its stack slot if the register is already eliminated.

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/elim_callee_saved.c: New.
---
 gcc/config/xtensa/xtensa.cc   | 132 ++
 .../gcc.target/xtensa/elim_callee_saved.c |  38 +
 2 files changed, 145 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3e2e22d4cbe..ff59c933d4d 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -105,6 +105,7 @@ struct GTY(()) machine_function
   bool epilogue_done;
   bool inhibit_logues_a1_adjusts;
   rtx last_logues_a9_content;
+  HOST_WIDE_INT eliminated_callee_saved_bmp;
 };
 
 static void xtensa_option_override (void);
@@ -3343,6 +3344,66 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int 
flags)
 cfun->machine->last_logues_a9_content = GEN_INT (offset);
 }
 
+static bool
+xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
+rtx_insn **p_insnS,
+rtx_insn **p_insnR)
+{
+  df_ref ref;
+  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
+  rtx pattern;
+
+  if (!optimize || !df || call_used_or_fixed_reg_p (regno))
+return false;
+
+  for (ref = DF_REG_DEF_CHAIN (regno);
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& REG_P (SET_DEST (pattern))
+&& REGNO (SET_DEST (pattern)) == regno
+&& REG_NREGS (SET_DEST (pattern)) == 1
+&& REG_P (SET_SRC (pattern))
+&& REGNO (SET_SRC (pattern)) != A1_REG)
+  {
+   if (insnS)
+ return false;
+   insnS = insn;
+   continue;
+  }
+else
+  return false;
+
+  for (ref = DF_REG_USE_CHAIN (regno);
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& REG_P (SET_SRC (pattern))
+&& REGNO (SET_SRC (pattern)) == regno
+&& REG_NREGS (SET_SRC (pattern)) == 1
+&& REG_P (SET_DEST (pattern))
+&& REGNO (SET_DEST (pattern)) != A1_REG)
+  {
+   if (insnR)
+ return false;
+   insnR = insn;
+   continue;
+  }
+else
+  return false;
+
+  if (!insnS || !insnR)
+return false;
+
+  *p_insnS = insnS, *p_insnR = insnR;
+
+  return true;
+}
+
 /* minimum frame = reg save area (4 words) plus static chain (1 word)
and the total number of words must be a multiple of 128 bits.  */
 #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
@@ -3382,6 +3443,7 @@ xtensa_expand_prologue (void)
   df_ref ref;
   bool stack_pointer_needed = 

[PATCH v4] xtensa: Eliminate unnecessary general-purpose reg-reg moves

2023-01-23 Thread Takayuki 'January June' Suwa via Gcc-patches
Register-register move instructions that can be easily seen as
unnecessary by the human eye may remain in the compiled result.
For example:

/* example */
double test(double a, double b) {
  return __builtin_copysign(a, b);
}

test:
add.n   a3, a3, a3
extui   a5, a5, 31, 1
ssai1
;; be in the same BB
src a7, a5, a3  ;; No '0' in the source constraints
;; The destination replaced is
;;   irrelevant to the sources if the
;;   destination constraint has '&'
;; No CALL insns in this span
;; Both A3 and A7 are irrelevant to
;;   insns in this span
mov.n   a3, a7  ;; An unnecessary reg-reg move
;; A7 is not used after this
ret.n

The last two instructions above, excluding the return instruction,
could be done like this:

src a3, a5, a3

This symptom often occurs when handling DI/DFmode values with SImode
instructions.  This patch solves the above problem using peephole2
pattern.

gcc/ChangeLog:

* config/xtensa/xtensa.md: New peephole2 pattern that eliminates
the occurrence of general-purpose register used only once and for
transferring intermediate value.

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/elim_GP_regmove.c: New.
---
 gcc/config/xtensa/xtensa.md   | 49 +++
 .../gcc.target/xtensa/elim_GP_regmove.c   | 23 +
 2 files changed, 72 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_GP_regmove.c

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index dd3fc37353b..1a6154c8ded 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3048,3 +3048,52 @@ FALLTHRU:;
   operands[1] = GEN_INT (imm0);
   operands[2] = GEN_INT (imm1);
 })
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand")
+   (match_operand 1 "register_operand"))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) == 4
+   && GET_MODE_SIZE (GET_MODE (operands[1])) == 4
+   && GP_REG_P (REGNO (operands[0])) && GP_REG_P (REGNO (operands[1]))
+   && peep2_reg_dead_p (1, operands[1])"
+  [(const_int 0)]
+{
+  basic_block bb = BLOCK_FOR_INSN (curr_insn);
+  rtx_insn *head = BB_HEAD (bb), *insn;
+  rtx dest = operands[0], src = operands[1], pattern, t_dest;
+  int i;
+  for (insn = PREV_INSN (curr_insn);
+   insn && insn != head;
+   insn = PREV_INSN (insn))
+if (CALL_P (insn))
+  break;
+else if (INSN_P (insn))
+  {
+   if (GET_CODE (pattern = PATTERN (insn)) == SET
+   && REG_P (t_dest = SET_DEST (pattern))
+   && GET_MODE_SIZE (GET_MODE (t_dest)) == 4
+   && REGNO (t_dest) == REGNO (src))
+   {
+ extract_constrain_insn (insn);
+ for (i = 1; i < recog_data.n_operands; ++i)
+   if (strchr (recog_data.constraints[i], '0'))
+ goto ABORT;
+ if (strchr (recog_data.constraints[0], '&'))
+   for (i = 1; i < recog_data.n_operands; ++i)
+ if (reg_overlap_mentioned_p (dest, recog_data.operand[i]))
+   goto ABORT;
+ SET_DEST (pattern) = gen_rtx_REG (GET_MODE (t_dest),
+   REGNO (dest));
+ df_insn_rescan (insn);
+ goto FALLTHRU;
+   }
+   if (reg_overlap_mentioned_p (dest, pattern)
+   || reg_overlap_mentioned_p (src, pattern)
+   || set_of (dest, insn)
+   || set_of (src, insn))
+ break;
+  }
+ABORT:
+  FAIL;
+FALLTHRU:;
+})
diff --git a/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove.c 
b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove.c
new file mode 100644
index 000..2f0d71d12bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/elim_GP_regmove.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fpeephole2" } */
+
+/* processed */
+double test0(double a, double b) {
+  return __builtin_copysign(a, b);
+}
+
+/* excluded: the source operands have '0' constraint.  */
+int test1(int a, int b) {
+  int c;
+  asm volatile ("" : "=a"(c) : "r"(a), "0"(b));
+  return c;
+}
+
+/* excluded: the destination operand has '&' constraint.  */
+int test2(int a) {
+  int b;
+  asm volatile ("" : "="(b) : "r"(a));
+  return b;
+}
+
+/* { dg-final { scan-assembler-times "mov\t|mov.n\t" 2 } } */
-- 
2.30.2


[PATCH v5] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-23 Thread Takayuki 'January June' Suwa via Gcc-patches
In the case of the CALL0 ABI, values that must be retained before and
after function calls are placed in the callee-saved registers (A12
through A15) and referenced later.  However, it is often the case that
the save and the reference are each only once and a simple register-
register move (with two exceptions; i. the register saved to/restored
from is the stack pointer, ii. the function needs an additional stack
pointer adjustment to grow the stack).

e.g. in the following example, if there are no other occurrences of
register A14:

;; before
; prologue {
  ...
s32i.n  a14, sp, 16
  ...   ;; no frame pointer needed
;; no additional stack growth
; } prologue
  ...
mov.n   a14, a6 ;; A6 is not SP
  ...
call0   foo
  ...
mov.n   a8, a14 ;; A8 is not SP
  ...
; epilogue {
  ...
l32i.n  a14, sp, 16
  ...
; } epilogue

It can be possible like this:

;; after
; prologue {
  ...
(no save needed)
  ...
; } prologue
  ...
s32i.n  a6, sp, 16  ;; replaced with A14's slot
  ...
call0   foo
  ...
l32i.n  a8, sp, 16  ;; through SP
  ...
; epilogue {
  ...
(no restoration needed)
  ...
; } epilogue

This patch adds the abovementioned logic to the function prologue/epilogue
RTL expander code.

gcc/ChangeLog:

* config/xtensa/xtensa.cc (machine_function): Add new member
'eliminated_callee_saved_bmp'.
(xtensa_can_eliminate_callee_saved_reg_p): New function to
determine whether the register can be eliminated or not.
(xtensa_expand_prologue): Add invoking the above function and
elimination the use of callee-saved register by using its stack
slot through the stack pointer (or the frame pointer if needed)
directly.
(xtensa_expand_prologue): Modify to not emit register restoration
insn from its stack slot if the register is already eliminated.

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/elim_callee_saved.c: New.
---
 gcc/config/xtensa/xtensa.cc   | 130 ++
 .../gcc.target/xtensa/elim_callee_saved.c |  32 +
 2 files changed, 137 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3e2e22d4cbe..a639f019d9c 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -105,6 +105,7 @@ struct GTY(()) machine_function
   bool epilogue_done;
   bool inhibit_logues_a1_adjusts;
   rtx last_logues_a9_content;
+  HOST_WIDE_INT eliminated_callee_saved_bmp;
 };
 
 static void xtensa_option_override (void);
@@ -3343,6 +3344,64 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int 
flags)
 cfun->machine->last_logues_a9_content = GEN_INT (offset);
 }
 
+static bool
+xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
+rtx_insn **p_insnS,
+rtx_insn **p_insnR)
+{
+  df_ref ref;
+  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
+  rtx pattern;
+
+  if (!df || call_used_or_fixed_reg_p (regno))
+return false;
+
+  for (ref = DF_REG_DEF_CHAIN (regno);
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& REG_P (SET_DEST (pattern))
+&& REGNO (SET_DEST (pattern)) == regno
+&& REG_P (SET_SRC (pattern))
+&& REGNO (SET_SRC (pattern)) != A1_REG)
+  {
+   if (insnS)
+ return false;
+   insnS = insn;
+   continue;
+  }
+else
+  return false;
+
+  for (ref = DF_REG_USE_CHAIN (regno);
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& REG_P (SET_SRC (pattern))
+&& REGNO (SET_SRC (pattern)) == regno
+&& REG_P (SET_DEST (pattern))
+&& REGNO (SET_DEST (pattern)) != A1_REG)
+  {
+   if (insnR)
+ return false;
+   insnR = insn;
+   continue;
+  }
+else
+  return false;
+
+  if (!insnS || !insnR)
+return false;
+
+  *p_insnS = insnS, *p_insnR = insnR;
+
+  return true;
+}
+
 /* minimum frame = reg save area (4 words) plus static chain (1 word)
and the total number of words must be a multiple of 128 bits.  */
 #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
@@ -3382,6 +3441,7 @@ xtensa_expand_prologue (void)
   df_ref ref;
   bool stack_pointer_needed = frame_pointer_needed
  || crtl->calls_eh_return;
+  bool large_stack_needed;
 
   /* 

Re: [PATCH v4] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-22 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/01/23 0:45, Max Filippov wrote:
> On Fri, Jan 20, 2023 at 8:39 PM Takayuki 'January June' Suwa
>  wrote:
>> On 2023/01/21 0:14, Max Filippov wrote:
>>> After having this many attempts and getting to the issues that are
>>> really hard to detect I wonder if the target backend is the right place
>>> for this optimization?
>>>
>> I guess they are not hard to detect
> 
> I mean, on the testing side. check-gcc testsuite passed without new
> regressions with this change, linux kernel smoke test passed, I was
> almost convinced that it's ok to commit.
> 
>> but just issues I didn't anticipate (and I just need a little more work).
> 
> Looking at other peephole2 patterns I see that their code transformations
> are much more compact and they don't need to track additional properties
> of unrelated instructions.
> 
>> And where else should it be done?  What about implementing a
>> target-specific pass just for one-point optimization?
> 
> I don't even understand what's target-specific in this optimization?
> It looks very generic to me.
> 

Ah, I seem to have misunderstood what you meant, sorry.

Now, what this patch is trying to do depends on whether register moves can be 
converted to stack pointer indirect loads/stores with offsets, and whether 
there is any benefit in doing so, but they are not target dependent. Is it?

If we want the target-independent part to do something like this, we will need 
a mechanism (macro, hook, etc.) to write appropriate information in the machine 
description and pass it on.

For example, offset ranges for register indirect loads and stores, or whether 
the ABI requires that callee-saved registers always be associated with stack 
slots, or even the need for stack frame construction...

I totally agree that the peephole2 pattern is not the best implementation 
location.


Re: [PATCH v4] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-20 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/01/21 0:14, Max Filippov wrote:
> Hi Suwa-san,
Hi!

> 
> On Wed, Jan 18, 2023 at 7:50 PM Takayuki 'January June' Suwa
>  wrote:
>>
>> In the previous patch, if insn is JUMP_INSN or CALL_INSN, it bypasses the 
>> reg check (possibly FAIL).
>>
>> =
>> In the case of the CALL0 ABI, values that must be retained before and
>> after function calls are placed in the callee-saved registers (A12
>> through A15) and referenced later.  However, it is often the case that
>> the save and the reference are each only once and a simple register-
>> register move (the frame pointer is needed to recover the stack pointer
>> and must be excluded).
>>
>> e.g. in the following example, if there are no other occurrences of
>> register A14:
>>
>> ;; before
>> ; prologue {
>>   ...
>> s32i.n  a14, sp, 16
>>   ...
>> ; } prologue
>>   ...
>> mov.n   a14, a6
>>   ...
>> call0   foo
>>   ...
>> mov.n   a8, a14
>>   ...
>> ; epilogue {
>>   ...
>> l32i.n  a14, sp, 16
>>   ...
>> ; } epilogue
>>
>> It can be possible like this:
>>
>> ;; after
>> ; prologue {
>>   ...
>> (deleted)
>>   ...
>> ; } prologue
>>   ...
>> s32i.n  a6, sp, 16
>>   ...
>> call0   foo
>>   ...
>> l32i.n  a8, sp, 16
>>   ...
>> ; epilogue {
>>   ...
>> (deleted)
>>   ...
>> ; } epilogue
>>
>> This patch introduces a new peephole2 pattern that implements the above.
>>
>> gcc/ChangeLog:
>>
>> * config/xtensa/xtensa.md: New peephole2 pattern that eliminates
>> the use of callee-saved register that saves and restores only once
>> for other register, by using its stack slot directly.
>> ---
>>  gcc/config/xtensa/xtensa.md | 62 +
>>  1 file changed, 62 insertions(+)
> 
> There are still issues with this change in the libgomp:
> 
> FAIL: libgomp.c/examples-4/target-1.c execution test
> FAIL: libgomp.c/examples-4/target-2.c execution test
> 
> They come from the following function:
> 
> code produced before the change:
>.literal_position
>.literal .LC8, init@PLT
>.literal .LC9, 40
>.literal .LC10, 10
>.literal .LC11, -80
>.literal .LC12, 80
>.align  4
>.global vec_mult_ref
>.type   vec_mult_ref, @function
> vec_mult_ref:
>l32ra9, .LC11
>addisp, sp, -16
>l32ra10, .LC9
>s32i.n  a12, sp, 8
>s32i.n  a13, sp, 4
>s32i.n  a0, sp, 12
>add.n   sp, sp, a9
>add.n   a12, sp, a10
>l32ra9, .LC8
>mov.n   a13, a2
>mov.n   a3, sp
>mov.n   a2, a12
>callx0  a9
>l32ra7, .LC10
>mov.n   a10, a12
>mov.n   a11, sp
>mov.n   a2, a13
>loopa7, .L17_LEND
> .L17:
>l32i.n  a9, a10, 0
>l32i.n  a6, a11, 0
>addi.n  a10, a10, 4
>mulla9, a9, a6
>addi.n  a11, a11, 4
>s32i.n  a9, a2, 0
>addi.n  a2, a2, 4
>.L17_LEND:
>l32ra9, .LC12
>add.n   sp, sp, a9
>l32i.n  a0, sp, 12
>l32i.n  a12, sp, 8
>l32i.n  a13, sp, 4
>addisp, sp, 16
>ret.n
> 
> 
> 
> with the change:
>.literal_position
>.literal .LC8, init@PLT
>.literal .LC9, 40
>.literal .LC10, 10
>.literal .LC11, -80
>.literal .LC12, 80
>.align  4
>.global vec_mult_ref
>.type   vec_mult_ref, @function
> vec_mult_ref:
>l32ra9, .LC11
>l32ra10, .LC9
>addisp, sp, -16
>s32i.n  a12, sp, 8
>s32i.n  a0, sp, 12
>add.n   sp, sp, a9
>add.n   a12, sp, a10
>l32ra9, .LC8
>s32i.n  a2, sp, 4
>mov.n   a3, sp
>mov.n   a2, a12
>callx0  a9
>l32ra7, .LC10
>l32i.n  a2, sp, 4
>mov.n   a10, a12
>mov.n   a11, sp
>loopa7, .L17_LEND
> .L17:
>l32i.n  a9, a10, 0
>l32i.n  a6, a11, 0
>addi.n  a10, a10, 4
>mulla9, a9, a6
>addi.n  a11, a11, 4
>s32i.n  a9, a2, 0
>addi.n  a2, a2, 4
>.L17_LEND:
>  

[PATCH] xtensa: Revise 89afb2e86fcb29c559b2957fdcbea0d01740c49b

2023-01-19 Thread Takayuki 'January June' Suwa via Gcc-patches
In the previously posted patch
"xtensa: Make complex hard register clobber elimination more robust and 
accurate",
the check code for insns that refer to the [DS]Cmode hard register before
it is overwritten after it is clobbered is incomplete.  Fortunately such
insns are seldom emitted, so it didn't matter.

This patch fixes that for the sake of completeness.

gcc/ChangeLog:

* config/xtensa/xtensa.md:
Fix exit from loops detecting references before overwriting in the
split pattern.
---
 gcc/config/xtensa/xtensa.md | 72 +++--
 1 file changed, 37 insertions(+), 35 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 8432d7bcb..e26772413 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -2976,45 +2976,47 @@
 {
   auto_sbitmap bmp (FIRST_PSEUDO_REGISTER);
   rtx_insn *insn;
-  rtx reg = gen_rtx_REG (SImode, 0);
+  rtx reg = gen_rtx_REG (SImode, 0), dest;
+  unsigned int regno;
+  sbitmap_iterator iter;
   bitmap_set_range (bmp, REGNO (operands[0]), REG_NREGS (operands[0]));
   for (insn = next_nonnote_nondebug_insn_bb (curr_insn);
insn; insn = next_nonnote_nondebug_insn_bb (insn))
-{
-  sbitmap_iterator iter;
-  unsigned int regno;
-  if (NONJUMP_INSN_P (insn))
-   {
- EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter)
-   {
- set_regno_raw (reg, regno, REG_NREGS (reg));
- if (reg_overlap_mentioned_p (reg, PATTERN (insn)))
-   break;
-   }
- if (GET_CODE (PATTERN (insn)) == SET)
-   {
- rtx x = SET_DEST (PATTERN (insn));
- if (REG_P (x) && HARD_REGISTER_P (x))
-   bitmap_clear_range (bmp, REGNO (x), REG_NREGS (x));
- else if (SUBREG_P (x) && HARD_REGISTER_P (SUBREG_REG (x)))
-   {
- struct subreg_info info;
- subreg_get_info (regno = REGNO (SUBREG_REG (x)),
-  GET_MODE (SUBREG_REG (x)),
-  SUBREG_BYTE (x), GET_MODE (x), );
- if (!info.representable_p)
-   break;
- bitmap_clear_range (bmp, regno + info.offset, info.nregs);
-   }
-   }
- if (bitmap_empty_p (bmp))
-   goto FALLTHRU;
-   }
-  else if (CALL_P (insn))
+if (NONJUMP_INSN_P (insn))
+  {
EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter)
-if (call_used_or_fixed_reg_p (regno))
-  break;
-}
+ {
+   set_regno_raw (reg, regno, REG_NREGS (reg));
+   if (reg_referenced_p (reg, PATTERN (insn)))
+ goto ABORT;
+ }
+   if (GET_CODE (PATTERN (insn)) == SET
+   || GET_CODE (PATTERN (insn)) == CLOBBER)
+ {
+   dest = SET_DEST (PATTERN (insn));
+   if (REG_P (dest) && HARD_REGISTER_P (dest))
+ bitmap_clear_range (bmp, REGNO (dest), REG_NREGS (dest));
+   else if (SUBREG_P (dest)
+&& HARD_REGISTER_P (SUBREG_REG (dest)))
+ {
+   struct subreg_info info;
+   subreg_get_info (regno = REGNO (SUBREG_REG (dest)),
+GET_MODE (SUBREG_REG (dest)),
+SUBREG_BYTE (dest), GET_MODE (dest),
+);
+   if (!info.representable_p)
+ break;
+   bitmap_clear_range (bmp, regno + info.offset, info.nregs);
+ }
+ }
+   if (bitmap_empty_p (bmp))
+ goto FALLTHRU;
+  }
+else if (CALL_P (insn))
+  EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter)
+   if (call_used_or_fixed_reg_p (regno))
+ goto ABORT;
+ABORT:
   FAIL;
 FALLTHRU:;
 })
-- 
2.30.2


[PATCH v3] xtensa: Eliminate unnecessary general-purpose reg-reg moves

2023-01-18 Thread Takayuki 'January June' Suwa via Gcc-patches
Register-register move instructions that can be easily seen as
unnecessary by the human eye may remain in the compiled result.
For example:

/* example */
double test(double a, double b) {
  return __builtin_copysign(a, b);
}

test:
add.n   a3, a3, a3
extui   a5, a5, 31, 1
ssai1
;; be in the same BB
src a7, a5, a3  ;; No '0' in the source constraints
;; No CALL insns in this span
;; Both A3 and A7 are irrelevant to
;;   insns in this span
mov.n   a3, a7  ;; An unnecessary reg-reg move
;; A7 is not used after this
ret.n

The last two instructions above, excluding the return instruction,
could be done like this:

src a3, a5, a3

This symptom often occurs when handling DI/DFmode values with SImode
instructions.  This patch solves the above problem using peephole2
pattern.

gcc/ChangeLog:

* config/xtensa/xtensa.md: New peephole2 pattern that eliminates
the occurrence of general-purpose register used only once and for
transferring intermediate value.
---
 gcc/config/xtensa/xtensa.md | 45 +
 1 file changed, 45 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index abcab231d8e..517dcecf2c1 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3110,3 +3110,48 @@ FALLTHRU:;
   df_insn_rescan (insnR);
   set_insn_deleted (insnP);
 })
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand")
+   (match_operand 1 "register_operand"))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) == 4
+   && GET_MODE_SIZE (GET_MODE (operands[1])) == 4
+   && GP_REG_P (REGNO (operands[0])) && GP_REG_P (REGNO (operands[1]))
+   && peep2_reg_dead_p (1, operands[1])"
+  [(const_int 0)]
+{
+  basic_block bb = BLOCK_FOR_INSN (curr_insn);
+  rtx_insn *head = BB_HEAD (bb), *insn;
+  rtx dest = operands[0], src = operands[1], pattern, t_dest;
+  int i;
+  for (insn = PREV_INSN (curr_insn);
+   insn && insn != head;
+   insn = PREV_INSN (insn))
+if (CALL_P (insn))
+  break;
+else if (INSN_P (insn))
+  {
+   if (GET_CODE (pattern = PATTERN (insn)) == SET
+   && REG_P (t_dest = SET_DEST (pattern))
+   && GET_MODE_SIZE (GET_MODE (t_dest)) == 4
+   && REGNO (t_dest) == REGNO (src))
+   {
+ extract_constrain_insn (insn);
+ for (i = 1; i < recog_data.n_operands; ++i)
+   if (strchr (recog_data.constraints[i], '0'))
+ goto ABORT;
+ SET_DEST (pattern) = gen_rtx_REG (GET_MODE (t_dest),
+   REGNO (dest));
+ df_insn_rescan (insn);
+ goto FALLTHRU;
+   }
+   if (reg_overlap_mentioned_p (dest, pattern)
+   || reg_overlap_mentioned_p (src, pattern)
+   || set_of (dest, insn)
+   || set_of (src, insn))
+ break;
+  }
+ABORT:
+  FAIL;
+FALLTHRU:;
+})
-- 
2.30.2


[PATCH v4] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-18 Thread Takayuki 'January June' Suwa via Gcc-patches
In the previous patch, if insn is JUMP_INSN or CALL_INSN, it bypasses the reg 
check (possibly FAIL).

=
In the case of the CALL0 ABI, values that must be retained before and
after function calls are placed in the callee-saved registers (A12
through A15) and referenced later.  However, it is often the case that
the save and the reference are each only once and a simple register-
register move (the frame pointer is needed to recover the stack pointer
and must be excluded).

e.g. in the following example, if there are no other occurrences of
register A14:

;; before
; prologue {
  ...
s32i.n  a14, sp, 16
  ...
; } prologue
  ...
mov.n   a14, a6
  ...
call0   foo
  ...
mov.n   a8, a14
  ...
; epilogue {
  ...
l32i.n  a14, sp, 16
  ...
; } epilogue

It can be possible like this:

;; after
; prologue {
  ...
(deleted)
  ...
; } prologue
  ...
s32i.n  a6, sp, 16
  ...
call0   foo
  ...
l32i.n  a8, sp, 16
  ...
; epilogue {
  ...
(deleted)
  ...
; } epilogue

This patch introduces a new peephole2 pattern that implements the above.

gcc/ChangeLog:

* config/xtensa/xtensa.md: New peephole2 pattern that eliminates
the use of callee-saved register that saves and restores only once
for other register, by using its stack slot directly.
---
 gcc/config/xtensa/xtensa.md | 62 +
 1 file changed, 62 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 4f1e8fd13..ac04ef6f0 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3029,3 +3029,65 @@ FALLTHRU:;
   operands[1] = GEN_INT (imm0);
   operands[2] = GEN_INT (imm1);
 })
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "reload_operand"))]
+  "!TARGET_WINDOWED_ABI && df
+   && epilogue_contains (insn)
+   && ! call_used_or_fixed_reg_p (REGNO (operands[0]))
+   && (!frame_pointer_needed
+   || REGNO (operands[0]) != HARD_FRAME_POINTER_REGNUM)"
+  [(const_int 0)]
+{
+  rtx reg = operands[0], pattern;
+  rtx_insn *insnP = NULL, *insnS = NULL, *insnR = NULL;
+  df_ref ref;
+  rtx_insn *insn;
+  for (ref = DF_REG_DEF_CHAIN (REGNO (reg));
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (insn == curr_insn)
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& rtx_equal_p (SET_DEST (pattern), reg)
+&& REG_P (SET_SRC (pattern)))
+  {
+   if (insnS)
+ FAIL;
+   insnS = insn;
+   continue;
+  }
+else
+  FAIL;
+  for (ref = DF_REG_USE_CHAIN (REGNO (reg));
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (prologue_contains (insn))
+  {
+   insnP = insn;
+   continue;
+  }
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& rtx_equal_p (SET_SRC (pattern), reg)
+&& REG_P (SET_DEST (pattern)))
+  {
+   if (insnR)
+ FAIL;
+   insnR = insn;
+   continue;
+  }
+else
+  FAIL;
+  if (!insnP || !insnS || !insnR)
+FAIL;
+  SET_DEST (PATTERN (insnS)) = copy_rtx (operands[1]);
+  df_insn_rescan (insnS);
+  SET_SRC (PATTERN (insnR)) = copy_rtx (operands[1]);
+  df_insn_rescan (insnR);
+  set_insn_deleted (insnP);
+})
-- 
2.30.2


[PATCH] xtensa: Optimize inversion of the MSB

2023-01-17 Thread Takayuki 'January June' Suwa via Gcc-patches
Such operation can be done either bitwise-XOR or addition with -2147483648,
but the latter is one byte less if TARGET_DENSITY.

gcc/ChangeLog:

* config/xtensa/xtensa.md (xorsi3_internal):
Rename from the original of "xorsi3".
(xorsi3): New expansion pattern that emits addition rather than
bitwise-XOR when the second source is a constant of -2147483648
if TARGET_DENSITY.
---
 gcc/config/xtensa/xtensa.md | 26 +-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 0a477e711..4b5899a4c 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -736,7 +736,31 @@
(set_attr "mode""SI")
(set_attr "length"  "3")])
 
-(define_insn "xorsi3"
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "register_operand")
+   (xor:SI (match_operand:SI 1 "register_operand")
+   (match_operand:SI 2 "nonmemory_operand")))]
+  ""
+{
+  if (register_operand (operands[2], SImode))
+emit_insn (gen_xorsi3_internal (operands[0], operands[1],
+   operands[2]));
+  else
+{
+  rtx (*gen_op)(rtx, rtx, rtx);
+  if (TARGET_DENSITY
+ && CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) == -2147483648L)
+   gen_op = gen_addsi3;
+  else
+   gen_op = gen_xorsi3_internal;
+  emit_insn (gen_op (operands[0], operands[1],
+force_reg (SImode, operands[2])));
+}
+  DONE;
+})
+
+(define_insn "xorsi3_internal"
   [(set (match_operand:SI 0 "register_operand" "=a")
(xor:SI (match_operand:SI 1 "register_operand" "%r")
(match_operand:SI 2 "register_operand" "r")))]
-- 
2.30.2


[PATCH v2] xtensa: Eliminate unnecessary general-purpose reg-reg moves

2023-01-17 Thread Takayuki 'January June' Suwa via Gcc-patches
Register-register move instructions that can be easily seen as
unnecessary by the human eye may remain in the compiled result.
For example:

/* example */
double test(double a, double b) {
  return __builtin_copysign(a, b);
}

test:
add.n   a3, a3, a3
extui   a5, a5, 31, 1
ssai1
;; be in the same BB
src a7, a5, a3  ;; No '0' in the source constraints
;; No CALL insns in this span
;; Both A3 and A7 are irrelevant to
;;   insns in this span
mov.n   a3, a7  ;; An unnecessary reg-reg move
;; A7 is not used after this
ret.n

The last two instructions above, excluding the return instruction,
could be done like this:

src a3, a5, a3

This symptom often occurs when handling DI/DFmode values with SImode
instructions.  This patch solves the above problem using peephole2
pattern.

gcc/ChangeLog:

* config/xtensa/xtensa.md: New peephole2 pattern that eliminates
the occurrence of genral-purpose register used only once and for
transferring intermediate value.
---
 gcc/config/xtensa/xtensa.md | 43 +
 1 file changed, 43 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 3694d95ad..0a477e711 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3091,3 +3091,46 @@ FALLTHRU:;
   df_insn_rescan (insnR);
   set_insn_deleted (insnP);
 })
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand")
+   (match_operand 1 "register_operand"))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) == 4
+   && GET_MODE_SIZE (GET_MODE (operands[1])) == 4
+   && GP_REG_P (REGNO (operands[0])) && GP_REG_P (REGNO (operands[1]))
+   && peep2_reg_dead_p (1, operands[1])"
+  [(const_int 0)]
+{
+  basic_block bb = BLOCK_FOR_INSN (curr_insn);
+  rtx_insn *head = BB_HEAD (bb), *insn;
+  rtx dest = operands[0], src = operands[1], pattern, t_dest;
+  int i;
+  for (insn = PREV_INSN (curr_insn);
+   insn && insn != head;
+   insn = PREV_INSN (insn))
+if (CALL_P (insn))
+  break;
+else if (INSN_P (insn))
+  {
+   if (GET_CODE (pattern = PATTERN (insn)) == SET
+   && REG_P (t_dest = SET_DEST (pattern))
+   && GET_MODE_SIZE (GET_MODE (t_dest)) == 4
+   && REGNO (t_dest) == REGNO (src))
+   {
+ extract_constrain_insn (insn);
+ for (i = 1; i < recog_data.n_operands; ++i)
+   if (strchr (recog_data.constraints[i], '0'))
+ goto ABORT;
+ SET_REGNO (t_dest, REGNO (dest));
+ goto FALLTHRU;
+   }
+   if (reg_overlap_mentioned_p (dest, pattern)
+   || reg_overlap_mentioned_p (src, pattern)
+   || set_of (dest, insn)
+   || set_of (src, insn))
+ break;
+  }
+ABORT:
+  FAIL;
+FALLTHRU:;
+})
-- 
2.30.2


[PATCH v3] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-17 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/01/17 20:23, Max Filippov wrote:
> Hi Suwa-san,
Hi!

> There's still a few regressions in tests with -fcompare-debug because
> code generated with -g and without it is different:
> E.g. check the following test with -g0 and -g:
Again debug_insn is the problem...

=
In the case of the CALL0 ABI, values that must be retained before and
after function calls are placed in the callee-saved registers (A12
through A15) and referenced later.  However, it is often the case that
the save and the reference are each only once and a simple register-
register move (the frame pointer is needed to recover the stack pointer
and must be excluded).

e.g. in the following example, if there are no other occurrences of
register A14:

;; before
; prologue {
  ...
s32i.n  a14, sp, 16
  ...
; } prologue
  ...
mov.n   a14, a6
  ...
call0   foo
  ...
mov.n   a8, a14
  ...
; epilogue {
  ...
l32i.n  a14, sp, 16
  ...
; } epilogue

It can be possible like this:

;; after
; prologue {
  ...
(deleted)
  ...
; } prologue
  ...
s32i.n  a6, sp, 16
  ...
call0   foo
  ...
l32i.n  a8, sp, 16
  ...
; epilogue {
  ...
(deleted)
  ...
; } epilogue

This patch introduces a new peephole2 pattern that implements the above.

gcc/ChangeLog:

* config/xtensa/xtensa.md: New peephole2 pattern that eliminates
the use of callee-saved register that saves and restores only once
for other register, by using its stack slot directly.
---
 gcc/config/xtensa/xtensa.md | 62 +
 1 file changed, 62 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 98f3c468f8b..2f3b2256d8b 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3024,3 +3024,65 @@ FALLTHRU:;
   operands[1] = GEN_INT (imm0);
   operands[2] = GEN_INT (imm1);
 })
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "reload_operand"))]
+  "!TARGET_WINDOWED_ABI && df
+   && epilogue_contains (insn)
+   && ! call_used_or_fixed_reg_p (REGNO (operands[0]))
+   && (!frame_pointer_needed
+   || REGNO (operands[0]) != HARD_FRAME_POINTER_REGNUM)"
+  [(const_int 0)]
+{
+  rtx reg = operands[0], pattern;
+  rtx_insn *insnP = NULL, *insnS = NULL, *insnR = NULL;
+  df_ref ref;
+  rtx_insn *insn;
+  for (ref = DF_REG_DEF_CHAIN (REGNO (reg));
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || ! NONJUMP_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (insn == curr_insn)
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& rtx_equal_p (SET_DEST (pattern), reg)
+&& REG_P (SET_SRC (pattern)))
+  {
+   if (insnS)
+ FAIL;
+   insnS = insn;
+   continue;
+  }
+else
+  FAIL;
+  for (ref = DF_REG_USE_CHAIN (REGNO (reg));
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || ! NONJUMP_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (prologue_contains (insn))
+  {
+   insnP = insn;
+   continue;
+  }
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& rtx_equal_p (SET_SRC (pattern), reg)
+&& REG_P (SET_DEST (pattern)))
+  {
+   if (insnR)
+ FAIL;
+   insnR = insn;
+   continue;
+  }
+else
+  FAIL;
+  if (!insnP || !insnS || !insnR)
+FAIL;
+  SET_DEST (PATTERN (insnS)) = copy_rtx (operands[1]);
+  df_insn_rescan (insnS);
+  SET_SRC (PATTERN (insnR)) = copy_rtx (operands[1]);
+  df_insn_rescan (insnR);
+  set_insn_deleted (insnP);
+})
-- 
2.30.2


[PATCH] xtensa: Eliminate unnecessary general-purpose reg-reg moves

2023-01-16 Thread Takayuki 'January June' Suwa via Gcc-patches
Register-register move instructions that can be easily seen as
unnecessary by the human eye may remain in the compiled result.
For example:

/* example */
double test(double a, double b) {
  return __builtin_copysign(a, b);
}

test:
add.n   a3, a3, a3
extui   a5, a5, 31, 1
ssai1
;; be in the same BB
src a7, a5, a3  ;; No '0' in the source constraints
;; No CALL insns in this span
;; Both A3 and A7 are irrelevant to
;;   insns in this span
mov.n   a3, a7  ;; An unnecessary reg-reg move
;; A7 is not used after this
ret.n

The last two instructions above, excluding the return instruction,
could be done like this:

src a3, a5, a3

This symptom often occurs when handling DI/DFmode values with SImode
instructions.  This patch solves the above problem using peephole2
pattern.

gcc/ChangeLog:

* config/xtensa/xtensa.md: New peephole2 pattern that eliminates
the occurrence of genral-purpose register used only once and for
transferring intermediate value.
---
 gcc/config/xtensa/xtensa.md | 44 +
 1 file changed, 44 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 61bbad8e4..1b53c8c9e 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3089,3 +3089,47 @@ FALLTHRU:;
   df_insn_rescan (insnR);
   set_insn_deleted (insnP);
 })
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand")
+   (match_operand 1 "register_operand"))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) == 4
+   && GET_MODE_SIZE (GET_MODE (operands[1])) == 4
+   && GP_REG_P (REGNO (operands[0])) && GP_REG_P (REGNO (operands[1]))
+   && peep2_reg_dead_p (1, operands[1])"
+  [(const_int 0)]
+{
+  basic_block bb = BLOCK_FOR_INSN (curr_insn);
+  rtx dest = operands[0], src = operands[1], pattern, t_dest;
+  rtx_insn *insn;
+  int i;
+  for (insn = PREV_INSN (curr_insn);
+   insn && BLOCK_FOR_INSN (insn) == bb;
+   insn = PREV_INSN (insn))
+if (CALL_P (insn))
+  break;
+else if (INSN_P (insn))
+  {
+   if (GET_CODE (pattern = PATTERN (insn)) == SET
+   && REG_P (t_dest = SET_DEST (pattern))
+   && GET_MODE_SIZE (GET_MODE (t_dest)) == 4
+   && REGNO (t_dest) == REGNO (src)
+   && ! REG_P (SET_SRC (pattern)))
+   {
+ extract_constrain_insn (insn);
+ for (i = 1; i < recog_data.n_operands; ++i)
+   if (strchr (recog_data.constraints[i], '0'))
+ goto ABORT;
+ SET_REGNO (t_dest, REGNO (dest));
+ goto FALLTHRU;
+   }
+   if (reg_overlap_mentioned_p (dest, pattern)
+   || reg_overlap_mentioned_p (src, pattern)
+   || set_of (dest, insn)
+   || set_of (src, insn))
+ break;
+  }
+ABORT:
+  FAIL;
+FALLTHRU:;
+})
-- 
2.30.2


[PATCH v2] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-16 Thread Takayuki 'January June' Suwa via Gcc-patches
In the case of the CALL0 ABI, values that must be retained before and
after function calls are placed in the callee-saved registers (A12
through A15) and referenced later.  However, it is often the case that
the save and the reference are each only once and a simple register-
register move (the frame pointer is needed to recover the stack pointer
and must be excluded).

e.g. in the following example, if there are no other occurrences of
register A14:

;; before
; prologue {
  ...
s32i.n  a14, sp, 16
  ...
; } prologue
  ...
mov.n   a14, a6
  ...
call0   foo
  ...
mov.n   a8, a14
  ...
; epilogue {
  ...
l32i.n  a14, sp, 16
  ...
; } epilogue

It can be possible like this:

;; after
; prologue {
  ...
(deleted)
  ...
; } prologue
  ...
s32i.n  a6, sp, 16
  ...
call0   foo
  ...
l32i.n  a8, sp, 16
  ...
; epilogue {
  ...
(deleted)
  ...
; } epilogue

This patch introduces a new peephole2 pattern that implements the above.

gcc/ChangeLog:

* config/xtensa/xtensa.md: New peephole2 pattern that eliminates
the use of callee-saved register that saves and restores only once
for other register, by using its stack slot directly.
---
 gcc/config/xtensa/xtensa.md | 60 +
 1 file changed, 60 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 98f3c468f8b..fc512346741 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3024,3 +3024,63 @@ FALLTHRU:;
   operands[1] = GEN_INT (imm0);
   operands[2] = GEN_INT (imm1);
 })
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "reload_operand"))]
+  "!TARGET_WINDOWED_ABI && df
+   && epilogue_contains (insn)
+   && ! call_used_or_fixed_reg_p (REGNO (operands[0]))
+   && (!frame_pointer_needed
+   || REGNO (operands[0]) != HARD_FRAME_POINTER_REGNUM)"
+  [(const_int 0)]
+{
+  rtx reg = operands[0], pattern;
+  rtx_insn *insnP = NULL, *insnS = NULL, *insnR = NULL;
+  df_ref ref;
+  rtx_insn *insn;
+  for (ref = DF_REG_DEF_CHAIN (REGNO (reg));
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR)
+  continue;
+else if ((insn = DF_REF_INSN (ref)) == curr_insn)
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& rtx_equal_p (SET_DEST (pattern), reg)
+&& REG_P (SET_SRC (pattern)))
+  {
+   if (insnS)
+ FAIL;
+   insnS = insn;
+   continue;
+  }
+else
+  FAIL;
+  for (ref = DF_REG_USE_CHAIN (REGNO (reg));
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR)
+  continue;
+else if (prologue_contains (insn = DF_REF_INSN (ref)))
+  {
+   insnP = insn;
+   continue;
+  }
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& rtx_equal_p (SET_SRC (pattern), reg)
+&& REG_P (SET_DEST (pattern)))
+  {
+   if (insnR)
+ FAIL;
+   insnR = insn;
+   continue;
+  }
+else
+  FAIL;
+  if (!insnP || !insnS || !insnR)
+FAIL;
+  SET_DEST (PATTERN (insnS)) = copy_rtx (operands[1]);
+  df_insn_rescan (insnS);
+  SET_SRC (PATTERN (insnR)) = copy_rtx (operands[1]);
+  df_insn_rescan (insnR);
+  set_insn_deleted (insnP);
+})
-- 
2.30.2


[PATCH] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-15 Thread Takayuki 'January June' Suwa via Gcc-patches
In the case of the CALL0 ABI, values that must be retained before and
after function calls are placed in the callee-saved registers (A12
through A15) and referenced later.  However, it is often the case that
the save and the reference are each only once and a simple register-
register move.

e.g. in the following example, if there are no other occurrences of
register A14:

;; before
; prologue {
  ...
s32i.n  a14, sp, 16
  ...
; } prologue
  ...
mov.n   a14, a6
  ...
call0   foo
  ...
mov.n   a8, a14
  ...
; epilogue {
  ...
l32i.n  a14, sp, 16
  ...
; } epilogue

It can be possible like this:

;; after
; prologue {
  ...
(deleted)
  ...
; } prologue
  ...
s32i.n  a6, sp, 16
  ...
call0   foo
  ...
l32i.n  a8, sp, 16
  ...
; epilogue {
  ...
(deleted)
  ...
; } epilogue

This patch introduces a new peephole2 pattern that implements the above.

gcc/ChangeLog:

* config/xtensa/xtensa.md: New peephole2 pattern that eliminates
the use of callee-saved register that saves and restores only once
for other register, by using its stack slot directly.
---
 gcc/config/xtensa/xtensa.md | 58 +
 1 file changed, 58 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 764da63f91c..249147688ac 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3024,3 +3024,61 @@ FALLTHRU:;
   operands[1] = GEN_INT (imm0);
   operands[2] = GEN_INT (imm1);
 })
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "reload_operand"))]
+  "!TARGET_WINDOWED_ABI && df
+   && epilogue_contains (insn)
+   && ! call_used_or_fixed_reg_p (REGNO (operands[0]))"
+  [(const_int 0)]
+{
+  rtx reg = operands[0], pattern;
+  rtx_insn *insnP = NULL, *insnS = NULL, *insnR = NULL;
+  df_ref ref;
+  rtx_insn *insn;
+  for (ref = DF_REG_DEF_CHAIN (REGNO (reg));
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR)
+  continue;
+else if ((insn = DF_REF_INSN (ref)) == curr_insn)
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& rtx_equal_p (SET_DEST (pattern), reg)
+&& REG_P (SET_SRC (pattern)))
+  {
+   if (insnS)
+ FAIL;
+   insnS = insn;
+   continue;
+  }
+else
+  FAIL;
+  for (ref = DF_REG_USE_CHAIN (REGNO (reg));
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR)
+  continue;
+else if (prologue_contains (insn = DF_REF_INSN (ref)))
+  {
+   insnP = insn;
+   continue;
+  }
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& rtx_equal_p (SET_SRC (pattern), reg)
+&& REG_P (SET_DEST (pattern)))
+  {
+   if (insnR)
+ FAIL;
+   insnR = insn;
+   continue;
+  }
+else
+  FAIL;
+  if (!insnP || !insnS || !insnR)
+FAIL;
+  SET_DEST (PATTERN (insnS)) = copy_rtx (operands[1]);
+  df_insn_rescan (insnS);
+  SET_SRC (PATTERN (insnR)) = copy_rtx (operands[1]);
+  df_insn_rescan (insnR);
+  set_insn_deleted (insnP);
+})
-- 
2.30.2


[PATCH] xtensa: Remove old broken tweak for leaf function

2023-01-13 Thread Takayuki 'January June' Suwa via Gcc-patches
In the before-IRA era, ORDER_REGS_FOR_LOCAL_ALLOC was called for each
function in Xtensa, and there was register allocation table reordering
for leaf functions to compensate for the poor performance of local-alloc.

Today the adjustment hook is still called via its alternative
ADJUST_REG_ALLOC_ORDER, but it is only called once at the start of the IRA,
and leaf_function_p() erroneously returns true and also gives no argument
count.

That straightforwardly misleads register allocation that all functions are
always leaves with no arguments, which leads to inefficiencies in allocation
results.

Fortunately, IRA is smart enough than local-alloc to not need such assistance.

This patch does away with the antiquated by removing the wreckage that no
longer works.

gcc/ChangeLog:

* config/xtensa/xtensa-protos.h (order_regs_for_local_alloc):
  Rename to xtensa_adjust_reg_alloc_order.
* config/xtensa/xtensa.cc (xtensa_adjust_reg_alloc_order):
  Ditto.  And also remove code to reorder register numbers for
  leaf functions, rename the tables, and adjust the allocation
  order for the call0 ABI to use register A0 more.
  (xtensa_leaf_regs): Remove.
* config/xtensa/xtensa.h (REG_ALLOC_ORDER): Cosmetics.
  (order_regs_for_local_alloc): Rename as the above.
  (LEAF_REGISTERS, LEAF_REG_REMAP, leaf_function): Remove.
---
 gcc/config/xtensa/xtensa-protos.h |  2 +-
 gcc/config/xtensa/xtensa.cc   | 77 +++
 gcc/config/xtensa/xtensa.h| 51 ++--
 3 files changed, 31 insertions(+), 99 deletions(-)

diff --git a/gcc/config/xtensa/xtensa-protos.h 
b/gcc/config/xtensa/xtensa-protos.h
index 91a215e535d..7b5790c5fc4 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -78,7 +78,7 @@ extern long compute_frame_size (poly_int64);
 extern bool xtensa_use_return_instruction_p (void);
 extern void xtensa_expand_prologue (void);
 extern void xtensa_expand_epilogue (bool);
-extern void order_regs_for_local_alloc (void);
+extern void xtensa_adjust_reg_alloc_order (void);
 extern enum reg_class xtensa_regno_to_class (int regno);
 extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to);
 extern const char **xtensa_get_config_strings (void);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 6cf6b35399a..df9b53aeced 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -107,18 +107,6 @@ struct GTY(()) machine_function
   rtx last_logues_a9_content;
 };
 
-/* Vector, indexed by hard register number, which contains 1 for a
-   register that is allowable in a candidate for leaf function
-   treatment.  */
-
-const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] =
-{
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1
-};
-
 static void xtensa_option_override (void);
 static enum internal_test map_test_to_internal_test (enum rtx_code);
 static rtx gen_int_relational (enum rtx_code, rtx, rtx);
@@ -4140,58 +4128,25 @@ xtensa_secondary_reload (bool in_p, rtx x, reg_class_t 
rclass,
   return NO_REGS;
 }
 
+/* Called once at the start of IRA, by ADJUST_REG_ALLOC_ORDER.  */
 
 void
-order_regs_for_local_alloc (void)
+xtensa_adjust_reg_alloc_order (void)
 {
-  if (!leaf_function_p ())
-{
-  static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] =
-   REG_ALLOC_ORDER;
-  static const int reg_nonleaf_alloc_order_call0[FIRST_PSEUDO_REGISTER] =
-   {
- 11, 10,  9,  8,  7,  6,  5,  4,  3,  2, 12, 13, 14, 15,
- 18,
- 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
- 0,  1, 16, 17,
- 35,
-   };
-
-  memcpy (reg_alloc_order, TARGET_WINDOWED_ABI ?
- reg_nonleaf_alloc_order : reg_nonleaf_alloc_order_call0,
- FIRST_PSEUDO_REGISTER * sizeof (int));
-}
-  else
-{
-  int i, num_arg_regs;
-  int nxt = 0;
-
-  /* Use the AR registers in increasing order (skipping a0 and a1)
-but save the incoming argument registers for a last resort.  */
-  num_arg_regs = crtl->args.info.arg_words;
-  if (num_arg_regs > MAX_ARGS_IN_REGISTERS)
-   num_arg_regs = MAX_ARGS_IN_REGISTERS;
-  for (i = GP_ARG_FIRST; i < 16 - num_arg_regs; i++)
-   reg_alloc_order[nxt++] = i + num_arg_regs;
-  for (i = 0; i < num_arg_regs; i++)
-   reg_alloc_order[nxt++] = GP_ARG_FIRST + i;
-
-  /* List the coprocessor registers in order.  */
-  for (i = 0; i < BR_REG_NUM; i++)
-   reg_alloc_order[nxt++] = BR_REG_FIRST + i;
-
-  /* List the FP registers in order for now.  */
-  for (i = 0; i < 16; i++)
-   reg_alloc_order[nxt++] = FP_REG_FIRST + i;
-
-  /* GCC requires that we list *all* the registers  */
-  reg_alloc_order[nxt++] = 0;  /* a0 = return address */
-  reg_alloc_order[nxt++] = 1;  /* 

[PATCH 2/2] xtensa: Optimize ctzsi2 and ffssi2 a bit

2023-01-11 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch saves one byte when the Code Density Option is enabled,

gcc/ChangeLog:

* config/xtensa/xtensa.md (ctzsi2, ffssi2):
Rearrange the emitting codes.
---
 gcc/config/xtensa/xtensa.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index b4989832169..764da63f91c 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -477,8 +477,8 @@
   emit_insn (gen_negsi2 (temp, operands[1]));
   emit_insn (gen_andsi3 (temp, temp, operands[1]));
   emit_insn (gen_clzsi2 (temp, temp));
-  emit_insn (gen_negsi2 (temp, temp));
-  emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (31)));
+  emit_move_insn (operands[0], GEN_INT (31));
+  emit_insn (gen_subsi3 (operands[0], operands[0], temp));
   DONE;
 })
 
@@ -491,8 +491,8 @@
   emit_insn (gen_negsi2 (temp, operands[1]));
   emit_insn (gen_andsi3 (temp, temp, operands[1]));
   emit_insn (gen_clzsi2 (temp, temp));
-  emit_insn (gen_negsi2 (temp, temp));
-  emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (32)));
+  emit_move_insn (operands[0], GEN_INT (32));
+  emit_insn (gen_subsi3 (operands[0], operands[0], temp));
   DONE;
 })
 
-- 
2.30.2


[PATCH 1/2] xtensa: Tune "*btrue" insn pattern

2023-01-11 Thread Takayuki 'January June' Suwa via Gcc-patches
This branch instruction has short encoding if EQ/NE comparison against
immediate zero when the Code Density Option is enabled, but its "length"
attribute was only for normal encoding.  This patch fixes it.

This patch also prevents undesireable replacement the comparison immediate
zero of the instruction (short encoding, as mentioned above) with a
register that has value of zero (normal encoding) by the postreload pass.

gcc/ChangeLog:

* config/xtensa/xtensa.md (*btrue):
Correct value of the attribute "length" that depends on
TARGET_DENSITY and operands, and add '?' character to the register
constraint of the compared operand.
---
 gcc/config/xtensa/xtensa.md | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index db1d68ee658..b4989832169 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1679,7 +1679,7 @@
   [(set (pc)
(if_then_else (match_operator 3 "branch_operator"
[(match_operand:SI 0 "register_operand" "r,r")
-(match_operand:SI 1 "branch_operand" "K,r")])
+(match_operand:SI 1 "branch_operand" "K,?r")])
  (label_ref (match_operand 2 "" ""))
  (pc)))]
   ""
@@ -1688,7 +1688,14 @@
 }
   [(set_attr "type""jump,jump")
(set_attr "mode""none")
-   (set_attr "length"  "3,3")])
+   (set (attr "length")
+(if_then_else (match_test "TARGET_DENSITY
+  && CONST_INT_P (operands[1])
+  && INTVAL (operands[1]) == 0
+  && (GET_CODE (operands[3]) == EQ
+  || GET_CODE (operands[3]) == NE)")
+  (const_int 2)
+  (const_int 3)))])
 
 (define_insn "*ubtrue"
   [(set (pc)
-- 
2.30.2


Re: [PATCH] ifcvt.cc: Prevent excessive if-conversion for conditional moves

2023-01-11 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/01/11 17:02, Robin Dapp wrote:
> Hi,
Hi!

>  
>> On optimizing for speed, default_noce_conversion_profitable_p() allows
>> plenty of headroom, so this patch has little impact.
>>
>> Also, if the target-specific cost estimate is accurate or allows for
>> margins, the impact should be similarly small.
> I believe this part of ifcvt does/did not use the costing on purpose.
> It will generally convert more sequences than other paths that compare
> before and after costs since we just count the number of converted
> insns comparing them against the "branch costs".  Similar to rtx costs
> they are kind of relative to a single insn but AFAIK it's not used
> consistently everywhere.  All the major platforms have low branch costs
> nowadays (0 or 1?) thus we won't emit too many conditional moves here.
> 
> In general I agree that we should compare costs everywhere and not just
> count (the costing should include the branch costs as well) but this would
> be a major overhaul.  For your case (assuming xtensa), could you not
> tune xtensa_branch_cost?  It is currently 3 allowing up to 4 conditional
> moves to be generated.  optimize_function_for_speed_p is already being
> passed to the hook so you could make use of that and decrease branch
> costs when optimizing for size only.
> 
> Regards
>  Robin

Thank you for your detailed explanation.

In my case (for Xtensa), the cost of branching isn't really an issue.
The actual problem (that I think) is the costs of the sequence itself before 
and after conversion.
It is due to the fact that ifcvt's internal estimation is based on 
PATTERN(insn), so the instruction lengths ("length" attribute) associated with 
insns are not well reflected.
This is especially noticeable when optimizing for size (overestimating the 
original cost).

Currently, in addition to the patch, I have implemented the following code, and 
I'm confirming that it works roughly well (fine adjustments are still required).

/* Return true if the instruction sequence seq is a good candidate as a
   replacement for the if-convertible sequence described in if_info.  */

static bool
xtensa_noce_conversion_profitable_p (rtx_insn *seq,
 struct noce_if_info *if_info)
{
  unsigned int cost, original_cost;
  bool speed_p;
  rtx_insn *insn;

  speed_p = if_info->speed_p;  /* of TEST_BB */

  /* Estimate the cost for the replacing sequence.  */
  cost = 0;
  for (insn = seq; insn; insn = NEXT_INSN (insn))
if (active_insn_p (insn))
  cost += xtensa_insn_cost (insn, speed_p);

  /* Short circuit and margins if optimiziing for speed.  */
  if (speed_p)
return cost <= if_info->max_seq_cost;

  /* Estimate the cost for the original sequence if optimizing for
 size.  */
  original_cost = xtensa_insn_cost (if_info->jump, speed_p);
  speed_p = optimize_bb_for_speed_p (if_info->then_bb);
  FOR_BB_INSNS (if_info->then_bb, insn)
if (active_insn_p (insn))
  original_cost += xtensa_insn_cost (insn, speed_p);
  if (if_info->else_bb)
{
  speed_p = optimize_bb_for_speed_p (if_info->else_bb);
  FOR_BB_INSNS (if_info->else_bb, insn)
if (active_insn_p (insn))
  original_cost += xtensa_insn_cost (insn, speed_p);
}

  return cost <= original_cost;
}


[PATCH] ifcvt.cc: Prevent excessive if-conversion for conditional moves

2023-01-10 Thread Takayuki 'January June' Suwa via Gcc-patches
Currently, cond_move_process_if_block() does the conversion without
balancing the cost of the converted sequence with the original one, but
this should be checked by calling targetm.noce_conversion_profitable_p().

Doing so allows us to provide a way based on the target-specific cost
estimate, to prevent unwanted size growth due to excessive conditional
moves on optimizing for size.

On optimizing for speed, default_noce_conversion_profitable_p() allows
plenty of headroom, so this patch has little impact.

Also, if the target-specific cost estimate is accurate or allows for
margins, the impact should be similarly small.

gcc/ChangeLog:

* ifcvt.cc (cond_move_process_if_block):
Consider the result of targetm.noce_conversion_profitable_p()
when replacing the original sequence with the converted one.
---
 gcc/ifcvt.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index 008796838f7..a896e14bb3c 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -4350,7 +4350,7 @@ cond_move_process_if_block (struct noce_if_info *if_info)
   goto done;
 }
   seq = end_ifcvt_sequence (if_info);
-  if (!seq)
+  if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
 goto done;
 
   loc_insn = first_active_insn (then_bb);
-- 
2.30.2


[PATCH] xtensa: Make instruction cost estimation for size more accurate

2023-01-09 Thread Takayuki 'January June' Suwa via Gcc-patches
Until now, we applied COSTS_N_INSNS() (multiplying by 4) after dividing
the instruction length by 3, so we couldn't express the difference less
than modulo 3 in insn cost for size (e.g. 11 Bytes and 12 bytes cost the
same).

This patch fixes that.

;; 2 bytes
addi.n  a2, a2, -1  ; cost 3

;; 3 bytes
addmi   a2, a2, 1024; cost 4

;; 4 bytes
movi.n  a3, 80  ; cost 5
bnez.n  a2, a3, .L4

;; 5 bytes
srlia2, a3, 1   ; cost 7
add.n   a2, a2, a2

;; 6 bytes
ssai8   ; cost 8
src a4, a2, a3

:: 3 + 4 bytes
l32ra2, .L5 ; cost 9

;; 11 bytes ; cost 15
;; 12 bytes ; cost 16

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_insn_cost):
Let insn cost for size be obtained by applying COSTS_N_INSNS()
to instruction length and then dividing by 3.
---
 gcc/config/xtensa/xtensa.cc | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index a1f184950ae..6cf6b35399a 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -4519,13 +4519,15 @@ xtensa_insn_cost (rtx_insn *insn, bool speed)
 {
   if (!(recog_memoized (insn) < 0))
 {
-  int len = get_attr_length (insn), n = (len + 2) / 3;
+  int len = get_attr_length (insn);
 
   if (len == 0)
return COSTS_N_INSNS (0);
 
   if (speed)  /* For speed cost.  */
{
+ int n = (len + 2) / 3;
+
  /* "L32R" may be particular slow (implementation-dependent).  */
  if (xtensa_is_insn_L32R_p (insn))
return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs);
@@ -4572,10 +4574,11 @@ xtensa_insn_cost (rtx_insn *insn, bool speed)
{
  /* "L32R" itself plus constant in litpool.  */
  if (xtensa_is_insn_L32R_p (insn))
-   return COSTS_N_INSNS (2) + 1;
+   len = 3 + 4;
 
- /* Consider ".n" short instructions.  */
- return COSTS_N_INSNS (n) - (n * 3 - len);
+ /* Consider fractional instruction length (for example, ".n"
+short instructions or "L32R" litpool constants.  */
+ return (COSTS_N_INSNS (len) + 1) / 3;
}
}
 }
-- 
2.30.2


[PATCH v2] xtensa: Optimize bitwise splicing operation

2023-01-07 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch optimizes the operation of cutting and splicing two register
values at a specified bit position, in other words, combining (bitwise
ORing) bits 0 through (C-1) of the register with bits C through 31
of the other, where C is the specified immediate integer 17 through 31.

This typically applies to signed copy of floating point number and
__builtin_return_address() if the windowed register ABI, and saves one
instruction compared to four shifts and a bitwise OR by the default RTL
combination pass.

gcc/ChangeLog:

* config/xtensa/xtensa.md (*splice_bits):
New insn_and_split pattern.
---
 gcc/config/xtensa/xtensa.md | 47 +
 1 file changed, 47 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 0a26d3dccf4..db1d68ee658 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -746,6 +746,53 @@
(set_attr "mode""SI")
(set_attr "length"  "3")])
 
+(define_insn_and_split "*splice_bits"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+   (match_operand:SI 3 "const_int_operand" "i"))
+   (and:SI (match_operand:SI 2 "register_operand" "r")
+   (match_operand:SI 4 "const_int_operand" "i"]
+
+  "!optimize_debug && optimize
+   && INTVAL (operands[3]) + INTVAL (operands[4]) == -1
+   && (exact_log2 (INTVAL (operands[3]) + 1) > 16
+   || exact_log2 (INTVAL (operands[4]) + 1) > 16)"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 5)
+   (ashift:SI (match_dup 1)
+  (match_dup 4)))
+   (set (match_dup 6)
+   (lshiftrt:SI (match_dup 2)
+(match_dup 3)))
+   (set (match_dup 0)
+   (ior:SI (lshiftrt:SI (match_dup 5)
+(match_dup 4))
+   (ashift:SI (match_dup 6)
+  (match_dup 3]
+{
+  int shift;
+  if (INTVAL (operands[3]) < 0)
+{
+  rtx x;
+  x = operands[1], operands[1] = operands[2], operands[2] = x;
+  x = operands[3], operands[3] = operands[4], operands[4] = x;
+}
+  shift = floor_log2 (INTVAL (operands[3]) + 1);
+  operands[3] = GEN_INT (shift);
+  operands[4] = GEN_INT (32 - shift);
+  operands[5] = gen_reg_rtx (SImode);
+  operands[6] = gen_reg_rtx (SImode);
+}
+  [(set_attr "type""arith")
+   (set_attr "mode""SI")
+   (set (attr "length")
+   (if_then_else (match_test "TARGET_DENSITY
+  && (INTVAL (operands[3]) == 0x7FFF
+  || INTVAL (operands[4]) == 0x7FFF)")
+ (const_int 11)
+ (const_int 12)))])
+
 
 ;; Zero-extend instructions.
 
-- 
2.30.2


Re: [PATCH] xtensa: Optimize bitwise splicing operation

2023-01-07 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/01/08 6:53, Max Filippov wrote:
> On Fri, Jan 6, 2023 at 6:55 PM Takayuki 'January June' Suwa
>  wrote:
>>
>> This patch optimizes the operation of cutting and splicing two register
>> values at a specified bit position, in other words, combining (bitwise
>> ORing) bits 0 through (C-1) of the register with bits C through 31
>> of the other, where C is the specified immediate integer 1 through 31.
>>
>> This typically applies to signedness copy of floating point number or
>> __builtin_return_address() if the windowed register ABI, and saves one
>> instruction compared to four shifts and a bitwise OR by the RTL
>> generation pass.
> 
> While I indeed see this kind of change, e.g.:
> -   extui   a3, a3, 27, 5
> -   sllia2, a2, 5
> -   srlia2, a2, 5
> -   sllia3, a3, 27
> -   or  a2, a2, a3
> +   sllia2, a2, 5
> +   extui   a3, a3, 27, 5
> +   ssai5
> +   src a2, a3, a2
> 
> I also see the following:
> -   movi.n  a6, -4
> -   and a5, a5, a6
> -   extui   a3, a3, 0, 2
> -   or  a3, a3, a5
> +   srlia5, a5, 2
> +   sllia3, a3, 30
> +   ssai30
> +   src a3, a5, a3
> 
> i.e. after the split there's the same number of instructions,
> but the new sequence is one byte longer than the original one
> because of the movi.n.
> 
> Looking at a bunch of linux builds I observe a slight code size
> growth in call0 kernels and a slight code size reduction in
> windowed kernels.
> 
>> gcc/ChangeLog:
>>
>> * config/xtensa/xtensa.md (*splice_bits):
>> New insn_and_split pattern.
>> ---
>>  gcc/config/xtensa/xtensa.md | 47 +
>>  1 file changed, 47 insertions(+)
>>
>> diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
>> index 0a26d3dccf4..36ec1b1918e 100644
>> --- a/gcc/config/xtensa/xtensa.md
>> +++ b/gcc/config/xtensa/xtensa.md
>> @@ -746,6 +746,53 @@
>> (set_attr "mode""SI")
>> (set_attr "length"  "3")])
>>
>> +(define_insn_and_split "*splice_bits"
>> +  [(set (match_operand:SI 0 "register_operand" "=a")
>> +   (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
>> +   (match_operand:SI 3 "const_int_operand" "i"))
>> +   (and:SI (match_operand:SI 2 "register_operand" "r")
>> +   (match_operand:SI 4 "const_int_operand" "i"]
>> +
>> +  "!optimize_debug && optimize
>> +   && INTVAL (operands[3]) + INTVAL (operands[4]) == -1
>> +   && (exact_log2 (INTVAL (operands[3]) + 1) > 0
>> +   || exact_log2 (INTVAL (operands[4]) + 1) > 0)"
>> +  "#"
>> +  "&& can_create_pseudo_p ()"
>> +  [(set (match_dup 5)
>> +   (ashift:SI (match_dup 1)
>> +  (match_dup 4)))
>> +   (set (match_dup 6)
>> +   (lshiftrt:SI (match_dup 2)
>> +(match_dup 3)))
>> +   (set (match_dup 0)
>> +   (ior:SI (lshiftrt:SI (match_dup 5)
>> +(match_dup 4))
>> +   (ashift:SI (match_dup 6)
>> +  (match_dup 3]
>> +{
>> +  int shift;
>> +  if (INTVAL (operands[3]) < 0)
>> +{
>> +  rtx x;
>> +  x = operands[1], operands[1] = operands[2], operands[2] = x;
>> +  x = operands[3], operands[3] = operands[4], operands[4] = x;
>> +}
>> +  shift = floor_log2 (INTVAL (operands[3]) + 1);
>> +  operands[3] = GEN_INT (shift);
>> +  operands[4] = GEN_INT (32 - shift);
>> +  operands[5] = gen_reg_rtx (SImode);
>> +  operands[6] = gen_reg_rtx (SImode);
>> +}
>> +  [(set_attr "type""arith")
>> +   (set_attr "mode""SI")
>> +   (set (attr "length")
>> +   (if_then_else (match_test "TARGET_DENSITY
>> +  && (INTVAL (operands[3]) == 0x7FFF
>> +  || INTVAL (operands[4]) == 
>> 0x7FFF)")
>> + (const_int 11)
>> + (const_int 12)))])
> 
> I wonder how the length could be 11 here? I always see 4 3-byte
> instructions generated by this pattern.
> 

Sorry, I should have carried out a systematic test beforehand:

#define TEST(c) \
  unsigned int test_ ## c (unsigned int a, unsigned int b) { \
return (a & (-1U >> c)) | (b & ~(-1U >> c)); \
  }
TEST(1)
TEST(2)
  ...
TEST(30)
TEST(31)

Without this patch, compiling the above if c is:

 a. between 1 and 15, slli (or add.n) + extui + slli + srli + or
 b. 16 then extui + slli + extui + or
 c. between 17 and 20, srli + slli + extui + or 
 d. between 21 and 31, movi(.n) + and + extui + or

Clearly, the patch should be restricted to apply only to case a.


[PATCH] xtensa: Optimize bitwise splicing operation

2023-01-06 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch optimizes the operation of cutting and splicing two register
values at a specified bit position, in other words, combining (bitwise
ORing) bits 0 through (C-1) of the register with bits C through 31
of the other, where C is the specified immediate integer 1 through 31.

This typically applies to signedness copy of floating point number or
__builtin_return_address() if the windowed register ABI, and saves one
instruction compared to four shifts and a bitwise OR by the RTL
generation pass.

gcc/ChangeLog:

* config/xtensa/xtensa.md (*splice_bits):
New insn_and_split pattern.
---
 gcc/config/xtensa/xtensa.md | 47 +
 1 file changed, 47 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 0a26d3dccf4..36ec1b1918e 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -746,6 +746,53 @@
(set_attr "mode""SI")
(set_attr "length"  "3")])
 
+(define_insn_and_split "*splice_bits"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+   (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+   (match_operand:SI 3 "const_int_operand" "i"))
+   (and:SI (match_operand:SI 2 "register_operand" "r")
+   (match_operand:SI 4 "const_int_operand" "i"]
+
+  "!optimize_debug && optimize
+   && INTVAL (operands[3]) + INTVAL (operands[4]) == -1
+   && (exact_log2 (INTVAL (operands[3]) + 1) > 0
+   || exact_log2 (INTVAL (operands[4]) + 1) > 0)"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 5)
+   (ashift:SI (match_dup 1)
+  (match_dup 4)))
+   (set (match_dup 6)
+   (lshiftrt:SI (match_dup 2)
+(match_dup 3)))
+   (set (match_dup 0)
+   (ior:SI (lshiftrt:SI (match_dup 5)
+(match_dup 4))
+   (ashift:SI (match_dup 6)
+  (match_dup 3]
+{
+  int shift;
+  if (INTVAL (operands[3]) < 0)
+{
+  rtx x;
+  x = operands[1], operands[1] = operands[2], operands[2] = x;
+  x = operands[3], operands[3] = operands[4], operands[4] = x;
+}
+  shift = floor_log2 (INTVAL (operands[3]) + 1);
+  operands[3] = GEN_INT (shift);
+  operands[4] = GEN_INT (32 - shift);
+  operands[5] = gen_reg_rtx (SImode);
+  operands[6] = gen_reg_rtx (SImode);
+}
+  [(set_attr "type""arith")
+   (set_attr "mode""SI")
+   (set (attr "length")
+   (if_then_else (match_test "TARGET_DENSITY
+  && (INTVAL (operands[3]) == 0x7FFF
+  || INTVAL (operands[4]) == 0x7FFF)")
+ (const_int 11)
+ (const_int 12)))])
+
 
 ;; Zero-extend instructions.
 
-- 
2.30.2


[PATCH v2] xtensa: Optimize stack frame adjustment more

2023-01-06 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch introduces a convenient helper function for integer immediate
addition with scratch register as needed, that splits and emits either
up to two ADDI/ADDMI machine instructions or an addition by register
following an integer immediate load (which may later be transformed by
constantsynth).

By using the helper function, it makes stack frame adjustment logic
simplified and instruction count less in some cases.

gcc/ChangeLog:

* config/xtensa/xtensa.cc
(xtensa_split_imm_two_addends, xtensa_emit_add_imm):
New helper functions.
(xtensa_set_return_address, xtensa_output_mi_thunk):
Change to use the helper function.
(xtensa_emit_adjust_stack_ptr): Ditto.
And also change to try reusing the content of scratch register
A9 if the register is not modified in the function body.
---
 gcc/config/xtensa/xtensa.cc | 151 +---
 1 file changed, 106 insertions(+), 45 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index ae44199bc98..a1f184950ae 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -104,6 +104,7 @@ struct GTY(()) machine_function
   bool frame_laid_out;
   bool epilogue_done;
   bool inhibit_logues_a1_adjusts;
+  rtx last_logues_a9_content;
 };
 
 /* Vector, indexed by hard register number, which contains 1 for a
@@ -2518,6 +2519,86 @@ xtensa_split_DI_reg_imm (rtx *operands)
 }
 
 
+/* Try to split an integer value into what are suitable for two consecutive
+   immediate addition instructions, ADDI or ADDMI.  */
+
+static bool
+xtensa_split_imm_two_addends (HOST_WIDE_INT imm, HOST_WIDE_INT v[2])
+{
+  HOST_WIDE_INT v0, v1;
+
+  if (imm < -32768)
+v0 = -32768, v1 = imm + 32768;
+  else if (imm > 32512)
+v0 = 32512, v1 = imm - 32512;
+  else if (TARGET_DENSITY && xtensa_simm12b (imm))
+/* A pair of MOVI(.N) and ADD.N is one or two bytes less than two
+   immediate additions if TARGET_DENSITY.  */
+return false;
+  else
+v0 = (imm + 128) & ~255L, v1 = imm - v0;
+
+  if (xtensa_simm8 (v1) || xtensa_simm8x256 (v1))
+{
+  v[0] = v0, v[1] = v1;
+  return true;
+}
+
+  return false;
+}
+
+
+/* Helper function for integer immediate addition with scratch register
+   as needed, that splits and emits either up to two ADDI/ADDMI machine
+   instructions or an addition by register following an integer immediate
+   load (which may later be transformed by constantsynth).
+
+   If 'scratch' is NULL_RTX but still needed, a new pseudo-register will
+   be allocated.  Thus, after the reload/LRA pass, the specified scratch
+   register must be a hard one.  */
+
+static bool
+xtensa_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch,
+bool need_note)
+{
+  bool retval = false;
+  HOST_WIDE_INT v[2];
+  rtx_insn *insn;
+
+  if (imm == 0)
+return false;
+
+  if (xtensa_simm8 (imm) || xtensa_simm8x256 (imm))
+insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
+  else if (xtensa_split_imm_two_addends (imm, v))
+{
+  if (!scratch)
+   scratch = gen_reg_rtx (SImode);
+  emit_insn (gen_addsi3 (scratch, src, GEN_INT (v[0])));
+  insn = emit_insn (gen_addsi3 (dst, scratch, GEN_INT (v[1])));
+}
+  else
+{
+  if (scratch)
+   emit_move_insn (scratch, GEN_INT (imm));
+  else
+   scratch = force_reg (SImode, GEN_INT (imm));
+  retval = true;
+  insn = emit_insn (gen_addsi3 (dst, src, scratch));
+}
+
+  if (need_note)
+{
+  rtx note_rtx = gen_rtx_SET (dst, plus_constant (Pmode, src, imm));
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+}
+
+  return retval;
+}
+
+
 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
 
 static bool
@@ -3245,41 +3326,33 @@ xtensa_initial_elimination_offset (int from, int to 
ATTRIBUTE_UNUSED)
 static void
 xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags)
 {
+  rtx src, scratch;
   rtx_insn *insn;
-  rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx
- : stack_pointer_rtx;
 
   if (cfun->machine->inhibit_logues_a1_adjusts)
 return;
 
-  if (xtensa_simm8 (offset)
-  || xtensa_simm8x256 (offset))
-insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset)));
-  else
-{
-  rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+  src = (flags & ADJUST_SP_FRAME_PTR)
+? hard_frame_pointer_rtx : stack_pointer_rtx;
+  scratch = gen_rtx_REG (Pmode, A9_REG);
 
-  if (offset < 0)
-   {
- emit_move_insn (tmp_reg, GEN_INT (-offset));
- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg));
-   }
-  else
-   {
- emit_move_insn (tmp_reg, GEN_INT (offset));
- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, tmp_reg));
-   }
-}
-
-  if (flags & ADJUST_SP_NEED_NOTE)
+  if (df && 

Re: [PATCH] xtensa: Optimize stack frame adjustment more

2023-01-06 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/01/06 17:05, Max Filippov wrote:
> On Thu, Jan 5, 2023 at 10:57 PM Takayuki 'January June' Suwa
>  wrote:
>> By using the helper function, it makes stack frame adjustment logic
>> simplified and instruction count less in some cases.
> 
> I've built a couple linux configurations with and without this change and
> I observe consistent code size growth, e.g.:
> 
> iss_defconfig without the change:
>   textdata bss dec hex filename
> 3014768  164016  115108 3293892  3242c4 vmlinux
> 
> iss_defconfig with the change:
>   textdata bss dec hex filename
> 3015296  164008  115108 3294412  3244cc vmlinux
> 
> virt_defconfig without the change:
>   textdata bss dec hex filename
> 5498881 2254360  291768 8045009  7ac1d1 vmlinux
> 
> virt_defconfig with the change:
>   textdata bss dec hex filename
> 5500389 2254360  291768 8046517  7ac7b5 vmlinux
> 
> generic_kc705_defconfig without the change:
>   textdata bss dec hex filename
> 7062530  635340  286400 7984270  79d48e vmlinux
> 
> generic_kc705_defconfig with the change:
>   textdata bss dec hex filename
> 7064078  635340  286400 7985818  79da9a vmlinux
> 

Probably due to this location:
> +  else if (TARGET_DENSITY && optimize_size && xtensa_simm12b (imm))
 
I omitted it in the new patch, so please check it.


Re: [PATCH] xtensa: Optimize stack frame adjustment more

2023-01-05 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/01/06 15:26, Max Filippov wrote:
> On Thu, Jan 5, 2023 at 7:35 PM Takayuki 'January June' Suwa
>  wrote:
>> On second thought, it cannot be a good idea to split addition/subtraction to 
>> the stack pointer.
>>
>>> -4aaf:  b0a192  movia9, 0x1b0
>>> -4ab2:  1f9aadd.n   a1, a15, a9
>>
>>> +4aaf:  02df12  addmi   a1, a15, 0x200
>>> +4ab2:  b0c112  addia1, a1, -80
>>
>> Because the former is atomic, but the latter is not. (may be interrupted 
>> between the two add instructions)
> 
> Oh, right, there are two issues: one is interruption in the absence of
> detailed stack tracking in the DWARF info, which can be fixed by emitting
> a separate note for each a1 change, the other is interruption when
> a1 is in the parent frame, which can be fixed by always moving a1
> down first, e.g. with the following change:
> 
> diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
> index 3b8a7bcda371..29cb91fa7de5 100644
> --- a/gcc/config/xtensa/xtensa.cc
> +++ b/gcc/config/xtensa/xtensa.cc
> @@ -2539,7 +2539,10 @@ xtensa_split_imm_two_addends (HOST_WIDE_INT
> imm, HOST_WIDE_INT v[2])
> 
>   if (xtensa_simm8 (v1) || xtensa_simm8x256 (v1))
> {
> -  v[0] = v0, v[1] = v1;
> +  if (v0 < 0)
> +   v[0] = v0, v[1] = v1;
> +  else
> +   v[0] = v1, v[1] = v0;
>   return true;
> }
> 
> Or both can be fixed by using a scratch register in the middle of the
> addi/addmi sequence.
> 
>> I'll wait for the results of your investigation, but it may be better to 
>> withdraw the patch.
> 
> The issue was in the unwinding code in the libgcc_s.so. I haven't figured
> out the exact mechanism, but found that emitting a separate note for each
> a1 change fixes it.
> 

Oh, thank you very much for your detailed investigation.  I will try to correct 
what you pointed out ASAP.


Re: [PATCH] xtensa: Optimize stack frame adjustment more

2023-01-05 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/01/06 6:32, Max Filippov wrote:
> Hi Suwa-san,
Hi!

> 
> On Thu, Jan 5, 2023 at 3:57 AM Takayuki 'January June' Suwa
>  wrote:
>>
>> This patch introduces a convenient helper function for integer immediate
>> addition with scratch register as needed, that splits and emits either
>> up to two ADDI/ADDMI machine instructions or an addition by register
>> following an immediate integer load (which may later be transformed by
>> constantsynth).
>>
>> By using the helper function, it makes stack frame adjustment logic
>> simplified and instruction count less in some cases.
>>
>> gcc/ChangeLog:
>>
>> * config/xtensa/xtensa.cc
>> (xtensa_split_imm_two_addends, xtensa_emit_add_imm):
>> New helper functions.
>> (xtensa_emit_adjust_stack_ptr, xtensa_set_return_address,
>> xtensa_output_mi_thunk): Change to use the helper function.
>> ---
>>  gcc/config/xtensa/xtensa.cc | 139 +++-
>>  1 file changed, 88 insertions(+), 51 deletions(-)
> 
> This change introduces a bunch of failures in the g++ testsuite,
> but the culprit is apparently somewhere in the libstdc++.so, I'm
> still looking for it.
> 
> I see the following pattern change in the generated epilogue code:
> 
> -4aaf:  b0a192  movia9, 0x1b0
> -4ab2:  1f9aadd.n   a1, a15, a9
> ...
> -4abe:  20c112  addia1, a1, 32
> -4ac1:  f00dret.n
> +4aaf:  02df12  addmi   a1, a15, 0x200
> +4ab2:  b0c112  addia1, a1, -80
> ...
> +4abf:  20c112  addia1, a1, 32
> +4ac2:  f00dret.n
> 
> I.e. a1 is first moved into the parent stack frame, then back to the right
> spot. This does not look correct, especially for bare-metal targets.
> 

On second thought, it cannot be a good idea to split addition/subtraction to 
the stack pointer.

> -4aaf:  b0a192  movia9, 0x1b0
> -4ab2:  1f9aadd.n   a1, a15, a9

> +4aaf:  02df12  addmi   a1, a15, 0x200
> +4ab2:  b0c112  addia1, a1, -80

Because the former is atomic, but the latter is not. (may be interrupted 
between the two add instructions)

I'll wait for the results of your investigation, but it may be better to 
withdraw the patch.


[PATCH] xtensa: Optimize stack frame adjustment more

2023-01-05 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch introduces a convenient helper function for integer immediate
addition with scratch register as needed, that splits and emits either
up to two ADDI/ADDMI machine instructions or an addition by register
following an immediate integer load (which may later be transformed by
constantsynth).

By using the helper function, it makes stack frame adjustment logic
simplified and instruction count less in some cases.

gcc/ChangeLog:

* config/xtensa/xtensa.cc
(xtensa_split_imm_two_addends, xtensa_emit_add_imm):
New helper functions.
(xtensa_emit_adjust_stack_ptr, xtensa_set_return_address,
xtensa_output_mi_thunk): Change to use the helper function.
---
 gcc/config/xtensa/xtensa.cc | 139 +++-
 1 file changed, 88 insertions(+), 51 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index ae44199bc98..3b8a7bcda37 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2518,6 +2518,82 @@ xtensa_split_DI_reg_imm (rtx *operands)
 }
 
 
+/* Try to split an integer value into what are suitable for two consecutive
+   immediate addition instructions, ADDI or ADDMI.  */
+
+static bool
+xtensa_split_imm_two_addends (HOST_WIDE_INT imm, HOST_WIDE_INT v[2])
+{
+  HOST_WIDE_INT v0, v1;
+
+  if (imm < -32768)
+v0 = -32768, v1 = imm + 32768;
+  else if (imm > 32512)
+v0 = 32512, v1 = imm - 32512;
+  else if (TARGET_DENSITY && optimize_size && xtensa_simm12b (imm))
+/* A pair of MOVI(.N) and ADD.N is one or two bytes less than two
+   immediate additions if TARGET_DENSITY.  */
+return false;
+  else
+v0 = (imm + 128) & ~255L, v1 = imm - v0;
+
+  if (xtensa_simm8 (v1) || xtensa_simm8x256 (v1))
+{
+  v[0] = v0, v[1] = v1;
+  return true;
+}
+
+  return false;
+}
+
+
+/* Helper function for integer immediate addition with scratch register
+   as needed, that splits and emits either up to two ADDI/ADDMI machine
+   instructions or an addition by register following an immediate integer
+   load (which may later be transformed by constantsynth).
+
+   If 'scratch' is NULL_RTX but still needed, a new pseudo-register will
+   be allocated.  Thus, after the reload/LRA pass, the specified scratch
+   register must be a hard one.  */
+
+static void
+xtensa_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch,
+bool need_note)
+{
+  HOST_WIDE_INT v[2];
+  rtx_insn *insn0, *insn1;
+
+  if (imm == 0)
+return;
+
+  if (xtensa_simm8 (imm) || xtensa_simm8x256 (imm))
+insn0 = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
+  else if (xtensa_split_imm_two_addends (imm, v))
+{
+  insn0 = emit_insn (gen_addsi3 (dst, src, GEN_INT (v[0])));
+  insn1 = emit_insn (gen_addsi3 (dst, dst, GEN_INT (v[1])));
+  if (need_note)
+   RTX_FRAME_RELATED_P (insn1) = 1;
+}
+  else
+{
+  if (scratch)
+   emit_move_insn (scratch, GEN_INT (imm));
+  else
+   scratch = force_reg (SImode, GEN_INT (imm));
+  insn0 = emit_insn (gen_addsi3 (dst, src, scratch));
+}
+
+  if (need_note)
+{
+  rtx note_rtx = gen_rtx_SET (dst, plus_constant (Pmode, src, imm));
+
+  RTX_FRAME_RELATED_P (insn0) = 1;
+  add_reg_note (insn0, REG_FRAME_RELATED_EXPR, note_rtx);
+}
+}
+
+
 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
 
 static bool
@@ -3245,41 +3321,14 @@ xtensa_initial_elimination_offset (int from, int to 
ATTRIBUTE_UNUSED)
 static void
 xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags)
 {
-  rtx_insn *insn;
-  rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx
- : stack_pointer_rtx;
-
   if (cfun->machine->inhibit_logues_a1_adjusts)
 return;
 
-  if (xtensa_simm8 (offset)
-  || xtensa_simm8x256 (offset))
-insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset)));
-  else
-{
-  rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-
-  if (offset < 0)
-   {
- emit_move_insn (tmp_reg, GEN_INT (-offset));
- insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg));
-   }
-  else
-   {
- emit_move_insn (tmp_reg, GEN_INT (offset));
- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, tmp_reg));
-   }
-}
-
-  if (flags & ADJUST_SP_NEED_NOTE)
-{
-  rtx note_rtx = gen_rtx_SET (stack_pointer_rtx,
- plus_constant (Pmode, stack_pointer_rtx,
-offset));
-
-  RTX_FRAME_RELATED_P (insn) = 1;
-  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
-}
+  xtensa_emit_add_imm (stack_pointer_rtx,
+  (flags & ADJUST_SP_FRAME_PTR)
+   ? hard_frame_pointer_rtx : stack_pointer_rtx,
+  offset, gen_rtx_REG (Pmode, A9_REG),
+  (flags & ADJUST_SP_NEED_NOTE));
 }
 
 /* minimum frame = reg 

[PATCH] xtensa: Check DF availability before use

2022-12-29 Thread Takayuki 'January June' Suwa via Gcc-patches
Parhaps no problem, but for safety.

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_expand_prologue): Fix to check
DF availability before use of DF_* macros.
---
 gcc/config/xtensa/xtensa.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 66e25349521..e726a115029 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -3322,7 +3322,7 @@ xtensa_expand_prologue (void)
  || crtl->calls_eh_return;
 
   /* Check if the function body really needs the stack pointer.  */
-  if (!stack_pointer_needed)
+  if (!stack_pointer_needed && df)
for (ref = DF_REG_USE_CHAIN (A1_REG);
 ref; ref = DF_REF_NEXT_REG (ref))
  if (DF_REF_CLASS (ref) == DF_REF_REGULAR
-- 
2.30.2


[PATCH] xtensa: Apply a few minor fixes

2022-12-26 Thread Takayuki 'January June' Suwa via Gcc-patches
Almost cosmetic and no functional changes.

gcc/ChangeLog:

* config/xtensa/*: Tabify, and trim trailing spaces.
* config/xtensa/xtensa.h (GP_RETURN, GP_RETURN_REG_COUNT):
Change to GP_RETURN_FIRST and GP_RETURN_LAST, respectively.
* config/xtensa/xtensa.cc (xtensa_function_value,
xtensa_libcall_value, xtensa_function_value_regno_p): Ditto.
(xtensa_expand_prologue): Modify to exit the inspection loops
as soon as the necessity of stack pointer is found.
(xtensa_set_return_address): Change the style of brackets.
* config/xtensa/xtensa.md (set_frame_ptr):
Fix to reflect TARGET_DENSITY.
---
 gcc/config/xtensa/elf.h  |  32 
 gcc/config/xtensa/linux.h|   1 -
 gcc/config/xtensa/uclinux.h  |   1 -
 gcc/config/xtensa/xtensa-dynconfig.c |   6 +-
 gcc/config/xtensa/xtensa.cc  | 105 ++-
 gcc/config/xtensa/xtensa.h   |  10 +--
 gcc/config/xtensa/xtensa.md  | 105 ++-
 7 files changed, 133 insertions(+), 127 deletions(-)

diff --git a/gcc/config/xtensa/elf.h b/gcc/config/xtensa/elf.h
index fbdccc49c9b..1edc1761d74 100644
--- a/gcc/config/xtensa/elf.h
+++ b/gcc/config/xtensa/elf.h
@@ -59,7 +59,7 @@ along with GCC; see the file COPYING3.  If not see
   "crt1-sim%O%s crt0%O%s crti%O%s crtbegin%O%s _vectors%O%s"
 
 #undef ENDFILE_SPEC
-#define ENDFILE_SPEC "crtend%O%s crtn%O%s"  
+#define ENDFILE_SPEC "crtend%O%s crtn%O%s"
 
 #undef LINK_SPEC
 #define LINK_SPEC \
@@ -86,19 +86,17 @@ along with GCC; see the file COPYING3.  If not see
 /* Search for headers in $tooldir/arch/include and for libraries and
startfiles in $tooldir/arch/lib.  */
 #define GCC_DRIVER_HOST_INITIALIZATION \
-do \
-{ \
-  char *tooldir, *archdir; \
-  tooldir = concat (tooldir_base_prefix, spec_machine, \
-   dir_separator_str, NULL); \
-  if (!IS_ABSOLUTE_PATH (tooldir)) \
-tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \
- spec_version, dir_separator_str, tooldir, NULL); \
-  archdir = concat (tooldir, "arch", dir_separator_str, NULL); \
-  add_prefix (_prefixes, \
- concat (archdir, "lib", dir_separator_str, NULL), \
- "GCC", PREFIX_PRIORITY_LAST, 0, 1); \
-  add_prefix (_prefixes, archdir, \
- "GCC", PREFIX_PRIORITY_LAST, 0, 0); \
-  } \
-while (0)
+  do { \
+char *tooldir, *archdir; \
+tooldir = concat (tooldir_base_prefix, spec_machine, \
+ dir_separator_str, NULL); \
+if (!IS_ABSOLUTE_PATH (tooldir)) \
+  tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, 
\
+   spec_version, dir_separator_str, tooldir, NULL); \
+archdir = concat (tooldir, "arch", dir_separator_str, NULL); \
+add_prefix (_prefixes, \
+   concat (archdir, "lib", dir_separator_str, NULL), \
+   "GCC", PREFIX_PRIORITY_LAST, 0, 1); \
+add_prefix (_prefixes, archdir, \
+   "GCC", PREFIX_PRIORITY_LAST, 0, 0); \
+  } while (0)
diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h
index bc7bee71517..198edfe0553 100644
--- a/gcc/config/xtensa/linux.h
+++ b/gcc/config/xtensa/linux.h
@@ -69,4 +69,3 @@ along with GCC; see the file COPYING3.  If not see
 #define XTENSA_ALWAYS_PIC 1
 
 #undef DEBUGGER_REGNO
-
diff --git a/gcc/config/xtensa/uclinux.h b/gcc/config/xtensa/uclinux.h
index 5fcf639ccff..5787b2f1ab9 100644
--- a/gcc/config/xtensa/uclinux.h
+++ b/gcc/config/xtensa/uclinux.h
@@ -71,4 +71,3 @@ along with GCC; see the file COPYING3.  If not see
 #define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
 
 #undef DEBUGGER_REGNO
-
diff --git a/gcc/config/xtensa/xtensa-dynconfig.c 
b/gcc/config/xtensa/xtensa-dynconfig.c
index 056204ae946..0a611fd14b0 100644
--- a/gcc/config/xtensa/xtensa-dynconfig.c
+++ b/gcc/config/xtensa/xtensa-dynconfig.c
@@ -35,7 +35,7 @@
 
 #if !defined (HAVE_DLFCN_H) && defined (_WIN32)
 
-#define RTLD_LAZY 0  /* Dummy value.  */
+#define RTLD_LAZY 0/* Dummy value.  */
 
 static void *
 dlopen (const char *file, int mode ATTRIBUTE_UNUSED)
@@ -142,8 +142,8 @@ XTENSA_CONFIG_INSTANCE_LIST;
 #define XTENSA_CONFIG_ENTRY(a) "__" #a "=" STRINGIFY(a)
 
 static const char * const xtensa_config_strings[] = {
-XTENSA_CONFIG_ENTRY_LIST,
-NULL,
+  XTENSA_CONFIG_ENTRY_LIST,
+  NULL,
 };
 
 const struct xtensa_config_v1 *xtensa_get_config_v1 (void)
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 94a98c25f8c..66e25349521 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -176,7 +176,7 @@ static bool constantpool_address_p (const_rtx addr);
 static bool xtensa_legitimate_constant_p (machine_mode, rtx);
 static void xtensa_reorg (void);
 static bool xtensa_can_use_doloop_p (const widest_int &, const widest_int &,
- unsigned int, bool);
+   

Re: [PATCH 2/2] xtensa: Implement new target hook: TARGET_CONSTANT_OK_FOR_CPROP_P

2022-09-12 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2022/09/13 4:34, Max Filippov wrote:
Hi!

> On Sun, Sep 11, 2022 at 1:50 PM Takayuki 'January June' Suwa
>  wrote:
>>
>> This patch implements new target hook TARGET_CONSTANT_OK_FOR_CPROP_P in
>> order to exclude CONST_INTs that cannot fit into a MOVI machine instruction
>> from cprop.
>>
>> gcc/ChangeLog:
>>
>> * config/xtensa/xtensa.c (TARGET_CONSTANT_OK_FOR_CPROP_P):
>> New macro definition.
>> (xtensa_constant_ok_for_cprop_p):
>> Implement the hook as mentioned above.
>> ---
>>  gcc/config/xtensa/xtensa.cc | 20 +---
>>  1 file changed, 17 insertions(+), 3 deletions(-)
> 
> Regtested for target=xtensa-linux-uclibc, no new regressions.
> Committed to master.
> 

Oops, sorry, this patch doesn't have the prerequisite patch merged in, so 
please revert (that target hook isn't working yet).


[PATCH 2/2] xtensa: Implement new target hook: TARGET_CONSTANT_OK_FOR_CPROP_P

2022-09-11 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch implements new target hook TARGET_CONSTANT_OK_FOR_CPROP_P in
order to exclude CONST_INTs that cannot fit into a MOVI machine instruction
from cprop.

gcc/ChangeLog:

* config/xtensa/xtensa.c (TARGET_CONSTANT_OK_FOR_CPROP_P):
New macro definition.
(xtensa_constant_ok_for_cprop_p):
Implement the hook as mentioned above.
---
 gcc/config/xtensa/xtensa.cc | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index ac52c015a94..5c432cc65aa 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -191,6 +191,7 @@ static bool xtensa_can_eliminate (const int from 
ATTRIBUTE_UNUSED,
 static HOST_WIDE_INT xtensa_starting_frame_offset (void);
 static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void);
 static bool xtensa_function_ok_for_sibcall (tree, tree);
+static bool xtensa_constant_ok_for_cprop_p (const_rtx);
 static rtx xtensa_delegitimize_address (rtx);
 
 
@@ -345,12 +346,15 @@ static rtx xtensa_delegitimize_address (rtx);
 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
 
-#undef TARGET_DELEGITIMIZE_ADDRESS
-#define TARGET_DELEGITIMIZE_ADDRESS xtensa_delegitimize_address
-
 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL xtensa_function_ok_for_sibcall
 
+#undef TARGET_CONSTANT_OK_FOR_CPROP_P
+#define TARGET_CONSTANT_OK_FOR_CPROP_P xtensa_constant_ok_for_cprop_p
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS xtensa_delegitimize_address
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 
@@ -4983,6 +4987,16 @@ xtensa_function_ok_for_sibcall (tree decl 
ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_U
   return true;
 }
 
+/* Implement TARGET_CONSTANT_OK_FOR_CPROP_P.  */
+static bool
+xtensa_constant_ok_for_cprop_p (const_rtx x)
+{
+  if (CONST_INT_P (x) && ! xtensa_simm12b (INTVAL (x)))
+return false;
+
+  return true;
+}
+
 static rtx
 xtensa_delegitimize_address (rtx op)
 {
-- 
2.20.1


[PATCH 1/2] Add new target hook: constant_ok_for_cprop_p

2022-09-11 Thread Takayuki 'January June' Suwa via Gcc-patches
Hi,

Many RISC machines, as we know, have some restrictions on placing 
register-width constants in the source of load-immediate machine instructions, 
so the target must provide a solution for that in the machine description.

A naive way would be to solve it early, ie. to replace with read constants 
pooled in memory when expanding to RTL.

Alternatively, a more fancy approach would be to forgo placement in the 
constant pool until somewhere before the reload/LRA eg. the "split1" pass to 
give the optimization passes that involve immediates a chance to work.

If we choose the latter, we can expect better results with RTL if-conversion, 
constant folding, etc., but it often propagates constants that are too large in 
size to resolve to a simple load-immediate instruction.

This is because constant propagation has no way of telling about it, so this 
patch provides it.

===

This new target hook can be used to tell cprop whether or not to propagate
a constant depending on its contents.

For backwards compatibility, the default setting for this hook retains the
old behavior.

gcc/ChangeLog:

* hooks.h (hook_bool_const_rtx_true): New prototype.
* hooks.cc (hook_bool_const_rtx_true): New default hook.
* target.def (constant_ok_for_cprop_p): New target hook.
* cprop.cc (cprop_constant_p): Change to use the hook.
* doc/tm.texi.in, (TARGET_CONSTANT_OK_FOR_CPROP_P): New @hook.
* doc/tm.texi (TARGET_CONSTANT_OK_FOR_CPROP_P): New document.
---
 gcc/cprop.cc   |  4 +++-
 gcc/doc/tm.texi| 12 
 gcc/doc/tm.texi.in |  2 ++
 gcc/hooks.cc   |  7 +++
 gcc/hooks.h|  1 +
 gcc/target.def | 14 ++
 6 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/gcc/cprop.cc b/gcc/cprop.cc
index 580f811545d..dfb1e88e9b4 100644
--- a/gcc/cprop.cc
+++ b/gcc/cprop.cc
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "dbgcnt.h"
 #include "cfgloop.h"
 #include "gcse.h"
+#include "target.h"
 
 
 /* An obstack for our working variables.  */
@@ -249,7 +250,8 @@ insert_set_in_table (rtx dest, rtx src, rtx_insn *insn,
 static bool
 cprop_constant_p (const_rtx x)
 {
-  return CONSTANT_P (x) && (GET_CODE (x) != CONST || shared_const_p (x));
+  return CONSTANT_P (x) && targetm.constant_ok_for_cprop_p (x)
+&& (GET_CODE (x) != CONST || shared_const_p (x));
 }
 
 /* Determine whether the rtx X should be treated as a register that can
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 858bfb80cec..83151626a71 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -12187,6 +12187,18 @@ MIPS, where add-immediate takes a 16-bit signed value,
 is zero, which disables this optimization.
 @end deftypevr
 
+@deftypefn {Target Hook} bool TARGET_CONSTANT_OK_FOR_CPROP_P (const_rtx 
@var{cst})
+On some target machines, such as RISC ones, load-immediate instructions
+often have a limited range (for example, within signed 12 bits or less).
+Because they will be typically placed into the constant pool,
+unconditionally propagating constants that exceed such limit can lead to
+increased number of instruction and/or memory read access.
+This target hook should return @code{false} if @var{cst}, a candidate for
+constant propagation, is undesirable as a source for load-immediate
+instructions.
+The default version of this hook always returns @code{true}.
+@end deftypefn
+
 @deftypefn {Target Hook} {unsigned HOST_WIDE_INT} TARGET_ASAN_SHADOW_OFFSET 
(void)
 Return the offset bitwise ored into shifted address to get corresponding
 Address Sanitizer shadow memory address.  NULL if Address Sanitizer is not
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 21b849ea32a..147331b0f53 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7887,6 +7887,8 @@ and the associated definitions of those functions.
 
 @hook TARGET_CONST_ANCHOR
 
+@hook TARGET_CONSTANT_OK_FOR_CPROP_P
+
 @hook TARGET_ASAN_SHADOW_OFFSET
 
 @hook TARGET_MEMMODEL_CHECK
diff --git a/gcc/hooks.cc b/gcc/hooks.cc
index b29233f4f85..67bf3553d26 100644
--- a/gcc/hooks.cc
+++ b/gcc/hooks.cc
@@ -82,6 +82,13 @@ hook_bool_mode_true (machine_mode)
   return true;
 }
 
+/* Generic hook that takes (const_rtx) and returns true.  */
+bool
+hook_bool_const_rtx_true (const_rtx)
+{
+  return true;
+}
+
 /* Generic hook that takes (machine_mode, machine_mode) and returns true.  */
 bool
 hook_bool_mode_mode_true (machine_mode, machine_mode)
diff --git a/gcc/hooks.h b/gcc/hooks.h
index 1056e1e9e4d..d001f8fb9dc 100644
--- a/gcc/hooks.h
+++ b/gcc/hooks.h
@@ -30,6 +30,7 @@ extern bool hook_bool_bool_gcc_optionsp_false (bool, struct 
gcc_options *);
 extern bool hook_bool_const_int_const_int_true (const int, const int);
 extern bool hook_bool_mode_false (machine_mode);
 extern bool hook_bool_mode_true (machine_mode);
+extern bool hook_bool_const_rtx_true (const_rtx);
 extern bool hook_bool_mode_mode_true (machine_mode, machine_mode);
 extern bool 

[PATCH] xtensa: constantsynth: Add new 3-insns synthesis pattern

2022-09-10 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch adds a new 3-instructions constant synthesis pattern:

- A value that can fit into a signed 12-bit after a number of either bitwise
  left or right rotations:
=> "MOVI(.N) Ax, simm12" + "SSAI (1 ... 11) or (21 ... 31)"
+ "SRC Ax, Ax, Ax"

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_constantsynth):
Add new pattern for the abovementioned case.

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/constsynth_3insns.c (test_4):
Add new test function.
---
 gcc/config/xtensa/xtensa.cc   | 31 +++
 .../gcc.target/xtensa/constsynth_3insns.c | 11 +++
 2 files changed, 42 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 0f586b09dfb..ac52c015a94 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -1142,6 +1142,37 @@ xtensa_constantsynth (rtx dst, HOST_WIDE_INT srcval)
  xtensa_constantsynth_rtx_ADDSUBX,
  divisor))
return 1;
+
+  /* loading simm12 followed by left/right bitwise rotation:
+MOVI + SSAI + SRC.  */
+  if ((srcval & 0x001FF800) == 0
+ || (srcval & 0x001FF800) == 0x001FF800)
+   {
+ int32_t v;
+
+ for (shift = 1; shift < 12; ++shift)
+   {
+ v = (int32_t)(((uint32_t)srcval >> shift)
+   | ((uint32_t)srcval << (32 - shift)));
+ if (xtensa_simm12b(v))
+   {
+ emit_move_insn (dst, GEN_INT (v));
+ emit_insn (gen_rotlsi3 (dst, dst, GEN_INT (shift)));
+ return 1;
+   }
+   }
+ for (shift = 1; shift < 12; ++shift)
+   {
+ v = (int32_t)(((uint32_t)srcval << shift)
+   | ((uint32_t)srcval >> (32 - shift)));
+ if (xtensa_simm12b(v))
+   {
+ emit_move_insn (dst, GEN_INT (v));
+ emit_insn (gen_rotrsi3 (dst, dst, GEN_INT (shift)));
+ return 1;
+   }
+   }
+   }
 }
 
   return 0;
diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c 
b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c
index f3c4a1c7c15..831288c7ddd 100644
--- a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c
+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c
@@ -21,4 +21,15 @@ void test_3(int *p)
   *p = 192437;
 }
 
+struct foo
+{
+  unsigned int b : 10;
+  unsigned int g : 11;
+  unsigned int r : 11;
+};
+void test_4(struct foo *p, unsigned int v)
+{
+  p->g = v;
+}
+
 /* { dg-final { scan-assembler-not "l32r" } } */
-- 
2.20.1


[PATCH v4 1/2] xtensa: Eliminate unused stack frame allocation/freeing

2022-09-08 Thread Takayuki 'January June' Suwa via Gcc-patches
Changes from v3:
  (xtensa_expand_prologue): Changed to exclude debug insns from DF use chain 
analysis.

---

In the example below, 'x' is once placed on the stack frame and then read
into registers as the argument value of bar():

/* example */
struct foo {
  int a, b;
};
extern struct foo bar(struct foo);
struct foo test(void) {
  struct foo x = { 0, 1 };
  return bar(x);
}

Thanks to the dead store elimination, the initialization of 'x' turns into
merely loading the immediates to registers, but corresponding stack frame
growth is not rolled back.  As a result:

;; prereq: the CALL0 ABI
;; before
test:
addisp, sp, -16 // unused stack frame allocation/freeing
movi.n  a2, 0
movi.n  a3, 1
addisp, sp, 16  // because no instructions that refer to
j.l bar, a9 // the stack pointer between the two

This patch eliminates such unused stack frame allocation/freeing:

;; after
test:
movi.n  a2, 0
movi.n  a3, 1
j.l bar, a9

gcc/ChangeLog:

* config/xtensa/xtensa.cc (machine_function): New boolean member as
a flag that controls whether to emit the insns for stack pointer
adjustment inside of the pro/epilogue.
(xtensa_emit_adjust_stack_ptr): New function to share the common
codes and to emit insns if not inhibited.
(xtensa_expand_epilogue): Change to use the function mentioned
above when using the CALL0 ABI.
(xtensa_expand_prologue): Ditto.
And also change to set the inhibit flag used by
xtensa_emit_adjust_stack_ptr() to true if the stack pointer is only
used for its own adjustment.
---
 gcc/config/xtensa/xtensa.cc | 164 ++--
 1 file changed, 80 insertions(+), 84 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 93ac6562b22..0f586b09dfb 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -102,6 +102,7 @@ struct GTY(()) machine_function
   int callee_save_size;
   bool frame_laid_out;
   bool epilogue_done;
+  bool inhibit_logues_a1_adjusts;
 };
 
 /* Vector, indexed by hard register number, which contains 1 for a
@@ -3048,7 +3049,7 @@ xtensa_output_literal (FILE *file, rtx x, machine_mode 
mode, int labelno)
 }
 
 static bool
-xtensa_call_save_reg(int regno)
+xtensa_call_save_reg (int regno)
 {
   if (TARGET_WINDOWED_ABI)
 return false;
@@ -3084,7 +3085,7 @@ compute_frame_size (poly_int64 size)
   cfun->machine->callee_save_size = 0;
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
 {
-  if (xtensa_call_save_reg(regno))
+  if (xtensa_call_save_reg (regno))
cfun->machine->callee_save_size += UNITS_PER_WORD;
 }
 
@@ -3139,6 +3140,49 @@ xtensa_initial_elimination_offset (int from, int to 
ATTRIBUTE_UNUSED)
   return offset;
 }
 
+#define ADJUST_SP_NONE  0x0
+#define ADJUST_SP_NEED_NOTE 0x1
+#define ADJUST_SP_FRAME_PTR 0x2
+static void
+xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags)
+{
+  rtx_insn *insn;
+  rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx
+ : stack_pointer_rtx;
+
+  if (cfun->machine->inhibit_logues_a1_adjusts)
+return;
+
+  if (xtensa_simm8 (offset)
+  || xtensa_simm8x256 (offset))
+insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset)));
+  else
+{
+  rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+
+  if (offset < 0)
+   {
+ emit_move_insn (tmp_reg, GEN_INT (-offset));
+ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg));
+   }
+  else
+   {
+ emit_move_insn (tmp_reg, GEN_INT (offset));
+ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, tmp_reg));
+   }
+}
+
+  if (flags & ADJUST_SP_NEED_NOTE)
+{
+  rtx note_rtx = gen_rtx_SET (stack_pointer_rtx,
+ plus_constant (Pmode, stack_pointer_rtx,
+offset));
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+}
+}
+
 /* minimum frame = reg save area (4 words) plus static chain (1 word)
and the total number of words must be a multiple of 128 bits.  */
 #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
@@ -3174,17 +3218,30 @@ xtensa_expand_prologue (void)
   int regno;
   HOST_WIDE_INT offset = 0;
   int callee_save_size = cfun->machine->callee_save_size;
+  df_ref ref;
+  bool stack_pointer_needed = frame_pointer_needed
+ || crtl->calls_eh_return;
+
+  /* Check if the function body really needs the stack pointer.  */
+  if (!stack_pointer_needed)
+   for (ref = DF_REG_USE_CHAIN (A1_REG);
+ref; ref = DF_REF_NEXT_REG (ref))
+ if (DF_REF_CLASS (ref) == DF_REF_REGULAR
+ && 

[PATCH v3 1/2] xtensa: Eliminate unused stack frame allocation/freeing

2022-09-07 Thread Takayuki 'January June' Suwa via Gcc-patches
Changes from v2:
  (xtensa_expand_prologue): Changed to check conditions for suppressing emit 
insns in advance, instead of tracking emitted and later replacing them with 
NOPs if they are found to be unnecessary.

---

In the example below, 'x' is once placed on the stack frame and then read
into registers as the argument value of bar():

/* example */
struct foo {
  int a, b;
};
extern struct foo bar(struct foo);
struct foo test(void) {
  struct foo x = { 0, 1 };
  return bar(x);
}

Thanks to the dead store elimination, the initialization of 'x' turns into
merely loading the immediates to registers, but corresponding stack frame
growth is not rolled back.  As a result:

;; prereq: the CALL0 ABI
;; before
test:
addisp, sp, -16 // unused stack frame allocation/freeing
movi.n  a2, 0
movi.n  a3, 1
addisp, sp, 16  // because no instructions that refer to
j.l bar, a9 // the stack pointer between the two

This patch eliminates such unused stack frame allocation/freeing:

;; after
test:
movi.n  a2, 0
movi.n  a3, 1
j.l bar, a9

gcc/ChangeLog:

* config/xtensa/xtensa.cc (machine_function): New boolean member as
a flag that controls whether to emit the insns for stack pointer
adjustment inside of the pro/epilogue.
(xtensa_emit_adjust_stack_ptr): New function to share the common
codes and to emit insns if not inhibited.
(xtensa_expand_epilogue): Change to use the function mentioned
above when using the CALL0 ABI.
(xtensa_expand_prologue): Ditto.
And also change to set the inhibit flag used by
xtensa_emit_adjust_stack_ptr() to true if the stack pointer is only
used for its own adjustment.
---
 gcc/config/xtensa/xtensa.cc | 162 +---
 1 file changed, 78 insertions(+), 84 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 93ac6562b22..86f94152a96 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -102,6 +102,7 @@ struct GTY(()) machine_function
   int callee_save_size;
   bool frame_laid_out;
   bool epilogue_done;
+  bool inhibit_logues_a1_adjusts;
 };
 
 /* Vector, indexed by hard register number, which contains 1 for a
@@ -3048,7 +3049,7 @@ xtensa_output_literal (FILE *file, rtx x, machine_mode 
mode, int labelno)
 }
 
 static bool
-xtensa_call_save_reg(int regno)
+xtensa_call_save_reg (int regno)
 {
   if (TARGET_WINDOWED_ABI)
 return false;
@@ -3084,7 +3085,7 @@ compute_frame_size (poly_int64 size)
   cfun->machine->callee_save_size = 0;
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
 {
-  if (xtensa_call_save_reg(regno))
+  if (xtensa_call_save_reg (regno))
cfun->machine->callee_save_size += UNITS_PER_WORD;
 }
 
@@ -3139,6 +3140,49 @@ xtensa_initial_elimination_offset (int from, int to 
ATTRIBUTE_UNUSED)
   return offset;
 }
 
+#define ADJUST_SP_NONE  0x0
+#define ADJUST_SP_NEED_NOTE 0x1
+#define ADJUST_SP_FRAME_PTR 0x2
+static void
+xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags)
+{
+  rtx_insn *insn;
+  rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx
+ : stack_pointer_rtx;
+
+  if (cfun->machine->inhibit_logues_a1_adjusts)
+return;
+
+  if (xtensa_simm8 (offset)
+  || xtensa_simm8x256 (offset))
+insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset)));
+  else
+{
+  rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+
+  if (offset < 0)
+   {
+ emit_move_insn (tmp_reg, GEN_INT (-offset));
+ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg));
+   }
+  else
+   {
+ emit_move_insn (tmp_reg, GEN_INT (offset));
+ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, tmp_reg));
+   }
+}
+
+  if (flags & ADJUST_SP_NEED_NOTE)
+{
+  rtx note_rtx = gen_rtx_SET (stack_pointer_rtx,
+ plus_constant (Pmode, stack_pointer_rtx,
+offset));
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+}
+}
+
 /* minimum frame = reg save area (4 words) plus static chain (1 word)
and the total number of words must be a multiple of 128 bits.  */
 #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
@@ -3174,17 +3218,28 @@ xtensa_expand_prologue (void)
   int regno;
   HOST_WIDE_INT offset = 0;
   int callee_save_size = cfun->machine->callee_save_size;
+  df_ref ref;
+  bool stack_pointer_really_used = false;
+
+  /* Check if the function body really needs the stack pointer.  */
+  for (ref = DF_REG_USE_CHAIN (A1_REG);
+  ref; ref = DF_REF_NEXT_REG (ref))
+   if (DF_REF_CLASS (ref) == DF_REF_REGULAR)
+ 

[PATCH v2 1/2] xtensa: Eliminate unused stack frame allocation/freeing

2022-09-02 Thread Takayuki 'January June' Suwa via Gcc-patches
Changes from v1:
  (xtensa_expand_epilogue): Fixed forgetting to consider hard_frame_pointer_rtx 
when sharing codes.

---
In the example below, 'x' is once placed on the stack frame and then read
into registers as the argument value of bar():

/* example */
struct foo {
  int a, b;
};
extern struct foo bar(struct foo);
struct foo test(void) {
  struct foo x = { 0, 1 };
  return bar(x);
}

Thanks to the dead store elimination, the initialization of 'x' turns into
merely loading the immediates to registers, but corresponding stack frame
growth is not rolled back.  As a result:

;; prereq: the CALL0 ABI
;; before
test:
addisp, sp, -16 // unused stack frame allocation/freeing
movi.n  a2, 0
movi.n  a3, 1
addisp, sp, 16  // because no instructions that refer to
j.l bar, a9 // the stack pointer between the two

This patch eliminates such unused stack frame allocation/freeing:

;; after
test:
movi.n  a2, 0
movi.n  a3, 1
j.l bar, a9

gcc/ChangeLog:

* config/xtensa/xtensa.cc (machine_function): New member to track
the insns for stack pointer adjustment inside of the pro/epilogue.
(xtensa_emit_adjust_stack_ptr): New function to share the common
codes and to record the insns for stack pointer adjustment.
(xtensa_expand_prologue): Change to use the function mentioned
above when using the CALL0 ABI.
(xtensa_expand_epilogue): Ditto.
And also change to cancel emitting the insns for the stack pointer
adjustment if only used for its own.
---
 gcc/config/xtensa/xtensa.cc | 230 ++--
 1 file changed, 118 insertions(+), 112 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index b673b6764da..17416fc6c3f 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -102,6 +102,7 @@ struct GTY(()) machine_function
   int callee_save_size;
   bool frame_laid_out;
   bool epilogue_done;
+  hash_set *logues_a1_adjusts;
 };
 
 /* Vector, indexed by hard register number, which contains 1 for a
@@ -3048,7 +3049,7 @@ xtensa_output_literal (FILE *file, rtx x, machine_mode 
mode, int labelno)
 }
 
 static bool
-xtensa_call_save_reg(int regno)
+xtensa_call_save_reg (int regno)
 {
   if (TARGET_WINDOWED_ABI)
 return false;
@@ -3084,7 +3085,7 @@ compute_frame_size (poly_int64 size)
   cfun->machine->callee_save_size = 0;
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
 {
-  if (xtensa_call_save_reg(regno))
+  if (xtensa_call_save_reg (regno))
cfun->machine->callee_save_size += UNITS_PER_WORD;
 }
 
@@ -3143,6 +3144,51 @@ xtensa_initial_elimination_offset (int from, int to 
ATTRIBUTE_UNUSED)
and the total number of words must be a multiple of 128 bits.  */
 #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
 
+#define ADJUST_SP_NONE  0x0
+#define ADJUST_SP_NEED_NOTE 0x1
+#define ADJUST_SP_FRAME_PTR 0x2
+static rtx_insn *
+xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags)
+{
+  rtx_insn *insn;
+  rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx
+ : stack_pointer_rtx;
+
+  if (xtensa_simm8 (offset)
+  || xtensa_simm8x256 (offset))
+insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset)));
+  else
+{
+  rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+  rtx_insn* tmp_insn;
+
+  if (offset < 0)
+   {
+ tmp_insn = emit_move_insn (tmp_reg, GEN_INT (-offset));
+ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg));
+   }
+  else
+   {
+ tmp_insn = emit_move_insn (tmp_reg, GEN_INT (offset));
+ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, tmp_reg));
+   }
+  cfun->machine->logues_a1_adjusts->add (tmp_insn);
+}
+
+  if (flags & ADJUST_SP_NEED_NOTE)
+{
+  rtx note_rtx = gen_rtx_SET (stack_pointer_rtx,
+ plus_constant (Pmode, stack_pointer_rtx,
+offset));
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+}
+
+  cfun->machine->logues_a1_adjusts->add (insn);
+  return insn;
+}
+
 void
 xtensa_expand_prologue (void)
 {
@@ -3175,16 +3221,13 @@ xtensa_expand_prologue (void)
   HOST_WIDE_INT offset = 0;
   int callee_save_size = cfun->machine->callee_save_size;
 
+  cfun->machine->logues_a1_adjusts = new hash_set;
+
   /* -128 is a limit of single addi instruction. */
   if (IN_RANGE (total_size, 1, 128))
{
- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
-   GEN_INT (-total_size)));
- RTX_FRAME_RELATED_P (insn) = 1;
- note_rtx = gen_rtx_SET (stack_pointer_rtx,
-  

[PATCH 1/2] xtensa: Eliminate unused stack frame allocation/freeing

2022-08-31 Thread Takayuki 'January June' Suwa via Gcc-patches
In the example below, 'x' is once placed on the stack frame and then read
into registers as the argument value of bar():

/* example */
struct foo {
  int a, b;
};
extern struct foo bar(struct foo);
struct foo test(void) {
  struct foo x = { 0, 1 };
  return bar(x);
}

Thanks to the dead store elimination, the initialization of 'x' turns into
merely loading the immediates to registers, but corresponding stack frame
growth is not rolled back.  As a result:

;; prereq: the CALL0 ABI
;; before
test:
addisp, sp, -16 // unused stack frame allocation/freeing
movi.n  a2, 0
movi.n  a3, 1
addisp, sp, 16  // because no instructions that refer to
j.l bar, a9 // the stack pointer between the two

This patch eliminates such unused stack frame allocation/freeing:

;; after
test:
movi.n  a2, 0
movi.n  a3, 1
j.l bar, a9

gcc/ChangeLog:

* config/xtensa/xtensa.cc (machine_function): New member to track
the insns for stack pointer adjustment inside of the pro/epilogue.
(xtensa_emit_adjust_stack_pointer): New function to share the
common codes and to record the insns for stack pointer adjustment.
(xtensa_expand_prologue): Change to use the function mentioned
above when using the CALL0 ABI.
(xtensa_expand_prologue): Ditto.
And also change to cancel emitting the insns for the stack pointer
adjustment if only used for its own.
---
 gcc/config/xtensa/xtensa.cc | 221 ++--
 1 file changed, 110 insertions(+), 111 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index b673b6764da..cd509876fd2 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -102,6 +102,7 @@ struct GTY(()) machine_function
   int callee_save_size;
   bool frame_laid_out;
   bool epilogue_done;
+  hash_set *logues_a1_adjusts;
 };
 
 /* Vector, indexed by hard register number, which contains 1 for a
@@ -3048,7 +3049,7 @@ xtensa_output_literal (FILE *file, rtx x, machine_mode 
mode, int labelno)
 }
 
 static bool
-xtensa_call_save_reg(int regno)
+xtensa_call_save_reg (int regno)
 {
   if (TARGET_WINDOWED_ABI)
 return false;
@@ -3084,7 +3085,7 @@ compute_frame_size (poly_int64 size)
   cfun->machine->callee_save_size = 0;
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
 {
-  if (xtensa_call_save_reg(regno))
+  if (xtensa_call_save_reg (regno))
cfun->machine->callee_save_size += UNITS_PER_WORD;
 }
 
@@ -3143,6 +3144,49 @@ xtensa_initial_elimination_offset (int from, int to 
ATTRIBUTE_UNUSED)
and the total number of words must be a multiple of 128 bits.  */
 #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
 
+static rtx_insn *
+xtensa_emit_adjust_stack_pointer (HOST_WIDE_INT offset, bool need_note)
+{
+  rtx_insn *insn;
+
+  if (xtensa_simm8 (offset)
+  || xtensa_simm8x256 (offset))
+insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (offset)));
+  else
+{
+  rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+  rtx_insn* tmp_insn;
+
+  if (offset < 0)
+   {
+ tmp_insn = emit_move_insn (tmp_reg, GEN_INT (-offset));
+ insn = emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx,
+   tmp_reg));
+   }
+  else
+   {
+ tmp_insn = emit_move_insn (tmp_reg, GEN_INT (offset));
+ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+   tmp_reg));
+   }
+  cfun->machine->logues_a1_adjusts->add (tmp_insn);
+}
+
+  if (need_note)
+{
+  rtx note_rtx = gen_rtx_SET (stack_pointer_rtx,
+ plus_constant (Pmode, stack_pointer_rtx,
+offset));
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+}
+
+  cfun->machine->logues_a1_adjusts->add (insn);
+  return insn;
+}
+
 void
 xtensa_expand_prologue (void)
 {
@@ -3175,16 +3219,12 @@ xtensa_expand_prologue (void)
   HOST_WIDE_INT offset = 0;
   int callee_save_size = cfun->machine->callee_save_size;
 
+  cfun->machine->logues_a1_adjusts = new hash_set;
+
   /* -128 is a limit of single addi instruction. */
   if (IN_RANGE (total_size, 1, 128))
{
- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
-   GEN_INT (-total_size)));
- RTX_FRAME_RELATED_P (insn) = 1;
- note_rtx = gen_rtx_SET (stack_pointer_rtx,
- plus_constant (Pmode, stack_pointer_rtx,
--total_size));
- add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+ insn = 

[PATCH 2/2] xtensa: Make complex hard register clobber elimination more robust and accurate

2022-08-31 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch eliminates all clobbers for complex hard registers that will
be overwritten entirely afterwards (supersedence of
3867d414bd7d9e5b6fb2a51b1fb3d9e9e1eae9).

gcc/ChangeLog:

* config/xtensa/xtensa.md: Rewrite the split pattern that performs
the abovementioned process so that insns that overwrite clobbered
register no longer need to be contiguous.
(DSC): Remove as no longer needed.
---
 gcc/config/xtensa/xtensa.md | 67 +
 1 file changed, 45 insertions(+), 22 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 3ed269249a4..f722ea56289 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -86,10 +86,6 @@
 ;; This code iterator is for *shlrd and its variants.
 (define_code_iterator ior_op [ior plus])
 
-;; This mode iterator allows the DC and SC patterns to be defined from
-;; the same template.
-(define_mode_iterator DSC [DC SC])
-
 
 ;; Attributes.
 
@@ -2843,27 +2839,54 @@
 })
 
 (define_split
-  [(clobber (match_operand:DSC 0 "register_operand"))]
-  "GP_REG_P (REGNO (operands[0]))"
+  [(clobber (match_operand 0 "register_operand"))]
+  "HARD_REGISTER_P (operands[0])
+   && COMPLEX_MODE_P (GET_MODE (operands[0]))"
   [(const_int 0)]
 {
-  unsigned int regno = REGNO (operands[0]);
-  machine_mode inner_mode = GET_MODE_INNER (mode);
+  auto_sbitmap bmp (FIRST_PSEUDO_REGISTER);
   rtx_insn *insn;
-  rtx x;
-  if (! ((insn = next_nonnote_nondebug_insn (curr_insn))
-&& NONJUMP_INSN_P (insn)
-&& GET_CODE (x = PATTERN (insn)) == SET
-&& REG_P (x = XEXP (x, 0))
-&& GET_MODE (x) == inner_mode
-&& REGNO (x) == regno
-&& (insn = next_nonnote_nondebug_insn (insn))
-&& NONJUMP_INSN_P (insn)
-&& GET_CODE (x = PATTERN (insn)) == SET
-&& REG_P (x = XEXP (x, 0))
-&& GET_MODE (x) == inner_mode
-&& REGNO (x) == regno + REG_NREGS (operands[0]) / 2))
-FAIL;
+  rtx reg = gen_rtx_REG (SImode, 0);
+  bitmap_set_range (bmp, REGNO (operands[0]), REG_NREGS (operands[0]));
+  for (insn = next_nonnote_nondebug_insn_bb (curr_insn);
+   insn; insn = next_nonnote_nondebug_insn_bb (insn))
+{
+  sbitmap_iterator iter;
+  unsigned int regno;
+  if (NONJUMP_INSN_P (insn))
+   {
+ EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter)
+   {
+ set_regno_raw (reg, regno, REG_NREGS (reg));
+ if (reg_overlap_mentioned_p (reg, PATTERN (insn)))
+   break;
+   }
+ if (GET_CODE (PATTERN (insn)) == SET)
+   {
+ rtx x = SET_DEST (PATTERN (insn));
+ if (REG_P (x) && HARD_REGISTER_P (x))
+   bitmap_clear_range (bmp, REGNO (x), REG_NREGS (x));
+ else if (SUBREG_P (x) && HARD_REGISTER_P (SUBREG_REG (x)))
+   {
+ struct subreg_info info;
+ subreg_get_info (regno = REGNO (SUBREG_REG (x)),
+  GET_MODE (SUBREG_REG (x)),
+  SUBREG_BYTE (x), GET_MODE (x), );
+ if (!info.representable_p)
+   break;
+ bitmap_clear_range (bmp, regno + info.offset, info.nregs);
+   }
+   }
+ if (bitmap_empty_p (bmp))
+   goto FALLTHRU;
+   }
+  else if (CALL_P (insn))
+   EXECUTE_IF_SET_IN_BITMAP (bmp, 2, regno, iter)
+if (call_used_or_fixed_reg_p (regno))
+  break;
+}
+  FAIL;
+FALLTHRU:;
 })
 
 (define_peephole2
-- 
2.20.1


[PATCH] xtensa: Improve indirect sibling call handling

2022-08-18 Thread Takayuki 'January June' Suwa via Gcc-patches
No longer needs the dedicated hard register (A11) for the address of the
call and the split patterns for fixups, due to the introduction of appropriate
register class and constraint.

(Note: "ISC_REGS" contains a hard register A8 used as a "static chain"
 pointer for nested functions, but no problem;  Pointer to nested function
 actually points to "trampoline", and trampoline itself doesn't receive
 "static chain" pointer to its parent's stack frame from the caller.)

gcc/ChangeLog:

* config/xtensa/xtensa.h
(enum reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS):
Add new register class "ISC_REGS".
* config/xtensa/constraints.md (c): Add new register constraint.
* config/xtensa/xtensa.md (define_constants): Remove "A11_REG".
(sibcall_internal, sibcall_value_internal):
Change to use the new register constraint, and remove two split
patterns for fixups that are no longer needed.

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/sibcalls.c: Add a new test function to ensure
that registers for arguments (occupy from A2 to A7) and for indirect
sibcall (should be assigned to A8) neither conflict nor spill out.
---
 gcc/config/xtensa/constraints.md   |  5 
 gcc/config/xtensa/xtensa.h |  3 +++
 gcc/config/xtensa/xtensa.md| 29 ++
 gcc/testsuite/gcc.target/xtensa/sibcalls.c |  5 
 4 files changed, 15 insertions(+), 27 deletions(-)

diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
index 0b7dcd1440e..e4c314b267c 100644
--- a/gcc/config/xtensa/constraints.md
+++ b/gcc/config/xtensa/constraints.md
@@ -27,6 +27,11 @@
  "Boolean registers @code{b0}-@code{b15}; only available if the Xtensa
   Boolean Option is configured.")
 
+(define_register_constraint "c" "TARGET_WINDOWED_ABI ? NO_REGS : ISC_REGS"
+ "@internal
+  General-purpose AR registers for indirect sibling calls, @code{a2}-
+  @code{a8}.")
+
 (define_register_constraint "d" "TARGET_DENSITY ? AR_REGS: NO_REGS"
  "@internal
   All AR registers, including sp, but only if the Xtensa Code Density
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index d027a777227..d51658afa89 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -378,6 +378,7 @@ enum reg_class
   FP_REGS, /* floating point registers */
   ACC_REG, /* MAC16 accumulator */
   SP_REG,  /* sp register (aka a1) */
+  ISC_REGS,/* registers for indirect sibling calls */
   RL_REGS, /* preferred reload regs (not sp or fp) */
   GR_REGS, /* integer registers except sp */
   AR_REGS, /* all integer registers */
@@ -399,6 +400,7 @@ enum reg_class
   "FP_REGS",   \
   "ACC_REG",   \
   "SP_REG",\
+  "ISC_REGS",  \
   "RL_REGS",   \
   "GR_REGS",   \
   "AR_REGS",   \
@@ -415,6 +417,7 @@ enum reg_class
   { 0xfff8, 0x0007 }, /* floating-point registers */ \
   { 0x, 0x0008 }, /* MAC16 accumulator */ \
   { 0x0002, 0x }, /* stack pointer register */ \
+  { 0x01fc, 0x }, /* registers for indirect sibling calls */ \
   { 0xfffd, 0x }, /* preferred reload registers */ \
   { 0xfffd, 0x }, /* general-purpose registers */ \
   { 0x0003, 0x }, /* integer registers */ \
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 1294aab6c5d..3ed269249a4 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -25,7 +25,6 @@
   (A7_REG  7)
   (A8_REG  8)
   (A9_REG  9)
-  (A11_REG 11)
 
   (UNSPEC_NOP  2)
   (UNSPEC_PLT  3)
@@ -2279,7 +2278,7 @@
 })
 
 (define_insn "sibcall_internal"
-  [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nir"))
+  [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "nic"))
 (match_operand 1 "" "i"))]
   "!TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)"
 {
@@ -2289,17 +2288,6 @@
(set_attr "mode""none")
(set_attr "length"  "3")])
 
-(define_split
-  [(call (mem:SI (match_operand:SI 0 "register_operand"))
-(match_operand 1 ""))]
-  "reload_completed
-   && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)
-   && ! call_used_or_fixed_reg_p (REGNO (operands[0]))"
-  [(set (reg:SI A11_REG)
-   (match_dup 0))
-   (call (mem:SI (reg:SI A11_REG))
-(match_dup 1))])
-
 (define_expand "sibcall_value"
   [(set (match_operand 0 

[PATCH] xtensa: Optimize stack pointer updates in function pro/epilogue under certain conditions

2022-08-17 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch enforces the use of "addmi" machine instruction instead of
addition/subtraction with two source registers for adjusting the stack
pointer, if the adjustment fits into a signed 16-bit and is also a multiple
of 256.

/* example */
void test(void) {
  char buffer[4096];
  __asm__(""::"m"(buffer));
}

;; before
test:
movi.n  a9, 1
sllia9, a9, 12
sub sp, sp, a9
movi.n  a9, 1
sllia9, a9, 12
add.n   sp, sp, a9
addisp, sp, 0
ret.n

;; after
test:
addmi   sp, sp, -0x1000
addmi   sp, sp, 0x1000
ret.n

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_expand_prologue):
Use an "addmi" machine instruction for updating the stack pointer
rather than addition/subtraction via hard register A9, if the amount
of change satisfies the literal value conditions of that instruction
when the CALL0 ABI is used.
(xtensa_expand_epilogue): Ditto.
And also inhibit the stack pointer addition of constant zero.
---
 gcc/config/xtensa/xtensa.cc | 79 +
 1 file changed, 54 insertions(+), 25 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 6ac879c38fb..b673b6764da 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -3150,7 +3150,6 @@ xtensa_expand_prologue (void)
   rtx_insn *insn = NULL;
   rtx note_rtx;
 
-
   total_size = compute_frame_size (get_frame_size ());
 
   if (flag_stack_usage_info)
@@ -3206,10 +3205,17 @@ xtensa_expand_prologue (void)
}
  else
{
- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
- emit_move_insn (tmp_reg, GEN_INT (total_size));
- insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
-   stack_pointer_rtx, tmp_reg));
+ if (xtensa_simm8x256 (-total_size))
+   insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (-total_size)));
+ else
+   {
+ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+ emit_move_insn (tmp_reg, GEN_INT (total_size));
+ insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
+   stack_pointer_rtx, tmp_reg));
+   }
  RTX_FRAME_RELATED_P (insn) = 1;
  note_rtx = gen_rtx_SET (stack_pointer_rtx,
  plus_constant (Pmode, stack_pointer_rtx,
@@ -3237,11 +3243,19 @@ xtensa_expand_prologue (void)
   if (total_size > 1024
  || (!callee_save_size && total_size > 128))
{
- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
- emit_move_insn (tmp_reg, GEN_INT (total_size -
-   callee_save_size));
- insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
-   stack_pointer_rtx, tmp_reg));
+ if (xtensa_simm8x256 (callee_save_size - total_size))
+   insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (callee_save_size -
+  total_size)));
+ else
+   {
+ rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+ emit_move_insn (tmp_reg, GEN_INT (total_size -
+   callee_save_size));
+ insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
+   stack_pointer_rtx, tmp_reg));
+   }
  RTX_FRAME_RELATED_P (insn) = 1;
  note_rtx = gen_rtx_SET (stack_pointer_rtx,
  plus_constant (Pmode, stack_pointer_rtx,
@@ -3315,12 +3329,21 @@ xtensa_expand_epilogue (bool sibcall_p)
 
   if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127 : 
1024))
{
- rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
- emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size -
-   cfun->machine->callee_save_size));
- emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
-hard_frame_pointer_rtx : stack_pointer_rtx,
-tmp_reg));
+ if (xtensa_simm8x256 (cfun->machine->current_frame_size -
+   cfun->machine->callee_save_size))
+   emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
+  hard_frame_pointer_rtx : stack_pointer_rtx,
+  GEN_INT (cfun->machine->current_frame_size -
+   

Re: [PATCH] xtensa: Prevent emitting integer additions of constant zero

2022-08-17 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2022/08/17 4:58, Max Filippov wrote:
> Hi Suwa-san,
Hi!

> 
> On Tue, Aug 16, 2022 at 5:42 AM Takayuki 'January June' Suwa
>  wrote:
>>
>> In a few cases, obviously omitable add instructions can be emitted via
>> invoking gen_addsi3.
>>
>> gcc/ChangeLog:
>>
>> * config/xtensa/xtensa.md (addsi3_internal): Rename from "addsi3".
>> (addsi3): New define_expand in order to reject integer additions of
>> constant zero.
>> ---
>>  gcc/config/xtensa/xtensa.md | 14 +-
>>  1 file changed, 13 insertions(+), 1 deletion(-)
> 
> with this change a bunch of tests fail to build with the following error:

Ah, sorry, I want to withdraw this patch.

>> In a few cases

As a matter of fact, "in a few cases" is just only one:

[xtensa_expand_epilogue() in /gcc/config/xtensa/xtensa.cc]
>  if (cfun->machine->current_frame_size > 0)
>   {
> if (frame_pointer_needed || /* always reachable with addi */
> cfun->machine->current_frame_size > 1024 ||
> cfun->machine->current_frame_size <= 127)
>   {
> if (cfun->machine->current_frame_size <= 127)
>   offset = cfun->machine->current_frame_size;
> else
>   offset = cfun->machine->callee_save_size;
>
> emit_insn (gen_addsi3 (stack_pointer_rtx,
>stack_pointer_rtx,
>GEN_INT (offset)));// offset can 
> be zero!
>   }

And adding "define_expand" only to deal with one rare case had too much impact, 
as you saw...

>   undefined reference to `__addsi3'
> 
> E.g. gcc.c-torture/execute/2519-1.c
> or gcc.c-torture/execute/20070919-1.c
> 


[PATCH] xtensa: Prevent emitting integer additions of constant zero

2022-08-16 Thread Takayuki 'January June' Suwa via Gcc-patches
In a few cases, obviously omitable add instructions can be emitted via
invoking gen_addsi3.

gcc/ChangeLog:

* config/xtensa/xtensa.md (addsi3_internal): Rename from "addsi3".
(addsi3): New define_expand in order to reject integer additions of
constant zero.
---
 gcc/config/xtensa/xtensa.md | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 9eeb73915..c132c1626 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -156,7 +156,19 @@
 
 ;; Addition.
 
-(define_insn "addsi3"
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand")
+   (plus:SI (match_operand:SI 1 "register_operand")
+(match_operand:SI 2 "add_operand")))]
+  ""
+{
+  if (! CONST_INT_P (operands[2]) || INTVAL (operands[2]) != 0)
+emit_insn (gen_addsi3_internal (operands[0],
+   operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "addsi3_internal"
   [(set (match_operand:SI 0 "register_operand" "=D,D,a,a,a")
(plus:SI (match_operand:SI 1 "register_operand" "%d,d,r,r,r")
 (match_operand:SI 2 "add_operand" "d,O,r,J,N")))]
-- 
2.20.1


[PATCH] xtensa: Turn on -fsplit-wide-types-early by default

2022-08-14 Thread Takayuki 'January June' Suwa via Gcc-patches
Since GCC10, the "subreg2" optimization pass was no longer tied to enabling
"subreg1" unless -fsplit-wide-types-early was turned on (PR88233).  However
on the Xtensa port, the lack of "subreg2" can degrade the quality of the
output code, especially for those that produce many D[FC]mode pseudos.

This patch turns on -fsplit-wide-types-early by default in order to restore
the previous behavior.

gcc/ChangeLog:

* common/config/xtensa/xtensa-common.cc
(xtensa_option_optimization_table): Add OPT_fsplit_wide_types_early
for OPT_LEVELS_ALL in order to restore pre-GCC10 behavior.
---
 gcc/common/config/xtensa/xtensa-common.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/common/config/xtensa/xtensa-common.cc 
b/gcc/common/config/xtensa/xtensa-common.cc
index fbbe9b0aad7..0f27763aa71 100644
--- a/gcc/common/config/xtensa/xtensa-common.cc
+++ b/gcc/common/config/xtensa/xtensa-common.cc
@@ -34,6 +34,8 @@ static const struct default_options 
xtensa_option_optimization_table[] =
assembler, so GCC cannot do a good job of reordering blocks.
Do not enable reordering unless it is explicitly requested.  */
 { OPT_LEVELS_ALL, OPT_freorder_blocks, NULL, 0 },
+/* Split multi-word types early (pre-GCC10 behavior).  */
+{ OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
 { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 
-- 
2.20.1


Re: [PATCH] lower-subreg, expr: Mitigate inefficiencies derived from "(clobber (reg X))" followed by "(set (subreg (reg X)) (...))"

2022-08-04 Thread Takayuki 'January June' Suwa via Gcc-patches
(sorry repost due to the lack of cc here)
Hi!

On 2022/08/04 18:49, Richard Sandiford wrote:
> Takayuki 'January June' Suwa  writes:
>> Thanks for your response.
>>
>> On 2022/08/03 16:52, Richard Sandiford wrote:
>>> Takayuki 'January June' Suwa via Gcc-patches  
>>> writes:
>>>> Emitting "(clobber (reg X))" before "(set (subreg (reg X)) (...))" keeps
>>>> data flow consistent, but it also increases register allocation pressure
>>>> and thus often creates many unwanted register-to-register moves that
>>>> cannot be optimized away.
>>>
>>> There are two things here:
>>>
>>> - If emit_move_complex_parts emits a clobber of a hard register,
>>>   then that's probably a bug/misfeature.  The point of the clobber is
>>>   to indicate that the register has no useful contents.  That's useful
>>>   for wide pseudos that are written to in parts, since it avoids the
>>>   need to track the liveness of each part of the pseudo individually.
>>>   But it shouldn't be necessary for hard registers, since subregs of
>>>   hard registers are simplified to hard registers wherever possible
>>>   (which on most targets is "always").
>>>
>>>   So I think the emit_move_complex_parts clobber should be restricted
>>>   to !HARD_REGISTER_P, like the lower-subreg clobber is.  If that helps
>>>   (if only partly) then it would be worth doing as its own patch.
>>>
>>> - I think it'd be worth looking into more detail why a clobber makes
>>>   a difference to register pressure.  A clobber of a pseudo register R
>>>   shouldn't make R conflict with things that are live at the point of
>>>   the clobber.
>>
>> I agree with its worth.
>> In fact, aside from other ports, on the xtensa one, RA in code with frequent 
>> D[FC]mode pseudos is terribly bad.
>> For example, in __muldc3 on libgcc2, the size of the stack frame reserved 
>> will almost double depending on whether or not this patch is applied.
> 
> Yeah, that's a lot.

So lots, but almost double might be an overstatement :)

BTW after some quick experimentation, I found that turning on 
-fsplit-wide-types-early would roughly (but not completely) solve the problem.  
Surely, the output was not so bad in the past...

> 
>>>>  It seems just analogous to partial register
>>>> stall which is a famous problem on processors that do register renaming.
>>>>
>>>> In my opinion, when the register to be clobbered is a composite of hard
>>>> ones, we should clobber the individual elements separetely, otherwise
>>>> clear the entire to zero prior to use as the "init-regs" pass does (like
>>>> partial register stall workarounds on x86 CPUs).  Such redundant zero
>>>> constant assignments will be removed later in the "cprop_hardreg" pass.
>>>
>>> I don't think we should rely on the zero being optimised away later.
>>>
>>> Emitting the zero also makes it harder for the register allocator
>>> to elide the move.  For example, if we have:
>>>
>>>   (set (subreg:SI (reg:DI P) 0) (reg:SI R0))
>>>   (set (subreg:SI (reg:DI P) 4) (reg:SI R1))
>>>
>>> then there is at least a chance that the RA could assign hard registers
>>> R0:R1 to P, which would turn the moves into nops.  If we emit:
>>>
>>>   (set (reg:DI P) (const_int 0))
>>>
>>> beforehand then that becomes impossible, since R0 and R1 would then
>>> conflict with P.
>>
>> Ah, surely, as you pointed out for targets where "(reg: DI)" corresponds to 
>> one hard register.
> 
> I was thinking here about the case where (reg:DI …) corresponds to
> 2 hard registers.  Each subreg move is then a single hard register
> copy, but assigning P to the combination R0:R1 can remove both of
> the subreg moves.
> 
>>> TBH I'm surprised we still run init_regs for LRA.  I thought there was
>>> a plan to stop doing that, but perhaps I misremember.
>>
>> Sorry I am not sure about the status of LRA... because the xtensa port is 
>> still using reload.
> 
> Ah, hadn't realised that.  If you have time to work on it, it would be
> really good to move over to LRA.  There are plans to remove old reload.

Alas you do overestimate me :) I've only been working about the GCC development 
for a little over a year.
Well it's a lie that I'm not interested in it, but too much for me.

> 
> It might be that old reload *does* treat a pseudo clobber as a conflict.
> I can't remember now. 

Re: [PATCH] lower-subreg, expr: Mitigate inefficiencies derived from "(clobber (reg X))" followed by "(set (subreg (reg X)) (...))"

2022-08-03 Thread Takayuki 'January June' Suwa via Gcc-patches
Thanks for your response.

On 2022/08/03 16:52, Richard Sandiford wrote:
> Takayuki 'January June' Suwa via Gcc-patches  writes:
>> Emitting "(clobber (reg X))" before "(set (subreg (reg X)) (...))" keeps
>> data flow consistent, but it also increases register allocation pressure
>> and thus often creates many unwanted register-to-register moves that
>> cannot be optimized away.
> 
> There are two things here:
> 
> - If emit_move_complex_parts emits a clobber of a hard register,
>   then that's probably a bug/misfeature.  The point of the clobber is
>   to indicate that the register has no useful contents.  That's useful
>   for wide pseudos that are written to in parts, since it avoids the
>   need to track the liveness of each part of the pseudo individually.
>   But it shouldn't be necessary for hard registers, since subregs of
>   hard registers are simplified to hard registers wherever possible
>   (which on most targets is "always").
> 
>   So I think the emit_move_complex_parts clobber should be restricted
>   to !HARD_REGISTER_P, like the lower-subreg clobber is.  If that helps
>   (if only partly) then it would be worth doing as its own patch.
> 
> - I think it'd be worth looking into more detail why a clobber makes
>   a difference to register pressure.  A clobber of a pseudo register R
>   shouldn't make R conflict with things that are live at the point of
>   the clobber.

I agree with its worth.
In fact, aside from other ports, on the xtensa one, RA in code with frequent 
D[FC]mode pseudos is terribly bad.
For example, in __muldc3 on libgcc2, the size of the stack frame reserved will 
almost double depending on whether or not this patch is applied.

> 
>>  It seems just analogous to partial register
>> stall which is a famous problem on processors that do register renaming.
>>
>> In my opinion, when the register to be clobbered is a composite of hard
>> ones, we should clobber the individual elements separetely, otherwise
>> clear the entire to zero prior to use as the "init-regs" pass does (like
>> partial register stall workarounds on x86 CPUs).  Such redundant zero
>> constant assignments will be removed later in the "cprop_hardreg" pass.
> 
> I don't think we should rely on the zero being optimised away later.
> 
> Emitting the zero also makes it harder for the register allocator
> to elide the move.  For example, if we have:
> 
>   (set (subreg:SI (reg:DI P) 0) (reg:SI R0))
>   (set (subreg:SI (reg:DI P) 4) (reg:SI R1))
> 
> then there is at least a chance that the RA could assign hard registers
> R0:R1 to P, which would turn the moves into nops.  If we emit:
> 
>   (set (reg:DI P) (const_int 0))
> 
> beforehand then that becomes impossible, since R0 and R1 would then
> conflict with P.

Ah, surely, as you pointed out for targets where "(reg: DI)" corresponds to one 
hard register.

> 
> TBH I'm surprised we still run init_regs for LRA.  I thought there was
> a plan to stop doing that, but perhaps I misremember.

Sorry I am not sure about the status of LRA... because the xtensa port is still 
using reload.

As conclusion, trying to tweak the common code side may have been a bit 
premature.
I'll consider if I can deal with those issues on the side of the 
target-specific code.

> 
> Thanks,
> Richard
> 
>> This patch may give better output code quality for the reasons above,
>> especially on architectures that don't have DFmode hard registers
>> (On architectures with such hard registers, this patch changes virtually
>> nothing).
>>
>> For example (Espressif ESP8266, Xtensa without FP hard regs):
>>
>> /* example */
>> double _Complex conjugate(double _Complex z) {
>>   __imag__(z) *= -1;
>>   return z;
>> }
>>
>> ;; before
>> conjugate:
>> movi.n  a6, -1
>> sllia6, a6, 31
>> mov.n   a8, a2
>> mov.n   a9, a3
>> mov.n   a7, a4
>> xor a6, a5, a6
>> mov.n   a2, a8
>> mov.n   a3, a9
>> mov.n   a4, a7
>> mov.n   a5, a6
>> ret.n
>>
>> ;; after
>> conjugate:
>> movi.n  a6, -1
>> sllia6, a6, 31
>> xor a6, a5, a6
>> mov.n   a5, a6
>> ret.n
>>
>> gcc/ChangeLog:
>>
>>  * lower-subreg.cc (resolve_simple_move):
>>  Add zero clear of the entire register immediately after
>>  the clobber.
>>  * expr.cc (emit_move_complex_parts):
>>  Change to clobber the real and imaginary parts separately
>>

[PATCH] lower-subreg, expr: Mitigate inefficiencies derived from "(clobber (reg X))" followed by "(set (subreg (reg X)) (...))"

2022-08-02 Thread Takayuki 'January June' Suwa via Gcc-patches
Emitting "(clobber (reg X))" before "(set (subreg (reg X)) (...))" keeps
data flow consistent, but it also increases register allocation pressure
and thus often creates many unwanted register-to-register moves that
cannot be optimized away.  It seems just analogous to partial register
stall which is a famous problem on processors that do register renaming.

In my opinion, when the register to be clobbered is a composite of hard
ones, we should clobber the individual elements separetely, otherwise
clear the entire to zero prior to use as the "init-regs" pass does (like
partial register stall workarounds on x86 CPUs).  Such redundant zero
constant assignments will be removed later in the "cprop_hardreg" pass.

This patch may give better output code quality for the reasons above,
especially on architectures that don't have DFmode hard registers
(On architectures with such hard registers, this patch changes virtually
nothing).

For example (Espressif ESP8266, Xtensa without FP hard regs):

/* example */
double _Complex conjugate(double _Complex z) {
  __imag__(z) *= -1;
  return z;
}

;; before
conjugate:
movi.n  a6, -1
sllia6, a6, 31
mov.n   a8, a2
mov.n   a9, a3
mov.n   a7, a4
xor a6, a5, a6
mov.n   a2, a8
mov.n   a3, a9
mov.n   a4, a7
mov.n   a5, a6
ret.n

;; after
conjugate:
movi.n  a6, -1
sllia6, a6, 31
xor a6, a5, a6
mov.n   a5, a6
ret.n

gcc/ChangeLog:

* lower-subreg.cc (resolve_simple_move):
Add zero clear of the entire register immediately after
the clobber.
* expr.cc (emit_move_complex_parts):
Change to clobber the real and imaginary parts separately
instead of the whole complex register if possible.
---
 gcc/expr.cc | 26 --
 gcc/lower-subreg.cc |  7 ++-
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 80bb1b8a4c5..9732e8fd4e5 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -3775,15 +3775,29 @@ emit_move_complex_push (machine_mode mode, rtx x, rtx y)
 rtx_insn *
 emit_move_complex_parts (rtx x, rtx y)
 {
-  /* Show the output dies here.  This is necessary for SUBREGs
- of pseudos since we cannot track their lifetimes correctly;
- hard regs shouldn't appear here except as return values.  */
-  if (!reload_completed && !reload_in_progress
-  && REG_P (x) && !reg_overlap_mentioned_p (x, y))
-emit_clobber (x);
+  rtx_insn *re_insn, *im_insn;
 
   write_complex_part (x, read_complex_part (y, false), false, true);
+  re_insn = get_last_insn ();
   write_complex_part (x, read_complex_part (y, true), true, false);
+  im_insn = get_last_insn ();
+
+  /* Show the output dies here.  This is necessary for SUBREGs
+ of pseudos since we cannot track their lifetimes correctly.  */
+  if (can_create_pseudo_p ()
+  && REG_P (x) && ! reg_overlap_mentioned_p (x, y))
+{
+  /* Hard regs shouldn't appear here except as return values.  */
+  if (HARD_REGISTER_P (x) && REG_NREGS (x) % 2 == 0)
+   {
+ emit_insn_before (gen_clobber (SET_DEST (PATTERN (re_insn))),
+   re_insn);
+ emit_insn_before (gen_clobber (SET_DEST (PATTERN (im_insn))),
+   im_insn);
+   }
+  else
+   emit_insn_before (gen_clobber (x), re_insn);
+}
 
   return get_last_insn ();
 }
diff --git a/gcc/lower-subreg.cc b/gcc/lower-subreg.cc
index 03e9326c663..4ff0a7d1556 100644
--- a/gcc/lower-subreg.cc
+++ b/gcc/lower-subreg.cc
@@ -1086,7 +1086,12 @@ resolve_simple_move (rtx set, rtx_insn *insn)
   unsigned int i;
 
   if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
-   emit_clobber (dest);
+   {
+ emit_clobber (dest);
+ /* We clear the entire of dest with zero after the clobber,
+similar to the "init-regs" pass.  */
+ emit_move_insn (dest, CONST0_RTX (GET_MODE (dest)));
+   }
 
   for (i = 0; i < words; ++i)
{
-- 
2.20.1


[PATCH 2/2] xtensa: Fix conflicting hard regno between indirect sibcall fixups and EH_RETURN_STACKADJ_RTX

2022-07-29 Thread Takayuki 'January June' Suwa via Gcc-patches
The hard register A10 was already allocated for EH_RETURN_STACKADJ_RTX.
(although exception handling and sibling call may not apply at the same time,
 but for safety)

gcc/ChangeLog:

* config/xtensa/xtensa.md: Change hard register number used in
the split patterns for indirect sibling call fixups from 10 to 11,
the last free one for the CALL0 ABI.
---
 gcc/config/xtensa/xtensa.md | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 899ce2755aa..1294aab6c5d 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -25,7 +25,7 @@
   (A7_REG  7)
   (A8_REG  8)
   (A9_REG  9)
-  (A10_REG 10)
+  (A11_REG 11)
 
   (UNSPEC_NOP  2)
   (UNSPEC_PLT  3)
@@ -2295,9 +2295,9 @@
   "reload_completed
&& !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)
&& ! call_used_or_fixed_reg_p (REGNO (operands[0]))"
-  [(set (reg:SI A10_REG)
+  [(set (reg:SI A11_REG)
(match_dup 0))
-   (call (mem:SI (reg:SI A10_REG))
+   (call (mem:SI (reg:SI A11_REG))
 (match_dup 1))])
 
 (define_expand "sibcall_value"
@@ -2328,10 +2328,10 @@
   "reload_completed
&& !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn)
&& ! call_used_or_fixed_reg_p (REGNO (operands[1]))"
-  [(set (reg:SI A10_REG)
+  [(set (reg:SI A11_REG)
(match_dup 1))
(set (match_dup 0)
-   (call (mem:SI (reg:SI A10_REG))
+   (call (mem:SI (reg:SI A11_REG))
  (match_dup 2)))])
 
 (define_insn "entry"
-- 
2.20.1


[PATCH 1/2] xtensa: Add RTX costs for if_then_else

2022-07-29 Thread Takayuki 'January June' Suwa via Gcc-patches
It takes one machine instruction for both condtional branch and move.

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_rtx_costs):
Add new case for IF_THEN_ELSE.
---
 gcc/config/xtensa/xtensa.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index a851a7ae6b3..6ac879c38fb 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -4273,6 +4273,7 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
 
 case ZERO_EXTRACT:
 case ZERO_EXTEND:
+case IF_THEN_ELSE:
   *total = COSTS_N_INSNS (1);
   return true;
 
-- 
2.20.1


[PATCH] xtensa: Optimize "bitwise AND NOT with imm" followed by "branch if (not) equal to zero"

2022-07-22 Thread Takayuki 'January June' Suwa via Gcc-patches
The RTL combiner will transform "if ((x & C) == C) goto label;"
into "if ((~x & C) == 0) goto label;" and will try to match it with
the insn patterns.

/* example */
void test_0(int a) {
  if ((char)a == 255)
foo();
}
void test_1(int a) {
  if ((unsigned short)a == 0x)
foo();
}
void test_2(int a) {
  if ((a & 0x3F80) != 0x3F80)
foo();
}

;; before
test_0:
extui   a2, a2, 0, 8
movia3, 0xff
bne a2, a3, .L1
j.l foo, a9
.L1:
ret.n
test_1:
movi.n  a3, -1
extui   a2, a2, 0, 16
extui   a3, a3, 16, 16
bne a2, a3, .L3
j.l foo, a9
.L3:
ret.n
test_2:
movia3, 0x80
extui   a2, a2, 7, 7
addmi   a3, a3, 0x3f00
sllia2, a2, 7
beq a2, a3, .L5
j.l foo, a9
.L5:
ret.n

;; after
test_0:
movia3, 0xff
bnall   a2, a3, .L1
j.l foo, a9
.L1:
ret.n
test_1:
movi.n  a3, -1
extui   a3, a3, 16, 16
bnall   a2, a3, .L3
j.l foo, a9
.L3:
ret.n
test_2:
movia3, 0x80
addmi   a3, a3, 0x3f00
balla2, a3, .L5
j.l foo, a9
.L5:
ret.n

gcc/ChangeLog:

* config/xtensa/xtensa.md (*masktrue_const_bitcmpl):
Add a new insn_and_split pattern, and a few split patterns for
spacial cases.
---
 gcc/config/xtensa/xtensa.md | 84 +
 1 file changed, 84 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index c02f1a56641..899ce2755aa 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1714,6 +1714,90 @@
(set_attr "mode""none")
(set_attr "length"  "3")])
 
+(define_insn_and_split "*masktrue_const_bitcmpl"
+  [(set (pc)
+   (if_then_else (match_operator 3 "boolean_operator"
+   [(and:SI (not:SI (match_operand:SI 0 "register_operand" 
"r"))
+(match_operand:SI 1 "const_int_operand" "i"))
+(const_int 0)])
+ (label_ref (match_operand 2 "" ""))
+ (pc)))]
+  "exact_log2 (INTVAL (operands[1])) < 0"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 4)
+   (match_dup 1))
+   (set (pc)
+   (if_then_else (match_op_dup 3
+   [(and:SI (not:SI (match_dup 0))
+(match_dup 4))
+(const_int 0)])
+ (label_ref (match_dup 2))
+ (pc)))]
+{
+  operands[4] = gen_reg_rtx (SImode);
+}
+  [(set_attr "type""jump")
+   (set_attr "mode""none")
+   (set (attr "length")
+   (if_then_else (match_test "TARGET_DENSITY
+  && IN_RANGE (INTVAL (operands[1]), -32, 95)")
+ (const_int 5)
+ (if_then_else (match_test "xtensa_simm12b (INTVAL 
(operands[1]))")
+   (const_int 6)
+   (const_int 10])
+
+(define_split
+  [(set (pc)
+   (if_then_else (match_operator 2 "boolean_operator"
+   [(subreg:HQI (not:SI (match_operand:SI 0 
"register_operand")) 0)
+(const_int 0)])
+ (label_ref (match_operand 1 ""))
+ (pc)))]
+  "!BYTES_BIG_ENDIAN"
+  [(set (pc)
+   (if_then_else (match_op_dup 2
+   [(and:SI (not:SI (match_dup 0))
+(match_dup 3))
+(const_int 0)])
+ (label_ref (match_dup 1))
+ (pc)))]
+{
+  operands[3] = GEN_INT ((1 << GET_MODE_BITSIZE (mode)) - 1);
+})
+
+(define_split
+  [(set (pc)
+   (if_then_else (match_operator 2 "boolean_operator"
+   [(subreg:HI (not:SI (match_operand:SI 0 
"register_operand")) 2)
+(const_int 0)])
+ (label_ref (match_operand 1 ""))
+ (pc)))]
+  "BYTES_BIG_ENDIAN"
+  [(set (pc)
+   (if_then_else (match_op_dup 2
+   [(and:SI (not:SI (match_dup 0))
+(const_int 65535))
+(const_int 0)])
+ (label_ref (match_dup 1))
+ (pc)))])
+
+(define_split
+  [(set (pc)
+   (if_then_else (match_operator 2 "boolean_operator"
+   [(subreg:QI (not:SI (match_operand:SI 0 
"register_operand")) 3)
+(const_int 0)])
+ (label_ref (match_operand 1 ""))
+ (pc)))]
+  "BYTES_BIG_ENDIAN"
+  [(set (pc)
+   (if_then_else (match_op_dup 2
+   [(and:SI (not:SI (match_dup 0))
+(const_int 255))
+ 

[PATCH] xtensa: Correct the relative RTX cost that corresponds to the Move Immediate "MOVI" instruction

2022-07-18 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch corrects the overestimation of the relative cost of
'(set (reg) (const_int N))' where N fits into the instruction itself.

In fact, such overestimation confuses the RTL loop invariant motion pass.
As a result, it brings almost no negative impact from the speed point of
view, but addtiional reg-reg move instructions and register allocation
pressure about the size.

/* example, optimized for size */
extern int foo(void);
extern int array[16];
void test_0(void) {
  unsigned int i;
  for (i = 0; i < sizeof(array)/sizeof(*array); ++i)
array[i] = 1024;
}
void test_1(void) {
  unsigned int i;
  for (i = 0; i < sizeof(array)/sizeof(*array); ++i)
array[i] = array[i] ? 1024 : 0;
}
void test_2(void) {
  unsigned int i;
  for (i = 0; i < sizeof(array)/sizeof(*array); ++i)
array[i] = foo() ? 0 : 1024;
}

;; before
.literal_position
.literal .LC0, array
test_0:
l32ra3, .LC0
movi.n  a2, 0
movia4, 0x400   // OK
.L2:
s32i.n  a4, a3, 0
addi.n  a2, a2, 1
addi.n  a3, a3, 4
bneia2, 16, .L2
ret.n
.literal_position
.literal .LC1, array
test_1:
l32ra2, .LC1
movi.n  a3, 0
movia5, 0x400   // NG
.L6:
l32i.n  a4, a2, 0
beqz.n  a4, .L5
mov.n   a4, a5  // should be "movi a4, 0x400"
.L5:
s32i.n  a4, a2, 0
addi.n  a3, a3, 1
addi.n  a2, a2, 4
bneia3, 16, .L6
ret.n
.literal_position
.literal .LC2, array
test_2:
addisp, sp, -32
s32i.n  a12, sp, 24
l32ra12, .LC2
s32i.n  a13, sp, 20
s32i.n  a14, sp, 16
s32i.n  a15, sp, 12
s32i.n  a0, sp, 28
addia13, a12, 64
movi.n  a15, 0  // NG
movia14, 0x400  // and wastes callee-saved registers (only 4)
.L11:
call0   foo
mov.n   a3, a14 // should be "movi a3, 0x400"
movnez  a3, a15, a2
s32i.n  a3, a12, 0
addi.n  a12, a12, 4
bne a12, a13, .L11
l32i.n  a0, sp, 28
l32i.n  a12, sp, 24
l32i.n  a13, sp, 20
l32i.n  a14, sp, 16
l32i.n  a15, sp, 12
addisp, sp, 32
ret.n

;; after
.literal_position
.literal .LC0, array
test_0:
l32ra3, .LC0
movi.n  a2, 0
movia4, 0x400   // OK
.L2:
s32i.n  a4, a3, 0
addi.n  a2, a2, 1
addi.n  a3, a3, 4
bneia2, 16, .L2
ret.n
.literal_position
.literal .LC1, array
test_1:
l32ra2, .LC1
movi.n  a3, 0
.L6:
l32i.n  a4, a2, 0
beqz.n  a4, .L5
movia4, 0x400   // OK
.L5:
s32i.n  a4, a2, 0
addi.n  a3, a3, 1
addi.n  a2, a2, 4
bneia3, 16, .L6
ret.n
.literal_position
.literal .LC2, array
test_2:
addisp, sp, -16
s32i.n  a12, sp, 8
l32ra12, .LC2
s32i.n  a13, sp, 4
s32i.n  a0, sp, 12
addia13, a12, 64
.L11:
call0   foo
movi.n  a3, 0   // OK
movia4, 0x400   // and less register allocation pressure
moveqz  a3, a4, a2
s32i.n  a3, a12, 0
addi.n  a12, a12, 4
bne a12, a13, .L11
l32i.n  a0, sp, 12
l32i.n  a12, sp, 8
l32i.n  a13, sp, 4
addisp, sp, 16
ret.n

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_rtx_costs):
Change the relative cost of '(set (reg) (const_int N))' where
N fits into signed 12-bit from 4 to 0 if optimizing for size.
And use the appropriate macro instead of the bare number 4.
---
 gcc/config/xtensa/xtensa.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 94337452ba8..a851a7ae6b3 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -4073,7 +4073,7 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
case SET:
  if (xtensa_simm12b (INTVAL (x)))
{
- *total = 4;
+ *total = speed ? COSTS_N_INSNS (1) : 0;
  return true;
}
  break;
-- 
2.20.1


[PATCH 1/2] xtensa: constantsynth: Make try to find shorter instruction

2022-07-15 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch allows the constant synthesis to choose shorter instruction
if possible.

/* example */
int test(void) {
  return 128 << 8;
}

;; before
test:
movia2, 0x100
addmi   a2, a2, 0x7f00
ret.n

;; after
test:
movi.n  a2, 1
sllia2, a2, 15
ret.n

When the Code Density Option is configured, the latter is one byte smaller
than the former.

gcc/ChangeLog:

* config/xtensa/xtensa.cc (xtensa_emit_constantsynth): Remove.
(xtensa_constantsynth_2insn): Change to try all three synthetic
methods and to use the one that fits the immediate value of
the seed into a Narrow Move Immediate instruction "MOVI.N"
when the Code Density Option is configured.
---
 gcc/config/xtensa/xtensa.cc | 58 ++---
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 13f2b2b832c..94337452ba8 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -1035,35 +1035,35 @@ xtensa_split_operand_pair (rtx operands[4], 
machine_mode mode)
load-immediate / arithmetic ones, instead of a L32R instruction
(plus a constant in litpool).  */
 
-static void
-xtensa_emit_constantsynth (rtx dst, enum rtx_code code,
-  HOST_WIDE_INT imm0, HOST_WIDE_INT imm1,
-  rtx (*gen_op)(rtx, HOST_WIDE_INT),
-  HOST_WIDE_INT imm2)
-{
-  gcc_assert (REG_P (dst));
-  emit_move_insn (dst, GEN_INT (imm0));
-  emit_move_insn (dst, gen_rtx_fmt_ee (code, SImode,
-  dst, GEN_INT (imm1)));
-  if (gen_op)
-emit_move_insn (dst, gen_op (dst, imm2));
-}
-
 static int
 xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval,
rtx (*gen_op)(rtx, HOST_WIDE_INT),
HOST_WIDE_INT op_imm)
 {
-  int shift = exact_log2 (srcval + 1);
+  HOST_WIDE_INT imm = INT_MAX;
+  rtx x = NULL_RTX;
+  int shift;
 
+  gcc_assert (REG_P (dst));
+
+  shift = exact_log2 (srcval + 1);
   if (IN_RANGE (shift, 1, 31))
 {
-  xtensa_emit_constantsynth (dst, LSHIFTRT, -1, 32 - shift,
-gen_op, op_imm);
-  return 1;
+  imm = -1;
+  x = gen_lshrsi3 (dst, dst, GEN_INT (32 - shift));
 }
 
-  if (IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512)))
+
+  shift = ctz_hwi (srcval);
+  if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95)))
+  && xtensa_simm12b (srcval >> shift))
+{
+  imm = srcval >> shift;
+  x = gen_ashlsi3 (dst, dst, GEN_INT (shift));
+}
+
+  if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95)))
+  && IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512)))
 {
   HOST_WIDE_INT imm0, imm1;
 
@@ -1076,19 +1076,19 @@ xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT 
srcval,
   imm0 = srcval - imm1;
   if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255))
imm0 -= 256, imm1 += 256;
-  xtensa_emit_constantsynth (dst, PLUS, imm0, imm1, gen_op, op_imm);
-   return 1;
+  imm = imm0;
+  x = gen_addsi3 (dst, dst, GEN_INT (imm1));
 }
 
-  shift = ctz_hwi (srcval);
-  if (xtensa_simm12b (srcval >> shift))
-{
-  xtensa_emit_constantsynth (dst, ASHIFT, srcval >> shift, shift,
-gen_op, op_imm);
-  return 1;
-}
+  if (!x)
+return 0;
 
-  return 0;
+  emit_move_insn (dst, GEN_INT (imm));
+  emit_insn (x);
+  if (gen_op)
+emit_move_insn (dst, gen_op (dst, op_imm));
+
+  return 1;
 }
 
 static rtx
-- 
2.20.1


[PATCH 2/2] xtensa: Optimize "bitwise AND with imm1" followed by "branch if (not) equal to imm2"

2022-07-15 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch enhances the effectiveness of the previously posted one:
"xtensa: Optimize bitwise AND operation with some specific forms of constants".

/* example */
extern void foo(int);
void test(int a) {
  if ((a & (-1U << 8)) == (128 << 8))  /* 0 or one of "b4const" */
foo(a);
}

;; before
.global test
test:
movia3, -0x100
movi.n  a4, 1
and a3, a2, a3
sllia4, a4, 15
bne a3, a4, .L3
j.l foo, a9
.L1:
ret.n

;; after
.global test
test:
srlia3, a2, 8
bneia3, 128, .L1
j.l foo, a9
.L1:
ret.n

gcc/ChangeLog:

* config/xtensa/xtensa.md
(*masktrue_const_pow2_minus_one, *masktrue_const_negative_pow2,
*masktrue_const_shifted_mask): If the immediate for bitwise AND is
represented as '-(1 << N)', decrease the lower bound of N from 12
to 1.  And the other immediate for conditional branch is now no
longer limited to zero, but also one of some positive integers.
Finally, remove the checks of some conditions, because the comparison
expressions that don't satisfy such checks are determined as
compile-time constants and thus will be optimized away before
RTL expansion.
---
 gcc/config/xtensa/xtensa.md | 73 ++---
 1 file changed, 44 insertions(+), 29 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 6a58d3e2776..c02f1a56641 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1716,63 +1716,78 @@
 
 (define_insn_and_split "*masktrue_const_pow2_minus_one"
   [(set (pc)
-   (if_then_else (match_operator 3 "boolean_operator"
+   (if_then_else (match_operator 4 "boolean_operator"
[(and:SI (match_operand:SI 0 "register_operand" "r")
 (match_operand:SI 1 "const_int_operand" "i"))
-(const_int 0)])
- (label_ref (match_operand 2 "" ""))
+(match_operand:SI 2 "const_int_operand" "i")])
+ (label_ref (match_operand 3 "" ""))
  (pc)))]
-  "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)"
+  "IN_RANGE (exact_log2 (INTVAL (operands[1]) + 1), 17, 31)
+   /* && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0  // can be 
omitted */
+   && xtensa_b4const_or_zero (INTVAL (operands[2]) << (32 - floor_log2 (INTVAL 
(operands[1]) + 1)))"
   "#"
   "&& can_create_pseudo_p ()"
-  [(set (match_dup 4)
+  [(set (match_dup 5)
(ashift:SI (match_dup 0)
   (match_dup 1)))
(set (pc)
-   (if_then_else (match_op_dup 3
-   [(match_dup 4)
-(const_int 0)])
- (label_ref (match_dup 2))
+   (if_then_else (match_op_dup 4
+   [(match_dup 5)
+(match_dup 2)])
+ (label_ref (match_dup 3))
  (pc)))]
 {
-  operands[1] = GEN_INT (32 - floor_log2 (INTVAL (operands[1]) + 1));
-  operands[4] = gen_reg_rtx (SImode);
+  int shift = 32 - floor_log2 (INTVAL (operands[1]) + 1);
+  operands[1] = GEN_INT (shift);
+  operands[2] = GEN_INT (INTVAL (operands[2]) << shift);
+  operands[5] = gen_reg_rtx (SImode);
 }
   [(set_attr "type""jump")
(set_attr "mode""none")
(set (attr "length")
-   (if_then_else (match_test "TARGET_DENSITY
-  && INTVAL (operands[1]) == 0x7FFF")
- (const_int 5)
- (const_int 6)))])
+   (if_then_else (match_test "(TARGET_DENSITY && INTVAL (operands[1]) == 
0x7FFF)
+  && INTVAL (operands[2]) == 0")
+ (const_int 4)
+ (if_then_else (match_test "TARGET_DENSITY
+&& (INTVAL (operands[1]) == 
0x7FFF
+|| INTVAL (operands[2]) == 
0)")
+   (const_int 5)
+   (const_int 6])
 
 (define_insn_and_split "*masktrue_const_negative_pow2"
   [(set (pc)
-   (if_then_else (match_operator 3 "boolean_operator"
+   (if_then_else (match_operator 4 "boolean_operator"
[(and:SI (match_operand:SI 0 "register_operand" "r")
 (match_operand:SI 1 "const_int_operand" "i"))
-(const_int 0)])
- (label_ref (match_operand 2 "" ""))
+(match_operand:SI 2 "const_int_operand" "i")])
+ (label_ref (match_operand 3 "" ""))
  (pc)))]
-  "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 12, 30)"
+  "IN_RANGE (exact_log2 (-INTVAL (operands[1])), 1, 30)
+   /* && (~INTVAL (operands[1]) & 

[PATCH] xtensa: Minor fix for FP constant synthesis

2022-07-13 Thread Takayuki 'January June' Suwa via Gcc-patches
This patch fixes an non-fatal issue about negative constant values derived
from FP constant synthesis on hosts whose 'long' is wider than 'int32_t'.

And also replaces the dedicated code in FP constant synthesis split
pattern with the appropriate existing function call.

gcc/ChangeLog:

* config/xtensa/xtensa.md:
In FP constant synthesis split pattern, subcontract to
avoid_constant_pool_reference() as in the case of integer,
because it can handle well too.  And cast to int32_t before
calling xtensa_constantsynth() in order to ignore upper 32-bit.

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/constsynth_double.c:
Modify in order to catch the issue.
---
 gcc/config/xtensa/xtensa.md   | 35 +--
 .../gcc.target/xtensa/constsynth_double.c |  2 +-
 2 files changed, 9 insertions(+), 28 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 9d998589631..6a58d3e2776 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1244,35 +1244,16 @@
   "! optimize_debug && reload_completed"
   [(const_int 0)]
 {
-  int i = 0;
-  rtx x = XEXP (operands[1], 0);
-  long l[2];
-  if (SYMBOL_REF_P (x)
-  && CONSTANT_POOL_ADDRESS_P (x))
-x = get_pool_constant (x);
-  else if (GET_CODE (x) == CONST)
-{
-  x = XEXP (x, 0);
-  gcc_assert (GET_CODE (x) == PLUS
- && SYMBOL_REF_P (XEXP (x, 0))
- && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
- && CONST_INT_P (XEXP (x, 1)));
-  i = INTVAL (XEXP (x, 1));
-  gcc_assert (i == 0 || i == 4);
-  i /= 4;
-  x = get_pool_constant (XEXP (x, 0));
-}
-  else
-gcc_unreachable ();
-  if (GET_MODE (x) == SFmode)
-REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l[0]);
-  else if (GET_MODE (x) == DFmode)
-REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
-  else
+  rtx x = avoid_constant_pool_reference (operands[1]);
+  long l;
+  HOST_WIDE_INT value;
+  if (! CONST_DOUBLE_P (x) || GET_MODE (x) != SFmode)
 FAIL;
+  REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
   x = gen_rtx_REG (SImode, REGNO (operands[0]));
-  if (! xtensa_constantsynth (x, l[i]))
-emit_move_insn (x, GEN_INT (l[i]));
+  value = (int32_t)l;
+  if (! xtensa_constantsynth (x, value))
+emit_move_insn (x, GEN_INT (value));
   DONE;
 })
 
diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c 
b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c
index 890ca504780..5fba6a98650 100644
--- a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c
+++ b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c
@@ -5,7 +5,7 @@ void test(unsigned int count, double array[])
 {
   unsigned int i;
   for (i = 0; i < count; ++i)
-array[i] = 1.0;
+array[i] = 8.988474246316506e+307;
 }
 
 /* { dg-final { scan-assembler-not "l32r" } } */
-- 
2.20.1


Re: [RFA] Improve initialization of objects when the initializer has trailing zeros.

2022-07-07 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2022/07/07 23:46, Jeff Law wrote:
> This is an update to a patch originally posted by Takayuki Suwa a few months 
> ago.
> 
> When we initialize an array from a STRING_CST we perform the initialization 
> in two steps.  The first step copies the STRING_CST to the destination.  The 
> second step uses clear_storage to initialize storage in the array beyond 
> TREE_STRING_LENGTH of the initializer.
> 
> Takayuki's patch added a special case when the STRING_CST itself was all 
> zeros which would avoid the copy from the STRING_CST and instead do all the 
> initialization via clear_storage which is clearly more runtime efficient.

Thank you for understanding what I mean...

> Richie had the suggestion that instead of special casing when the entire 
> STRING_CST was NULs  to instead identify when the tail of the STRING_CST was 
> NULs.   That's more general and handles Takayuki's case as well.

and offering good explanation.

> Bootstrapped and regression tested on x86_64-linux-gnu.  Given I rewrote 
> Takayuki's patch I think it needs someone else to review rather than 
> self-approving.

LGTM and of course it resolves the beginning of the first place 
(https://gcc.gnu.org/pipermail/gcc-patches/2022-May/595685.html).

> 
> OK for the trunk?
> 
> Jeff
> 


[PATCH] xtensa: Optimize integer constant addition that is between -32896 and 32639

2022-06-26 Thread Takayuki 'January June' Suwa via Gcc-patches
Such constants are often subject to the constant synthesis:

int test(int a) {
  return a - 31999;
}

test:
movia3, 1
addmi   a3, a3, -0x7d00
add a2, a2, a3
ret

This patch optimizes such case as follows:

test:
addia2, a2, 1
addmi   a2, a2, -0x7d00
ret

gcc/ChangeLog:

* config/xtensa/xtensa.md:
Suppress unnecessary emitting nop insn in the split patterns for
integer/FP constant synthesis, and add new peephole2 pattern that
folds such synthesized additions.
---
 gcc/config/xtensa/xtensa.md | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index f31ec33b362..9d998589631 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1033,6 +1033,7 @@
 FAIL;
   if (! xtensa_constantsynth (operands[0], INTVAL (x)))
 emit_move_insn (operands[0], x);
+  DONE;
 })
 
 ;; 16-bit Integer moves
@@ -1272,6 +1273,7 @@
   x = gen_rtx_REG (SImode, REGNO (operands[0]));
   if (! xtensa_constantsynth (x, l[i]))
 emit_move_insn (x, GEN_INT (l[i]));
+  DONE;
 })
 
 ;; 64-bit floating point moves
@@ -2808,3 +2810,36 @@
 && REGNO (x) == regno + REG_NREGS (operands[0]) / 2))
 FAIL;
 })
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "const_int_operand"))
+   (set (match_dup 0)
+   (plus:SI (match_dup 0)
+(match_operand:SI 2 "const_int_operand")))
+   (set (match_operand:SI 3 "register_operand")
+   (plus:SI (match_operand:SI 4 "register_operand")
+(match_dup 0)))]
+  "IN_RANGE (INTVAL (operands[1]) + INTVAL (operands[2]),
+(-128 - 32768), (127 + 32512))
+   && REGNO (operands[0]) != REGNO (operands[3])
+   && REGNO (operands[0]) != REGNO (operands[4])
+   && peep2_reg_dead_p (3, operands[0])"
+  [(set (match_dup 3)
+   (plus:SI (match_dup 4)
+(match_dup 1)))
+   (set (match_dup 3)
+   (plus:SI (match_dup 3)
+(match_dup 2)))]
+{
+  HOST_WIDE_INT value = INTVAL (operands[1]) + INTVAL (operands[2]);
+  int imm0, imm1;
+  value += 128;
+  if (value > 32512)
+imm1 = 32512;
+  else
+imm1 = value & ~255;
+  imm0 = value - imm1 - 128;
+  operands[1] = GEN_INT (imm0);
+  operands[2] = GEN_INT (imm1);
+})
-- 
2.20.1


  1   2   >