Also, the omission of the instruction that sets the shift amount register
(SAR) to 8 is now more efficient: it is omitted if there was a previous
BSWAP rtx in the same BB, but not omitted if no BSWAP is found or another
rtx that modifies the SAR is found first.
gcc/ChangeLog:
* config/xtensa/xtensa-protos.h (xtensa_bswapsi2_output):
New function prototype.
* config/xtensa/xtensa.cc
(xtensa_bswapsi2_output_1, xtensa_bswapsi2_output):
New functions.
* config/xtensa/xtensa.md (bswapsi2_internal):
Rewrite in compact syntax and use xtensa_bswapsi2_output() as asm
output.
gcc/testsuite/ChangeLog:
* gcc.target/xtensa/bswap-SSAI8.c: New.
---
gcc/config/xtensa/xtensa-protos.h | 1 +
gcc/config/xtensa/xtensa.cc | 67 +++++++++++++++++++
gcc/config/xtensa/xtensa.md | 37 +++-------
gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c | 27 ++++++++
4 files changed, 103 insertions(+), 29 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c
diff --git a/gcc/config/xtensa/xtensa-protos.h
b/gcc/config/xtensa/xtensa-protos.h
index 1f5dcf5949c..98e75c6a535 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -60,6 +60,7 @@ extern bool xtensa_tls_referenced_p (rtx);
extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx);
extern bool xtensa_split1_finished_p (void);
extern void xtensa_split_DI_reg_imm (rtx *);
+extern char *xtensa_bswapsi2_output (rtx_insn *, const char *);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 0dfebdcb441..b55104c192c 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2645,6 +2645,73 @@ xtensa_split_DI_reg_imm (rtx *operands)
}
+/* Return the asm output string of bswapsi2 insn pattern.
+ It does this by scanning backwards for BBs from the specified insn,
+ and if a BSWAP is found, it omits the instruction to set SAR to 8.
+ If BSWAP is not found, or if a CALL, JUMP, ASM, or other insn that
+ sets SAR is found first, prepend an instruction to set SAR to 8 as
+ usual. */
+
+static int
+xtensa_bswapsi2_output_1 (const_rtx pat)
+{
+ subrtx_iterator::array_type array;
+ const_rtx x;
+
+ FOR_EACH_SUBRTX (iter, array, pat, NONCONST)
+ switch (GET_CODE (x = *iter))
+ {
+ case ASHIFT:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ if (! CONST_INT_P (XEXP (x, 1)))
+ return -1;
+ break;
+ case ROTATE:
+ case ROTATERT:
+ case ASM_INPUT:
+ case ASM_OPERANDS:
+ return -1;
+ case BSWAP:
+ return 1;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+char *
+xtensa_bswapsi2_output (rtx_insn *insn, const char *output)
+{
+ static char result[128];
+ bool expect_sar_is_8 = false;
+ int i;
+
+ while ((insn = prev_nonnote_nondebug_insn_bb (insn)))
+ if (CALL_P (insn) || JUMP_P (insn))
+ break;
+ else if (NONJUMP_INSN_P (insn))
+ {
+ if ((i = xtensa_bswapsi2_output_1 (PATTERN (insn))) < 0)
+ break;
+ else if (i > 0)
+ {
+ expect_sar_is_8 = true;
+ break;
+ }
+ }
+
+ if (expect_sar_is_8)
+ result[0] = '\0';
+ else
+ strcpy (result, "ssai\t8\n\t");
+ strcat (result, output);
+
+ return result;
+}
+
+
/* Try to split an integer value into what are suitable for two consecutive
immediate addition instructions, ADDI or ADDMI. */
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index ab0403d9ea8..1339b03ce1e 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -649,36 +649,15 @@
})
(define_insn "bswapsi2_internal"
- [(set (match_operand:SI 0 "register_operand" "=a,&a")
- (bswap:SI (match_operand:SI 1 "register_operand" "0,r")))
- (clobber (match_scratch:SI 2 "=&a,X"))]
+ [(set (match_operand:SI 0 "register_operand")
+ (bswap:SI (match_operand:SI 1 "register_operand")))
+ (clobber (match_scratch:SI 2))]
"!optimize_debug && optimize > 1 && !optimize_size"
-{
- rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn);
- const char *init = "ssai\t8\;";
- static char result[128];
- if (prev_insn && NONJUMP_INSN_P (prev_insn))
- {
- rtx x = PATTERN (prev_insn);
- if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2
- && GET_CODE (XVECEXP (x, 0, 0)) == SET
- && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER)
- {
- x = XEXP (XVECEXP (x, 0, 0), 1);
- if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode)
- init = "";
- }
- }
- sprintf (result,
- (which_alternative == 0)
- ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2,
%%2\;src\t%%0, %%1, %%2"
- : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0,
%%0\;src\t%%0, %%1, %%0",
- init);
- return result;
-}
- [(set_attr "type" "arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "15,15")])
+ {@ [cons: =0, 1, =2; attrs: type, length]
+ [ a, 0, &a; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%2, %1,
16\;src\t%2, %2, %1\;src\t%2, %2, %2\;src\t%0, %1, %2");
+ [&a, r, X; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%0, %1,
16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0");
+ }
+ [(set_attr "mode" "SI")])
(define_expand "bswapdi2"
[(set (match_operand:DI 0 "register_operand" "")
diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c
b/gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c
new file mode 100644
index 00000000000..15364ed51e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern void foo(void);
+
+void test_0(volatile unsigned int a[], unsigned int b)
+{
+ a[0] = __builtin_bswap32(a[0]);
+ a[1] = a[1] >> 9;
+ a[2] = __builtin_bswap32(a[2]);
+ a[3] = a[3] << b;
+ a[4] = __builtin_bswap32(a[4]);
+ foo();
+ a[5] = __builtin_bswap32(a[5]);
+ a[6] = __builtin_stdc_rotate_left (a[6], 13);
+ a[7] = __builtin_bswap32(a[7]);
+ asm volatile ("# asm volatile");
+ a[8] = __builtin_bswap32(a[8]);
+}
+
+void test_1(volatile unsigned long long a[])
+{
+ a[0] = __builtin_bswap64(a[0]);
+ a[1] = __builtin_bswap64(a[1]);
+}
+
+/* { dg-final { scan-assembler-times "ssai\t8" 6 } } */
--
2.39.5