Also, the omission of the instruction that sets the shift amount register
(SAR) to 8 is now more efficient: it is omitted if there was a previous
BSWAP rtx in the same BB, but not omitted if no BSWAP is found or another
rtx that modifies the SAR is found first.

gcc/ChangeLog:

        * config/xtensa/xtensa-protos.h (xtensa_bswapsi2_output):
        New function prototype.
        * config/xtensa/xtensa.cc
        (xtensa_bswapsi2_output_1, xtensa_bswapsi2_output):
        New functions.
        * config/xtensa/xtensa.md (bswapsi2_internal):
        Rewrite in compact syntax and use xtensa_bswapsi2_output() as asm
        output.

gcc/testsuite/ChangeLog:

        * gcc.target/xtensa/bswap-SSAI8.c: New.
---
 gcc/config/xtensa/xtensa-protos.h             |  1 +
 gcc/config/xtensa/xtensa.cc                   | 67 +++++++++++++++++++
 gcc/config/xtensa/xtensa.md                   | 37 +++-------
 gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c | 27 ++++++++
 4 files changed, 103 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c

diff --git a/gcc/config/xtensa/xtensa-protos.h 
b/gcc/config/xtensa/xtensa-protos.h
index 1f5dcf5949c..98e75c6a535 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -60,6 +60,7 @@ extern bool xtensa_tls_referenced_p (rtx);
 extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx);
 extern bool xtensa_split1_finished_p (void);
 extern void xtensa_split_DI_reg_imm (rtx *);
+extern char *xtensa_bswapsi2_output (rtx_insn *, const char *);
#ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 0dfebdcb441..b55104c192c 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2645,6 +2645,73 @@ xtensa_split_DI_reg_imm (rtx *operands)
 }
+/* Return the asm output string of bswapsi2 insn pattern.
+   It does this by scanning backwards for BBs from the specified insn,
+   and if a BSWAP is found, it omits the instruction to set SAR to 8.
+   If BSWAP is not found, or if a CALL, JUMP, ASM, or other insn that
+   sets SAR is found first, prepend an instruction to set SAR to 8 as
+   usual.  */
+
+static int
+xtensa_bswapsi2_output_1 (const_rtx pat)
+{
+  subrtx_iterator::array_type array;
+  const_rtx x;
+
+  FOR_EACH_SUBRTX (iter, array, pat, NONCONST)
+    switch (GET_CODE (x = *iter))
+      {
+      case ASHIFT:
+      case ASHIFTRT:
+      case LSHIFTRT:
+       if (! CONST_INT_P (XEXP (x, 1)))
+         return -1;
+       break;
+      case ROTATE:
+      case ROTATERT:
+      case ASM_INPUT:
+      case ASM_OPERANDS:
+       return -1;
+      case BSWAP:
+       return 1;
+      default:
+       break;
+      }
+
+  return 0;
+}
+
+char *
+xtensa_bswapsi2_output (rtx_insn *insn, const char *output)
+{
+  static char result[128];
+  bool expect_sar_is_8 = false;
+  int i;
+
+  while ((insn = prev_nonnote_nondebug_insn_bb (insn)))
+    if (CALL_P (insn) || JUMP_P (insn))
+      break;
+    else if (NONJUMP_INSN_P (insn))
+      {
+       if ((i = xtensa_bswapsi2_output_1 (PATTERN (insn))) < 0)
+         break;
+       else if (i > 0)
+         {
+           expect_sar_is_8 = true;
+           break;
+         }
+      }
+
+  if (expect_sar_is_8)
+    result[0] = '\0';
+  else
+    strcpy (result, "ssai\t8\n\t");
+  strcat (result, output);
+
+  return result;
+}
+
+
 /* Try to split an integer value into what are suitable for two consecutive
    immediate addition instructions, ADDI or ADDMI.  */
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index ab0403d9ea8..1339b03ce1e 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -649,36 +649,15 @@
 })
(define_insn "bswapsi2_internal"
-  [(set (match_operand:SI 0 "register_operand" "=a,&a")
-       (bswap:SI (match_operand:SI 1 "register_operand" "0,r")))
-   (clobber (match_scratch:SI 2 "=&a,X"))]
+  [(set (match_operand:SI 0 "register_operand")
+       (bswap:SI (match_operand:SI 1 "register_operand")))
+   (clobber (match_scratch:SI 2))]
   "!optimize_debug && optimize > 1 && !optimize_size"
-{
-  rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn);
-  const char *init = "ssai\t8\;";
-  static char result[128];
-  if (prev_insn && NONJUMP_INSN_P (prev_insn))
-    {
-      rtx x = PATTERN (prev_insn);
-      if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2
-         && GET_CODE (XVECEXP (x, 0, 0)) == SET
-         && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER)
-       {
-         x = XEXP (XVECEXP (x, 0, 0), 1);
-         if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode)
-           init = "";
-       }
-    }
-  sprintf (result,
-          (which_alternative == 0)
-          ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, 
%%2\;src\t%%0, %%1, %%2"
-          : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, 
%%0\;src\t%%0, %%1, %%0",
-          init);
-  return result;
-}
-   [(set_attr "type" "arith,arith")
-    (set_attr "mode" "SI")
-    (set_attr "length"       "15,15")])
+  {@ [cons: =0, 1, =2; attrs: type, length]
+     [ a, 0, &a; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%2, %1, 
16\;src\t%2, %2, %1\;src\t%2, %2, %2\;src\t%0, %1, %2");
+     [&a, r,  X; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%0, %1, 
16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0");
+  }
+  [(set_attr "mode" "SI")])
(define_expand "bswapdi2"
   [(set (match_operand:DI 0 "register_operand" "")
diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c 
b/gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c
new file mode 100644
index 00000000000..15364ed51e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern void foo(void);
+
+void test_0(volatile unsigned int a[], unsigned int b)
+{
+  a[0] = __builtin_bswap32(a[0]);
+  a[1] = a[1] >> 9;
+  a[2] = __builtin_bswap32(a[2]);
+  a[3] = a[3] << b;
+  a[4] = __builtin_bswap32(a[4]);
+  foo();
+  a[5] = __builtin_bswap32(a[5]);
+  a[6] = __builtin_stdc_rotate_left (a[6], 13);
+  a[7] = __builtin_bswap32(a[7]);
+  asm volatile ("# asm volatile");
+  a[8] = __builtin_bswap32(a[8]);
+}
+
+void test_1(volatile unsigned long long a[])
+{
+  a[0] = __builtin_bswap64(a[0]);
+  a[1] = __builtin_bswap64(a[1]);
+}
+
+/* { dg-final { scan-assembler-times "ssai\t8" 6 } } */
--
2.39.5

Reply via email to