Hi All,

This implements the new tbranch optab for AArch64.

Instead of emitting the instruction directly I've chosen to expand the pattern
using a zero extract and generating the existing pattern for comparisons for two
reasons:

  1. Allows for CSE of the actual comparison.
  2. It looks like the code in expand makes the label as unused and removed it
     if it doesn't see a separate reference to it.

Because of this expansion though I disable the pattern at -O0 since we have no
combine in that case so we'd end up with worse code.  I did try emitting the
pattern directly, but as mentioned in no#2 expand would then kill the label.

While doing this I noticed that the version that checks the signbit doesn't work
The reason for this looks like an incorrect pattern.  The [us]fbx
instructions are defined for index + size == regiter size.  They architecturally
alias to different instructions and binutils handles this correctly.

In GCC however we tried to prematurely optimize this and added a separate split
pattern.  But this pattern is also missing alternatives only handling DImode.

This just removes this and relaxes the constraints on the normal bfx pattern.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
        (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
        (tbranch<mode>4): New.
        (*<optab><mode>): Rename to...
        (*<optab><GPI:mode><ALLI:mode>): ... this.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/tbz_1.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..6a4494a9a370139313cc8e57447717aafa14da2d
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -943,12 +943,28 @@ (define_insn "*cb<optab><mode>1"
                      (const_int 1)))]
 )
 
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch<mode>4"
   [(set (pc) (if_then_else
-             (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
-                                   (const_int 1)
-                                   (match_operand 1
-                                     "aarch64_simd_shift_imm_<mode>" "n"))
+               (match_operator 0 "aarch64_comparison_operator"
+                [(match_operand:ALLI 1 "register_operand")
+                 (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
+               (label_ref (match_operand 3 "" ""))
+               (pc)))]
+  "optimize > 0"
+{
+  rtx bitvalue = gen_reg_rtx (DImode);
+  emit_insn (gen_extzv (bitvalue, operands[1], const1_rtx, operands[2]));
+  operands[2] = const0_rtx;
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
+                                        operands[2]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+  [(set (pc) (if_then_else
+             (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" 
"r")
+                                    (const_int 1)
+                                    (match_operand 1
+                                      "aarch64_simd_shift_imm_<ALLI:mode>" 
"n"))
                   (const_int 0))
             (label_ref (match_operand 2 "" ""))
             (pc)))
@@ -959,15 +975,15 @@ (define_insn "*tb<optab><mode>1"
       {
        if (get_attr_far_branch (insn) == 1)
          return aarch64_gen_far_branch (operands, 2, "Ltb",
-                                        "<inv_tb>\\t%<w>0, %1, ");
+                                        "<inv_tb>\\t%<ALLI:w>0, %1, ");
        else
          {
            operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-           return "tst\t%<w>0, %1\;<bcond>\t%l2";
+           return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
          }
       }
     else
-      return "<tbz>\t%<w>0, %1, %l2";
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
@@ -5752,39 +5768,19 @@ (define_expand "<optab>"
 )
 
 
-(define_insn "*<optab><mode>"
+(define_insn "*<optab><GPI:mode><ALLI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-       (ANY_EXTRACT:GPI (match_operand:GPI 1 "register_operand" "r")
+       (ANY_EXTRACT:GPI (match_operand:ALLI 1 "register_operand" "r")
                         (match_operand 2
-                          "aarch64_simd_shift_imm_offset_<mode>" "n")
+                          "aarch64_simd_shift_imm_offset_<ALLI:mode>" "n")
                         (match_operand 3
-                          "aarch64_simd_shift_imm_<mode>" "n")))]
+                          "aarch64_simd_shift_imm_<ALLI:mode>" "n")))]
   "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]),
-            1, GET_MODE_BITSIZE (<MODE>mode) - 1)"
-  "<su>bfx\\t%<w>0, %<w>1, %3, %2"
+            1, GET_MODE_BITSIZE (<ALLI:MODE>mode))"
+  "<su>bfx\\t%<GPI:w>0, %<GPI:w>1, %3, %2"
   [(set_attr "type" "bfx")]
 )
 
-;; When the bit position and width add up to 32 we can use a W-reg LSR
-;; instruction taking advantage of the implicit zero-extension of the X-reg.
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-       (zero_extract:DI (match_operand:DI 1 "register_operand")
-                        (match_operand 2
-                          "aarch64_simd_shift_imm_offset_di")
-                        (match_operand 3
-                          "aarch64_simd_shift_imm_di")))]
-  "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), 1,
-            GET_MODE_BITSIZE (DImode) - 1)
-   && (INTVAL (operands[2]) + INTVAL (operands[3]))
-       == GET_MODE_BITSIZE (SImode)"
-  [(set (match_dup 0)
-       (zero_extend:DI (lshiftrt:SI (match_dup 4) (match_dup 3))))]
-  {
-    operands[4] = gen_lowpart (SImode, operands[1]);
-  }
-)
-
 ;; Bitfield Insert (insv)
 (define_expand "insv<mode>"
   [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c 
b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549ce1a0cff6774463
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables 
-fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+**     tbnz    x[0-9]+, #?0, .L([0-9]+)
+**     ret
+**     ...
+*/
+void g1(bool x)
+{
+  if (__builtin_expect (x, 0))
+    h ();
+}
+
+/*
+** g2:
+**     tbz     x[0-9]+, #?0, .L([0-9]+)
+**     b       h
+**     ...
+*/
+void g2(bool x)
+{
+  if (__builtin_expect (x, 1))
+    h ();
+}
+
+/*
+** g3_ge:
+**     tbnz    w[0-9]+, #?31, .L[0-9]+
+**     b       h
+**     ...
+*/
+void g3_ge(int x)
+{
+  if (__builtin_expect (x >= 0, 1))
+    h ();
+}
+
+/*
+** g3_gt:
+**     cmp     w[0-9]+, 0
+**     ble     .L[0-9]+
+**     b       h
+**     ...
+*/
+void g3_gt(int x)
+{
+  if (__builtin_expect (x > 0, 1))
+    h ();
+}
+
+/*
+** g3_lt:
+**     tbz     w[0-9]+, #?31, .L[0-9]+
+**     b       h
+**     ...
+*/
+void g3_lt(int x)
+{
+  if (__builtin_expect (x < 0, 1))
+    h ();
+}
+
+/*
+** g3_le:
+**     cmp     w[0-9]+, 0
+**     bgt     .L[0-9]+
+**     b       h
+**     ...
+*/
+void g3_le(int x)
+{
+  if (__builtin_expect (x <= 0, 1))
+    h ();
+}
+
+/*
+** g5:
+**     mov     w[0-9]+, 65279
+**     tst     w[0-9]+, w[0-9]+
+**     beq     .L[0-9]+
+**     b       h
+**     ...
+*/ 
+void g5(int x)
+{
+  if (__builtin_expect (x & 0xfeff, 1))
+    h ();
+}




-- 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..6a4494a9a370139313cc8e57447717aafa14da2d
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -943,12 +943,28 @@ (define_insn "*cb<optab><mode>1"
                      (const_int 1)))]
 )
 
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch<mode>4"
   [(set (pc) (if_then_else
-             (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
-                                   (const_int 1)
-                                   (match_operand 1
-                                     "aarch64_simd_shift_imm_<mode>" "n"))
+               (match_operator 0 "aarch64_comparison_operator"
+                [(match_operand:ALLI 1 "register_operand")
+                 (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
+               (label_ref (match_operand 3 "" ""))
+               (pc)))]
+  "optimize > 0"
+{
+  rtx bitvalue = gen_reg_rtx (DImode);
+  emit_insn (gen_extzv (bitvalue, operands[1], const1_rtx, operands[2]));
+  operands[2] = const0_rtx;
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
+                                        operands[2]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+  [(set (pc) (if_then_else
+             (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" 
"r")
+                                    (const_int 1)
+                                    (match_operand 1
+                                      "aarch64_simd_shift_imm_<ALLI:mode>" 
"n"))
                   (const_int 0))
             (label_ref (match_operand 2 "" ""))
             (pc)))
@@ -959,15 +975,15 @@ (define_insn "*tb<optab><mode>1"
       {
        if (get_attr_far_branch (insn) == 1)
          return aarch64_gen_far_branch (operands, 2, "Ltb",
-                                        "<inv_tb>\\t%<w>0, %1, ");
+                                        "<inv_tb>\\t%<ALLI:w>0, %1, ");
        else
          {
            operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-           return "tst\t%<w>0, %1\;<bcond>\t%l2";
+           return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
          }
       }
     else
-      return "<tbz>\t%<w>0, %1, %l2";
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
@@ -5752,39 +5768,19 @@ (define_expand "<optab>"
 )
 
 
-(define_insn "*<optab><mode>"
+(define_insn "*<optab><GPI:mode><ALLI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-       (ANY_EXTRACT:GPI (match_operand:GPI 1 "register_operand" "r")
+       (ANY_EXTRACT:GPI (match_operand:ALLI 1 "register_operand" "r")
                         (match_operand 2
-                          "aarch64_simd_shift_imm_offset_<mode>" "n")
+                          "aarch64_simd_shift_imm_offset_<ALLI:mode>" "n")
                         (match_operand 3
-                          "aarch64_simd_shift_imm_<mode>" "n")))]
+                          "aarch64_simd_shift_imm_<ALLI:mode>" "n")))]
   "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]),
-            1, GET_MODE_BITSIZE (<MODE>mode) - 1)"
-  "<su>bfx\\t%<w>0, %<w>1, %3, %2"
+            1, GET_MODE_BITSIZE (<ALLI:MODE>mode))"
+  "<su>bfx\\t%<GPI:w>0, %<GPI:w>1, %3, %2"
   [(set_attr "type" "bfx")]
 )
 
-;; When the bit position and width add up to 32 we can use a W-reg LSR
-;; instruction taking advantage of the implicit zero-extension of the X-reg.
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-       (zero_extract:DI (match_operand:DI 1 "register_operand")
-                        (match_operand 2
-                          "aarch64_simd_shift_imm_offset_di")
-                        (match_operand 3
-                          "aarch64_simd_shift_imm_di")))]
-  "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), 1,
-            GET_MODE_BITSIZE (DImode) - 1)
-   && (INTVAL (operands[2]) + INTVAL (operands[3]))
-       == GET_MODE_BITSIZE (SImode)"
-  [(set (match_dup 0)
-       (zero_extend:DI (lshiftrt:SI (match_dup 4) (match_dup 3))))]
-  {
-    operands[4] = gen_lowpart (SImode, operands[1]);
-  }
-)
-
 ;; Bitfield Insert (insv)
 (define_expand "insv<mode>"
   [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c 
b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549ce1a0cff6774463
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables 
-fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+**     tbnz    x[0-9]+, #?0, .L([0-9]+)
+**     ret
+**     ...
+*/
+void g1(bool x)
+{
+  if (__builtin_expect (x, 0))
+    h ();
+}
+
+/*
+** g2:
+**     tbz     x[0-9]+, #?0, .L([0-9]+)
+**     b       h
+**     ...
+*/
+void g2(bool x)
+{
+  if (__builtin_expect (x, 1))
+    h ();
+}
+
+/*
+** g3_ge:
+**     tbnz    w[0-9]+, #?31, .L[0-9]+
+**     b       h
+**     ...
+*/
+void g3_ge(int x)
+{
+  if (__builtin_expect (x >= 0, 1))
+    h ();
+}
+
+/*
+** g3_gt:
+**     cmp     w[0-9]+, 0
+**     ble     .L[0-9]+
+**     b       h
+**     ...
+*/
+void g3_gt(int x)
+{
+  if (__builtin_expect (x > 0, 1))
+    h ();
+}
+
+/*
+** g3_lt:
+**     tbz     w[0-9]+, #?31, .L[0-9]+
+**     b       h
+**     ...
+*/
+void g3_lt(int x)
+{
+  if (__builtin_expect (x < 0, 1))
+    h ();
+}
+
+/*
+** g3_le:
+**     cmp     w[0-9]+, 0
+**     bgt     .L[0-9]+
+**     b       h
+**     ...
+*/
+void g3_le(int x)
+{
+  if (__builtin_expect (x <= 0, 1))
+    h ();
+}
+
+/*
+** g5:
+**     mov     w[0-9]+, 65279
+**     tst     w[0-9]+, w[0-9]+
+**     beq     .L[0-9]+
+**     b       h
+**     ...
+*/ 
+void g5(int x)
+{
+  if (__builtin_expect (x & 0xfeff, 1))
+    h ();
+}



Reply via email to