Hi all,

Vectorise __builtin_signbit (v2sf, v4sf) with unsigned shift right vector
instruction.

Bootstrapped and tested on aarch64-none-linux-gnu.

Assembly output for:
$ aarch64-elf-gcc -S -O3 signbitv2sf.c -dp

Before patch:

foo:
        ldp     w2, w1, [x1]    // 37   [c=0 l=4]  
*load_pair_zero_extendsidi2_aarch64/0
        and     w2, w2, -2147483648     // 8    [c=4 l=4]  andsi3/1
        and     w1, w1, -2147483648     // 12   [c=4 l=4]  andsi3/1
        stp     w2, w1, [x0]    // 38   [c=0 l=4]  store_pair_sw_sisi/0
        ret             // 32   [c=0 l=4]  *do_return

After patch:

foo:
        ldr     d0, [x1]        // 7    [c=8 l=4]  *aarch64_simd_movv2sf/0
        ushr    v0.2s, v0.2s, 31        // 8    [c=12 l=4]  
aarch64_simd_lshrv2si
        str     d0, [x0]        // 9    [c=4 l=4]  *aarch64_simd_movv2si/2
        ret             // 28   [c=0 l=4]  *do_return

Assembly output for:
$ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp

Before patch:

foo:
        adrp    x3, in  // 38   [c=4 l=4]  *movdi_aarch64/12
        adrp    x2, out // 41   [c=4 l=4]  *movdi_aarch64/12
        add     x3, x3, :lo12:in        // 40   [c=4 l=4]  add_losym_di
        add     x2, x2, :lo12:out       // 43   [c=4 l=4]  add_losym_di
        mov     x0, 0   // 3    [c=4 l=4]  *movdi_aarch64/3
        .p2align 3,,7
.L2:
        ldr     w1, [x3, x0]    // 10   [c=16 l=4]  *zero_extendsidi2_aarch64/1
        and     w1, w1, -2147483648     // 11   [c=4 l=4]  andsi3/1
        str     w1, [x2, x0]    // 16   [c=4 l=4]  *movsi_aarch64/8
        add     x0, x0, 4       // 17   [c=4 l=4]  *adddi3_aarch64/0
        cmp     x0, 4096        // 19   [c=4 l=4]  cmpdi/1
        bne     .L2             // 20   [c=4 l=4]  condjump
        ret             // 51   [c=0 l=4]  \*do_return

After patch:

foo:
        adrp    x2, in  // 37   [c=4 l=4]  *movdi_aarch64/12
        adrp    x1, out // 40   [c=4 l=4]  *movdi_aarch64/12
        add     x2, x2, :lo12:in        // 39   [c=4 l=4]  add_losym_di
        add     x1, x1, :lo12:out       // 42   [c=4 l=4]  add_losym_di
        mov     x0, 0   // 3    [c=4 l=4]  *movdi_aarch64/3
        .p2align 3,,7
.L2:
        ldr     q0, [x2, x0]    // 10   [c=8 l=4]  *aarch64_simd_movv4sf/0
        ushr    v0.4s, v0.4s, 31        // 11   [c=12 l=4]  
aarch64_simd_lshrv4si
        str     q0, [x1, x0]    // 15   [c=4 l=4]  *aarch64_simd_movv4si/2
        add     x0, x0, 16      // 16   [c=4 l=4]  *adddi3_aarch64/0
        cmp     x0, 4096        // 18   [c=4 l=4]  cmpdi/1
        bne     .L2             // 19   [c=4 l=4]  condjump
        ret             // 50   [c=0 l=4]  *do_return

OK for Trunk ?

Thanks,
Przemyslaw

gcc/ChangeLog:

2019-05-13  Przemyslaw Wirkus  <przemyslaw.wir...@arm.com\>

        * internal-fn.def (SIGNBIT): New.
        * config/aarch64/aarch64-simd.md (signbitv2sf2): New expand
        defined.
        (signbitv4sf2): Likewise.

gcc/testsuite/ChangeLog:

2019-05-13  Przemyslaw Wirkus  <przemyslaw.wir...@arm.com\>

        * gcc.target/aarch64/signbitv4sf.c: New test.
        * gcc.target/aarch64/signbitv2sf.c: New test.
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
e3852c5d182b70978d7603225fce55c0b8ee2894..8f7227327cb960fb34c7b88e1bf283f8f17a3be9
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -935,6 +935,21 @@
   [(set_attr "type" "neon_ins<q>")]
 )
 
+(define_expand "signbit<mode>2"
+  [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
+   (use (match_operand:VDQSF 1 "register_operand"))]
+  "TARGET_SIMD"
+{
+  int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
+  rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+                                                        shift_amount);
+  operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+
+  emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
+                                                 shift_vector));
+  DONE;
+})
+
 (define_insn "aarch64_simd_lshr<mode>"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 
e370eaa84767839c827b6ebd0c86303bcc36fa54..016301a58d83d7128817824d7c7ef92825c7e03e
 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -217,6 +217,7 @@ DEF_INTERNAL_FLT_FN (LOG10, ECF_CONST, log10, unary)
 DEF_INTERNAL_FLT_FN (LOG1P, ECF_CONST, log1p, unary)
 DEF_INTERNAL_FLT_FN (LOG2, ECF_CONST, log2, unary)
 DEF_INTERNAL_FLT_FN (LOGB, ECF_CONST, logb, unary)
+DEF_INTERNAL_FLT_FN (SIGNBIT, ECF_CONST, signbit, unary)
 DEF_INTERNAL_FLT_FN (SIGNIFICAND, ECF_CONST, significand, unary)
 DEF_INTERNAL_FLT_FN (SIN, ECF_CONST, sin, unary)
 DEF_INTERNAL_FLT_FN (SINH, ECF_CONST, sinh, unary)
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c 
b/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c
new file mode 100644
index 
0000000000000000000000000000000000000000..2587bfedd538f30a018cf827ea57cd583b2fa084
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 --save-temps" } */
+
+extern void abort ();
+
+#define N 8
+float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0};
+int out[N];
+
+void
+foo (int *i, float *f)
+{
+  i[0] = __builtin_signbit (f[0]);
+  i[1] = __builtin_signbit (f[1]);
+}
+
+/* { dg-final { scan-assembler-not {-2147483648} } } */
+/* { dg-final { scan-assembler {\tushr\tv[0-9]+.2s, v[0-9]+.2s, 31} } } */
+
+int
+main ()
+{
+  int i;
+
+  foo (out, in);
+  foo (out + 2, in + 2);
+  foo (out + 4, in + 4);
+  foo (out + 6, in + 6);
+
+  for (i = 0; i < N; i++)
+  {
+    if (in[i] >= 0.0 && out[i])
+      abort ();
+    if (in[i] < 0.0 && !out[i])
+      abort ();
+  }
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c 
b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
new file mode 100644
index 
0000000000000000000000000000000000000000..18cffdc7d5b2701a1bbf23f9f7d27b7a31568758
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 --save-temps" } */
+
+extern void abort ();
+
+#define N 1024
+float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0};
+int out[N];
+
+void
+foo ()
+{
+  int i;
+  for (i = 0; i < N; i++)
+    out[i] = __builtin_signbit (in[i]);
+}
+
+/* { dg-final { scan-assembler-not {-2147483648} } } */
+/* { dg-final { scan-assembler {\tushr\tv[0-9]+.4s, v[0-9]+.4s, 31} } } */
+
+int
+main ()
+{
+  int i;
+
+  foo ();
+
+  for (i = 0; i < N; i++)
+  {
+    if (in[i] >= 0.0 && out[i])
+      abort ();
+    if (in[i] < 0.0 && !out[i])
+      abort ();
+  }
+
+  return 0;
+}
+

Reply via email to