subject:"\[PATCH\]\[GCC\]\[AArch64\] Vectorise __builtin

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

2019-05-14 Thread Przemyslaw Wirkus

> What is that backslash in \> doing in the ChangeLog entries?

>    Jakub

My bad, tool I use for code review crafted in backslash when it wrongly assumed
I want a markdown. An early sign that machines want to take over, I guess...
I promise I will be more diligent next time.

kind regards
Przemyslaw

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

2019-05-14 Thread Jakub Jelinek

On Tue, May 14, 2019 at 09:08:28AM +0100, Richard Sandiford wrote:
> > 2019-05-13  Przemyslaw Wirkus  

What is that backslash in \> doing in the ChangeLog entries?

Jakub

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

2019-05-14 Thread Richard Sandiford

Przemyslaw Wirkus  writes:
> Hi all,
>
> Vectorise __builtin_signbit (v2sf, v4sf) with unsigned shift right vector
> instruction.
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
>
> Assembly output for:
> $ aarch64-elf-gcc -S -O3 signbitv2sf.c -dp
>
> Before patch:
>
> foo:
>   ldp w2, w1, [x1]// 37   [c=0 l=4]  
> *load_pair_zero_extendsidi2_aarch64/0
>   and w2, w2, -2147483648 // 8[c=4 l=4]  andsi3/1
>   and w1, w1, -2147483648 // 12   [c=4 l=4]  andsi3/1
>   stp w2, w1, [x0]// 38   [c=0 l=4]  store_pair_sw_sisi/0
>   ret // 32   [c=0 l=4]  *do_return
>
> After patch:
>
> foo:
>   ldr d0, [x1]// 7[c=8 l=4]  *aarch64_simd_movv2sf/0
>   ushrv0.2s, v0.2s, 31// 8[c=12 l=4]  
> aarch64_simd_lshrv2si
>   str d0, [x0]// 9[c=4 l=4]  *aarch64_simd_movv2si/2
>   ret // 28   [c=0 l=4]  *do_return
>
> Assembly output for:
> $ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp
>
> Before patch:
>
> foo:
>   adrpx3, in  // 38   [c=4 l=4]  *movdi_aarch64/12
>   adrpx2, out // 41   [c=4 l=4]  *movdi_aarch64/12
>   add x3, x3, :lo12:in// 40   [c=4 l=4]  add_losym_di
>   add x2, x2, :lo12:out   // 43   [c=4 l=4]  add_losym_di
>   mov x0, 0   // 3[c=4 l=4]  *movdi_aarch64/3
>   .p2align 3,,7
> .L2:
>   ldr w1, [x3, x0]// 10   [c=16 l=4]  *zero_extendsidi2_aarch64/1
>   and w1, w1, -2147483648 // 11   [c=4 l=4]  andsi3/1
>   str w1, [x2, x0]// 16   [c=4 l=4]  *movsi_aarch64/8
>   add x0, x0, 4   // 17   [c=4 l=4]  *adddi3_aarch64/0
>   cmp x0, 4096// 19   [c=4 l=4]  cmpdi/1
>   bne .L2 // 20   [c=4 l=4]  condjump
>   ret // 51   [c=0 l=4]  \*do_return
>
> After patch:
>
> foo:
>   adrpx2, in  // 37   [c=4 l=4]  *movdi_aarch64/12
>   adrpx1, out // 40   [c=4 l=4]  *movdi_aarch64/12
>   add x2, x2, :lo12:in// 39   [c=4 l=4]  add_losym_di
>   add x1, x1, :lo12:out   // 42   [c=4 l=4]  add_losym_di
>   mov x0, 0   // 3[c=4 l=4]  *movdi_aarch64/3
>   .p2align 3,,7
> .L2:
>   ldr q0, [x2, x0]// 10   [c=8 l=4]  *aarch64_simd_movv4sf/0
>   ushrv0.4s, v0.4s, 31// 11   [c=12 l=4]  
> aarch64_simd_lshrv4si
>   str q0, [x1, x0]// 15   [c=4 l=4]  *aarch64_simd_movv4si/2
>   add x0, x0, 16  // 16   [c=4 l=4]  *adddi3_aarch64/0
>   cmp x0, 4096// 18   [c=4 l=4]  cmpdi/1
>   bne .L2 // 19   [c=4 l=4]  condjump
>   ret // 50   [c=0 l=4]  *do_return
>
> OK for Trunk ?
>
> Thanks,
> Przemyslaw
>
> gcc/ChangeLog:
>
> 2019-05-13  Przemyslaw Wirkus  
>
>   * internal-fn.def (SIGNBIT): New.
>   * config/aarch64/aarch64-simd.md (signbitv2sf2): New expand
>   defined.
>   (signbitv4sf2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> 2019-05-13  Przemyslaw Wirkus  
>
>   * gcc.target/aarch64/signbitv4sf.c: New test.
>   * gcc.target/aarch64/signbitv2sf.c: New test.

Thanks, applied as r271149.

Richard

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

2019-05-13 Thread Przemyslaw Wirkus

Hi all,

Vectorise __builtin_signbit (v2sf, v4sf) with unsigned shift right vector
instruction.

Bootstrapped and tested on aarch64-none-linux-gnu.

Assembly output for:
$ aarch64-elf-gcc -S -O3 signbitv2sf.c -dp

Before patch:

foo:
ldp w2, w1, [x1]// 37   [c=0 l=4]  
*load_pair_zero_extendsidi2_aarch64/0
and w2, w2, -2147483648 // 8[c=4 l=4]  andsi3/1
and w1, w1, -2147483648 // 12   [c=4 l=4]  andsi3/1
stp w2, w1, [x0]// 38   [c=0 l=4]  store_pair_sw_sisi/0
ret // 32   [c=0 l=4]  *do_return

After patch:

foo:
ldr d0, [x1]// 7[c=8 l=4]  *aarch64_simd_movv2sf/0
ushrv0.2s, v0.2s, 31// 8[c=12 l=4]  
aarch64_simd_lshrv2si
str d0, [x0]// 9[c=4 l=4]  *aarch64_simd_movv2si/2
ret // 28   [c=0 l=4]  *do_return

Assembly output for:
$ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp

Before patch:

foo:
adrpx3, in  // 38   [c=4 l=4]  *movdi_aarch64/12
adrpx2, out // 41   [c=4 l=4]  *movdi_aarch64/12
add x3, x3, :lo12:in// 40   [c=4 l=4]  add_losym_di
add x2, x2, :lo12:out   // 43   [c=4 l=4]  add_losym_di
mov x0, 0   // 3[c=4 l=4]  *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr w1, [x3, x0]// 10   [c=16 l=4]  *zero_extendsidi2_aarch64/1
and w1, w1, -2147483648 // 11   [c=4 l=4]  andsi3/1
str w1, [x2, x0]// 16   [c=4 l=4]  *movsi_aarch64/8
add x0, x0, 4   // 17   [c=4 l=4]  *adddi3_aarch64/0
cmp x0, 4096// 19   [c=4 l=4]  cmpdi/1
bne .L2 // 20   [c=4 l=4]  condjump
ret // 51   [c=0 l=4]  \*do_return

After patch:

foo:
adrpx2, in  // 37   [c=4 l=4]  *movdi_aarch64/12
adrpx1, out // 40   [c=4 l=4]  *movdi_aarch64/12
add x2, x2, :lo12:in// 39   [c=4 l=4]  add_losym_di
add x1, x1, :lo12:out   // 42   [c=4 l=4]  add_losym_di
mov x0, 0   // 3[c=4 l=4]  *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr q0, [x2, x0]// 10   [c=8 l=4]  *aarch64_simd_movv4sf/0
ushrv0.4s, v0.4s, 31// 11   [c=12 l=4]  
aarch64_simd_lshrv4si
str q0, [x1, x0]// 15   [c=4 l=4]  *aarch64_simd_movv4si/2
add x0, x0, 16  // 16   [c=4 l=4]  *adddi3_aarch64/0
cmp x0, 4096// 18   [c=4 l=4]  cmpdi/1
bne .L2 // 19   [c=4 l=4]  condjump
ret // 50   [c=0 l=4]  *do_return

OK for Trunk ?

Thanks,
Przemyslaw

gcc/ChangeLog:

2019-05-13  Przemyslaw Wirkus  

* internal-fn.def (SIGNBIT): New.
* config/aarch64/aarch64-simd.md (signbitv2sf2): New expand
defined.
(signbitv4sf2): Likewise.

gcc/testsuite/ChangeLog:

2019-05-13  Przemyslaw Wirkus  

* gcc.target/aarch64/signbitv4sf.c: New test.
* gcc.target/aarch64/signbitv2sf.c: New test.
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
e3852c5d182b70978d7603225fce55c0b8ee2894..8f7227327cb960fb34c7b88e1bf283f8f17a3be9
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -935,6 +935,21 @@
   [(set_attr "type" "neon_ins")]
 )
 
+(define_expand "signbit2"
+  [(use (match_operand: 0 "register_operand"))
+   (use (match_operand:VDQSF 1 "register_operand"))]
+  "TARGET_SIMD"
+{
+  int shift_amount = GET_MODE_UNIT_BITSIZE (mode) - 1;
+  rtx shift_vector = aarch64_simd_gen_const_vector_dup (mode,
+shift_amount);
+  operands[1] = lowpart_subreg (mode, operands[1], mode);
+
+  emit_insn (gen_aarch64_simd_lshr (operands[0], operands[1],
+ shift_vector));
+  DONE;
+})
+
 (define_insn "aarch64_simd_lshr"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 
e370eaa84767839c827b6ebd0c86303bcc36fa54..016301a58d83d7128817824d7c7ef92825c7e03e
 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -217,6 +217,7 @@ DEF_INTERNAL_FLT_FN (LOG10, ECF_CONST, log10, unary)
 DEF_INTERNAL_FLT_FN (LOG1P, ECF_CONST, log1p, unary)
 DEF_INTERNAL_FLT_FN (LOG2, ECF_CONST, log2, unary)
 DEF_INTERNAL_FLT_FN (LOGB, ECF_CONST, logb, unary)
+DEF_INTERNAL_FLT_FN (SIGNBIT, ECF_CONST, signbit, unary)
 DEF_INTERNAL_FLT_FN (SIGNIFICAND, ECF_CONST, significand, unary)
 DEF_INTERNAL_FLT_FN (SIN, ECF_CONST, sin, unary)
 DEF_INTERNAL_FLT_FN (SINH, ECF_CONST, sinh, unary)
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c 
b/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c
new file mode 100644
index 
..2587bfedd538f30a018cf827ea57cd583b2fa084
--- /dev/null
+++

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

2019-05-04 Thread Richard Sandiford

Przemyslaw Wirkus  writes:
> Hi Richard,
> New patch adds a new IFN_SIGNBIT internal function that maps
> to signbit_optab.

Thanks.

> gcc/ChangeLog:
>
> 2019-05-05  Przemyslaw Wirkus  
>
>   * gcc/internal-fn.def (SIGNBIT): New.
>   * gcc/config/aarch64/aarch64-simd.md (signbitv4sf2): New expand
>   defined.

Sorry for the nitpicks (I'm not really a fan of ChangeLogs), but:
the filenames are relative to the changelog file, so no "gcc/" here and

> gcc/testsuite/ChangeLog:
>
> 2019-05-05  Przemyslaw Wirkus  
>
>   * gcc/testsuite/gcc.target/aarch64/signbitv4sf.c: New test.

no "gcc/testsuite/" here.

> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> e3852c5d182b70978d7603225fce55c0b8ee2894..3374ce95b912cceaca49660df0579467f758974d
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -935,6 +935,21 @@
>[(set_attr "type" "neon_ins")]
>  )
>  
> +(define_expand "signbitv4sf2"
> +  [(use (match_operand:V4SI 0 "register_operand"))
> +   (use (match_operand:V4SF 1 "register_operand"))]
> +  "TARGET_SIMD"
> +{
> +  int shift_amount = GET_MODE_UNIT_BITSIZE (V4SImode) - 1;
> +  rtx shift_vector = aarch64_simd_gen_const_vector_dup (V4SImode,
> +  shift_amount);
> +  operands[1] = lowpart_subreg (V4SImode, operands[1], V4SFmode);
> +
> +  emit_insn (gen_aarch64_simd_lshrv4si (operands[0], operands[1],
> +  shift_vector));

Formatting nit: argument should be indented to the column after the
innermost unclosed "(".

> +  DONE;
> +})
> +

Looks good, but I think it can be generalised to handle v2sf if you use:

- :VDQSF instead of :V4SF
-  instead of other instances of V4SF (and  instead of v4sf)
-  instead of V4SI (and  instead of v4si)

E.g. this will handle SLP instances like:

void
f (int *i, float *f)
{
  i[0] = __builtin_signbitf (f[0]);
  i[1] = __builtin_signbitf (f[1]);
}

It could also be used for epilogue loop vectorisation, if we ever
turn that on by default for AArch64.

Thanks,
Richard

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

2019-05-03 Thread Przemyslaw Wirkus

Hi Richard,
New patch adds a new IFN_SIGNBIT internal function that maps
to signbit_optab.

gcc/ChangeLog:

2019-05-05  Przemyslaw Wirkus  

* gcc/internal-fn.def (SIGNBIT): New.
* gcc/config/aarch64/aarch64-simd.md (signbitv4sf2): New expand
defined.

gcc/testsuite/ChangeLog:

2019-05-05  Przemyslaw Wirkus  

* gcc/testsuite/gcc.target/aarch64/signbitv4sf.c: New test.diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
e3852c5d182b70978d7603225fce55c0b8ee2894..3374ce95b912cceaca49660df0579467f758974d
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -935,6 +935,21 @@
   [(set_attr "type" "neon_ins")]
 )
 
+(define_expand "signbitv4sf2"
+  [(use (match_operand:V4SI 0 "register_operand"))
+   (use (match_operand:V4SF 1 "register_operand"))]
+  "TARGET_SIMD"
+{
+  int shift_amount = GET_MODE_UNIT_BITSIZE (V4SImode) - 1;
+  rtx shift_vector = aarch64_simd_gen_const_vector_dup (V4SImode,
+  shift_amount);
+  operands[1] = lowpart_subreg (V4SImode, operands[1], V4SFmode);
+
+  emit_insn (gen_aarch64_simd_lshrv4si (operands[0], operands[1],
+  shift_vector));
+  DONE;
+})
+
 (define_insn "aarch64_simd_lshr"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 
e370eaa84767839c827b6ebd0c86303bcc36fa54..016301a58d83d7128817824d7c7ef92825c7e03e
 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -217,6 +217,7 @@ DEF_INTERNAL_FLT_FN (LOG10, ECF_CONST, log10, unary)
 DEF_INTERNAL_FLT_FN (LOG1P, ECF_CONST, log1p, unary)
 DEF_INTERNAL_FLT_FN (LOG2, ECF_CONST, log2, unary)
 DEF_INTERNAL_FLT_FN (LOGB, ECF_CONST, logb, unary)
+DEF_INTERNAL_FLT_FN (SIGNBIT, ECF_CONST, signbit, unary)
 DEF_INTERNAL_FLT_FN (SIGNIFICAND, ECF_CONST, significand, unary)
 DEF_INTERNAL_FLT_FN (SIN, ECF_CONST, sin, unary)
 DEF_INTERNAL_FLT_FN (SINH, ECF_CONST, sinh, unary)
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c 
b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
new file mode 100644
index 
..aa06a5df1dbb3e295355d485b39963127a828b68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 --save-temps" } */
+
+extern void abort ();
+
+#define N 1024
+float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0};
+int out[N];
+
+void
+foo ()
+{
+  for (int i = 0; i < N; i++)
+out[i] = __builtin_signbit (in[i]);
+}
+
+/* { dg-final { scan-assembler-not {-2147483648} } } */
+/* { dg-final { scan-assembler {\tushr\tv[0-9]+.4s, v[0-9]+.4s, 31} } } */
+
+int
+main ()
+{
+  foo ();
+
+  for (int i = 0; i < N; i++)
+  {
+if (in[i] >= 0.0 && out[i])
+  abort ();
+if (in[i] < 0.0 && !out[i])
+  abort ();
+  }
+
+  return 0;
+}
+

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

2019-03-22 Thread Richard Sandiford

Hi,

Przemyslaw Wirkus  writes:
> Hi all,
>
> Vectorise __builtin_signbit (v4sf) with unsigned shift right vector
> instruction.
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
>
> Assembly output for:
> $ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp
>
> Before patch:
>
> foo:
>   adrpx3, in  // 37   [c=4 l=4]  *movdi_aarch64/12
>   adrpx2, out // 40   [c=4 l=4]  *movdi_aarch64/12
>   add x3, x3, :lo12:in// 39   [c=4 l=4]  add_losym_di
>   add x2, x2, :lo12:out   // 42   [c=4 l=4]  add_losym_di
>   mov x0, 0   // 3[c=4 l=4]  *movdi_aarch64/3
>   .p2align 3,,7
> .L2:
>   ldr w1, [x3, x0]// 10   [c=16 l=4]  *zero_extendsidi2_aarch64/1
>   and w1, w1, -2147483648 // 11   [c=4 l=4]  andsi3/1
>   str w1, [x2, x0]// 16   [c=4 l=4]  *movsi_aarch64/8
>   add x0, x0, 4   // 17   [c=4 l=4]  *adddi3_aarch64/0
>   cmp x0, 4096// 19   [c=4 l=4]  cmpdi/1
>   bne .L2 // 20   [c=4 l=4]  condjump
>   ret // 50   [c=0 l=4]  *do_return
>
> After patch:
>
> foo:
>   adrpx2, in  // 36   [c=4 l=4]  *movdi_aarch64/12
>   adrpx1, out // 39   [c=4 l=4]  *movdi_aarch64/12
>   add x2, x2, :lo12:in// 38   [c=4 l=4]  add_losym_di
>   add x1, x1, :lo12:out   // 41   [c=4 l=4]  add_losym_di
>   mov x0, 0   // 3[c=4 l=4]  *movdi_aarch64/3
>   .p2align 3,,7
> .L2:
>   ldr q0, [x2, x0]// 10   [c=8 l=4]  *aarch64_simd_movv4sf/0
>   ushrv0.4s, v0.4s, 31// 11   [c=12 l=4]  
> aarch64_simd_lshrv4si
>   str q0, [x1, x0]// 15   [c=4 l=4]  *aarch64_simd_movv4si/2
>   add x0, x0, 16  // 16   [c=4 l=4]  *adddi3_aarch64/0
>   cmp x0, 4096// 18   [c=4 l=4]  cmpdi/1
>   bne .L2 // 19   [c=4 l=4]  condjump
>   ret // 49   [c=0 l=4]  *do_return
>
> Thanks,
> Przemyslaw
>
> gcc/ChangeLog:
>
> 2019-03-20  Przemyslaw Wirkus  
>
>   * config/aarch64/aarch64-builtins.c
>   (aarch64_builtin_vectorized_function): Added CASE_CFN_SIGNBIT.
>   * config/aarch64/aarch64-simd-builtins.def: (signbit)
>   Extend to V4SF mode.
>   * config/aarch64/aarch64-simd.md (signbitv4sf2): New expand
>   defined.

I think it'd be better to add a new IFN_SIGNBIT internal function
that maps to signbit_optab.  That way the compiler will know what
the vector function does and there'll be no need to add a new
built-in function.

Thanks,
Richard

[PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

2019-03-21 Thread Przemyslaw Wirkus

Hi all,

Vectorise __builtin_signbit (v4sf) with unsigned shift right vector
instruction.

Bootstrapped and tested on aarch64-none-linux-gnu.

Assembly output for:
$ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp

Before patch:

foo:
adrpx3, in  // 37   [c=4 l=4]  *movdi_aarch64/12
adrpx2, out // 40   [c=4 l=4]  *movdi_aarch64/12
add x3, x3, :lo12:in// 39   [c=4 l=4]  add_losym_di
add x2, x2, :lo12:out   // 42   [c=4 l=4]  add_losym_di
mov x0, 0   // 3[c=4 l=4]  *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr w1, [x3, x0]// 10   [c=16 l=4]  *zero_extendsidi2_aarch64/1
and w1, w1, -2147483648 // 11   [c=4 l=4]  andsi3/1
str w1, [x2, x0]// 16   [c=4 l=4]  *movsi_aarch64/8
add x0, x0, 4   // 17   [c=4 l=4]  *adddi3_aarch64/0
cmp x0, 4096// 19   [c=4 l=4]  cmpdi/1
bne .L2 // 20   [c=4 l=4]  condjump
ret // 50   [c=0 l=4]  *do_return

After patch:

foo:
adrpx2, in  // 36   [c=4 l=4]  *movdi_aarch64/12
adrpx1, out // 39   [c=4 l=4]  *movdi_aarch64/12
add x2, x2, :lo12:in// 38   [c=4 l=4]  add_losym_di
add x1, x1, :lo12:out   // 41   [c=4 l=4]  add_losym_di
mov x0, 0   // 3[c=4 l=4]  *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr q0, [x2, x0]// 10   [c=8 l=4]  *aarch64_simd_movv4sf/0
ushrv0.4s, v0.4s, 31// 11   [c=12 l=4]  
aarch64_simd_lshrv4si
str q0, [x1, x0]// 15   [c=4 l=4]  *aarch64_simd_movv4si/2
add x0, x0, 16  // 16   [c=4 l=4]  *adddi3_aarch64/0
cmp x0, 4096// 18   [c=4 l=4]  cmpdi/1
bne .L2 // 19   [c=4 l=4]  condjump
ret // 49   [c=0 l=4]  *do_return

Thanks,
Przemyslaw

gcc/ChangeLog:

2019-03-20  Przemyslaw Wirkus  

* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): Added CASE_CFN_SIGNBIT.
* config/aarch64/aarch64-simd-builtins.def: (signbit)
Extend to V4SF mode.
* config/aarch64/aarch64-simd.md (signbitv4sf2): New expand
defined.

gcc/testsuite/ChangeLog:

2019-02-28  Przemyslaw Wirkus  

* gcc.target/aarch64/signbitv4sf.c: New test.
diff --git a/gcc/config/aarch64/aarch64-builtins.c 
b/gcc/config/aarch64/aarch64-builtins.c
index 
04063e5ed134d2e64487db23b8fa7794817b2739..86f8345848abd1515cef61824db525dc26ec9bdb
 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1709,6 +1709,13 @@ aarch64_builtin_vectorized_function (unsigned int fn, 
tree type_out,
 
return aarch64_builtin_decls[builtin];
   }
+CASE_CFN_SIGNBIT:
+  {
+   if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_signbitv4sf];
+   else
+ return NULL_TREE;
+  }
 case CFN_BUILT_IN_BSWAP16:
 #undef AARCH64_CHECK_BUILTIN_MODE
 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
b/gcc/config/aarch64/aarch64-simd-builtins.def
index 
17bb0c4869b12ede2fc51a8f89d841ded8fac230..d568f0ba4e61febf0590b22789b006f3bfe11ccd
 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -324,6 +324,9 @@
   VAR1 (UNOP, rint, 2, hf)
   VAR1 (UNOP, round, 2, hf)
 
+  /* Implemented by signbit2 pattern */
+  VAR1 (UNOP, signbit, 2, v4sf)
+
   /* Implemented by l2.  */
   VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
   VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
be6c27d319a1ca6fee581d8f8856a4dff8f4a060..87e2a58649c3e5d490c499115cf6b7495d448c29
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -915,6 +915,21 @@
   [(set_attr "type" "neon_ins")]
 )
 
+(define_expand "signbitv4sf2"
+[(use (match_operand:V4SI 0 "register_operand"))
+ (use (match_operand:V4SF 1 "register_operand"))]
+ "TARGET_SIMD"
+{
+  int shift_amount = GET_MODE_UNIT_BITSIZE (V4SImode) - 1;
+  rtx shift_vector = aarch64_simd_gen_const_vector_dup (V4SImode,
+  shift_amount);
+  operands[1] = lowpart_subreg (V4SImode, operands[1], V4SFmode);
+
+  emit_insn (gen_aarch64_simd_lshrv4si (operands[0], operands[1],
+  shift_vector));
+  DONE;
+})
+
 (define_insn "aarch64_simd_lshr"
  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c 
b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
new file mode 100644
index 
..aa06a5df1dbb3e295355d485b39963127a828b68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
@@ -0,0 +1,35 @@
+/* { dg-do

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

[PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64

8 matches

Site Navigation

Mail list logo

Footer information