Hi all,
Vectorise __builtin_signbit (v4sf) with unsigned shift right vector
instruction.
Bootstrapped and tested on aarch64-none-linux-gnu.
Assembly output for:
$ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp
Before patch:
foo:
adrp x3, in // 37 [c=4 l=4] *movdi_aarch64/12
adrp x2, out // 40 [c=4 l=4] *movdi_aarch64/12
add x3, x3, :lo12:in // 39 [c=4 l=4] add_losym_di
add x2, x2, :lo12:out // 42 [c=4 l=4] add_losym_di
mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr w1, [x3, x0] // 10 [c=16 l=4] *zero_extendsidi2_aarch64/1
and w1, w1, -2147483648 // 11 [c=4 l=4] andsi3/1
str w1, [x2, x0] // 16 [c=4 l=4] *movsi_aarch64/8
add x0, x0, 4 // 17 [c=4 l=4] *adddi3_aarch64/0
cmp x0, 4096 // 19 [c=4 l=4] cmpdi/1
bne .L2 // 20 [c=4 l=4] condjump
ret // 50 [c=0 l=4] *do_return
After patch:
foo:
adrp x2, in // 36 [c=4 l=4] *movdi_aarch64/12
adrp x1, out // 39 [c=4 l=4] *movdi_aarch64/12
add x2, x2, :lo12:in // 38 [c=4 l=4] add_losym_di
add x1, x1, :lo12:out // 41 [c=4 l=4] add_losym_di
mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr q0, [x2, x0] // 10 [c=8 l=4] *aarch64_simd_movv4sf/0
ushr v0.4s, v0.4s, 31 // 11 [c=12 l=4]
aarch64_simd_lshrv4si
str q0, [x1, x0] // 15 [c=4 l=4] *aarch64_simd_movv4si/2
add x0, x0, 16 // 16 [c=4 l=4] *adddi3_aarch64/0
cmp x0, 4096 // 18 [c=4 l=4] cmpdi/1
bne .L2 // 19 [c=4 l=4] condjump
ret // 49 [c=0 l=4] *do_return
Thanks,
Przemyslaw
gcc/ChangeLog:
2019-03-20 Przemyslaw Wirkus <[email protected]>
* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): Added CASE_CFN_SIGNBIT.
* config/aarch64/aarch64-simd-builtins.def: (signbit)
Extend to V4SF mode.
* config/aarch64/aarch64-simd.md (signbitv4sf2): New expand
defined.
gcc/testsuite/ChangeLog:
2019-02-28 Przemyslaw Wirkus <[email protected]>
* gcc.target/aarch64/signbitv4sf.c: New test.
diff --git a/gcc/config/aarch64/aarch64-builtins.c
b/gcc/config/aarch64/aarch64-builtins.c
index
04063e5ed134d2e64487db23b8fa7794817b2739..86f8345848abd1515cef61824db525dc26ec9bdb
100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1709,6 +1709,13 @@ aarch64_builtin_vectorized_function (unsigned int fn,
tree type_out,
return aarch64_builtin_decls[builtin];
}
+ CASE_CFN_SIGNBIT:
+ {
+ if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_signbitv4sf];
+ else
+ return NULL_TREE;
+ }
case CFN_BUILT_IN_BSWAP16:
#undef AARCH64_CHECK_BUILTIN_MODE
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def
b/gcc/config/aarch64/aarch64-simd-builtins.def
index
17bb0c4869b12ede2fc51a8f89d841ded8fac230..d568f0ba4e61febf0590b22789b006f3bfe11ccd
100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -324,6 +324,9 @@
VAR1 (UNOP, rint, 2, hf)
VAR1 (UNOP, round, 2, hf)
+ /* Implemented by signbit<mode>2 pattern */
+ VAR1 (UNOP, signbit, 2, v4sf)
+
/* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */
VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index
be6c27d319a1ca6fee581d8f8856a4dff8f4a060..87e2a58649c3e5d490c499115cf6b7495d448c29
100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -915,6 +915,21 @@
[(set_attr "type" "neon_ins<q>")]
)
+(define_expand "signbitv4sf2"
+ [(use (match_operand:V4SI 0 "register_operand"))
+ (use (match_operand:V4SF 1 "register_operand"))]
+ "TARGET_SIMD"
+{
+ int shift_amount = GET_MODE_UNIT_BITSIZE (V4SImode) - 1;
+ rtx shift_vector = aarch64_simd_gen_const_vector_dup (V4SImode,
+ shift_amount);
+ operands[1] = lowpart_subreg (V4SImode, operands[1], V4SFmode);
+
+ emit_insn (gen_aarch64_simd_lshrv4si (operands[0], operands[1],
+ shift_vector));
+ DONE;
+})
+
(define_insn "aarch64_simd_lshr<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
new file mode 100644
index
0000000000000000000000000000000000000000..aa06a5df1dbb3e295355d485b39963127a828b68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 --save-temps" } */
+
+extern void abort ();
+
+#define N 1024
+float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0};
+int out[N];
+
+void
+foo ()
+{
+ for (int i = 0; i < N; i++)
+ out[i] = __builtin_signbit (in[i]);
+}
+
+/* { dg-final { scan-assembler-not {-2147483648} } } */
+/* { dg-final { scan-assembler {\tushr\tv[0-9]+.4s, v[0-9]+.4s, 31} } } */
+
+int
+main ()
+{
+ foo ();
+
+ for (int i = 0; i < N; i++)
+ {
+ if (in[i] >= 0.0 && out[i])
+ abort ();
+ if (in[i] < 0.0 && !out[i])
+ abort ();
+ }
+
+ return 0;
+}
+