Hi all, Vectorise __builtin_signbit (v2sf, v4sf) with unsigned shift right vector instruction.
Bootstrapped and tested on aarch64-none-linux-gnu. Assembly output for: $ aarch64-elf-gcc -S -O3 signbitv2sf.c -dp Before patch: foo: ldp w2, w1, [x1] // 37 [c=0 l=4] *load_pair_zero_extendsidi2_aarch64/0 and w2, w2, -2147483648 // 8 [c=4 l=4] andsi3/1 and w1, w1, -2147483648 // 12 [c=4 l=4] andsi3/1 stp w2, w1, [x0] // 38 [c=0 l=4] store_pair_sw_sisi/0 ret // 32 [c=0 l=4] *do_return After patch: foo: ldr d0, [x1] // 7 [c=8 l=4] *aarch64_simd_movv2sf/0 ushr v0.2s, v0.2s, 31 // 8 [c=12 l=4] aarch64_simd_lshrv2si str d0, [x0] // 9 [c=4 l=4] *aarch64_simd_movv2si/2 ret // 28 [c=0 l=4] *do_return Assembly output for: $ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp Before patch: foo: adrp x3, in // 38 [c=4 l=4] *movdi_aarch64/12 adrp x2, out // 41 [c=4 l=4] *movdi_aarch64/12 add x3, x3, :lo12:in // 40 [c=4 l=4] add_losym_di add x2, x2, :lo12:out // 43 [c=4 l=4] add_losym_di mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3 .p2align 3,,7 .L2: ldr w1, [x3, x0] // 10 [c=16 l=4] *zero_extendsidi2_aarch64/1 and w1, w1, -2147483648 // 11 [c=4 l=4] andsi3/1 str w1, [x2, x0] // 16 [c=4 l=4] *movsi_aarch64/8 add x0, x0, 4 // 17 [c=4 l=4] *adddi3_aarch64/0 cmp x0, 4096 // 19 [c=4 l=4] cmpdi/1 bne .L2 // 20 [c=4 l=4] condjump ret // 51 [c=0 l=4] \*do_return After patch: foo: adrp x2, in // 37 [c=4 l=4] *movdi_aarch64/12 adrp x1, out // 40 [c=4 l=4] *movdi_aarch64/12 add x2, x2, :lo12:in // 39 [c=4 l=4] add_losym_di add x1, x1, :lo12:out // 42 [c=4 l=4] add_losym_di mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3 .p2align 3,,7 .L2: ldr q0, [x2, x0] // 10 [c=8 l=4] *aarch64_simd_movv4sf/0 ushr v0.4s, v0.4s, 31 // 11 [c=12 l=4] aarch64_simd_lshrv4si str q0, [x1, x0] // 15 [c=4 l=4] *aarch64_simd_movv4si/2 add x0, x0, 16 // 16 [c=4 l=4] *adddi3_aarch64/0 cmp x0, 4096 // 18 [c=4 l=4] cmpdi/1 bne .L2 // 19 [c=4 l=4] condjump ret // 50 [c=0 l=4] *do_return OK for Trunk ? Thanks, Przemyslaw gcc/ChangeLog: 2019-05-13 Przemyslaw Wirkus <przemyslaw.wir...@arm.com\> * internal-fn.def (SIGNBIT): New. * config/aarch64/aarch64-simd.md (signbitv2sf2): New expand defined. (signbitv4sf2): Likewise. gcc/testsuite/ChangeLog: 2019-05-13 Przemyslaw Wirkus <przemyslaw.wir...@arm.com\> * gcc.target/aarch64/signbitv4sf.c: New test. * gcc.target/aarch64/signbitv2sf.c: New test.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e3852c5d182b70978d7603225fce55c0b8ee2894..8f7227327cb960fb34c7b88e1bf283f8f17a3be9 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -935,6 +935,21 @@ [(set_attr "type" "neon_ins<q>")] ) +(define_expand "signbit<mode>2" + [(use (match_operand:<V_INT_EQUIV> 0 "register_operand")) + (use (match_operand:VDQSF 1 "register_operand"))] + "TARGET_SIMD" +{ + int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1; + rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, + shift_amount); + operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); + + emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1], + shift_vector)); + DONE; +}) + (define_insn "aarch64_simd_lshr<mode>" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index e370eaa84767839c827b6ebd0c86303bcc36fa54..016301a58d83d7128817824d7c7ef92825c7e03e 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -217,6 +217,7 @@ DEF_INTERNAL_FLT_FN (LOG10, ECF_CONST, log10, unary) DEF_INTERNAL_FLT_FN (LOG1P, ECF_CONST, log1p, unary) DEF_INTERNAL_FLT_FN (LOG2, ECF_CONST, log2, unary) DEF_INTERNAL_FLT_FN (LOGB, ECF_CONST, logb, unary) +DEF_INTERNAL_FLT_FN (SIGNBIT, ECF_CONST, signbit, unary) DEF_INTERNAL_FLT_FN (SIGNIFICAND, ECF_CONST, significand, unary) DEF_INTERNAL_FLT_FN (SIN, ECF_CONST, sin, unary) DEF_INTERNAL_FLT_FN (SINH, ECF_CONST, sinh, unary) diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c b/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c new file mode 100644 index 0000000000000000000000000000000000000000..2587bfedd538f30a018cf827ea57cd583b2fa084 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3 --save-temps" } */ + +extern void abort (); + +#define N 8 +float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0}; +int out[N]; + +void +foo (int *i, float *f) +{ + i[0] = __builtin_signbit (f[0]); + i[1] = __builtin_signbit (f[1]); +} + +/* { dg-final { scan-assembler-not {-2147483648} } } */ +/* { dg-final { scan-assembler {\tushr\tv[0-9]+.2s, v[0-9]+.2s, 31} } } */ + +int +main () +{ + int i; + + foo (out, in); + foo (out + 2, in + 2); + foo (out + 4, in + 4); + foo (out + 6, in + 6); + + for (i = 0; i < N; i++) + { + if (in[i] >= 0.0 && out[i]) + abort (); + if (in[i] < 0.0 && !out[i]) + abort (); + } + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c new file mode 100644 index 0000000000000000000000000000000000000000..18cffdc7d5b2701a1bbf23f9f7d27b7a31568758 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3 --save-temps" } */ + +extern void abort (); + +#define N 1024 +float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0}; +int out[N]; + +void +foo () +{ + int i; + for (i = 0; i < N; i++) + out[i] = __builtin_signbit (in[i]); +} + +/* { dg-final { scan-assembler-not {-2147483648} } } */ +/* { dg-final { scan-assembler {\tushr\tv[0-9]+.4s, v[0-9]+.4s, 31} } } */ + +int +main () +{ + int i; + + foo (); + + for (i = 0; i < N; i++) + { + if (in[i] >= 0.0 && out[i]) + abort (); + if (in[i] < 0.0 && !out[i]) + abort (); + } + + return 0; +} +