GCC Maintainers: The following patch adds support for the vec_pack_to_short builtin. The patch has been tested on powerpc64le-unknown-linux-gnu (Power 8 LE) and powerpc64-unknown-linux-gnu(Power 9 LE).
Please let me know if the following patch is acceptable. Thanks. Carl Love ------------------------------------------------------------------------- gcc/ChangeLog: 2017-07-06 Carl Love <c...@us.ibm.com> * config/rs6000/rs6000-c: Add support for built-in function vector unsigned short vec_pack_to_short_fp32 (vector float, vector float). * config/rs6000/rs6000-builtin.def (CONVERT_4F32_8I16): Add BU_P9V_AV_2 and BU_P9V_OVERLOAD_2 definitions. * config/rs6000/altivec.h (vec_pack_to_short_fp32): Add define. * config/rs6000/altivec.md(UNSPEC_CONVERT_4F32_8I16): Add UNSPEC. (convert_4f32_8i16): Add define_expand. * doc/extend.texi: Update the built-in documentation file for the new built-in function. gcc/testsuite/ChangeLog: 2017-07-06 Carl Love <c...@us.ibm.com> * gcc.target/powerpc/builtins-1-p9-runnable.c: Add new test file for built-ins. --- gcc/config/rs6000/altivec.h | 1 + gcc/config/rs6000/altivec.md | 18 +++++++++++++++ gcc/config/rs6000/rs6000-builtin.def | 2 ++ gcc/config/rs6000/rs6000-c.c | 4 ++++ gcc/doc/extend.texi | 2 ++ .../gcc.target/powerpc/builtins-1-p9-runnable.c | 26 ++++++++++++++++++++++ 6 files changed, 53 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 806675a..5af7eec 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -418,6 +418,7 @@ #ifdef __POWER9_VECTOR__ /* Vector additions added in ISA 3.0. */ +#define vec_pack_to_short_fp32 __builtin_vec_convert_4f32_8i16 #define vec_vctz __builtin_vec_vctz #define vec_cnttz __builtin_vec_vctz #define vec_vctzb __builtin_vec_vctzb diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 5629d77..d5f7a8f 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -79,6 +79,7 @@ UNSPEC_VUNPACK_LO_SIGN_DIRECT UNSPEC_VUPKHPX UNSPEC_VUPKLPX + UNSPEC_CONVERT_4F32_8I16 UNSPEC_DARN UNSPEC_DARN_32 UNSPEC_DARN_RAW @@ -3170,6 +3171,23 @@ } [(set_attr "type" "veccomplex")]) +;; Generate two vector F32 converted to packed vector I16 vector +(define_expand "convert_4f32_8i16" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")] + UNSPEC_CONVERT_4F32_8I16))] + "TARGET_P9_VECTOR" +{ + rtx rtx_tmp_hi = gen_reg_rtx (V4SImode); + rtx rtx_tmp_lo = gen_reg_rtx (V4SImode); + + emit_insn (gen_altivec_vctuxs (rtx_tmp_hi, operands[1], const0_rtx)); + emit_insn (gen_altivec_vctuxs (rtx_tmp_lo, operands[2], const0_rtx)); + emit_insn (gen_altivec_vpkswss (operands[0], rtx_tmp_hi, rtx_tmp_lo)); + DONE; +}) + ;; Generate ;; xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0 ;; vsubu?m SCRATCH2,SCRATCH1,%1 diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index c5017aa..258c5f8 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1990,10 +1990,12 @@ BU_P8V_OVERLOAD_3 (VSUBEUQM, "vsubeuqm") /* ISA 3.0 vector overloaded 2-argument functions. */ BU_P9V_AV_2 (VSLV, "vslv", CONST, vslv) BU_P9V_AV_2 (VSRV, "vsrv", CONST, vsrv) +BU_P9V_AV_2 (CONVERT_4F32_8I16, "convert_4f32_8i16", CONST, convert_4f32_8i16) /* ISA 3.0 vector overloaded 2-argument functions. */ BU_P9V_OVERLOAD_2 (VSLV, "vslv") BU_P9V_OVERLOAD_2 (VSRV, "vsrv") +BU_P9V_OVERLOAD_2 (CONVERT_4F32_8I16, "convert_4f32_8i16") /* 2 argument vector functions added in ISA 3.0 (power9). */ BU_P9V_AV_2 (VADUB, "vadub", CONST, vaduv16qi3) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 1a40797..2b5193b 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -2417,6 +2417,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM, RS6000_BTI_V4SF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + + { P9V_BUILTIN_VEC_CONVERT_4F32_8I16, P9V_BUILTIN_CONVERT_4F32_8I16, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM, RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM, diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 5cb512f..891860b 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -15363,6 +15363,8 @@ signed int vec_cntlz_lsbb (vector unsigned char); signed int vec_cnttz_lsbb (vector signed char); signed int vec_cnttz_lsbb (vector unsigned char); +vector unsigned short vec_pack_to_short_fp32 (vector float, vector float); + vector signed char vec_xl_len (signed char *addr, size_t len); vector unsigned char vec_xl_len (unsigned char *addr, size_t len); vector signed int vec_xl_len (signed int *addr, size_t len); diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c new file mode 100644 index 0000000..790f64c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c @@ -0,0 +1,26 @@ +/* { dg-do run { target { powerpc*-*-linux* } } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-O2 -mcpu=power9" } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ + +#include <altivec.h> + +void abort (void); + +int main() { + int i; + vector float vfa, vfb; + vector unsigned short vur, vuexpt; + + vfa = (vector float){3.4, 5.0, 20.0, 50.9 }; + vfb = (vector float){10.0, 40.0, 70.0, 100.0 }; + vuexpt = (vector unsigned short){ 3, 5, 20, 50, + 10, 40, 70, 100}; + + vur = vec_pack_to_short_fp32 (vfa, vfb); + + for(i = 0; i< 8; i++) { + if (vur[i] != vuexpt[i]) + abort(); + } +} -- 1.9.1