Re: [PATCH, rs6000] Add builtin support for vec_insert4b, vec_extract4b
Hi! On Wed, Feb 14, 2018 at 12:08:27PM -0800, Carl Love wrote: > Per Segher's comments on the first version of the patch. I split the > patch into two. Thanks, much easier to read. > 2018-02-13 Carl Love> > * config/rs6000/altivec.h: Add builtin names vec_extract4b > vec_insert4b. * config/rs6000/altivec.h (vec_extract4b, vec_insert4b): New. (Similar for the rest of the changelog). > --- a/gcc/config/rs6000/rs6000-c.c > +++ b/gcc/config/rs6000/rs6000-c.c > @@ -5433,6 +5433,8 @@ const struct altivec_builtin_types > altivec_overloaded_builtins[] = { > RS6000_BTI_INTDI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 }, >{ P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B, > RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 }, > + { P9V_BUILTIN_VEC_EXTRACT4B, P9V_BUILTIN_EXTRACT4B, > +RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, 0 > }, The old builtin use unsigned int for the element number (but signed is correct, yes). Looks good. Okay for trunk. Thanks! Segher
[PATCH, rs6000] Add builtin support for vec_insert4b, vec_extract4b
GCC maintainers: Per Segher's comments on the first version of the patch. I split the patch into two. The first patch (this one) adds the ABI specified vec_insert4b and vec_extract builtins. It adds a runnable file to test the ABI specified builtin instances. Note, the runnable test file does not test for illegal argument values such as the const int second argument > 12 or of the wrong type. Note, the rtl for vec_insert4b in vsx.md is a copy of the vec_vinsert4b code with the name changed. The rtl for vec_extract4b is new. The second patch removes all of the non-ABI builtin support. Additionally, I have addressed the other comments from Segher with regards to formatting issues and rtl register specification. This patch has been tested on: powerpc64le-unknown-linux-gnu (Power 8 LE) powerpc64le-unknown-linux-gnu (Power 9 LE) with no regressions. Let me know if the patch looks OK or not. Thanks. The patch should also be ported to GCC 7 so we are in compliance with the ABI. Carl Love --- gcc/ChangeLog: 2018-02-13 Carl Love* config/rs6000/altivec.h: Add builtin names vec_extract4b vec_insert4b. * config/rs6000/rs6000-builtin.def: Add INSERT4B and EXTRACT4B definitions. * config/rs6000/rs6000-c.c: Add the definitions for P9V_BUILTIN_VEC_EXTRACT4B and P9V_BUILTIN_VEC_INSERT4B. * config/rs6000/rs6000.c (altivec_expand_builtin): Add P9V_BUILTIN_EXTRACT4B and P9V_BUILTIN_INSERT4B case statements. * config/rs6000/vsx.md: Add define_insn extract4b. Add define_expand definition for insert4b and define insn *insert3b_internal. * doc/extend.texi: Add documentation for vec_extract4b. gcc/testsuite/ChangeLog: 2018-02-13 Carl Love * gcc.target/powerpc/builtins-7-p9-runnable.c: New runnable test file for the ABI definitions for vec_extract4b and vec_insert4b. --- gcc/config/rs6000/altivec.h| 2 + gcc/config/rs6000/rs6000-builtin.def | 4 + gcc/config/rs6000/rs6000-c.c | 8 + gcc/config/rs6000/rs6000.c | 2 + gcc/config/rs6000/vsx.md | 41 + gcc/doc/extend.texi| 7 + .../gcc.target/powerpc/builtins-7-p9-runnable.c| 169 + 7 files changed, 233 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 684cb1990..3bce2ae39 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -435,6 +435,8 @@ #define vec_vctzw __builtin_vec_vctzw #define vec_vextract4b __builtin_vec_vextract4b #define vec_vinsert4b __builtin_vec_vinsert4b +#define vec_extract4b __builtin_vec_extract4b +#define vec_insert4b __builtin_vec_insert4b #define vec_vprtyb __builtin_vec_vprtyb #define vec_vprtybd __builtin_vec_vprtybd #define vec_vprtybw __builtin_vec_vprtybw diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 86604da46..420d12e29 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2229,6 +2229,8 @@ BU_P9V_AV_2 (VEXTUWRX, "vextuwrx",CONST, vextuwrx) BU_P9V_VSX_2 (VEXTRACT4B, "vextract4b", CONST, vextract4b) BU_P9V_VSX_3 (VINSERT4B,"vinsert4b", CONST, vinsert4b) BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di",CONST, vinsert4b_di) +BU_P9V_VSX_3 (INSERT4B,"insert4b", CONST, insert4b) +BU_P9V_VSX_2 (EXTRACT4B, "extract4b",CONST, extract4b) /* Hardware IEEE 128-bit floating point round to odd instrucitons added in ISA 3.0 (power9). */ @@ -2291,11 +2293,13 @@ BU_P9V_OVERLOAD_2 (XL_LEN_R,"xl_len_r") BU_P9V_OVERLOAD_2 (VEXTULX,"vextulx") BU_P9V_OVERLOAD_2 (VEXTURX,"vexturx") BU_P9V_OVERLOAD_2 (VEXTRACT4B, "vextract4b") +BU_P9V_OVERLOAD_2 (EXTRACT4B, "extract4b") /* ISA 3.0 Vector scalar overloaded 3 argument functions */ BU_P9V_OVERLOAD_3 (STXVL, "stxvl") BU_P9V_OVERLOAD_3 (XST_LEN_R, "xst_len_r") BU_P9V_OVERLOAD_3 (VINSERT4B, "vinsert4b") +BU_P9V_OVERLOAD_3 (INSERT4B,"insert4b") /* Overloaded CMPNE support was implemented prior to Power 9, so is not mentioned here. */ diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index a68be511c..56e66db98 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -5433,6 +5433,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTDI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 }, { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B, RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 }, + { P9V_BUILTIN_VEC_EXTRACT4B,
Re: [PATCH, rs6000] Add builtin support for vec_insert4b, vec_extract4b
Hi Carl, On Thu, Feb 01, 2018 at 11:31:55AM -0800, Carl Love wrote: > The following patch contains fixes for the builtins to add and extract > a word from a char vector. The existing names for the builtins that do > this are not consistent with the ABI. Additionally, the supported > arguments are not all consistent with the ABI. This patch fixes the > support for the insert and extract word builtins so they are consistent > with the "64-Bit ELF V2 ABI Specification". The patch is hard to review because it does multiple things at once. Would have been easier if you first add the new builtins and then delete the old one. But I'll try :-) > Let me know if the patch looks OK or not. Let me know if you want to > include it in stage 4 or wait for the next release. Thanks. It should also go to GCC 7, right? > 2018-01-31 Carl Love> > * config/rs6000/altivec.h: Change vec_vextract4b to vec_extract4b You have a tab before vec_extract4b there. > and vec_vinsert4b to vec_insert4b. > * config/rs6000/rs6000-builtin.def: Remove VEXTRACT4B, VINSERT4B > and VINSERT4B_DI definitions. Add INSERT4B and EXTRACT4B definitions. Two spaces after dot (numerous times, this is the first one). You have a tab before EXTRACT4B. * config/rs6000/rs6000-builtin.def (VEXTRACT4B, VINSERT4B): Delete. (EXTRACT4B, INSERT4B): New. (And similar to all below). > *doc/extend.texi: Remove documentation for the vec_vextract4b. Fix > documentation for vec_insert4b. Add documentation for vec_extract4b. Space after *. > 2018-01-31 Carl Love > * gcc.target/powerpc/builtins-7-p9-runnable.c: New runnable test file. > * gcc.target/powerpc/p9_vinsert4b-1.c: Remove file. > * gcc.target/powerpc/p9_vinsert4b-2.c: Remove file. The files are called p9-vinsert* in fact. Is all what they tested now in builtins-7-p9-runnable.c ? > +(define_insn "extract4b" > + [(set (match_operand:V2DI 0 "vsx_register_operand") > + (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa") > + (match_operand:QI 2 "const_0_to_12_operand" "n")] > + UNSPEC_XXEXTRACTUW))] >"TARGET_P9_VECTOR" > { >if (!VECTOR_ELT_ORDER_BIG) > operands[2] = GEN_INT (12 - INTVAL (operands[2])); > > + return "xxextractuw %0,%1,%2"; > +}) You need %x0 and %x1 here I think? > -vector signed char vec_insert4b (vector int, vector signed char, const int); > +vector unsigned char vec_insert4b (vector int, vector unsigned char, > + const signed int); Maybe just write "int" instead of "signed int"? > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c > @@ -0,0 +1,168 @@ > +/* { dg-do run { target { powerpc64*-*-* && p9vector_hw } } } */ As always: why powerpc64* instead of powerpc*? Segher
[PATCH, rs6000] Add builtin support for vec_insert4b, vec_extract4b
GCC maintainers: The following patch contains fixes for the builtins to add and extract a word from a char vector. The existing names for the builtins that do this are not consistent with the ABI. Additionally, the supported arguments are not all consistent with the ABI. This patch fixes the support for the insert and extract word builtins so they are consistent with the "64-Bit ELF V2 ABI Specification". The patch has been tested on: powerpc64le-unknown-linux-gnu (Power 8 LE) powerpc64le-unknown-linux-gnu (Power 9 LE) Let me know if the patch looks OK or not. Let me know if you want to include it in stage 4 or wait for the next release. Thanks. Carl Love -- gcc/ChangeLog: 2018-01-31 Carl Love* config/rs6000/altivec.h: Change vec_vextract4b to vec_extract4b and vec_vinsert4b to vec_insert4b. * config/rs6000/rs6000-builtin.def: Remove VEXTRACT4B, VINSERT4B and VINSERT4B_DI definitions. Add INSERT4B and EXTRACT4B definitions. * config/rs6000/rs6000-c.c: Remove P9V_BUILTIN_VEC_VEXTRACT4B and P9V_BUILTIN_VEC_VINSERT4B definitions. Add the definitions for P9V_BUILTIN_VEC_EXTRACT4B and P9V_BUILTIN_VEC_INSERT4B. * config/rs6000/rs6000.c (altivec_expand_builtin): Remove P9V_BUILTIN_VEXTRACT4B, P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI, P9V_BUILTIN_VEC_VINSERT4B case statements. Add P9V_BUILTIN_EXTRACT4B and P9V_BUILTIN_INSERT4B case statements. * config/rs6000/vsx.md: Remove define_expand vextract4b and define_insn_and_split *vextract4b_internal. Add define_insn extract4b definition. Rename define_expand vinsert4b to define_expand insert4b and define_insn *vinsert4b_internal to define_insn *insert4b_internal. Remove definitions for define_expand vinsert4b_di and define_insn *vinsert4b_di_internal. *doc/extend.texi: Remove documentation for the vec_vextract4b. Fix documentation for vec_insert4b. Add documentation for vec_extract4b. gcc/testsuite/ChangeLog: 2018-01-31 Carl Love * gcc.target/powerpc/builtins-7-p9-runnable.c: New runnable test file. * gcc.target/powerpc/p9_vinsert4b-1.c: Remove file. * gcc.target/powerpc/p9_vinsert4b-2.c: Remove file. --- gcc/config/rs6000/altivec.h| 4 +- gcc/config/rs6000/rs6000-builtin.def | 9 +- gcc/config/rs6000/rs6000-c.c | 31 +--- gcc/config/rs6000/rs6000.c | 7 +- gcc/config/rs6000/vsx.md | 65 ++-- gcc/doc/extend.texi| 11 +- .../gcc.target/powerpc/builtins-7-p9-runnable.c| 168 + gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-1.c | 39 - gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-2.c | 30 9 files changed, 197 insertions(+), 167 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c delete mode 100644 gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-1.c delete mode 100644 gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-2.c diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 684cb1990..1e495e69c 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -433,8 +433,8 @@ #define vec_vctzd __builtin_vec_vctzd #define vec_vctzh __builtin_vec_vctzh #define vec_vctzw __builtin_vec_vctzw -#define vec_vextract4b __builtin_vec_vextract4b -#define vec_vinsert4b __builtin_vec_vinsert4b +#define vec_extract4b __builtin_vec_extract4b +#define vec_insert4b __builtin_vec_insert4b #define vec_vprtyb __builtin_vec_vprtyb #define vec_vprtybd __builtin_vec_vprtybd #define vec_vprtybw __builtin_vec_vprtybw diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 86604da46..37595cc29 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2226,9 +2226,8 @@ BU_P9V_AV_2 (VEXTUWLX, "vextuwlx",CONST, vextuwlx) BU_P9V_AV_2 (VEXTUWRX, "vextuwrx", CONST, vextuwrx) /* Insert/extract 4 byte word into a vector. */ -BU_P9V_VSX_2 (VEXTRACT4B, "vextract4b", CONST, vextract4b) -BU_P9V_VSX_3 (VINSERT4B,"vinsert4b", CONST, vinsert4b) -BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di",CONST, vinsert4b_di) +BU_P9V_VSX_3 (INSERT4B,"insert4b", CONST, insert4b) +BU_P9V_VSX_2 (EXTRACT4B, "extract4b",CONST, extract4b) /* Hardware IEEE 128-bit floating point round to odd instrucitons added in ISA 3.0 (power9). */ @@ -2290,12 +2289,12 @@ BU_P9V_OVERLOAD_2 (LXVL,"lxvl") BU_P9V_OVERLOAD_2 (XL_LEN_R, "xl_len_r") BU_P9V_OVERLOAD_2 (VEXTULX,"vextulx") BU_P9V_OVERLOAD_2 (VEXTURX,"vexturx")