During code review, it was discovered that the implementation of __builtin_altivec_lvx_v1ti is not complete. The constant ALTIVEC_BUILTINLVX_V1TI is introduced and is bound to the function __builtin_altivec_lvx_v1ti. However, this function's implementation is incomplete because there is no call to the def_builtin function for this binding.
This patch provides the missing pieces to add support for this function. Additionally, this patch introduces four new __int128-based prototypes of the overloaded __builtin_vec_ld function. This is the function that implements the vec_ld () macro expansion. A new test case has been provided to exercise each of these prototypes. This patch has been bootstrapped and tested without regressions on both powerpc64le-unknown-linux (P8) and on powerpc-linux (P7 big-endian, with both -m32 and -m64 target options). Is this patch ok for trunk? gcc/ChangeLog: 2018-03-14 Kelvin Nilsen <kel...@gcc.gnu.org> * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add entries for V1TI variants of __builtin_altivec_ld builtin. * config/rs6000/rs6000.c (altivec_expand_lv_builtin): Add test and handling of V1TI variant of LVX icode pattern. (altivec_expand_builtin): Add case for ALTIVEC_BUILTIN_LVX_V1TI. (rs6000_gimple_fold_builtin): Likewise. (altivec_init_builtins): Add code to define __builtin_altivec_lvx_v1ti function. * doc/extend.texi: Add four new prototypes for vec_ld. gcc/testsuite/ChangeLog: 2018-03-14 Kelvin Nilsen <kel...@gcc.gnu.org> * gcc.target/powerpc/altivec-ld-1.c: New test. Index: gcc/config/rs6000/rs6000-c.c =================================================================== --- gcc/config/rs6000/rs6000-c.c (revision 258341) +++ gcc/config/rs6000/rs6000-c.c (working copy) @@ -1562,6 +1562,15 @@ const struct altivec_builtin_types altivec_overloa { VSX_BUILTIN_VEC_FLOATO, VSX_BUILTIN_UNS_FLOATO_V2DI, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V2DI, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTTI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTTI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI, Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 258341) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -14452,6 +14452,7 @@ altivec_expand_lv_builtin (enum insn_code icode, t LVXL and LVE*X expand to use UNSPECs to hide their special behavior, so the raw address is fine. */ if (icode == CODE_FOR_altivec_lvx_v2df_2op + || icode == CODE_FOR_altivec_lvx_v1ti_2op || icode == CODE_FOR_altivec_lvx_v2di_2op || icode == CODE_FOR_altivec_lvx_v4sf_2op || icode == CODE_FOR_altivec_lvx_v4si_2op @@ -15811,6 +15812,9 @@ altivec_expand_builtin (tree exp, rtx target, bool case ALTIVEC_BUILTIN_LVX_V2DI: return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op, exp, target, false); + case ALTIVEC_BUILTIN_LVX_V1TI: + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v1ti_2op, + exp, target, false); case ALTIVEC_BUILTIN_LVX_V4SF: return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op, exp, target, false); @@ -16542,6 +16546,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator * case ALTIVEC_BUILTIN_LVX_V4SF: case ALTIVEC_BUILTIN_LVX_V2DI: case ALTIVEC_BUILTIN_LVX_V2DF: + case ALTIVEC_BUILTIN_LVX_V1TI: { arg0 = gimple_call_arg (stmt, 0); // offset arg1 = gimple_call_arg (stmt, 1); // address @@ -17443,6 +17448,10 @@ altivec_init_builtins (void) = build_function_type_list (V2DI_type_node, long_integer_type_node, pcvoid_type_node, NULL_TREE); + tree v1ti_ftype_long_pcvoid + = build_function_type_list (V1TI_type_node, + long_integer_type_node, pcvoid_type_node, + NULL_TREE); tree void_ftype_opaque_long_pvoid = build_function_type_list (void_type_node, @@ -17540,6 +17549,8 @@ altivec_init_builtins (void) def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX); def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX_V2DF); + def_builtin ("__builtin_altivec_lvx_v1ti", v1ti_ftype_long_pcvoid, + ALTIVEC_BUILTIN_LVX_V1TI); def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX_V2DI); def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid, Index: gcc/doc/extend.texi =================================================================== --- gcc/doc/extend.texi (revision 258341) +++ gcc/doc/extend.texi (working copy) @@ -18396,6 +18396,10 @@ vector double vec_div (vector double, vector doubl vector long vec_div (vector long, vector long); vector unsigned long vec_div (vector unsigned long, vector unsigned long); vector double vec_floor (vector double); +vector __int128 vec_ld (int, const vector __int128 *); +vector unsigned __int128 vec_ld (int, const vector unsigned __int128 *); +vector __int128 vec_ld (int, const __int128 *); +vector unsigned __int128 vec_ld (int, const unsigned __int128 *); vector double vec_ld (int, const vector double *); vector double vec_ld (int, const double *); vector double vec_ldl (int, const vector double *); Index: gcc/testsuite/gcc.target/powerpc/altivec-ld-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/altivec-ld-1.c (nonexistent) +++ gcc/testsuite/gcc.target/powerpc/altivec-ld-1.c (working copy) @@ -0,0 +1,67 @@ +/* { dg-do run { target powerpc*-*-* } } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-maltivec -O0 -Wall -Wno-deprecated" } */ + +#include <altivec.h> +#include <stdlib.h> + +static __vector __int128 v; +static __vector __int128 *pv; + +static __vector unsigned __int128 uv; +static __vector unsigned __int128 *puv; + +static __int128 i128; +static __int128 *pi128; + +static unsigned __int128 u128; +static unsigned __int128 *pu128; + + +void +doInitialization () +{ + v[0] = -1; + pv = &v; + + uv[0] = 0xcafebabe; + puv = &uv; + + i128 = 0xfabeabe; + pi128 = &i128; + + u128 = 0xabefabe; + pu128 = &u128; +} + +int +main (int argc, char *argv[]) +{ + __vector __int128 loaded_v; + __vector unsigned __int128 loaded_uv; + + /* Usage: + * <Type> result = vec_ld (int index, __vector <Type> v) + * is equivalent to: + * result = v [index]; + */ + doInitialization (); + loaded_v = vec_ld (0, pv); + if (loaded_v[0] != -1) + abort (); + + loaded_uv = vec_ld (0, puv); + if (loaded_uv[0] != 0xcafebabe) + abort (); + + loaded_v = vec_ld (0, pi128); + if (loaded_v[0] != 0xfabeabe) + abort (); + + loaded_uv = vec_ld (0, pu128); + if (loaded_uv[0] != 0xabefabe) + abort (); + + return 0; +}