On Fri, 21 May 2021, Jason Merrill wrote:

> On 5/21/21 8:33 AM, Richard Biener wrote:
> > This adds support for the clang __builtin_shufflevector extension to
> > the C and C++ frontends.  The builtin is lowered to VEC_PERM_EXPR.
> > Because VEC_PERM_EXPR does not support different sized vector inputs
> > or result or the special permute index of -1 (don't-care)
> > c_build_shufflevector applies lowering by widening inputs and output
> > to the widest vector, replacing -1 by a defined index and
> > subsetting the final vector if we produced a wider result than
> > desired.
> > 
> > Code generation thus can be sub-optimal, followup patches will
> > aim to fix that by recovering from part of the missing features
> > during RTL expansion and by relaxing the constraints of the GIMPLE
> > IL with regard to VEC_PERM_EXPR.
> > 
> > Bootstrapped on x86_64-unknown-linux-gnu, (re-)testing in progress.
> > 
> > Honza - you've filed PR88601, can you point me to testcases that
> > exercise common uses so we can look at code generation quality
> > and where time is spent best in improving things?
> > 
> > OK for trunk?
> > 
> > Thanks,
> > Richard.
> > 
> > 2021-05-21  Richard Biener  <rguent...@suse.de>
> > 
> >     PR c++/88601
> > gcc/c-family/
> >  * c-common.c: Include tree-vector-builder.h and
> >  vec-perm-indices.h.
> >  (c_common_reswords): Add __builtin_shufflevector.
> >  (c_build_shufflevector): New funtion.
> >  * c-common.h (enum rid): Add RID_BUILTIN_SHUFFLEVECTOR.
> >  (c_build_shufflevector): Declare.
> > 
> > gcc/c/
> >  * c-decl.c (names_builtin_p): Handle RID_BUILTIN_SHUFFLEVECTOR.
> >  * c-parser.c (c_parser_postfix_expression): Likewise.
> > 
> > gcc/cp/
> >  * cp-objcp-common.c (names_builtin_p): Handle
> >  RID_BUILTIN_SHUFFLEVECTOR.
> >  * cp-tree.h (build_x_shufflevector): Declare.
> >  * parser.c (cp_parser_postfix_expression): Handle
> >  RID_BUILTIN_SHUFFLEVECTOR.
> >  * pt.c (tsubst_copy_and_build): Handle IFN_SHUFFLEVECTOR.
> >  * typeck.c (build_x_shufflevector): Build either a lowered
> >  VEC_PERM_EXPR or an unlowered shufflevector via a temporary
> >  internal function IFN_SHUFFLEVECTOR.
> > 
> > gcc/
> >  * internal-fn.c (expand_SHUFFLEVECTOR): Define.
> >  * internal-fn.def (SHUFFLEVECTOR): New.
> >  * internal-fn.h (expand_SHUFFLEVECTOR): Declare.
> > 
> > gcc/testsuite/
> >  * c-c++-common/builtin-shufflevector-2.c: New testcase.
> >  * c-c++-common/torture/builtin-shufflevector-1.c: Likewise.
> >  * g++.dg/builtin-shufflevector-1.C: Likewise.
> >  * g++.dg/builtin-shufflevector-2.C: Likewise.
> > ---
> >   gcc/c-family/c-common.c                       | 139 ++++++++++++++++++
> >   gcc/c-family/c-common.h                       |   4 +-
> >   gcc/c/c-decl.c                                |   1 +
> >   gcc/c/c-parser.c                              |  38 +++++
> >   gcc/cp/cp-objcp-common.c                      |   1 +
> >   gcc/cp/cp-tree.h                              |   3 +
> >   gcc/cp/parser.c                               |  15 ++
> >   gcc/cp/pt.c                                   |   9 ++
> >   gcc/cp/typeck.c                               |  36 +++++
> >   gcc/internal-fn.c                             |   6 +
> >   gcc/internal-fn.def                           |   3 +
> >   gcc/internal-fn.h                             |   1 +
> >   .../c-c++-common/builtin-shufflevector-2.c    |  18 +++
> >   .../torture/builtin-shufflevector-1.c         |  49 ++++++
> >   .../g++.dg/builtin-shufflevector-1.C          |  18 +++
> >   .../g++.dg/builtin-shufflevector-2.C          |  12 ++
> >   16 files changed, 352 insertions(+), 1 deletion(-)
> >   create mode 100644 gcc/testsuite/c-c++-common/builtin-shufflevector-2.c
> >   create mode 100644
> >   gcc/testsuite/c-c++-common/torture/builtin-shufflevector-1.c
> >   create mode 100644 gcc/testsuite/g++.dg/builtin-shufflevector-1.C
> >   create mode 100644 gcc/testsuite/g++.dg/builtin-shufflevector-2.C
> > 
> > diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
> > index b7daa2e2654..c4eb2b1c920 100644
> > --- a/gcc/c-family/c-common.c
> > +++ b/gcc/c-family/c-common.c
> > @@ -51,6 +51,8 @@ along with GCC; see the file COPYING3.  If not see
> >   #include "c-spellcheck.h"
> >   #include "selftest.h"
> >   #include "debug.h"
> > +#include "tree-vector-builder.h"
> > +#include "vec-perm-indices.h"
> >   
> >   cpp_reader *parse_in;             /* Declared in c-pragma.h.  */
> >   
> > @@ -383,6 +385,7 @@ const struct c_common_resword c_common_reswords[] =
> >     { "__builtin_has_attribute", RID_BUILTIN_HAS_ATTRIBUTE, 0 },
> >     { "__builtin_launder", RID_BUILTIN_LAUNDER, D_CXXONLY },
> >     { "__builtin_shuffle", RID_BUILTIN_SHUFFLE, 0 },
> > +  { "__builtin_shufflevector", RID_BUILTIN_SHUFFLEVECTOR, 0 },
> >     { "__builtin_tgmath", RID_BUILTIN_TGMATH, D_CONLY },
> >     { "__builtin_offsetof", RID_OFFSETOF, 0 },
> >     { "__builtin_types_compatible_p", RID_TYPES_COMPATIBLE_P, D_CONLY },
> > @@ -1108,6 +1111,142 @@ c_build_vec_perm_expr (location_t loc, tree v0, tree
> > v1, tree mask,
> >     return ret;
> >   }
> >   
> > +/* Build a VEC_PERM_EXPR if V0, V1 are not error_mark_nodes
> > +   and have vector types, V0 has the same element type as V1, and the
> > +   number of elements the result is that of MASK.  */
> > +tree
> > +c_build_shufflevector (location_t loc, tree v0, tree v1, vec<tree> mask,
> 
> IMO a vec<> shouldn't be passed by value.  It looks like it will do the
> shallow copy you want with the current definition of vec, but that seems
> unwise to rely on.  In general, containers should be passed by (const)
> reference unless you actually want to copy all the elements.

OK, I see.  vec<> is just a single pointer here so passing by value
avoids an indirection - but well, fixed.

> > +                  bool complain)
> > +{
> > +  tree ret;
> > +  bool wrap = true;
> > +  bool maybe_const = false;
> > +
> > +  if (v0 == error_mark_node || v1 == error_mark_node)
> > +    return error_mark_node;
> > +
> > +  if (!gnu_vector_type_p (TREE_TYPE (v0))
> > +      || !gnu_vector_type_p (TREE_TYPE (v1)))
> > +    {
> > +      if (complain)
> > +   error_at (loc, "%<__builtin_shufflevector%> arguments must be
> > vectors");
> > +      return error_mark_node;
> > +    }
> > +
> > +  /* ???  In principle one could select a constant part of a variable size
> > +     vector but things get a bit awkward with trying to support this here.
> > */
> > +  unsigned HOST_WIDE_INT v0n, v1n;
> > +  if (!TYPE_VECTOR_SUBPARTS (TREE_TYPE (v0)).is_constant (&v0n)
> > +      || !TYPE_VECTOR_SUBPARTS (TREE_TYPE (v1)).is_constant (&v1n))
> > +    {
> > +      if (complain)
> > +   error_at (loc, "%<__builtin_shufflevector%> arguments must be
> > constant"
> > +             " size vectors");
> > +      return error_mark_node;
> > +    }
> > +
> > +  if (TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (v0)))
> > +      != TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (v1))))
> > +    {
> > +      if (complain)
> > +   error_at (loc, "%<__builtin_shufflevector%> argument vectors must "
> > +             "have the same element type");
> > +      return error_mark_node;
> > +    }
> > +
> > +  if (!pow2p_hwi (mask.length ()))
> > +    {
> > +      if (complain)
> > +   error_at (loc, "%<__builtin_shufflevector%> must specify a result "
> > +             "with a power of two number of elements");
> > +      return error_mark_node;
> > +    }
> > +
> > +  if (!c_dialect_cxx ())
> > +    {
> > +      /* Avoid C_MAYBE_CONST_EXPRs inside VEC_PERM_EXPR.  */
> > +      v0 = c_fully_fold (v0, false, &maybe_const);
> > +      wrap &= maybe_const;
> > +
> > +      v1 = c_fully_fold (v1, false, &maybe_const);
> > +      wrap &= maybe_const;
> > +    }
> > +
> > +  unsigned HOST_WIDE_INT maskl = MAX (mask.length (), MAX (v0n, v1n));
> > +  unsigned HOST_WIDE_INT pad = (v0n < maskl ? maskl - v0n : 0);
> > +  vec_perm_builder sel (maskl, maskl, 1);
> > +  unsigned i;
> > +  for (i = 0; i < mask.length (); ++i)
> > +    {
> > +      tree idx = mask[i];
> > +      if (!tree_fits_shwi_p (idx))
> > +   {
> > +     if (complain)
> > +       error_at (loc, "invalid element index %qE to "
> > +                 "%<__builtin_shufflevector%>", idx);
> > +     return error_mark_node;
> > +   }
> > +      HOST_WIDE_INT iidx = tree_to_shwi (idx);
> > +      if (iidx < -1
> > +     || (iidx != -1
> > +         && (unsigned HOST_WIDE_INT) iidx >= v0n + v1n))
> > +   {
> > +     if (complain)
> > +       error_at (loc, "invalid element index %qE to "
> > +                 "%<__builtin_shufflevector%>", idx);
> > +     return error_mark_node;
> > +   }
> > +      /* ???  Our VEC_PERM_EXPR does not allow for -1 yet.  */
> > +      if (iidx == -1)
> > +   iidx = i;
> > +      /* ???  Our VEC_PERM_EXPR does not allow different sized inputs,
> > +    so pad out a smaller v0.  */
> > +      else if ((unsigned HOST_WIDE_INT) iidx >= v0n)
> > +   iidx += pad;
> > +      sel.quick_push (iidx);
> > +    }
> > +  /* ???  VEC_PERM_EXPR does not support a result that is smaller than
> > +     the inputs, so we have to pad id out.  */
> > +  for (; i < maskl; ++i)
> > +    sel.quick_push (i);
> > +
> > +  vec_perm_indices indices (sel, 2, maskl);
> > +
> > +  tree ret_type = build_vector_type (TREE_TYPE (TREE_TYPE (v0)), maskl);
> > +  tree mask_type = build_vector_type (build_nonstandard_integer_type
> > +           (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (ret_type))), 1),
> > +           maskl);
> > +  /* Pad out arguments to the common vector size.  */
> > +  if (v0n < maskl)
> > +    {
> > +      constructor_elt elt = { NULL_TREE, build_zero_cst (TREE_TYPE (v0)) };
> > +      v0 = build_constructor_single (ret_type, NULL_TREE, v0);
> > +      for (i = 1; i < maskl / v0n; ++i)
> > +   vec_safe_push (CONSTRUCTOR_ELTS (v0), elt);
> > +    }
> > +  if (v1n < maskl)
> > +    {
> > +      constructor_elt elt = { NULL_TREE, build_zero_cst (TREE_TYPE (v1)) };
> > +      v1 = build_constructor_single (ret_type, NULL_TREE, v1);
> > +      for (i = 1; i < maskl / v1n; ++i)
> > +   vec_safe_push (CONSTRUCTOR_ELTS (v1), elt);
> > +    }
> > +  ret = build3_loc (loc, VEC_PERM_EXPR, ret_type, v0, v1,
> > +               vec_perm_indices_to_tree (mask_type, indices));
> > +  /* Get the lowpart we are interested in.  */
> > +  if (mask.length () < maskl)
> > +    {
> > +      tree lpartt = build_vector_type (TREE_TYPE (ret_type), mask.length
> > ());
> > +      ret = build3_loc (loc, BIT_FIELD_REF,
> > +                   lpartt, ret, TYPE_SIZE (lpartt), bitsize_zero_node);
> > +    }
> > +
> > +  if (!c_dialect_cxx () && !wrap)
> > +    ret = c_wrap_maybe_const (ret, true);
> > +
> > +  return ret;
> > +}
> > +
> >   /* Build a VEC_CONVERT ifn for __builtin_convertvector builtin.  */
> >   
> >   tree
> > diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
> > index f30b6c6ac33..be4b29a017b 100644
> > --- a/gcc/c-family/c-common.h
> > +++ b/gcc/c-family/c-common.h
> > @@ -107,7 +107,7 @@ enum rid
> >     RID_ASM,       RID_TYPEOF,   RID_ALIGNOF,  RID_ATTRIBUTE,  RID_VA_ARG,
> >     RID_EXTENSION, RID_IMAGPART, RID_REALPART, RID_LABEL,
> >     RID_CHOOSE_EXPR,
> >     RID_TYPES_COMPATIBLE_P,      RID_BUILTIN_COMPLEX,
> >     RID_BUILTIN_SHUFFLE,
> > -  RID_BUILTIN_CONVERTVECTOR,   RID_BUILTIN_TGMATH,
> > +  RID_BUILTIN_SHUFFLEVECTOR,   RID_BUILTIN_CONVERTVECTOR,
> > RID_BUILTIN_TGMATH,
> >     RID_BUILTIN_HAS_ATTRIBUTE,
> >     RID_DFLOAT32, RID_DFLOAT64, RID_DFLOAT128,
> >   @@ -1048,6 +1048,8 @@ extern bool lvalue_p (const_tree);
> >   extern bool vector_targets_convertible_p (const_tree t1, const_tree t2);
> >   extern bool vector_types_convertible_p (const_tree t1, const_tree t2, bool
> >   emit_lax_note);
> >   extern tree c_build_vec_perm_expr (location_t, tree, tree, tree, bool =
> >   true);
> > +extern tree c_build_shufflevector (location_t, tree, tree,
> > +                              vec<tree>, bool = true);
> >   extern tree c_build_vec_convert (location_t, tree, location_t, tree, bool
> >   = true);
> >   
> >   extern void init_c_lex (void);
> > diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
> > index 53b2b5b637d..d2cd1856c64 100644
> > --- a/gcc/c/c-decl.c
> > +++ b/gcc/c/c-decl.c
> > @@ -10540,6 +10540,7 @@ names_builtin_p (const char *name)
> >       case RID_BUILTIN_CONVERTVECTOR:
> >       case RID_BUILTIN_HAS_ATTRIBUTE:
> >       case RID_BUILTIN_SHUFFLE:
> > +    case RID_BUILTIN_SHUFFLEVECTOR:
> >       case RID_CHOOSE_EXPR:
> >       case RID_OFFSETOF:
> >       case RID_TYPES_COMPATIBLE_P:
> > diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
> > index b9930d487fd..07946ee6352 100644
> > --- a/gcc/c/c-parser.c
> > +++ b/gcc/c/c-parser.c
> > @@ -10000,6 +10000,44 @@ c_parser_postfix_expression (c_parser *parser)
> >        set_c_expr_source_range (&expr, loc, close_paren_loc);
> >        break;
> >       }
> > +   case RID_BUILTIN_SHUFFLEVECTOR:
> > +     {
> > +       vec<c_expr_t, va_gc> *cexpr_list;
> > +       unsigned int i;
> > +       c_expr_t *p;
> > +       location_t close_paren_loc;
> > +
> > +       c_parser_consume_token (parser);
> > +       if (!c_parser_get_builtin_args (parser,
> > +                                       "__builtin_shufflevector",
> > +                                       &cexpr_list, false,
> > +                                       &close_paren_loc))
> > +         {
> > +           expr.set_error ();
> > +           break;
> > +         }
> > +
> > +       FOR_EACH_VEC_SAFE_ELT (cexpr_list, i, p)
> > +         *p = convert_lvalue_to_rvalue (loc, *p, true, true);
> > +
> > +       if (vec_safe_length (cexpr_list) < 3)
> > +         {
> > +           error_at (loc, "wrong number of arguments to "
> > +                          "%<__builtin_shuffle%>");
> > +           expr.set_error ();
> > +         }
> > +       else
> > +         {
> > +           auto_vec<tree, 16> mask;
> > +           for (i = 2; i < cexpr_list->length (); ++i)
> > +             mask.safe_push ((*cexpr_list)[i].value);
> > +           expr.value = c_build_shufflevector (loc,
> > (*cexpr_list)[0].value,
> > +                                               (*cexpr_list)[1].value,
> > +                                               mask);
> > +         }
> > +       set_c_expr_source_range (&expr, loc, close_paren_loc);
> > +       break;
> > +     }
> >    case RID_BUILTIN_CONVERTVECTOR:
> >      {
> >         location_t start_loc = loc;
> > diff --git a/gcc/cp/cp-objcp-common.c b/gcc/cp/cp-objcp-common.c
> > index 9847270694a..46b2248574c 100644
> > --- a/gcc/cp/cp-objcp-common.c
> > +++ b/gcc/cp/cp-objcp-common.c
> > @@ -390,6 +390,7 @@ names_builtin_p (const char *name)
> >       case RID_BUILTIN_CONVERTVECTOR:
> >       case RID_BUILTIN_HAS_ATTRIBUTE:
> >       case RID_BUILTIN_SHUFFLE:
> > +    case RID_BUILTIN_SHUFFLEVECTOR:
> >       case RID_BUILTIN_LAUNDER:
> >       case RID_BUILTIN_BIT_CAST:
> >       case RID_OFFSETOF:
> > diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
> > index aa202715873..c95a820037f 100644
> > --- a/gcc/cp/cp-tree.h
> > +++ b/gcc/cp/cp-tree.h
> > @@ -7895,6 +7895,9 @@ extern tree cp_build_binary_op                  (const
> > op_location_t &,
> >   extern tree build_x_vec_perm_expr               (location_t,
> >          tree, tree, tree,
> >          tsubst_flags_t);
> > +extern tree build_x_shufflevector               (location_t,
> > +                                            vec<tree, va_gc> *,
> > +                                            tsubst_flags_t);
> >   #define cxx_sizeof(T)  cxx_sizeof_or_alignof_type (input_location, T,
> >   SIZEOF_EXPR, false, true)
> >   extern tree build_simple_component_ref            (tree, tree);
> >   extern tree build_ptrmemfunc_access_expr  (tree, tree);
> > diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
> > index 48b83d67b34..0e154916a0a 100644
> > --- a/gcc/cp/parser.c
> > +++ b/gcc/cp/parser.c
> > @@ -7295,6 +7295,7 @@ cp_parser_postfix_expression (cp_parser *parser, bool
> > address_p, bool cast_p,
> >   
> >       case RID_ADDRESSOF:
> >       case RID_BUILTIN_SHUFFLE:
> > +    case RID_BUILTIN_SHUFFLEVECTOR:
> >       case RID_BUILTIN_LAUNDER:
> >          {
> >     vec<tree, va_gc> *vec;
> > @@ -7357,6 +7358,20 @@ cp_parser_postfix_expression (cp_parser *parser, bool
> > address_p, bool cast_p,
> >          }
> >        break;
> >   +   case RID_BUILTIN_SHUFFLEVECTOR:
> > +       if (vec->length () < 3)
> > +         {
> > +           error_at (loc, "wrong number of arguments to "
> > +                          "%<__builtin_shuffle%>");
> 
> shufflevector?

Oops, yes.  I'll wait for other comments before re-posting/testing.

Richard.

> > +           postfix_expression = error_mark_node;
> > +         }
> > +       else
> > +         {
> > +           postfix_expression
> > +             = build_x_shufflevector (loc, vec, tf_warning_or_error);
> > +         }
> > +       break;
> > +
> >      default:
> >        gcc_unreachable ();
> >       }
> > diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
> > index 99a9ee5ade2..6700ce23548 100644
> > --- a/gcc/cp/pt.c
> > +++ b/gcc/cp/pt.c
> > @@ -20400,6 +20400,15 @@ tsubst_copy_and_build (tree t,
> >             RETURN (ret);
> >          break;
> >   +     case IFN_SHUFFLEVECTOR:
> > +         {
> > +           ret = build_x_shufflevector (input_location, call_args,
> > +                                        complain);
> > +           if (ret != error_mark_node)
> > +             RETURN (ret);
> > +           break;
> > +         }
> > +
> >        default:
> >          /* Unsupported internal function with arguments.  */
> >          gcc_unreachable ();
> > diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
> > index 703ddd3cc7a..251ae4bf6dc 100644
> > --- a/gcc/cp/typeck.c
> > +++ b/gcc/cp/typeck.c
> > @@ -5979,6 +5979,42 @@ build_x_vec_perm_expr (location_t loc,
> >                                   orig_arg1, orig_arg2);
> >     return exp;
> >   }
> > +
> > +/* Build a VEC_PERM_EXPR.
> > +   This is a simple wrapper for c_build_shufflevector.  */
> > +tree
> > +build_x_shufflevector (location_t loc, vec<tree, va_gc> *args,
> > +                  tsubst_flags_t complain)
> > +{
> > +  tree arg0 = (*args)[0];
> > +  tree arg1 = (*args)[1];
> > +  if (processing_template_decl)
> > +    {
> > +      for (unsigned i = 0; i < args->length (); ++i)
> > +   if (type_dependent_expression_p ((*args)[i]))
> > +     {
> > +       tree exp = build_min_nt_call_vec (NULL, args);
> > +       CALL_EXPR_IFN (exp) = IFN_SHUFFLEVECTOR;
> > +       return exp;
> > +     }
> > +      arg0 = build_non_dependent_expr (arg0);
> > +      arg1 = build_non_dependent_expr (arg1);
> > +      /* ???  Nothing needed for the index arguments?  */
> > +    }
> > +  auto_vec<tree, 16> mask;
> > +  for (unsigned i = 2; i < args->length (); ++i)
> > +    {
> > +      tree idx = maybe_constant_value ((*args)[i]);
> > +      mask.safe_push (idx);
> > +    }
> > +  tree exp = c_build_shufflevector (loc, arg0, arg1, mask, complain &
> > tf_error);
> > +  if (processing_template_decl && exp != error_mark_node)
> > +    {
> > +      exp = build_min_non_dep_call_vec (exp, NULL, args);
> > +      CALL_EXPR_IFN (exp) = IFN_SHUFFLEVECTOR;
> > +    }
> > +  return exp;
> > +}
> >   
> >   /* Return a tree for the sum or difference (RESULTCODE says which)
> >      of pointer PTROP and integer INTOP.  */
> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > index d209a52f823..d0795cb15c7 100644
> > --- a/gcc/internal-fn.c
> > +++ b/gcc/internal-fn.c
> > @@ -4119,6 +4119,12 @@ vectorized_internal_fn_supported_p (internal_fn ifn,
> > tree type)
> >       && direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED));
> >   }
> >   
> > +void
> > +expand_SHUFFLEVECTOR (internal_fn, gcall *)
> > +{
> > +  gcc_unreachable ();
> > +}
> > +
> >   void
> >   expand_PHI (internal_fn, gcall *)
> >   {
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index daeace7a34e..b2f414d2131 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -405,6 +405,9 @@ DEF_INTERNAL_FN (CO_FRAME, ECF_PURE | ECF_NOTHROW |
> > ECF_LEAF, NULL)
> >   /* A NOP function with arbitrary arguments and return value.  */
> >   DEF_INTERNAL_FN (NOP, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
> >   
> > +/* Temporary vehicle for __builtin_shufflevector.  */
> > +DEF_INTERNAL_FN (SHUFFLEVECTOR, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
> > +
> >   #undef DEF_INTERNAL_INT_FN
> >   #undef DEF_INTERNAL_FLT_FN
> >   #undef DEF_INTERNAL_FLT_FLOATN_FN
> > diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> > index c6599ce4894..ee53e378944 100644
> > --- a/gcc/internal-fn.h
> > +++ b/gcc/internal-fn.h
> > @@ -229,6 +229,7 @@ extern void expand_addsub_overflow (location_t,
> > tree_code, tree, tree, tree,
> >   extern void expand_internal_call (gcall *);
> >   extern void expand_internal_call (internal_fn, gcall *);
> >   extern void expand_PHI (internal_fn, gcall *);
> > +extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
> >   
> >   extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
> >   
> > diff --git a/gcc/testsuite/c-c++-common/builtin-shufflevector-2.c
> > b/gcc/testsuite/c-c++-common/builtin-shufflevector-2.c
> > new file mode 100644
> > index 00000000000..78cec1684fc
> > --- /dev/null
> > +++ b/gcc/testsuite/c-c++-common/builtin-shufflevector-2.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile } */
> > +
> > +typedef long v4di __attribute__((vector_size(4 * sizeof (long))));
> > +typedef int v4si __attribute__((vector_size(4 * sizeof (int))));
> > +typedef int v8si __attribute__((vector_size(8 * sizeof (int))));
> > +
> > +v4si res, a, b;
> > +v4di resl, al, bl;
> > +v8si res8, a8, b8;
> > +void foo (void)
> > +{
> > +  res = __builtin_shufflevector (a, 0, 0, 1, 4, 5); /* { dg-error "must be
> > vectors" } */
> > +  res = __builtin_shufflevector (a, b, 0, 1, 4, 5, 6); /* { dg-error "power
> > of two" } */
> > +  res = __builtin_shufflevector (a, b, 0, 1, 4, 8); /* { dg-error "invalid"
> > } */
> > +  res = __builtin_shufflevector (a, b, 0, 1, -4, 5); /* { dg-error
> > "invalid" } */
> > +  res = __builtin_shufflevector (a, bl, 0, 1, 4, 5); /* { dg-error "same
> > element type" } */
> > +  resl = __builtin_shufflevector (a, b, 0, 1, 4, 5); /* { dg-error "" }
> > incompatible types */
> > +}
> > diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-1.c
> > b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-1.c
> > new file mode 100644
> > index 00000000000..c8647364795
> > --- /dev/null
> > +++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-1.c
> > @@ -0,0 +1,49 @@
> > +/* { dg-do run } */
> > +
> > +typedef long v4di __attribute__((vector_size(4 * sizeof (long))));
> > +typedef int v4si __attribute__((vector_size(4 * sizeof (int))));
> > +typedef int v8si __attribute__((vector_size(8 * sizeof (int))));
> > +
> > +v4si res[5], a, b;
> > +v4di resl, al, bl;
> > +v8si res8[3], a8, b8;
> > +void __attribute__((noipa))
> > +foo (void)
> > +{
> > +  res[0] = __builtin_shufflevector (a, b, 0, 1, 4, 5);
> > +  res[1] = __builtin_shufflevector (a, b, 0, 1, -1, 5);
> > +  res8[0] = __builtin_shufflevector (a, b, 0, 1, 2, 2 + 1, 4, 5, 6, 7);
> > +  res[2] = __builtin_shufflevector (a8, b8, 0, 8, 1, 9);
> > +  res[3] = __builtin_shufflevector (a8, b, 0, 8, 1, 9);
> > +  res[4] = __builtin_shufflevector (a, b8, 0, 4, 1, 5);
> > +  res8[1] = __builtin_shufflevector (a8, b, 0, 8, 1, 9, 10, 11, 2, 3);
> > +  res8[2] = __builtin_shufflevector (a, b8, 0, 4, 1, 5, -1, -1, -1, -1);
> > +}
> > +
> > +#define comp(a, b, n) \
> > +  for (unsigned i = 0; i < n; ++i) \
> > +    if ((a)[i] != (b)[i]) \
> > +      __builtin_abort ();
> > +
> > +int main()
> > +{
> > +  a = (v4si) { 0, 1, 2, 3 };
> > +  b = (v4si) { 4, 5, 6, 7 };
> > +  a8 = (v8si) { 0, 1, 2, 3, 4, 5, 6, 7 };
> > +  b8 = (v8si) { 8, 9, 10, 11, 12, 13, 14, 15 };
> > +  foo ();
> > +  comp (res[0], ((v4si) { 0, 1, 4, 5}), 4)
> > +  res[1][2] = 9;
> > +  comp (res[1], ((v4si) { 0, 1, 9, 5}), 4)
> > +  comp (res8[0], ((v8si) { 0, 1, 2, 3, 4, 5, 6, 7 }), 8)
> > +  comp (res[2], ((v4si) { 0, 8, 1, 9}), 4)
> > +  comp (res[3], ((v4si) { 0, 4, 1, 5}), 4)
> > +  comp (res[4], ((v4si) { 0, 8, 1, 9}), 4)
> > +  comp (res8[1], ((v8si) { 0, 4, 1, 5, 6, 7, 2, 3 }), 8)
> > +  res8[2][4] = 42;
> > +  res8[2][5] = 42;
> > +  res8[2][6] = 42;
> > +  res8[2][7] = 42;
> > +  comp (res8[2], ((v8si) { 0, 8, 1, 9, 42, 42, 42, 42 }), 8)
> > +  return 0;
> > +}
> > diff --git a/gcc/testsuite/g++.dg/builtin-shufflevector-1.C
> > b/gcc/testsuite/g++.dg/builtin-shufflevector-1.C
> > new file mode 100644
> > index 00000000000..da838e8ca6c
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/builtin-shufflevector-1.C
> > @@ -0,0 +1,18 @@
> > +// { dg-do compile { target c++11 } }
> > +
> > +template <typename T1, typename T2, int ...args>
> > +struct shufflevector
> > +{
> > +  static auto shuffle (T1 a, T2 b)
> > +      -> decltype (__builtin_shufflevector (a, b, args...))
> > +  {
> > +    return __builtin_shufflevector (a, b, args...);
> > +  }
> > +};
> > +
> > +typedef int v4si __attribute__((vector_size (16)));
> > +v4si a, b, c;
> > +int main()
> > +{
> > +  c = shufflevector<v4si, v4si, 0, 1, 4, 5>::shuffle (a, b);
> > +}
> > diff --git a/gcc/testsuite/g++.dg/builtin-shufflevector-2.C
> > b/gcc/testsuite/g++.dg/builtin-shufflevector-2.C
> > new file mode 100644
> > index 00000000000..c8d096c5295
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/builtin-shufflevector-2.C
> > @@ -0,0 +1,12 @@
> > +// { dg-do compile }
> > +
> > +typedef double v2df __attribute__((vector_size(2 * sizeof (double))));
> > +
> > +template<typename T, typename U, int N, int M>
> > +struct Shuffle {
> > +  void f(T t, U u, v2df a, v2df b) {
> > +    (void)__builtin_shufflevector(t, u, N, M); // { dg-error "invalid" }
> > +  }
> > +};
> > +
> > +template struct Shuffle<v2df, v2df, 4, 3>;
> > 
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to