The following  patch fixes an ICE when vectorizing shifts with the
simplified SLP operand code by adjusting the type of the shift
argument in vectorizable_shift.  I took the liberty to enable
more SLP shift vectorization for originally not "scalar" (same)
but constant shift amounts as we do as fallback anyways.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-10-22  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/92166
        * tree-vect-slp.c (vect_get_and_check_slp_defs): Demote a
        vect_constant_def operand to vect_external_defs if there is
        at least one vect_external_defs.
        (vect_print_slp_tree): Dump scalar ops.
        * tree-vect-stmts.c (vectorizable_shift): For SLP shifts
        with constant shift amount convert the scalars to the desired
        vector component type.

        * gcc.dg/vshift-5.c: Amend.

Index: gcc/testsuite/gcc.dg/vshift-5.c
===================================================================
--- gcc/testsuite/gcc.dg/vshift-5.c     (revision 277280)
+++ gcc/testsuite/gcc.dg/vshift-5.c     (working copy)
@@ -41,6 +41,42 @@ f2 (void)
 }
 
 __attribute__((noinline, noclone)) void
+f2a (int x)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << x;
+  a1 = a1 << 2;
+  a2 = a2 << 2;
+  a3 = a3 << 2;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f2b (int x)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << 2;
+  a1 = a1 << 2;
+  a2 = a2 << x;
+  a3 = a3 << 2;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
 f3 (int x)
 {
   long long a0, a1, a2, a3;
@@ -77,5 +113,13 @@ main ()
   if (a[0] != (4LL << 7) || a[1] != (3LL << 8)
       || a[2] != (2LL << 9) || a[3] != (1LL << 10))
     abort ();
+  f2a (3);
+  if (a[0] != (4LL << 10) || a[1] != (3LL << 10)
+      || a[2] != (2LL << 11) || a[3] != (1LL << 12))
+    abort ();
+  f2b (3);
+  if (a[0] != (4LL << 12) || a[1] != (3LL << 12)
+      || a[2] != (2LL << 14) || a[3] != (1LL << 14))
+    abort ();
   return 0;
 }
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c (revision 277280)
+++ gcc/tree-vect-slp.c (working copy)
@@ -475,8 +475,11 @@ again:
       /* Check the types of the definitions.  */
       switch (dt)
        {
-       case vect_constant_def:
        case vect_external_def:
+         /* Make sure to demote the overall operand to external.  */
+         oprnd_info->first_dt = vect_external_def;
+         /* Fallthru.  */
+       case vect_constant_def:
          oprnd_info->def_stmts.quick_push (NULL);
          oprnd_info->ops.quick_push (oprnd);
          break;
@@ -1504,9 +1507,10 @@ static void
 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
                     slp_tree node, hash_set<slp_tree> &visited)
 {
-  int i;
+  unsigned i;
   stmt_vec_info stmt_info;
   slp_tree child;
+  tree op;
 
   if (visited.add (node))
     return;
@@ -1514,11 +1518,23 @@ vect_print_slp_tree (dump_flags_t dump_k
   dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
   dump_user_location_t user_loc = loc.get_user_location ();
   dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
-                  SLP_TREE_DEF_TYPE (node) != vect_internal_def
-                  ? " (external)" : "", node,
+                  SLP_TREE_DEF_TYPE (node) == vect_external_def
+                  ? " (external)"
+                  : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
+                     ? " (constant)"
+                     : ""), node,
                   estimated_poly_value (node->max_nunits));
-  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
-    dump_printf_loc (metadata, user_loc, "\tstmt %d %G", i, stmt_info->stmt);
+  if (SLP_TREE_SCALAR_STMTS (node).exists ())
+    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
+      dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
+  else
+    {
+      dump_printf_loc (metadata, user_loc, "\t{ ");
+      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
+       dump_printf (metadata, "%T%s ", op,
+                    i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
+      dump_printf (metadata, "}\n");
+    }
   if (SLP_TREE_CHILDREN (node).is_empty ())
     return;
   dump_printf_loc (metadata, user_loc, "\tchildren");
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c       (revision 277280)
+++ gcc/tree-vect-stmts.c       (working copy)
@@ -5670,8 +5670,11 @@ vectorizable_shift (stmt_vec_info stmt_i
 
       if (!op1_vectype)
        op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
-      if (op1_vectype == NULL_TREE
-         || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
+      if ((op1_vectype == NULL_TREE
+          || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
+         && (!slp_node
+             || SLP_TREE_DEF_TYPE
+                  (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5710,7 +5713,10 @@ vectorizable_shift (stmt_vec_info stmt_i
                  so make sure the scalar is the right type if we are
                 dealing with vectors of long long/long/short/char.  */
               if (dt[1] == vect_constant_def)
-                op1 = fold_convert (TREE_TYPE (vectype), op1);
+               {
+                 if (!slp_node)
+                   op1 = fold_convert (TREE_TYPE (vectype), op1);
+               }
              else if (!useless_type_conversion_p (TREE_TYPE (vectype),
                                                   TREE_TYPE (op1)))
                {
@@ -5821,6 +5827,23 @@ vectorizable_shift (stmt_vec_info stmt_i
                     }
                 }
             }
+         else if (slp_node
+                  && !useless_type_conversion_p (TREE_TYPE (vectype),
+                                                 TREE_TYPE (op1)))
+           {
+             /* Convert the scalar constant shift amounts in-place.  */
+             slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
+             gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
+             for (unsigned i = 0;
+                  i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
+               {
+                 SLP_TREE_SCALAR_OPS (shift)[i]
+                   = fold_convert (TREE_TYPE (vectype),
+                                   SLP_TREE_SCALAR_OPS (shift)[i]);
+                 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
+                              == INTEGER_CST));
+               }
+           }
 
           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
              (a special case for certain kind of vector shifts); otherwise,

Reply via email to