The following patch fixes an ICE when vectorizing shifts with the simplified SLP operand code by adjusting the type of the shift argument in vectorizable_shift. I took the liberty to enable more SLP shift vectorization for originally not "scalar" (same) but constant shift amounts as we do as fallback anyways.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2019-10-22 Richard Biener <rguent...@suse.de> PR tree-optimization/92166 * tree-vect-slp.c (vect_get_and_check_slp_defs): Demote a vect_constant_def operand to vect_external_defs if there is at least one vect_external_defs. (vect_print_slp_tree): Dump scalar ops. * tree-vect-stmts.c (vectorizable_shift): For SLP shifts with constant shift amount convert the scalars to the desired vector component type. * gcc.dg/vshift-5.c: Amend. Index: gcc/testsuite/gcc.dg/vshift-5.c =================================================================== --- gcc/testsuite/gcc.dg/vshift-5.c (revision 277280) +++ gcc/testsuite/gcc.dg/vshift-5.c (working copy) @@ -41,6 +41,42 @@ f2 (void) } __attribute__((noinline, noclone)) void +f2a (int x) +{ + long long a0, a1, a2, a3; + a0 = a[0]; + a1 = a[1]; + a2 = a[2]; + a3 = a[3]; + a0 = a0 << x; + a1 = a1 << 2; + a2 = a2 << 2; + a3 = a3 << 2; + a[0] = a0; + a[1] = a1; + a[2] = a2; + a[3] = a3; +} + +__attribute__((noinline, noclone)) void +f2b (int x) +{ + long long a0, a1, a2, a3; + a0 = a[0]; + a1 = a[1]; + a2 = a[2]; + a3 = a[3]; + a0 = a0 << 2; + a1 = a1 << 2; + a2 = a2 << x; + a3 = a3 << 2; + a[0] = a0; + a[1] = a1; + a[2] = a2; + a[3] = a3; +} + +__attribute__((noinline, noclone)) void f3 (int x) { long long a0, a1, a2, a3; @@ -77,5 +113,13 @@ main () if (a[0] != (4LL << 7) || a[1] != (3LL << 8) || a[2] != (2LL << 9) || a[3] != (1LL << 10)) abort (); + f2a (3); + if (a[0] != (4LL << 10) || a[1] != (3LL << 10) + || a[2] != (2LL << 11) || a[3] != (1LL << 12)) + abort (); + f2b (3); + if (a[0] != (4LL << 12) || a[1] != (3LL << 12) + || a[2] != (2LL << 14) || a[3] != (1LL << 14)) + abort (); return 0; } Index: gcc/tree-vect-slp.c =================================================================== --- gcc/tree-vect-slp.c (revision 277280) +++ gcc/tree-vect-slp.c (working copy) @@ -475,8 +475,11 @@ again: /* Check the types of the definitions. */ switch (dt) { - case vect_constant_def: case vect_external_def: + /* Make sure to demote the overall operand to external. */ + oprnd_info->first_dt = vect_external_def; + /* Fallthru. */ + case vect_constant_def: oprnd_info->def_stmts.quick_push (NULL); oprnd_info->ops.quick_push (oprnd); break; @@ -1504,9 +1507,10 @@ static void vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc, slp_tree node, hash_set<slp_tree> &visited) { - int i; + unsigned i; stmt_vec_info stmt_info; slp_tree child; + tree op; if (visited.add (node)) return; @@ -1514,11 +1518,23 @@ vect_print_slp_tree (dump_flags_t dump_k dump_metadata_t metadata (dump_kind, loc.get_impl_location ()); dump_user_location_t user_loc = loc.get_user_location (); dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n", - SLP_TREE_DEF_TYPE (node) != vect_internal_def - ? " (external)" : "", node, + SLP_TREE_DEF_TYPE (node) == vect_external_def + ? " (external)" + : (SLP_TREE_DEF_TYPE (node) == vect_constant_def + ? " (constant)" + : ""), node, estimated_poly_value (node->max_nunits)); - FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) - dump_printf_loc (metadata, user_loc, "\tstmt %d %G", i, stmt_info->stmt); + if (SLP_TREE_SCALAR_STMTS (node).exists ()) + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) + dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt); + else + { + dump_printf_loc (metadata, user_loc, "\t{ "); + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) + dump_printf (metadata, "%T%s ", op, + i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : ""); + dump_printf (metadata, "}\n"); + } if (SLP_TREE_CHILDREN (node).is_empty ()) return; dump_printf_loc (metadata, user_loc, "\tchildren"); Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c (revision 277280) +++ gcc/tree-vect-stmts.c (working copy) @@ -5670,8 +5670,11 @@ vectorizable_shift (stmt_vec_info stmt_i if (!op1_vectype) op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out); - if (op1_vectype == NULL_TREE - || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype)) + if ((op1_vectype == NULL_TREE + || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype)) + && (!slp_node + || SLP_TREE_DEF_TYPE + (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5710,7 +5713,10 @@ vectorizable_shift (stmt_vec_info stmt_i so make sure the scalar is the right type if we are dealing with vectors of long long/long/short/char. */ if (dt[1] == vect_constant_def) - op1 = fold_convert (TREE_TYPE (vectype), op1); + { + if (!slp_node) + op1 = fold_convert (TREE_TYPE (vectype), op1); + } else if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op1))) { @@ -5821,6 +5827,23 @@ vectorizable_shift (stmt_vec_info stmt_i } } } + else if (slp_node + && !useless_type_conversion_p (TREE_TYPE (vectype), + TREE_TYPE (op1))) + { + /* Convert the scalar constant shift amounts in-place. */ + slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1]; + gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def); + for (unsigned i = 0; + i < SLP_TREE_SCALAR_OPS (shift).length (); ++i) + { + SLP_TREE_SCALAR_OPS (shift)[i] + = fold_convert (TREE_TYPE (vectype), + SLP_TREE_SCALAR_OPS (shift)[i]); + gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i]) + == INTEGER_CST)); + } + } /* vec_oprnd1 is available if operand 1 should be of a scalar-type (a special case for certain kind of vector shifts); otherwise,