This removes the non-SLP paths from vectorizable_call, propagates out ncopies == 1 and removes empty loops resulting from that.
Bootstrapped and tested on x86_64-unknown-linux-gnu, will squash and push. * tree-vect-stmts.cc (vectorizable_call): Remove non-SLP path. --- gcc/tree-vect-stmts.cc | 124 +++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 68 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 7a89bc0e5d4..0799e4fd664 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -3381,7 +3381,6 @@ vectorizable_call (vec_info *vinfo, tree vectypes[ARRAY_SIZE (dt)] = {}; slp_tree slp_op[ARRAY_SIZE (dt)] = {}; int ndts = ARRAY_SIZE (dt); - int j; auto_vec<tree, 8> vargs; enum { NARROW, NONE, WIDEN } modifier; size_t i, nargs; @@ -3510,13 +3509,14 @@ vectorizable_call (vec_info *vinfo, return false; } - if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out)) - { + if (vect_emulated_vector_p (vectype_in) + || vect_emulated_vector_p (vectype_out)) + { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "use emulated vector type for call\n"); return false; - } + } /* FORNOW */ nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); @@ -3693,32 +3693,28 @@ vectorizable_call (vec_info *vinfo, tree prev_res = NULL_TREE; vargs.safe_grow (vect_nargs, true); auto_vec<vec<tree> > vec_defs (nargs); - for (j = 0; j < 1; ++j) - { - /* Build argument list for the vectorized call. */ - if (cfn == CFN_GOMP_SIMD_LANE) - { - for (i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++i) - { - /* ??? For multi-lane SLP we'd need to build - { 0, 0, .., 1, 1, ... }. */ - tree cst = build_index_vector (vectype_out, - i * nunits_out, 1); - tree new_var - = vect_get_new_ssa_name (vectype_out, vect_simple_var, - "cst_"); - gimple *init_stmt = gimple_build_assign (new_var, cst); - vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL); - new_temp = make_ssa_name (vec_dest); - gimple *new_stmt - = gimple_build_assign (new_temp, new_var); - vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, - gsi); - slp_node->push_vec_def (new_stmt); - } - continue; - } + /* Build argument list for the vectorized call. */ + if (cfn == CFN_GOMP_SIMD_LANE) + { + for (i = 0; i < SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++i) + { + /* ??? For multi-lane SLP we'd need to build + { 0, 0, .., 1, 1, ... }. */ + tree cst = build_index_vector (vectype_out, + i * nunits_out, 1); + tree new_var + = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_"); + gimple *init_stmt = gimple_build_assign (new_var, cst); + vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL); + new_temp = make_ssa_name (vec_dest); + gimple *new_stmt = gimple_build_assign (new_temp, new_var); + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + slp_node->push_vec_def (new_stmt); + } + } + else + { vec<tree> vec_oprnds0; vect_get_slp_defs (vinfo, slp_node, &vec_defs); vec_oprnds0 = vec_defs[0]; @@ -3730,9 +3726,8 @@ vectorizable_call (vec_info *vinfo, if (masked_loop_p && reduc_idx >= 0) { unsigned int vec_num = vec_oprnds0.length (); - vargs[varg++] = vect_get_loop_mask (loop_vinfo, - gsi, masks, vec_num, - vectype_out, i); + vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, + vec_num, vectype_out, i); } size_t k; for (k = 0; k < nargs; k++) @@ -3782,12 +3777,11 @@ vectorizable_call (vec_info *vinfo, else if (mask_opno >= 0 && masked_loop_p) { unsigned int vec_num = vec_oprnds0.length (); - tree mask = vect_get_loop_mask (loop_vinfo, - gsi, masks, vec_num, - vectype_out, i); - vargs[mask_opno] = prepare_vec_mask - (loop_vinfo, TREE_TYPE (mask), mask, - vargs[mask_opno], gsi); + tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, + vec_num, vectype_out, i); + vargs[mask_opno] + = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, + vargs[mask_opno], gsi); } gcall *call; @@ -3804,6 +3798,7 @@ vectorizable_call (vec_info *vinfo, slp_node->push_vec_def (new_stmt); } } + for (i = 0; i < nargs; i++) { vec<tree> vec_oprndsi = vec_defs[i]; @@ -3815,41 +3810,34 @@ vectorizable_call (vec_info *vinfo, auto_vec<vec<tree> > vec_defs (nargs); /* We don't define any narrowing conditional functions at present. */ gcc_assert (mask_opno < 0); - for (j = 0; j < 1; ++j) - { - /* Build argument list for the vectorized call. */ - if (j == 0) - vargs.create (nargs * 2); - else - vargs.truncate (0); - vec<tree> vec_oprnds0; + /* Build argument list for the vectorized call. */ + vargs.create (nargs * 2); - vect_get_slp_defs (vinfo, slp_node, &vec_defs); - vec_oprnds0 = vec_defs[0]; + vect_get_slp_defs (vinfo, slp_node, &vec_defs); + vec<tree> vec_oprnds0 = vec_defs[0]; - /* Arguments are ready. Create the new vector stmt. */ - for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2) + /* Arguments are ready. Create the new vector stmt. */ + for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2) + { + size_t k; + vargs.truncate (0); + for (k = 0; k < nargs; k++) { - size_t k; - vargs.truncate (0); - for (k = 0; k < nargs; k++) - { - vec<tree> vec_oprndsk = vec_defs[k]; - vargs.quick_push (vec_oprndsk[i]); - vargs.quick_push (vec_oprndsk[i + 1]); - } - gcall *call; - if (ifn != IFN_LAST) - call = gimple_build_call_internal_vec (ifn, vargs); - else - call = gimple_build_call_vec (fndecl, vargs); - new_temp = make_ssa_name (vec_dest, call); - gimple_call_set_lhs (call, new_temp); - gimple_call_set_nothrow (call, true); - vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); - slp_node->push_vec_def (call); + vec<tree> vec_oprndsk = vec_defs[k]; + vargs.quick_push (vec_oprndsk[i]); + vargs.quick_push (vec_oprndsk[i + 1]); } + gcall *call; + if (ifn != IFN_LAST) + call = gimple_build_call_internal_vec (ifn, vargs); + else + call = gimple_build_call_vec (fndecl, vargs); + new_temp = make_ssa_name (vec_dest, call); + gimple_call_set_lhs (call, new_temp); + gimple_call_set_nothrow (call, true); + vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); + slp_node->push_vec_def (call); } for (i = 0; i < nargs; i++) -- 2.43.0