On Thu, Jul 8, 2021 at 2:43 PM Richard Sandiford via Gcc-patches
<[email protected]> wrote:
>
> vect_create_epilog_for_reduction had a variable called new_phis.
> It collected the statements that produce the exit block definitions
> of the vector reduction accumulators. Although those statements
> are indeed phis initially, they are often replaced with normal
> statements later, leading to puzzling code like:
>
> FOR_EACH_VEC_ELT (new_phis, i, new_phi)
> {
> int bit_offset;
> if (gimple_code (new_phi) == GIMPLE_PHI)
> vec_temp = PHI_RESULT (new_phi);
> else
> vec_temp = gimple_assign_lhs (new_phi);
>
> Also, although the array collects statements, in practice all users want
> the lhs instead.
>
> This patch therefore replaces new_phis with a vector of gimple values
> called “reduc_inputs”.
>
> Also, reduction chains and ncopies>1 were handled with identical code
> (and there was a comment saying so). The patch unites them into
> a single “if”.
OK.
Thanks,
Richard.
> gcc/
> * tree-vect-loop.c (vect_create_epilog_for_reduction): Replace
> the new_phis vector with a reduc_inputs vector. Combine handling
> of reduction chains and ncopies > 1.
> ---
> gcc/tree-vect-loop.c | 113 ++++++++++++++++---------------------------
> 1 file changed, 41 insertions(+), 72 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 8390ac80ca0..b7f73ca52c7 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -5005,7 +5005,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> imm_use_iterator imm_iter, phi_imm_iter;
> use_operand_p use_p, phi_use_p;
> gimple *use_stmt;
> - auto_vec<gimple *> new_phis;
> + auto_vec<tree> reduc_inputs;
> int j, i;
> auto_vec<tree> scalar_results;
> unsigned int group_size = 1, k;
> @@ -5017,7 +5017,6 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> b2 = operation (b1) */
> bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info));
> bool direct_slp_reduc;
> - tree new_phi_result;
> tree induction_index = NULL_TREE;
>
> if (slp_node)
> @@ -5215,7 +5214,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> if (double_reduc)
> loop = outer_loop;
> exit_bb = single_exit (loop)->dest;
> - new_phis.create (slp_node ? vec_num : ncopies);
> + reduc_inputs.create (slp_node ? vec_num : ncopies);
> for (unsigned i = 0; i < vec_num; i++)
> {
> if (slp_node)
> @@ -5223,19 +5222,14 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> else
> def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[0]);
> for (j = 0; j < ncopies; j++)
> - {
> + {
> tree new_def = copy_ssa_name (def);
> - phi = create_phi_node (new_def, exit_bb);
> - if (j == 0)
> - new_phis.quick_push (phi);
> - else
> - {
> - def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]);
> - new_phis.quick_push (phi);
> - }
> -
> - SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
> - }
> + phi = create_phi_node (new_def, exit_bb);
> + if (j)
> + def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]);
> + SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
> + reduc_inputs.quick_push (new_def);
> + }
> }
>
> exit_gsi = gsi_after_labels (exit_bb);
> @@ -5274,52 +5268,32 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> a2 = operation (a1)
> a3 = operation (a2),
>
> - we may end up with more than one vector result. Here we reduce them to
> - one vector. */
> - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc)
> + we may end up with more than one vector result. Here we reduce them
> + to one vector.
> +
> + The same is true if we couldn't use a single defuse cycle. */
> + if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)
> + || direct_slp_reduc
> + || ncopies > 1)
> {
> gimple_seq stmts = NULL;
> - tree first_vect = PHI_RESULT (new_phis[0]);
> - first_vect = gimple_convert (&stmts, vectype, first_vect);
> - for (k = 1; k < new_phis.length (); k++)
> + tree first_vect = gimple_convert (&stmts, vectype, reduc_inputs[0]);
> + for (k = 1; k < reduc_inputs.length (); k++)
> {
> - gimple *next_phi = new_phis[k];
> - tree second_vect = PHI_RESULT (next_phi);
> - second_vect = gimple_convert (&stmts, vectype, second_vect);
> + tree second_vect = gimple_convert (&stmts, vectype,
> reduc_inputs[k]);
> first_vect = gimple_build (&stmts, code, vectype,
> first_vect, second_vect);
> }
> gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
>
> - new_phi_result = first_vect;
> - new_phis.truncate (0);
> - new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
> + reduc_inputs.truncate (0);
> + reduc_inputs.safe_push (first_vect);
> }
> - /* Likewise if we couldn't use a single defuse cycle. */
> - else if (ncopies > 1)
> - {
> - gimple_seq stmts = NULL;
> - tree first_vect = PHI_RESULT (new_phis[0]);
> - first_vect = gimple_convert (&stmts, vectype, first_vect);
> - for (int k = 1; k < ncopies; ++k)
> - {
> - tree second_vect = PHI_RESULT (new_phis[k]);
> - second_vect = gimple_convert (&stmts, vectype, second_vect);
> - first_vect = gimple_build (&stmts, code, vectype,
> - first_vect, second_vect);
> - }
> - gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
> - new_phi_result = first_vect;
> - new_phis.truncate (0);
> - new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
> - }
> - else
> - new_phi_result = PHI_RESULT (new_phis[0]);
>
> if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
> && reduc_fn != IFN_LAST)
> {
> - /* For condition reductions, we have a vector (NEW_PHI_RESULT)
> containing
> + /* For condition reductions, we have a vector (REDUC_INPUTS 0)
> containing
> various data values where the condition matched and another vector
> (INDUCTION_INDEX) containing all the indexes of those matches. We
> need to extract the last matching index (which will be the index with
> @@ -5350,7 +5324,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> tree zero_vec = build_zero_cst (vectype);
>
> gimple_seq stmts = NULL;
> - new_phi_result = gimple_convert (&stmts, vectype, new_phi_result);
> + reduc_inputs[0] = gimple_convert (&stmts, vectype, reduc_inputs[0]);
> gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
>
> /* Find maximum value from the vector of found indexes. */
> @@ -5370,7 +5344,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
>
> /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
> with the vector (INDUCTION_INDEX) of found indexes, choosing values
> - from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC)
> + from the data vector (REDUC_INPUTS 0) for matches, 0 (ZERO_VEC)
> otherwise. Only one value should match, resulting in a vector
> (VEC_COND) with one data value and the rest zeros.
> In the case where the loop never made any matches, every index will
> @@ -5389,7 +5363,8 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> zero. */
> tree vec_cond = make_ssa_name (vectype);
> gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
> - vec_compare,
> new_phi_result,
> + vec_compare,
> + reduc_inputs[0],
> zero_vec);
> gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
>
> @@ -5437,7 +5412,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> val = data_reduc[i], idx_val = induction_index[i];
> return val; */
>
> - tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result));
> + tree data_eltype = TREE_TYPE (TREE_TYPE (reduc_inputs[0]));
> tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index));
> unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype));
> poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE
> (induction_index));
> @@ -5461,7 +5436,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> epilog_stmt = gimple_build_assign (val, BIT_FIELD_REF,
> build3 (BIT_FIELD_REF,
> data_eltype,
> - new_phi_result,
> + reduc_inputs[0],
> bitsize_int (el_size),
> bitsize_int (off)));
> gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> @@ -5513,10 +5488,10 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> "Reduce using direct vector reduction.\n");
>
> gimple_seq stmts = NULL;
> - new_phi_result = gimple_convert (&stmts, vectype, new_phi_result);
> - vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
> + reduc_inputs[0] = gimple_convert (&stmts, vectype, reduc_inputs[0]);
> + vec_elem_type = TREE_TYPE (TREE_TYPE (reduc_inputs[0]));
> new_temp = gimple_build (&stmts, as_combined_fn (reduc_fn),
> - vec_elem_type, new_phi_result);
> + vec_elem_type, reduc_inputs[0]);
> new_temp = gimple_convert (&stmts, scalar_type, new_temp);
> gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
>
> @@ -5546,7 +5521,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> neutral value. We can then do a normal reduction on each vector. */
>
> /* Enforced by vectorizable_reduction. */
> - gcc_assert (new_phis.length () == 1);
> + gcc_assert (reduc_inputs.length () == 1);
> gcc_assert (pow2p_hwi (group_size));
>
> slp_tree orig_phis_slp_node = slp_node_instance->reduc_phis;
> @@ -5602,7 +5577,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
>
> sel[j] = (index[j] == i);
>
> - which selects the elements of NEW_PHI_RESULT that should
> + which selects the elements of REDUC_INPUTS[0] that should
> be included in the result. */
> tree compare_val = build_int_cst (index_elt_type, i);
> compare_val = build_vector_from_val (index_type, compare_val);
> @@ -5611,11 +5586,11 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
>
> /* Calculate the equivalent of:
>
> - vec = seq ? new_phi_result : vector_identity;
> + vec = seq ? reduc_inputs[0] : vector_identity;
>
> VEC is now suitable for a full vector reduction. */
> tree vec = gimple_build (&seq, VEC_COND_EXPR, vectype,
> - sel, new_phi_result, vector_identity);
> + sel, reduc_inputs[0], vector_identity);
>
> /* Do the reduction and convert it to the appropriate type. */
> tree scalar = gimple_build (&seq, as_combined_fn (reduc_fn),
> @@ -5630,7 +5605,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> bool reduce_with_shift;
> tree vec_temp;
>
> - gcc_assert (slp_reduc || new_phis.length () == 1);
> + gcc_assert (slp_reduc || reduc_inputs.length () == 1);
>
> /* See if the target wants to do the final (shift) reduction
> in a vector mode of smaller size and first reduce upper/lower
> @@ -5640,7 +5615,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
> unsigned nunits1 = nunits;
> if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode
> - && new_phis.length () == 1)
> + && reduc_inputs.length () == 1)
> {
> nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
> /* For SLP reductions we have to make sure lanes match up, but
> @@ -5672,7 +5647,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
>
> /* First reduce the vector to the desired vector size we should
> do shift reduction on by combining upper and lower halves. */
> - new_temp = new_phi_result;
> + new_temp = reduc_inputs[0];
> while (nunits > nunits1)
> {
> nunits /= 2;
> @@ -5751,7 +5726,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> new_temp = make_ssa_name (vectype1);
> epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2);
> gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> - new_phis[0] = epilog_stmt;
> + reduc_inputs[0] = new_temp;
> }
>
> if (reduce_with_shift && !slp_reduc)
> @@ -5832,13 +5807,9 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> int element_bitsize = tree_to_uhwi (bitsize);
> tree compute_type = TREE_TYPE (vectype);
> gimple_seq stmts = NULL;
> - FOR_EACH_VEC_ELT (new_phis, i, new_phi)
> + FOR_EACH_VEC_ELT (reduc_inputs, i, vec_temp)
> {
> int bit_offset;
> - if (gimple_code (new_phi) == GIMPLE_PHI)
> - vec_temp = PHI_RESULT (new_phi);
> - else
> - vec_temp = gimple_assign_lhs (new_phi);
> new_temp = gimple_build (&stmts, BIT_FIELD_REF, compute_type,
> vec_temp, bitsize, bitsize_zero_node);
>
> @@ -5929,11 +5900,10 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> gimple_seq stmts = NULL;
> if (double_reduc)
> {
> - new_phi = new_phis[0];
> gcc_assert (VECTOR_TYPE_P (TREE_TYPE (adjustment_def)));
> adjustment_def = gimple_convert (&stmts, vectype, adjustment_def);
> new_temp = gimple_build (&stmts, code, vectype,
> - PHI_RESULT (new_phi), adjustment_def);
> + reduc_inputs[0], adjustment_def);
> }
> else
> {
> @@ -5947,7 +5917,6 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> epilog_stmt = gimple_seq_last_stmt (stmts);
> gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
> scalar_results[0] = new_temp;
> - new_phis[0] = epilog_stmt;
> }
>
> if (double_reduc)