On Thu, Jul 8, 2021 at 2:49 PM Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > After previous patches, we can now easily provide the neutral op > as an argument to get_initial_def_for_reduction. This in turn > allows the adjustment calculation to be moved outside of > get_initial_def_for_reduction, which is the main motivation > of the patch.
OK. > gcc/ > * tree-vect-loop.c (get_initial_def_for_reduction): Remove > adjustment handling. Take the neutral value as an argument, > in place of the code argument. > (vect_transform_cycle_phi): Update accordingly. Handle the > initial values of cond reductions separately from code reductions. > Choose the adjustment here rather than in > get_initial_def_for_reduction. Sink the splat of vec_initial_def. > --- > gcc/tree-vect-loop.c | 177 +++++++++++++++---------------------------- > 1 file changed, 59 insertions(+), 118 deletions(-) > > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index 744645d8bad..fe7e73f655f 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -4614,57 +4614,26 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, > Input: > REDUC_INFO - the info_for_reduction > INIT_VAL - the initial value of the reduction variable > + NEUTRAL_OP - a value that has no effect on the reduction, as per > + neutral_op_for_reduction > > Output: > - ADJUSTMENT_DEF - a tree that holds a value to be added to the final result > - of the reduction (used for adjusting the epilog - see below). > Return a vector variable, initialized according to the operation that > STMT_VINFO performs. This vector will be used as the initial value > of the vector of partial results. > > - Option1 (adjust in epilog): Initialize the vector as follows: > - add/bit or/xor: [0,0,...,0,0] > - mult/bit and: [1,1,...,1,1] > - min/max/cond_expr: [init_val,init_val,..,init_val,init_val] > - and when necessary (e.g. add/mult case) let the caller know > - that it needs to adjust the result by init_val. > - > - Option2: Initialize the vector as follows: > - add/bit or/xor: [init_val,0,0,...,0] > - mult/bit and: [init_val,1,1,...,1] > - min/max/cond_expr: [init_val,init_val,...,init_val] > - and no adjustments are needed. > - > - For example, for the following code: > - > - s = init_val; > - for (i=0;i<n;i++) > - s = s + a[i]; > - > - STMT_VINFO is 's = s + a[i]', and the reduction variable is 's'. > - For a vector of 4 units, we want to return either [0,0,0,init_val], > - or [0,0,0,0] and let the caller know that it needs to adjust > - the result at the end by 'init_val'. > - > - FORNOW, we are using the 'adjust in epilog' scheme, because this way the > - initialization vector is simpler (same element in all entries), if > - ADJUSTMENT_DEF is not NULL, and Option2 otherwise. > - > - A cost model should help decide between these two schemes. */ > + The value we need is a vector in which element 0 has value INIT_VAL > + and every other element has value NEUTRAL_OP. */ > > static tree > get_initial_def_for_reduction (loop_vec_info loop_vinfo, > stmt_vec_info reduc_info, > - enum tree_code code, tree init_val, > - tree *adjustment_def) > + tree init_val, tree neutral_op) > { > class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > tree scalar_type = TREE_TYPE (init_val); > tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); > - tree def_for_init; > tree init_def; > - REAL_VALUE_TYPE real_init_val = dconst0; > - int int_init_val = 0; > gimple_seq stmts = NULL; > > gcc_assert (vectype); > @@ -4675,75 +4644,34 @@ get_initial_def_for_reduction (loop_vec_info > loop_vinfo, > gcc_assert (nested_in_vect_loop_p (loop, reduc_info) > || loop == (gimple_bb (reduc_info->stmt))->loop_father); > > - /* ADJUSTMENT_DEF is NULL when called from > - vect_create_epilog_for_reduction to vectorize double reduction. */ > - if (adjustment_def) > - *adjustment_def = NULL; > - > - switch (code) > + if (operand_equal_p (init_val, neutral_op)) > { > - case WIDEN_SUM_EXPR: > - case DOT_PROD_EXPR: > - case SAD_EXPR: > - case PLUS_EXPR: > - case MINUS_EXPR: > - case BIT_IOR_EXPR: > - case BIT_XOR_EXPR: > - case MULT_EXPR: > - case BIT_AND_EXPR: > - { > - if (code == MULT_EXPR) > - { > - real_init_val = dconst1; > - int_init_val = 1; > - } > - > - if (code == BIT_AND_EXPR) > - int_init_val = -1; > - > - if (SCALAR_FLOAT_TYPE_P (scalar_type)) > - def_for_init = build_real (scalar_type, real_init_val); > - else > - def_for_init = build_int_cst (scalar_type, int_init_val); > - > - if (adjustment_def || operand_equal_p (def_for_init, init_val, 0)) > - { > - /* Option1: the first element is '0' or '1' as well. */ > - if (!operand_equal_p (def_for_init, init_val, 0)) > - *adjustment_def = init_val; > - init_def = gimple_build_vector_from_val (&stmts, vectype, > - def_for_init); > - } > - else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()) > - { > - /* Option2 (variable length): the first element is INIT_VAL. */ > - init_def = gimple_build_vector_from_val (&stmts, vectype, > - def_for_init); > - init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT, > - vectype, init_def, init_val); > - } > - else > - { > - /* Option2: the first element is INIT_VAL. */ > - tree_vector_builder elts (vectype, 1, 2); > - elts.quick_push (init_val); > - elts.quick_push (def_for_init); > - init_def = gimple_build_vector (&stmts, &elts); > - } > - } > - break; > - > - case MIN_EXPR: > - case MAX_EXPR: > - case COND_EXPR: > - { > - init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val); > - init_def = gimple_build_vector_from_val (&stmts, vectype, init_val); > - } > - break; > - > - default: > - gcc_unreachable (); > + /* If both elements are equal then the vector described above is > + just a splat. */ > + neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype), neutral_op); > + init_def = gimple_build_vector_from_val (&stmts, vectype, neutral_op); > + } > + else > + { > + neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype), neutral_op); > + init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val); > + if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()) > + { > + /* Construct a splat of NEUTRAL_OP and insert INIT_VAL into > + element 0. */ > + init_def = gimple_build_vector_from_val (&stmts, vectype, > + neutral_op); > + init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT, > + vectype, init_def, init_val); > + } > + else > + { > + /* Build {INIT_VAL, NEUTRAL_OP, NEUTRAL_OP, ...}. */ > + tree_vector_builder elts (vectype, 1, 2); > + elts.quick_push (init_val); > + elts.quick_push (neutral_op); > + init_def = gimple_build_vector (&stmts, &elts); > + } > } > > if (stmts) > @@ -7479,7 +7407,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, > vectype_out); > > /* Get the loop-entry arguments. */ > - tree vec_initial_def; > + tree vec_initial_def = NULL_TREE; > auto_vec<tree> vec_initial_defs; > if (slp_node) > { > @@ -7529,9 +7457,6 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, > STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE; > } > vec_initial_def = build_vector_from_val (vectype_out, induc_val); > - vec_initial_defs.create (ncopies); > - for (i = 0; i < ncopies; ++i) > - vec_initial_defs.quick_push (vec_initial_def); > } > else if (nested_cycle) > { > @@ -7541,23 +7466,39 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, > ncopies, initial_def, > &vec_initial_defs); > } > + else if (STMT_VINFO_REDUC_TYPE (reduc_info) == CONST_COND_REDUCTION > + || STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION) > + /* Fill the initial vector with the initial scalar value. */ > + vec_initial_def > + = get_initial_def_for_reduction (loop_vinfo, reduc_stmt_info, > + initial_def, initial_def); > else > { > - tree adjustment_def = NULL_TREE; > - tree *adjustment_defp = &adjustment_def; > enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); > - if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) > - adjustment_defp = NULL; > + tree neutral_op = neutral_op_for_reduction (TREE_TYPE (initial_def), > + code, initial_def); > + gcc_assert (neutral_op); > + /* Try to simplify the vector initialization by applying an > + adjustment after the reduction has been performed. */ > + if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def > + && !operand_equal_p (neutral_op, initial_def)) > + { > + STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = initial_def; > + initial_def = neutral_op; > + } > vec_initial_def > - = get_initial_def_for_reduction (loop_vinfo, reduc_info, code, > - initial_def, adjustment_defp); > - STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = adjustment_def; > - vec_initial_defs.create (ncopies); > - for (i = 0; i < ncopies; ++i) > - vec_initial_defs.quick_push (vec_initial_def); > + = get_initial_def_for_reduction (loop_vinfo, reduc_info, > + initial_def, neutral_op); > } > } > > + if (vec_initial_def) > + { > + vec_initial_defs.create (ncopies); > + for (i = 0; i < ncopies; ++i) > + vec_initial_defs.quick_push (vec_initial_def); > + } > + > /* Generate the reduction PHIs upfront. */ > for (i = 0; i < vec_num; i++) > {