https://gcc.gnu.org/g:670e2b4d3e05f58ce573f719b74dfa49962ed175
commit r16-3390-g670e2b4d3e05f58ce573f719b74dfa49962ed175 Author: Richard Biener <rguent...@suse.de> Date: Tue Aug 26 09:04:36 2025 +0200 Compute vect_reduc_type off SLP node instead of stmt-info The following changes the vect_reduc_type API to work on the SLP node. The API is only used from the aarch64 backend, so all changes are there. In particular I noticed aarch64_force_single_cycle is invoked even for scalar costing (where the flag tested isn't computed yet), I figured in scalar costing all reductions are a single cycle. * tree-vectorizer.h (vect_reduc_type): Get SLP node as argument. * config/aarch64/aarch64.cc (aarch64_sve_in_loop_reduction_latency): Take SLO node as argument and adjust. (aarch64_in_loop_reduction_latency): Likewise. (aarch64_detect_vector_stmt_subtype): Adjust. (aarch64_vector_costs::count_ops): Likewise. Treat reductions during scalar costing as single-cycle. Diff: --- gcc/config/aarch64/aarch64.cc | 21 ++++++++++++++------- gcc/tree-vectorizer.h | 16 ++++++++++------ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index fb8311b655d7..eb9e2cfaab09 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -17420,10 +17420,11 @@ aarch64_bool_compound_p (vec_info *vinfo, stmt_vec_info stmt_info, instructions. */ static unsigned int aarch64_sve_in_loop_reduction_latency (vec_info *vinfo, + slp_tree node, stmt_vec_info stmt_info, const sve_vec_cost *sve_costs) { - switch (vect_reduc_type (vinfo, stmt_info)) + switch (vect_reduc_type (vinfo, node)) { case EXTRACT_LAST_REDUCTION: return sve_costs->clast_cost; @@ -17463,7 +17464,9 @@ aarch64_sve_in_loop_reduction_latency (vec_info *vinfo, - If VEC_FLAGS & VEC_ANY_SVE, return the loop carry latency of the SVE implementation. */ static unsigned int -aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info, +aarch64_in_loop_reduction_latency (vec_info *vinfo, + slp_tree node, + stmt_vec_info stmt_info, unsigned int vec_flags) { const cpu_vector_cost *vec_costs = aarch64_tune_params.vec_costs; @@ -17476,7 +17479,8 @@ aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info, if (sve_costs) { unsigned int latency - = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs); + = aarch64_sve_in_loop_reduction_latency (vinfo, node, + stmt_info, sve_costs); if (latency) return latency; } @@ -17575,7 +17579,8 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, && sve_costs) { unsigned int latency - = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs); + = aarch64_sve_in_loop_reduction_latency (vinfo, node, + stmt_info, sve_costs); if (latency) return latency; } @@ -17787,8 +17792,10 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, && vect_is_reduction (stmt_info)) { unsigned int base - = aarch64_in_loop_reduction_latency (m_vinfo, stmt_info, m_vec_flags); - if (aarch64_force_single_cycle (m_vinfo, stmt_info)) + = aarch64_in_loop_reduction_latency (m_vinfo, node, + stmt_info, m_vec_flags); + if (m_costing_for_scalar + || aarch64_force_single_cycle (m_vinfo, stmt_info)) /* ??? Ideally we'd use a tree to reduce the copies down to 1 vector, and then accumulate that, but at the moment the loop-carried dependency includes all copies. */ @@ -17901,7 +17908,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, have only accounted for one. */ if (stmt_info && (kind == vector_stmt || kind == vec_to_scalar) - && vect_reduc_type (m_vinfo, stmt_info) == COND_REDUCTION) + && vect_reduc_type (m_vinfo, node) == COND_REDUCTION) ops->general_ops += count; /* Count the predicate operations needed by an SVE comparison. */ diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 67154d3eaf52..ad7500eb6c97 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2878,14 +2878,18 @@ vect_is_reduction (stmt_vec_info stmt_info) /* If STMT_INFO describes a reduction, return the vect_reduction_type of the reduction it describes, otherwise return -1. */ inline int -vect_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info) +vect_reduc_type (vec_info *vinfo, slp_tree node) { if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo)) - if (STMT_VINFO_REDUC_DEF (stmt_info)) - { - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); - return int (STMT_VINFO_REDUC_TYPE (reduc_info)); - } + { + stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node); + if (STMT_VINFO_REDUC_DEF (stmt_info)) + { + stmt_vec_info reduc_info + = info_for_reduction (loop_vinfo, stmt_info); + return int (STMT_VINFO_REDUC_TYPE (reduc_info)); + } + } return -1; }