https://gcc.gnu.org/g:670e2b4d3e05f58ce573f719b74dfa49962ed175

commit r16-3390-g670e2b4d3e05f58ce573f719b74dfa49962ed175
Author: Richard Biener <rguent...@suse.de>
Date:   Tue Aug 26 09:04:36 2025 +0200

    Compute vect_reduc_type off SLP node instead of stmt-info
    
    The following changes the vect_reduc_type API to work on the SLP node.
    The API is only used from the aarch64 backend, so all changes are there.
    In particular I noticed aarch64_force_single_cycle is invoked even
    for scalar costing (where the flag tested isn't computed yet), I
    figured in scalar costing all reductions are a single cycle.
    
            * tree-vectorizer.h (vect_reduc_type): Get SLP node as argument.
            * config/aarch64/aarch64.cc (aarch64_sve_in_loop_reduction_latency):
            Take SLO node as argument and adjust.
            (aarch64_in_loop_reduction_latency): Likewise.
            (aarch64_detect_vector_stmt_subtype): Adjust.
            (aarch64_vector_costs::count_ops): Likewise.  Treat reductions
            during scalar costing as single-cycle.

Diff:
---
 gcc/config/aarch64/aarch64.cc | 21 ++++++++++++++-------
 gcc/tree-vectorizer.h         | 16 ++++++++++------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index fb8311b655d7..eb9e2cfaab09 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -17420,10 +17420,11 @@ aarch64_bool_compound_p (vec_info *vinfo, 
stmt_vec_info stmt_info,
    instructions.  */
 static unsigned int
 aarch64_sve_in_loop_reduction_latency (vec_info *vinfo,
+                                      slp_tree node,
                                       stmt_vec_info stmt_info,
                                       const sve_vec_cost *sve_costs)
 {
-  switch (vect_reduc_type (vinfo, stmt_info))
+  switch (vect_reduc_type (vinfo, node))
     {
     case EXTRACT_LAST_REDUCTION:
       return sve_costs->clast_cost;
@@ -17463,7 +17464,9 @@ aarch64_sve_in_loop_reduction_latency (vec_info *vinfo,
    - If VEC_FLAGS & VEC_ANY_SVE, return the loop carry latency of the
      SVE implementation.  */
 static unsigned int
-aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info,
+aarch64_in_loop_reduction_latency (vec_info *vinfo,
+                                  slp_tree node,
+                                  stmt_vec_info stmt_info,
                                   unsigned int vec_flags)
 {
   const cpu_vector_cost *vec_costs = aarch64_tune_params.vec_costs;
@@ -17476,7 +17479,8 @@ aarch64_in_loop_reduction_latency (vec_info *vinfo, 
stmt_vec_info stmt_info,
   if (sve_costs)
     {
       unsigned int latency
-       = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs);
+       = aarch64_sve_in_loop_reduction_latency (vinfo, node,
+                                                stmt_info, sve_costs);
       if (latency)
        return latency;
     }
@@ -17575,7 +17579,8 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, 
vect_cost_for_stmt kind,
       && sve_costs)
     {
       unsigned int latency
-       = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs);
+       = aarch64_sve_in_loop_reduction_latency (vinfo, node,
+                                                stmt_info, sve_costs);
       if (latency)
        return latency;
     }
@@ -17787,8 +17792,10 @@ aarch64_vector_costs::count_ops (unsigned int count, 
vect_cost_for_stmt kind,
       && vect_is_reduction (stmt_info))
     {
       unsigned int base
-       = aarch64_in_loop_reduction_latency (m_vinfo, stmt_info, m_vec_flags);
-      if (aarch64_force_single_cycle (m_vinfo, stmt_info))
+       = aarch64_in_loop_reduction_latency (m_vinfo, node,
+                                            stmt_info, m_vec_flags);
+      if (m_costing_for_scalar
+         || aarch64_force_single_cycle (m_vinfo, stmt_info))
        /* ??? Ideally we'd use a tree to reduce the copies down to 1 vector,
           and then accumulate that, but at the moment the loop-carried
           dependency includes all copies.  */
@@ -17901,7 +17908,7 @@ aarch64_vector_costs::count_ops (unsigned int count, 
vect_cost_for_stmt kind,
      have only accounted for one.  */
   if (stmt_info
       && (kind == vector_stmt || kind == vec_to_scalar)
-      && vect_reduc_type (m_vinfo, stmt_info) == COND_REDUCTION)
+      && vect_reduc_type (m_vinfo, node) == COND_REDUCTION)
     ops->general_ops += count;
 
   /* Count the predicate operations needed by an SVE comparison.  */
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 67154d3eaf52..ad7500eb6c97 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2878,14 +2878,18 @@ vect_is_reduction (stmt_vec_info stmt_info)
 /* If STMT_INFO describes a reduction, return the vect_reduction_type
    of the reduction it describes, otherwise return -1.  */
 inline int
-vect_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info)
+vect_reduc_type (vec_info *vinfo, slp_tree node)
 {
   if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
-    if (STMT_VINFO_REDUC_DEF (stmt_info))
-      {
-       stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
-       return int (STMT_VINFO_REDUC_TYPE (reduc_info));
-      }
+    {
+      stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
+      if (STMT_VINFO_REDUC_DEF (stmt_info))
+       {
+         stmt_vec_info reduc_info
+           = info_for_reduction (loop_vinfo, stmt_info);
+         return int (STMT_VINFO_REDUC_TYPE (reduc_info));
+       }
+    }
   return -1;
 }

Reply via email to