This extends optimized reduction epilog handling to cover the
trivial single-lane SLP reduction case.

        * tree-vect-loop.cc (vect_create_epilog_for_reduction): Allow
        direct opcode and shift reduction also for SLP reductions
        with a single lane.
---
 gcc/tree-vect-loop.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 83c0544b6aa..31abfe047a4 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6500,7 +6500,7 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
   /* 2.3 Create the reduction code, using one of the three schemes described
          above. In SLP we simply need to extract all the elements from the 
          vector (without reducing them), so we use scalar shifts.  */
-  else if (reduc_fn != IFN_LAST && !slp_reduc)
+  else if (reduc_fn != IFN_LAST && (!slp_reduc || group_size == 1))
     {
       tree tmp;
       tree vec_elem_type;
@@ -6670,7 +6670,7 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
       gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
       reduc_inputs[0] = new_temp;
 
-      if (reduce_with_shift && !slp_reduc)
+      if (reduce_with_shift && (!slp_reduc || group_size == 1))
        {
          int element_bitsize = tree_to_uhwi (bitsize);
          /* Enforced by vectorizable_reduction, which disallows SLP reductions
-- 
2.35.3

Reply via email to