[PATCH] tree-optimization/122365 - deal with bool SLP reductions

Richard Biener Tue, 21 Oct 2025 07:09:32 -0700

I hadn't thought of these but at least added an assert which now
tripped.  Fixed thus.  There's also a latent issue with AVX512
mask types.  The by-pieces reduction code used the wrong element
sizes.


Bootstrap and regtest running on x86_64-unknown-linux-gnu.

        PR tree-optimization/122365
        * tree-vect-loop.cc (vect_create_epilog_for_reduction):
        Convert all inputs.  Use the proper vector element sizes
        for the elementwise reduction.

        * gcc.dg/vect/vect-reduc-bool-9.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/vect-reduc-bool-9.c | 27 +++++++++++
 gcc/tree-vect-loop.cc                         | 48 ++++++++++---------
 2 files changed, 53 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-bool-9.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-9.c 
b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-9.c
new file mode 100644
index 00000000000..4ec141c5e69
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-bool-9.c
@@ -0,0 +1,27 @@
+/* PR122365 */
+/* { dg-do compile } */
+
+struct TDTI {
+  float V[4];
+};
+struct TDTI4D {
+  struct TDTI S[];
+};
+void bar();
+struct TDTI4D nii_readParRec_dti4D;
+int nii_readParRec_d_0_0;
+void nii_readParRec() {
+  for (int i;;) {
+    bool v1varies = false, v2varies = false, v3varies = false;
+    for (; i < nii_readParRec_d_0_0; i++) {
+      if (nii_readParRec_dti4D.S[i].V[1])
+        v1varies = true;
+      if (nii_readParRec_dti4D.S[i].V[2])
+        v2varies = true;
+      if (nii_readParRec_dti4D.S[i].V[3])
+        v3varies = true;
+    }
+    if (v1varies || v2varies || v3varies)
+      bar();
+  }
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index be684a529db..15cb22023fc 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -5616,13 +5616,13 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
       && VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info)
       && vectype != VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info))
     {
-      gcc_assert (reduc_inputs.length () == 1);
       vectype = VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info);
       gimple_seq stmts = NULL;
-      reduc_inputs[0] = gimple_build (&stmts, VEC_COND_EXPR, vectype,
-                                     reduc_inputs[0],
-                                     build_one_cst (vectype),
-                                     build_zero_cst (vectype));
+      for (unsigned i = 0; i < reduc_inputs.length (); ++i)
+       reduc_inputs[i] = gimple_build (&stmts, VEC_COND_EXPR, vectype,
+                                       reduc_inputs[i],
+                                       build_one_cst (vectype),
+                                       build_zero_cst (vectype));
       gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
     }
 
@@ -5963,25 +5963,29 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
              gcc_assert (exact_log2 (nunits1) != -1 && nunits1 <= nunits);
            }
        }
-      if (!slp_reduc
-         && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
+      else if (!slp_reduc
+              && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
        nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
 
-      tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
-                                                          stype, nunits1);
+      tree vectype1 = vectype;
+      if (mode1 != mode)
+       {
+         vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
+                                                         stype, nunits1);
+         /* First reduce the vector to the desired vector size we should
+            do shift reduction on by combining upper and lower halves.  */
+         gimple_seq stmts = NULL;
+         new_temp = vect_create_partial_epilog (reduc_inputs[0], vectype1,
+                                                code, &stmts);
+         gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
+         reduc_inputs[0] = new_temp;
+       }
+
       reduce_with_shift = have_whole_vector_shift (mode1);
       if (!VECTOR_MODE_P (mode1)
          || !directly_supported_p (code, vectype1))
        reduce_with_shift = false;
 
-      /* First reduce the vector to the desired vector size we should
-        do shift reduction on by combining upper and lower halves.  */
-      gimple_seq stmts = NULL;
-      new_temp = vect_create_partial_epilog (reduc_inputs[0], vectype1,
-                                            code, &stmts);
-      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
-      reduc_inputs[0] = new_temp;
-
       if (reduce_with_shift && (!slp_reduc || group_size == 1))
        {
          tree bitsize = TYPE_SIZE (TREE_TYPE (vectype1));
@@ -6009,7 +6013,7 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
                             "Reduce using vector shifts\n");
 
          gimple_seq stmts = NULL;
-         new_temp = gimple_convert (&stmts, vectype1, new_temp);
+         new_temp = gimple_convert (&stmts, vectype1, reduc_inputs[0]);
           for (elt_offset = nelements / 2;
                elt_offset >= 1;
                elt_offset /= 2)
@@ -6053,13 +6057,13 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
                             "Reduce using scalar code.\n");
 
          tree compute_type = TREE_TYPE (vectype1);
-         tree bitsize = TYPE_SIZE (compute_type);
-         int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
-         int element_bitsize = tree_to_uhwi (bitsize);
+         unsigned vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
+         unsigned element_bitsize = vector_element_bits (vectype1);
+         tree bitsize = bitsize_int (element_bitsize);
          gimple_seq stmts = NULL;
          FOR_EACH_VEC_ELT (reduc_inputs, i, vec_temp)
             {
-              int bit_offset;
+             unsigned bit_offset;
              new_temp = gimple_build (&stmts, BIT_FIELD_REF, compute_type,
                                       vec_temp, bitsize, bitsize_zero_node);
 
-- 
2.51.0

[PATCH] tree-optimization/122365 - deal with bool SLP reductions

Reply via email to