The following adds vectorizer support for reduc_mask_{and,ior,xor}_scal
in the epilogue of bool reductions.

        * config/i386/sse.md (reduc_mask_and_scal_qi): Dummy for testing.
        * tree-vectorizer.h (reduction_fn_for_scalar_code): Add
        optional vector type argument.
        * tree-vect-loop.cc (reduction_fn_for_scalar_code): When a
        mask vector type is specified, return the corresponding
        MASK functions for AND, IOR and XOR.
        (vect_create_epilog_for_reduction): Pun to masks to an
        integer vector type only when we do not support direct mask
        reduction.
        (vectorizable_reduction): Prefer direct mask reduction over
        integer vector reduction.
---
 gcc/config/i386/sse.md | 10 +++++
 gcc/tree-vect-loop.cc  | 97 ++++++++++++++++++++++++++----------------
 gcc/tree-vectorizer.h  |  3 +-
 3 files changed, 72 insertions(+), 38 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8b28c8edb19..3672d2ba696 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4013,6 +4013,16 @@
   DONE;
 })
 
+(define_expand "reduc_mask_and_scal_qi"
+ [(and:QI
+    (match_operand:QI 0 "register_operand")
+    (match_operand:QI 1 "register_operand"))]
+ "TARGET_AVX512F"
+{
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
 (define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>"
   [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
        (unspec:VFH_AVX512VL
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 0f890fae22c..121a49169ca 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3250,8 +3250,16 @@ fold_left_reduction_fn (code_helper code, internal_fn 
*reduc_fn)
    Return FALSE if CODE currently cannot be vectorized as reduction.  */
 
 bool
-reduction_fn_for_scalar_code (code_helper code, internal_fn *reduc_fn)
+reduction_fn_for_scalar_code (code_helper code, internal_fn *reduc_fn,
+                             tree vectype)
 {
+  if (vectype
+      && VECTOR_BOOLEAN_TYPE_P (vectype)
+      && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
+      && maybe_ne (GET_MODE_PRECISION (TYPE_MODE (vectype)),
+                  TYPE_VECTOR_SUBPARTS (vectype)))
+    return false;
+
   if (code.is_tree_code ())
     switch (tree_code (code))
       {
@@ -3268,15 +3276,18 @@ reduction_fn_for_scalar_code (code_helper code, 
internal_fn *reduc_fn)
        return true;
 
       case BIT_AND_EXPR:
-       *reduc_fn = IFN_REDUC_AND;
+       *reduc_fn = ((vectype && VECTOR_BOOLEAN_TYPE_P (vectype))
+                    ? IFN_MASK_REDUC_AND : IFN_REDUC_AND);
        return true;
 
       case BIT_IOR_EXPR:
-       *reduc_fn = IFN_REDUC_IOR;
+       *reduc_fn = ((vectype && VECTOR_BOOLEAN_TYPE_P (vectype))
+                    ? IFN_MASK_REDUC_IOR : IFN_REDUC_IOR);
        return true;
 
       case BIT_XOR_EXPR:
-       *reduc_fn = IFN_REDUC_XOR;
+       *reduc_fn = ((vectype && VECTOR_BOOLEAN_TYPE_P (vectype))
+                    ? IFN_MASK_REDUC_XOR : IFN_REDUC_XOR);
        return true;
 
       case MULT_EXPR:
@@ -5565,9 +5576,12 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
   /* Shouldn't be used beyond this point.  */
   exit_bb = nullptr;
 
-  /* For the actual reduction work on a bool data vector instead of a
-     mask vector.  */
-  if (VECTOR_BOOLEAN_TYPE_P (vectype))
+  /* If we are operating on a mask vector and do not support direct mask
+     reduction, work on a bool data vector instead of a mask vector.  */
+  if (VECTOR_BOOLEAN_TYPE_P (vectype)
+      && reduc_fn != IFN_MASK_REDUC_AND
+      && reduc_fn != IFN_MASK_REDUC_IOR
+      && reduc_fn != IFN_MASK_REDUC_XOR)
     {
       gcc_assert (reduc_inputs.length () == 1);
       vectype = get_related_vectype_for_scalar_type (loop_vinfo->vector_mode,
@@ -7301,29 +7315,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
 
   VECT_REDUC_INFO_CODE (reduc_info) = orig_code;
 
-  /* For now see to implement the epilogue reduction on a bool data,
-     not the mask type.  */
-  tree orig_vectype_out = vectype_out;
-  if (VECTOR_BOOLEAN_TYPE_P (vectype_out))
-    {
-      vectype_out
-       = get_related_vectype_for_scalar_type (loop_vinfo->vector_mode,
-                                              TREE_TYPE (vectype_out),
-                                              TYPE_VECTOR_SUBPARTS
-                                                (orig_vectype_out));
-      if (!vectype_out
-         || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_out),
-                      TYPE_VECTOR_SUBPARTS (orig_vectype_out))
-         || !expand_vec_cond_expr_p (vectype_out, orig_vectype_out))
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "cannot turn mask into bool data vector for "
-                            "reduction epilogue.\n");
-         return false;
-       }
-    }
-
   reduction_type = VECT_REDUC_INFO_TYPE (reduc_info);
   if (reduction_type == TREE_CODE_REDUCTION)
     {
@@ -7389,6 +7380,29 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
       return false;
     }
 
+  /* See if we can convert a mask vector to a corresponding bool data vector
+     to perform the epilogue reduction.  */
+  tree alt_vectype_out = NULL_TREE;
+  if (VECTOR_BOOLEAN_TYPE_P (vectype_out))
+    {
+      alt_vectype_out
+       = get_related_vectype_for_scalar_type (loop_vinfo->vector_mode,
+                                              TREE_TYPE (vectype_out),
+                                              TYPE_VECTOR_SUBPARTS
+                                                (vectype_out));
+      if (!alt_vectype_out
+         || maybe_ne (TYPE_VECTOR_SUBPARTS (alt_vectype_out),
+                      TYPE_VECTOR_SUBPARTS (vectype_out))
+         || !expand_vec_cond_expr_p (alt_vectype_out, vectype_out))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "cannot turn mask into bool data vector for "
+                            "reduction epilogue.\n");
+         alt_vectype_out = NULL_TREE;
+       }
+    }
+
   internal_fn reduc_fn = IFN_LAST;
   if (reduction_type == TREE_CODE_REDUCTION
       || reduction_type == FOLD_LEFT_REDUCTION
@@ -7397,17 +7411,28 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
     {
       if (reduction_type == FOLD_LEFT_REDUCTION
          ? fold_left_reduction_fn (orig_code, &reduc_fn)
-         : reduction_fn_for_scalar_code (orig_code, &reduc_fn))
+         : reduction_fn_for_scalar_code (orig_code, &reduc_fn, vectype_out))
        {
          if (reduc_fn != IFN_LAST
              && !direct_internal_fn_supported_p (reduc_fn, vectype_out,
                                                  OPTIMIZE_FOR_SPEED))
            {
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "reduc op not supported by target.\n");
+             if (reduction_type != FOLD_LEFT_REDUCTION
+                 && alt_vectype_out
+                 && reduction_fn_for_scalar_code (orig_code, &reduc_fn,
+                                                  alt_vectype_out)
+                 && reduc_fn != IFN_LAST
+                 && direct_internal_fn_supported_p (reduc_fn, alt_vectype_out,
+                                                    OPTIMIZE_FOR_SPEED))
+               ;
+             else
+               {
+                 if (dump_enabled_p ())
+                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                    "reduc op not supported by target.\n");
 
-             reduc_fn = IFN_LAST;
+                 reduc_fn = IFN_LAST;
+               }
            }
        }
       else
@@ -7444,8 +7469,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
       return false;
     }
 
-  vectype_out = orig_vectype_out;
-
   /* For SLP reductions, see if there is a neutral value we can use.  */
   tree neutral_op = NULL_TREE;
   tree initial_value = NULL_TREE;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 1e5caf45afc..11f3dc8db0a 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2665,7 +2665,8 @@ extern tree vect_gen_loop_len_mask (loop_vec_info, 
gimple_stmt_iterator *,
                                    unsigned int);
 extern gimple_seq vect_gen_len (tree, tree, tree, tree);
 extern vect_reduc_info info_for_reduction (loop_vec_info, slp_tree);
-extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);
+extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *,
+                                         tree = NULL_TREE);
 
 /* Drive for loop transformation stage.  */
 extern class loop *vect_transform_loop (loop_vec_info, gimple *);
-- 
2.51.0

Reply via email to