The following adds named patterns for reducing of vector masks with
AND, IOR and XOR to be used by the vectorizer.  A slight complication
are targets using scalar integer modes as mask modes, as for those
the mode for low-precision masks is ambiguous.  For this reason the
optab follows what vec_pack_sbool_trunc does and passes an additional
CONST_INT operand indicating the number of lanes in the input mask.
Note this is done always when the vector mask mode is an integer mode
and never otherwise.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

        * doc/md.texi (reduc_sbool_{and,ior,xor}_scal_<mode>): Document.
        * optabs.def (reduc_sbool_and_scal_optab,
        reduc_sbool_ior_scal_optab, reduc_sbool_xor_scal_optab): New.
        * internal-fn.def (REDUC_SBOOL_AND, REDUC_SBOOL_IOR,
        REDUC_SBOO_XOR): Likewise.
        * internal-fn.cc (reduc_sbool_direct): New initializer.
        (expand_reduc_sbool_optab_fn): New expander.
        (direct_reduc_sbool_optab_supported_p): New.
---
 gcc/doc/md.texi     | 13 +++++++++++++
 gcc/internal-fn.cc  | 38 ++++++++++++++++++++++++++++++++++++++
 gcc/internal-fn.def |  7 +++++++
 gcc/optabs.def      |  3 +++
 4 files changed, 61 insertions(+)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 44e1149bea8..aa0141314cf 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5736,6 +5736,19 @@ of a vector of mode @var{m}.  Operand 1 is the vector 
input and operand 0
 is the scalar result.  The mode of the scalar result is the same as one
 element of @var{m}.
 
+@cindex @code{reduc_sbool_and_scal_@var{m}} instruction pattern
+@cindex @code{reduc_sbool_ior_scal_@var{m}} instruction pattern
+@cindex @code{reduc_sbool_xor_scal_@var{m}} instruction pattern
+@item @samp{reduc_sbool_and_scal_@var{m}}
+@itemx @samp{reduc_sbool_ior_scal_@var{m}}
+@itemx @samp{reduc_sbool_xor_scal_@var{m}}
+Compute the bitwise @code{AND}/@code{IOR}/@code{XOR} reduction of the elements
+of a vector boolean of mode @var{m}.  Operand 1 is the vector input and
+operand 0 is the scalar result.  The mode of the scalar result is @var{QImode}
+with its value either zero or one.  If mode @var{m} is a scalar integer mode
+then operand 2 is the number of elements in the input vector to provide
+disambiguation for the case @var{m} is ambiguous.
+
 @cindex @code{extract_last_@var{m}} instruction pattern
 @item @code{extract_last_@var{m}}
 Find the last set bit in mask operand 1 and extract the associated element
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index fb76e64e53e..514fe98f40d 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -194,6 +194,7 @@ init_internal_fns ()
 #define mask_len_fold_left_direct { 1, 1, false }
 #define check_ptrs_direct { 0, 0, false }
 #define crc_direct { 1, -1, true }
+#define reduc_sbool_direct { 0, 0, true }
 
 const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
@@ -4099,6 +4100,42 @@ expand_crc_optab_fn (internal_fn fn, gcall *stmt, 
convert_optab optab)
     }
 }
 
+/* Expand .REDUC_SBOOL_{AND,IOR,XOR}.  */
+
+static void
+expand_reduc_sbool_optab_fn (internal_fn fn, gcall *stmt, direct_optab optab)
+{
+  tree_pair types = direct_internal_fn_types (fn, stmt);
+  insn_code icode = direct_optab_handler (optab, TYPE_MODE (types.first));
+
+  /* Below copied from expand_fn_using_insn.  */
+
+  gcc_assert (icode != CODE_FOR_nothing);
+
+  expand_operand *ops = XALLOCAVEC (expand_operand, 3);
+  rtx lhs_rtx = NULL_RTX;
+  tree lhs = gimple_call_lhs (stmt);
+  if (lhs)
+    lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_call_lhs_operand (&ops[0], lhs_rtx,
+                          insn_data[icode].operand[0].mode);
+
+  tree rhs = gimple_call_arg (stmt, 0);
+  tree rhs_type = TREE_TYPE (rhs);
+  rtx rhs_rtx = expand_normal (rhs);
+  gcc_assert (VECTOR_BOOLEAN_TYPE_P (rhs_type));
+  create_input_operand (&ops[1], rhs_rtx, TYPE_MODE (rhs_type));
+  if (SCALAR_INT_MODE_P (TYPE_MODE (rhs_type)))
+    {
+      rtx nunits = GEN_INT (TYPE_VECTOR_SUBPARTS (rhs_type).to_constant ());
+      gcc_assert (insn_operand_matches (icode, 2, nunits));
+      create_input_operand (&ops[2], nunits, SImode);
+    }
+  expand_insn (icode, SCALAR_INT_MODE_P (TYPE_MODE (rhs_type)) ? 3 : 2, ops);
+  if (lhs_rtx)
+    assign_call_lhs (lhs, lhs_rtx, &ops[0]);
+}
+
 /* Expanders for optabs that can use expand_direct_optab_fn.  */
 
 #define expand_unary_optab_fn(FN, STMT, OPTAB) \
@@ -4261,6 +4298,7 @@ multi_vector_optab_supported_p (convert_optab optab, 
tree_pair types,
 #define direct_check_ptrs_optab_supported_p direct_optab_supported_p
 #define direct_vec_set_optab_supported_p direct_optab_supported_p
 #define direct_vec_extract_optab_supported_p convert_optab_supported_p
+#define direct_reduc_sbool_optab_supported_p direct_optab_supported_p
 
 /* Return the optab used by internal function FN.  */
 
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 8434a805e28..7874fcfb3df 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -330,6 +330,13 @@ DEF_INTERNAL_OPTAB_FN (REDUC_IOR, ECF_CONST | ECF_NOTHROW,
                       reduc_ior_scal, unary)
 DEF_INTERNAL_OPTAB_FN (REDUC_XOR, ECF_CONST | ECF_NOTHROW,
                       reduc_xor_scal, unary)
+DEF_INTERNAL_OPTAB_FN (REDUC_SBOOL_AND, ECF_CONST | ECF_NOTHROW,
+                      reduc_sbool_and_scal, reduc_sbool)
+DEF_INTERNAL_OPTAB_FN (REDUC_SBOOL_IOR, ECF_CONST | ECF_NOTHROW,
+                      reduc_sbool_ior_scal, reduc_sbool)
+DEF_INTERNAL_OPTAB_FN (REDUC_SBOOL_XOR, ECF_CONST | ECF_NOTHROW,
+                      reduc_sbool_xor_scal, reduc_sbool)
+
 
 /* Extract the last active element from a vector.  */
 DEF_INTERNAL_OPTAB_FN (EXTRACT_LAST, ECF_CONST | ECF_NOTHROW,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 790e43f08f4..d13e0b5b848 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -401,6 +401,9 @@ OPTAB_D (reduc_umin_scal_optab, "reduc_umin_scal_$a")
 OPTAB_D (reduc_and_scal_optab,  "reduc_and_scal_$a")
 OPTAB_D (reduc_ior_scal_optab,  "reduc_ior_scal_$a")
 OPTAB_D (reduc_xor_scal_optab,  "reduc_xor_scal_$a")
+OPTAB_D (reduc_sbool_and_scal_optab, "reduc_sbool_and_scal_$a")
+OPTAB_D (reduc_sbool_ior_scal_optab, "reduc_sbool_ior_scal_$a")
+OPTAB_D (reduc_sbool_xor_scal_optab, "reduc_sbool_xor_scal_$a")
 OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a")
 OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
 OPTAB_D (mask_len_fold_left_plus_optab, "mask_len_fold_left_plus_$a")
-- 
2.51.0

Reply via email to