The following amends the SLP addsub pattern to also match blends
of .FMA/.FMS and form .FMADDSUB even when -ffp-contract=off.

Bootstrap and regtest ongoing on x86_64-unknown-linux-gnu.

Richard.

        PR tree-optimization/120808
        * tree-vect-slp-patterns.cc (vect_match_expression_p):
        Take a code_helper and also match calls.
        (addsub_pattern::recognize): Handle .FMA/.FMS pairs
        in addition to PLUS/MINUS.
        (addsub_pattern::build): Adjust.

        * gcc.dg/vect/bb-slp-pr120808.c: Now also expect FMADDSUB
        patterns to be matched.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c |  2 +-
 gcc/tree-vect-slp-patterns.cc               | 93 +++++++++++++++------
 2 files changed, 70 insertions(+), 25 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c
index c334d6ad8d3..ffa84c298fc 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c
@@ -9,4 +9,4 @@ void f(double x[restrict], double *y, double *z)
 }
 
 /* The following should check for SLP build covering the loads.  */
-/* { dg-final { scan-tree-dump "transform load" "slp2" { target { x86_64-*-* 
i?86-*-* } } } } */
+/* { dg-final { scan-tree-dump "Found VEC_FMSUBADD pattern" "slp2" { target { 
x86_64-*-* i?86-*-* } } } } */
diff --git a/gcc/tree-vect-slp-patterns.cc b/gcc/tree-vect-slp-patterns.cc
index 24ae203e6ff..95319bc09ff 100644
--- a/gcc/tree-vect-slp-patterns.cc
+++ b/gcc/tree-vect-slp-patterns.cc
@@ -299,18 +299,23 @@ vect_build_swap_evenodd_node (slp_tree node)
    code CODE.  */
 
 static inline bool
-vect_match_expression_p (slp_tree node, tree_code code)
+vect_match_expression_p (slp_tree node, code_helper code)
 {
   if (!node
       || !SLP_TREE_REPRESENTATIVE (node))
     return false;
 
   gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
-  if (!is_gimple_assign (expr)
-      || gimple_assign_rhs_code (expr) != code)
-    return false;
-
-  return true;
+  if (is_gimple_assign (expr)
+      && code.is_tree_code ()
+      && gimple_assign_rhs_code (expr) == (tree_code) code)
+    return true;
+  if (is_a <gcall *> (expr)
+      && !code.is_tree_code ()
+      && gimple_call_combined_fn (expr) == (combined_fn) code)
+    return true;
+
+  return false;
 }
 
 /* Check if the given lane permute in PERMUTES matches an alternating sequence
@@ -1494,15 +1499,33 @@ addsub_pattern::recognize (slp_tree_to_load_perm_map_t 
*,
   unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
   if (l0 == l1)
     return NULL;
+  bool fma_p = false;
   bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
                                          PLUS_EXPR);
   if (!l0add_p
       && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
-    return NULL;
+    {
+      l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], 
CFN_FMA);
+      if (!l0add_p
+         && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], CFN_FMS))
+       return NULL;
+      fma_p = true;
+    }
   bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
                                          PLUS_EXPR);
+  if (l1add_p && fma_p)
+    return NULL;
   if (!l1add_p
       && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
+    {
+      if (!fma_p)
+       return NULL;
+      l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], 
CFN_FMA);
+      if (!l1add_p
+         && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], CFN_FMS))
+       return NULL;
+    }
+  else if (!l1add_p && fma_p)
     return NULL;
 
   slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
@@ -1527,26 +1550,31 @@ addsub_pattern::recognize (slp_tree_to_load_perm_map_t 
*,
 
   /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
      (l0add_p), see whether we have FMA variants.  We can only form FMAs
-     if allowed via -ffp-contract=fast.  */
-  if (flag_fp_contract_mode != FP_CONTRACT_FAST
+     if allowed via -ffp-contract=fast or if they were FMA before.  */
+  if (!fma_p
+      && flag_fp_contract_mode != FP_CONTRACT_FAST
       && FLOAT_TYPE_P (SLP_TREE_VECTYPE (l0node)))
     ;
   else if (!l0add_p
-          && vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], 
MULT_EXPR))
+          && (fma_p
+              || vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0],
+                                          MULT_EXPR)))
     {
       /* (c * d) -+ a */
       if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
        return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
     }
   else if (l0add_p
-          && vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], 
MULT_EXPR))
+          && (fma_p
+              || vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0],
+                                          MULT_EXPR)))
     {
       /* (c * d) +- a */
       if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
        return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
     }
 
-  if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
+  if (!fma_p && !l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
     return new addsub_pattern (node_, IFN_VEC_ADDSUB);
 
   return NULL;
@@ -1610,25 +1638,42 @@ addsub_pattern::build (vec_info *vinfo)
            sub = SLP_TREE_CHILDREN (node)[l1];
            add = SLP_TREE_CHILDREN (node)[l0];
          }
-       slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
        /* Modify the blend node in-place.  */
        SLP_TREE_CHILDREN (node).safe_grow (3, true);
-       SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
-       SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
-       SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
+       gcall *call;
+       stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
+       if (vect_match_expression_p (add, CFN_FMA))
+         {
+           SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (add)[0];
+           SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (add)[1];
+           SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (add)[2];
+           /* Build IFN_VEC_FMADDSUB from the fms representative
+              operands.  */
+           call = gimple_build_call_internal (m_ifn, 3,
+                                              gimple_call_arg (srep->stmt, 0),
+                                              gimple_call_arg (srep->stmt, 1),
+                                              gimple_call_arg (srep->stmt, 2));
+         }
+       else
+         {
+           slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
+           SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
+           SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
+           SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
+           /* Build IFN_VEC_FMADDSUB from the mul/sub representative
+              operands.  */
+           stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
+           call = gimple_build_call_internal (m_ifn, 3,
+                                              gimple_assign_rhs1 (mrep->stmt),
+                                              gimple_assign_rhs2 (mrep->stmt),
+                                              gimple_assign_rhs2 (srep->stmt));
+         }
        SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
        SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
        SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
 
-       /* Build IFN_VEC_FMADDSUB from the mul/sub representative operands.  */
-       stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
-       stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
-       gcall *call = gimple_build_call_internal (m_ifn, 3,
-                                                 gimple_assign_rhs1 
(mrep->stmt),
-                                                 gimple_assign_rhs2 
(mrep->stmt),
-                                                 gimple_assign_rhs2 
(srep->stmt));
        gimple_call_set_lhs (call, make_ssa_name
-                            (TREE_TYPE (gimple_assign_lhs (srep->stmt))));
+                            (TREE_TYPE (gimple_get_lhs (srep->stmt))));
        gimple_call_set_nothrow (call, true);
        gimple_set_bb (call, gimple_bb (srep->stmt));
        stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep);
-- 
2.43.0

Reply via email to