In PR 105965 we accepted a request to form FMA instructions when the
source code is using a narrow generic vector that contains just one
element, corresponding to V1SF or V1DF mode, while the backend does not
expand fma patterns for such modes.
For this to work under -ffp-contract=on, we either need to modify
backends, or emulate such degenerate-vector FMA via scalar FMA in
tree-vect-generic. Do the latter.
gcc/c-family/ChangeLog:
* c-gimplify.cc (fma_supported_p): Allow forming single-element
vector FMA when scalar FMA is available.
(c_gimplify_expr): Allow vector types.
gcc/ChangeLog:
* tree-vect-generic.cc (expand_vec1_fma): New helper. Use it...
(expand_vector_operations_1): ... here to handle IFN_FMA.
---
gcc/c-family/c-gimplify.cc | 10 ++++++--
gcc/tree-vect-generic.cc | 48 ++++++++++++++++++++++++++++++++++++--
2 files changed, 54 insertions(+), 4 deletions(-)
diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
index c6fb764656..1942d5019e 100644
--- a/gcc/c-family/c-gimplify.cc
+++ b/gcc/c-family/c-gimplify.cc
@@ -875,7 +875,13 @@ c_build_bind_expr (location_t loc, tree block, tree body)
static bool
fma_supported_p (enum internal_fn fn, tree type)
{
- return direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH);
+ return (direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH)
+ /* Accept single-element vector FMA (see PR 105965) when the
+ backend handles the scalar but not the vector mode. */
+ || (VECTOR_TYPE_P (type)
+ && known_eq (TYPE_VECTOR_SUBPARTS (type), 1U)
+ && direct_internal_fn_supported_p (fn, TREE_TYPE (type),
+ OPTIMIZE_FOR_BOTH)));
}
/* Gimplification of expression trees. */
@@ -939,7 +945,7 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p
ATTRIBUTE_UNUSED,
/* For -ffp-contract=on we need to attempt FMA contraction only
during initial gimplification. Late contraction across statement
boundaries would violate language semantics. */
- if (SCALAR_FLOAT_TYPE_P (type)
+ if ((SCALAR_FLOAT_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type))
&& flag_fp_contract_mode == FP_CONTRACT_ON
&& cfun && !(cfun->curr_properties & PROP_gimple_any)
&& fma_supported_p (IFN_FMA, type))
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index 3c68361870..954b84edce 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -1983,6 +1983,36 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
gsi_replace (gsi, g, false);
}
+/* Expand IFN_FMA, assuming vector contains just one scalar.
+ c_gimplify_expr can introduce it when performing FMA contraction. */
+
+static void
+expand_vec1_fma (gimple_stmt_iterator *gsi)
+{
+ gcall *call = as_a <gcall *> (gsi_stmt (*gsi));
+ tree type = TREE_TYPE (gimple_call_arg (call, 0));
+ if (!VECTOR_TYPE_P (type))
+ return;
+ gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), 1U));
+
+ for (int i = 0; i < 3; i++)
+ {
+ tree arg = gimple_call_arg (call, i);
+ arg = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (type), arg);
+ gimple_call_set_arg (call, i, arg);
+ }
+ tree lhs = gimple_call_lhs (call);
+ if (lhs)
+ {
+ tree new_lhs = make_ssa_name (TREE_TYPE (type));
+ gimple_call_set_lhs (call, new_lhs);
+ tree ctor = build_constructor_single (type, 0, new_lhs);
+ gimple *g = gimple_build_assign (lhs, CONSTRUCTOR, ctor);
+ gsi_insert_after (gsi, g, GSI_NEW_STMT);
+ }
+ update_stmt (call);
+}
+
/* Process one statement. If we identify a vector operation, expand it. */
static void
@@ -1998,8 +2028,22 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
if (!stmt)
{
- if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
- expand_vector_conversion (gsi);
+ gcall *call = dyn_cast <gcall *> (gsi_stmt (*gsi));
+ if (!call || !gimple_call_internal_p (call))
+ return;
+ switch (gimple_call_internal_fn (call))
+ {
+ case IFN_VEC_CONVERT:
+ return expand_vector_conversion (gsi);
+ case IFN_FMA:
+ case IFN_FMS:
+ case IFN_FNMA:
+ case IFN_FNMS:
+ if (!direct_internal_fn_supported_p (call, OPTIMIZE_FOR_BOTH))
+ return expand_vec1_fma (gsi);
+ default:
+ break;
+ }
return;
}
--
2.49.0