On Tue, Jun 24, 2025 at 1:18 PM Alexander Monakov <amona...@ispras.ru> wrote: > > > > On Fri, May 23, 2025 at 2:31 PM Alexander Monakov <amona...@ispras.ru> > > > wrote: > > > > > > > > In PR 105965 we accepted a request to form FMA instructions when the > > > > source code is using a narrow generic vector that contains just one > > > > element, corresponding to V1SF or V1DF mode, while the backend does not > > > > expand fma patterns for such modes. > > > > > > > > For this to work under -ffp-contract=on, we either need to modify > > > > backends, or emulate such degenerate-vector FMA via scalar FMA in > > > > tree-vect-generic. Do the latter. > > > > > > Can you instead apply the lowering during gimplification? That is because > > > having an unsupported internal-function in the IL the user could not have > > > emitted directly is somewhat bad. I thought the vector lowering could > > > be generalized for more single-argument internal functions but then no > > > such unsupported calls should exist in the first place. > > > > Sure, like below? Not fully tested yet. > > Ping — now bootstrapped and regtested.
LGTM. Thanks, Richard. > > -- 8< -- > > > > From 4caee92434d9425912979b285725166b22f40a87 Mon Sep 17 00:00:00 2001 > > From: Alexander Monakov <amona...@ispras.ru> > > Date: Wed, 21 May 2025 18:35:45 +0300 > > Subject: [PATCH v2] allow contraction to synthetic single-element vector FMA > > > > In PR 105965 we accepted a request to form FMA instructions when the > > source code is using a narrow generic vector that contains just one > > element, corresponding to V1SF or V1DF mode, while the backend does not > > expand fma patterns for such modes. > > > > For this to work under -ffp-contract=on, we either need to modify > > backends, or emulate such degenerate-vector FMA via scalar FMA. > > Do the latter, in gimplification hook together with contraction. > > > > gcc/c-family/ChangeLog: > > > > * c-gimplify.cc (fma_supported_p): Allow forming single-element > > vector FMA when scalar FMA is available. > > (c_gimplify_expr): Allow vector types. > > --- > > gcc/c-family/c-gimplify.cc | 50 ++++++++++++++++++++++++++++++-------- > > 1 file changed, 40 insertions(+), 10 deletions(-) > > > > diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc > > index c6fb764656..6c313287e6 100644 > > --- a/gcc/c-family/c-gimplify.cc > > +++ b/gcc/c-family/c-gimplify.cc > > @@ -870,12 +870,28 @@ c_build_bind_expr (location_t loc, tree block, tree > > body) > > return bind; > > } > > > > +enum fma_expansion > > +{ > > + FMA_NONE, > > + FMA_DIRECT, > > + FMA_VEC1_SYNTHETIC > > +}; > > + > > /* Helper for c_gimplify_expr: test if target supports fma-like FN. */ > > > > -static bool > > +static fma_expansion > > fma_supported_p (enum internal_fn fn, tree type) > > { > > - return direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH); > > + if (direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH)) > > + return FMA_DIRECT; > > + /* Accept single-element vector FMA (see PR 105965) when the > > + backend handles the scalar but not the vector mode. */ > > + if (VECTOR_TYPE_P (type) > > + && known_eq (TYPE_VECTOR_SUBPARTS (type), 1U) > > + && direct_internal_fn_supported_p (fn, TREE_TYPE (type), > > + OPTIMIZE_FOR_BOTH)) > > + return FMA_VEC1_SYNTHETIC; > > + return FMA_NONE; > > } > > > > /* Gimplification of expression trees. */ > > @@ -936,13 +952,14 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p > > ATTRIBUTE_UNUSED, > > case MINUS_EXPR: > > { > > tree type = TREE_TYPE (*expr_p); > > + enum fma_expansion how; > > /* For -ffp-contract=on we need to attempt FMA contraction only > > during initial gimplification. Late contraction across statement > > boundaries would violate language semantics. */ > > - if (SCALAR_FLOAT_TYPE_P (type) > > + if ((SCALAR_FLOAT_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)) > > && flag_fp_contract_mode == FP_CONTRACT_ON > > && cfun && !(cfun->curr_properties & PROP_gimple_any) > > - && fma_supported_p (IFN_FMA, type)) > > + && (how = fma_supported_p (IFN_FMA, type)) != FMA_NONE) > > { > > bool neg_mul = false, neg_add = code == MINUS_EXPR; > > > > @@ -973,7 +990,7 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p > > ATTRIBUTE_UNUSED, > > enum internal_fn ifn = IFN_FMA; > > if (neg_mul) > > { > > - if (fma_supported_p (IFN_FNMA, type)) > > + if ((how = fma_supported_p (IFN_FNMA, type)) != FMA_NONE) > > ifn = IFN_FNMA; > > else > > ops[0] = build1 (NEGATE_EXPR, type, ops[0]); > > @@ -981,21 +998,34 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p > > ATTRIBUTE_UNUSED, > > if (neg_add) > > { > > enum internal_fn ifn2 = ifn == IFN_FMA ? IFN_FMS : IFN_FNMS; > > - if (fma_supported_p (ifn2, type)) > > + if ((how = fma_supported_p (ifn2, type)) != FMA_NONE) > > ifn = ifn2; > > else > > ops[2] = build1 (NEGATE_EXPR, type, ops[2]); > > } > > /* Avoid gimplify_arg: it emits all side effects into *PRE_P. */ > > for (auto &&op : ops) > > - if (gimplify_expr (&op, pre_p, post_p, is_gimple_val, fb_rvalue) > > - == GS_ERROR) > > - return GS_ERROR; > > + { > > + if (how == FMA_VEC1_SYNTHETIC) > > + op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (type), op); > > + if (gimplify_expr (&op, pre_p, post_p, is_gimple_val, > > fb_rvalue) > > + == GS_ERROR) > > + return GS_ERROR; > > + } > > > > gcall *call = gimple_build_call_internal_vec (ifn, ops); > > gimple_seq_add_stmt_without_update (pre_p, call); > > *expr_p = create_tmp_var (type); > > - gimple_call_set_lhs (call, *expr_p); > > + if (how == FMA_DIRECT) > > + gimple_call_set_lhs (call, *expr_p); > > + else > > + { > > + tree lhs = create_tmp_var (TREE_TYPE (type)); > > + gimple_call_set_lhs (call, lhs); > > + tree ctor = build_constructor_single (type, 0, lhs); > > + gimple *g = gimple_build_assign (*expr_p, CONSTRUCTOR, ctor); > > + gimple_seq_add_stmt_without_update (pre_p, g); > > + } > > return GS_ALL_DONE; > > } > > break; > >