On Tue, Jun 24, 2025 at 1:18 PM Alexander Monakov <amona...@ispras.ru> wrote:
>
> > > On Fri, May 23, 2025 at 2:31 PM Alexander Monakov <amona...@ispras.ru> 
> > > wrote:
> > > >
> > > > In PR 105965 we accepted a request to form FMA instructions when the
> > > > source code is using a narrow generic vector that contains just one
> > > > element, corresponding to V1SF or V1DF mode, while the backend does not
> > > > expand fma patterns for such modes.
> > > >
> > > > For this to work under -ffp-contract=on, we either need to modify
> > > > backends, or emulate such degenerate-vector FMA via scalar FMA in
> > > > tree-vect-generic.  Do the latter.
> > >
> > > Can you instead apply the lowering during gimplification?  That is because
> > > having an unsupported internal-function in the IL the user could not have
> > > emitted directly is somewhat bad.  I thought the vector lowering could
> > > be generalized for more single-argument internal functions but then no
> > > such unsupported calls should exist in the first place.
> >
> > Sure, like below?  Not fully tested yet.
>
> Ping — now bootstrapped and regtested.

LGTM.

Thanks,
Richard.

> > -- 8< --
> >
> > From 4caee92434d9425912979b285725166b22f40a87 Mon Sep 17 00:00:00 2001
> > From: Alexander Monakov <amona...@ispras.ru>
> > Date: Wed, 21 May 2025 18:35:45 +0300
> > Subject: [PATCH v2] allow contraction to synthetic single-element vector FMA
> >
> > In PR 105965 we accepted a request to form FMA instructions when the
> > source code is using a narrow generic vector that contains just one
> > element, corresponding to V1SF or V1DF mode, while the backend does not
> > expand fma patterns for such modes.
> >
> > For this to work under -ffp-contract=on, we either need to modify
> > backends, or emulate such degenerate-vector FMA via scalar FMA.
> > Do the latter, in gimplification hook together with contraction.
> >
> > gcc/c-family/ChangeLog:
> >
> >       * c-gimplify.cc (fma_supported_p): Allow forming single-element
> >       vector FMA when scalar FMA is available.
> >       (c_gimplify_expr): Allow vector types.
> > ---
> >  gcc/c-family/c-gimplify.cc | 50 ++++++++++++++++++++++++++++++--------
> >  1 file changed, 40 insertions(+), 10 deletions(-)
> >
> > diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
> > index c6fb764656..6c313287e6 100644
> > --- a/gcc/c-family/c-gimplify.cc
> > +++ b/gcc/c-family/c-gimplify.cc
> > @@ -870,12 +870,28 @@ c_build_bind_expr (location_t loc, tree block, tree 
> > body)
> >    return bind;
> >  }
> >
> > +enum fma_expansion
> > +{
> > +  FMA_NONE,
> > +  FMA_DIRECT,
> > +  FMA_VEC1_SYNTHETIC
> > +};
> > +
> >  /* Helper for c_gimplify_expr: test if target supports fma-like FN.  */
> >
> > -static bool
> > +static fma_expansion
> >  fma_supported_p (enum internal_fn fn, tree type)
> >  {
> > -  return direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH);
> > +  if (direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH))
> > +    return FMA_DIRECT;
> > +  /* Accept single-element vector FMA (see PR 105965) when the
> > +     backend handles the scalar but not the vector mode.  */
> > +  if (VECTOR_TYPE_P (type)
> > +      && known_eq (TYPE_VECTOR_SUBPARTS (type),  1U)
> > +      && direct_internal_fn_supported_p (fn, TREE_TYPE (type),
> > +                                      OPTIMIZE_FOR_BOTH))
> > +    return FMA_VEC1_SYNTHETIC;
> > +  return FMA_NONE;
> >  }
> >
> >  /* Gimplification of expression trees.  */
> > @@ -936,13 +952,14 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p 
> > ATTRIBUTE_UNUSED,
> >      case MINUS_EXPR:
> >        {
> >       tree type = TREE_TYPE (*expr_p);
> > +     enum fma_expansion how;
> >       /* For -ffp-contract=on we need to attempt FMA contraction only
> >          during initial gimplification.  Late contraction across statement
> >          boundaries would violate language semantics.  */
> > -     if (SCALAR_FLOAT_TYPE_P (type)
> > +     if ((SCALAR_FLOAT_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type))
> >           && flag_fp_contract_mode == FP_CONTRACT_ON
> >           && cfun && !(cfun->curr_properties & PROP_gimple_any)
> > -         && fma_supported_p (IFN_FMA, type))
> > +         && (how = fma_supported_p (IFN_FMA, type)) != FMA_NONE)
> >         {
> >           bool neg_mul = false, neg_add = code == MINUS_EXPR;
> >
> > @@ -973,7 +990,7 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p 
> > ATTRIBUTE_UNUSED,
> >           enum internal_fn ifn = IFN_FMA;
> >           if (neg_mul)
> >             {
> > -             if (fma_supported_p (IFN_FNMA, type))
> > +             if ((how = fma_supported_p (IFN_FNMA, type)) != FMA_NONE)
> >                 ifn = IFN_FNMA;
> >               else
> >                 ops[0] = build1 (NEGATE_EXPR, type, ops[0]);
> > @@ -981,21 +998,34 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p 
> > ATTRIBUTE_UNUSED,
> >           if (neg_add)
> >             {
> >               enum internal_fn ifn2 = ifn == IFN_FMA ? IFN_FMS : IFN_FNMS;
> > -             if (fma_supported_p (ifn2, type))
> > +             if ((how = fma_supported_p (ifn2, type)) != FMA_NONE)
> >                 ifn = ifn2;
> >               else
> >                 ops[2] = build1 (NEGATE_EXPR, type, ops[2]);
> >             }
> >           /* Avoid gimplify_arg: it emits all side effects into *PRE_P.  */
> >           for (auto &&op : ops)
> > -           if (gimplify_expr (&op, pre_p, post_p, is_gimple_val, fb_rvalue)
> > -               == GS_ERROR)
> > -             return GS_ERROR;
> > +           {
> > +             if (how == FMA_VEC1_SYNTHETIC)
> > +               op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (type), op);
> > +             if (gimplify_expr (&op, pre_p, post_p, is_gimple_val, 
> > fb_rvalue)
> > +                 == GS_ERROR)
> > +               return GS_ERROR;
> > +           }
> >
> >           gcall *call = gimple_build_call_internal_vec (ifn, ops);
> >           gimple_seq_add_stmt_without_update (pre_p, call);
> >           *expr_p = create_tmp_var (type);
> > -         gimple_call_set_lhs (call, *expr_p);
> > +         if (how == FMA_DIRECT)
> > +           gimple_call_set_lhs (call, *expr_p);
> > +         else
> > +           {
> > +             tree lhs = create_tmp_var (TREE_TYPE (type));
> > +             gimple_call_set_lhs (call, lhs);
> > +             tree ctor = build_constructor_single (type, 0, lhs);
> > +             gimple *g = gimple_build_assign (*expr_p, CONSTRUCTOR, ctor);
> > +             gimple_seq_add_stmt_without_update (pre_p, g);
> > +           }
> >           return GS_ALL_DONE;
> >         }
> >       break;
> >

Reply via email to