The target-independent and aarch64 bits mostly look good to me, but
a few comments/questions:

Alfie Richards <alfie.richa...@arm.com> writes:
> diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
> index a604511db71..4eb37f5818f 100644
> --- a/gcc/cp/typeck.cc
> +++ b/gcc/cp/typeck.cc
> @@ -4489,6 +4489,16 @@ cp_build_function_call_vec (tree function, vec<tree, 
> va_gc> **params,
>       return error_mark_node;
>        fndecl = function;
>  
> +      /* For target_version semantics, the funciton set cannot be called

Typo: function.

> +      if there is no default version in scope.  */
> +      if (!TARGET_HAS_FMV_TARGET_ATTRIBUTE
> +        && !is_function_default_version (fndecl))
> +      {
> +        if (complain & tf_error)
> +         error ("no default version in scope");
> +        return error_mark_node;
> +      }
> +
>        /* Convert anything with function type to a pointer-to-function.  */
>        if (DECL_MAIN_P (function))
>       {
> @@ -90,13 +97,48 @@ create_dispatcher_calls (struct cgraph_node *node)
>  
>    cgraph_node *inode = cgraph_node::get (idecl);
>    gcc_assert (inode);
> -  tree resolver_decl = targetm.generate_version_dispatcher_body (inode);
> -
> -  /* Update aliases.  */
> -  inode->alias = true;
> -  inode->alias_target = resolver_decl;
> -  if (!inode->analyzed)
> -    inode->resolve_alias (cgraph_node::get (resolver_decl));
> +  cgraph_function_version_info *inode_info = inode->function_version ();
> +  gcc_assert (inode_info);
> +
> +  tree resolver_decl = NULL;
> +
> +  /* For target_version semantics, if there is a lone default declaration
> +     it needs to be mangled, with an alias from the dispatched symbol to the
> +     default version.  */
> +  if (!TARGET_HAS_FMV_TARGET_ATTRIBUTE
> +      && TREE_STATIC (node->decl)
> +      && inode_info->next
> +      && !inode_info->next->next)
> +    {
> +      inode->alias = true;
> +      inode->alias_target = inode_info->next->this_node->decl;
> +      inode->externally_visible = true;
> +      if (!inode->analyzed)
> +     inode->resolve_alias
> +       (cgraph_node::get (inode_info->next->this_node->decl));

I realise you're just inheriting this construct from the existing code,
but do you know why it's written the way that it is?  It seems strange on
the face of it to have some parts of the alias set-up be unconditional
and other parts be conditional on !inode->analyzed.  When is inode->analyzed
true vs. false in this context?

Also, AIUI, in:

     inode->alias_target = ...;
     if (!inode->analyzed)
       inode->resolve_alias (...);

the non-error path in resolve_alias will set alias_target back to null.

> +
> +      DECL_ATTRIBUTES (idecl)
> +     = make_attribute ("alias",
> +                       IDENTIFIER_POINTER
> +                         (DECL_ASSEMBLER_NAME
> +                            (inode_info->next->this_node->decl)),
> +                       DECL_ATTRIBUTES (node->decl));
> +      TREE_USED (idecl) = true;
> +      DECL_EXTERNAL (idecl) = false;
> +      TREE_STATIC (idecl) = true;
> +      return;
> +    }
> +  /* In target_version semantics, only create the resolver if the
> +     default node is implemented.  */
> +  else if (TARGET_HAS_FMV_TARGET_ATTRIBUTE || TREE_STATIC (node->decl))
> +    {
> +      resolver_decl = targetm.generate_version_dispatcher_body (inode);
> +      /* Update aliases.  */
> +      inode->alias = true;
> +      inode->alias_target = resolver_decl;
> +      if (!inode->analyzed)
> +     inode->resolve_alias (cgraph_node::get (resolver_decl));
> +    }
>  
>    auto_vec<cgraph_edge *> edges_to_redirect;
>    /* We need to capture the references by value rather than just pointers to 
> them
> @@ -333,8 +375,7 @@ expand_target_clones (struct cgraph_node *node, bool 
> definition)
>       continue;
>  
>        /* Create new target clone.  */
> -      tree attributes = make_attribute (new_attr_name, attr,
> -                                     DECL_ATTRIBUTES (node->decl));
> +      tree attributes = make_attribute (new_attr_name, attr, attrs);
>  
>        cgraph_node *new_node
>       = create_target_clone (node, definition, NULL, attributes);

Just checking: is there a specific reason for making this change here
rather than in patch 2?  I wasn't sure why the link for "default" was
changed there (to drop the original target_clones attribute) and the
link for non-default versions was changed here.

In case it sounds otherwise, I'm not trying to be pedantic about what
goes where.  I just wasn't sure if this was tied to the other semantic
changes in a way that I didn't understand.

> @@ -424,15 +465,76 @@ redirect_to_specific_clone (cgraph_node *node)
>      }
>  }
>  
> +static bool is_simple_target_clones_case (cgraph_node *node)

Formatting nit: the function name should start a new line.
There should be a function comment too (in particular defining
what "simple" means in this context).

> +{
> +  /* target attribute semantics doesnt support the complex case,
> +     so this is always true.  */
> +  if (TARGET_HAS_FMV_TARGET_ATTRIBUTE)
> +    return true;
> +
> +  int num_defaults = 0;
> +  auto versions = get_clone_versions (node->decl, &num_defaults);
> +  if (versions.is_empty () || num_defaults != 1)
> +    return false;
> +
> +  cgraph_function_version_info *fv = node->function_version ();
> +
> +  if (fv && (fv->next || fv->prev))
> +    return false;
> +
> +  return true;
> +}
> +
>  static unsigned int
> -ipa_target_clone (void)
> +ipa_target_clone (bool early)
>  {
>    struct cgraph_node *node;
>    auto_vec<cgraph_node *> to_dispatch;
>  
> +  /* Don't need to do anything early for target attribute semantics.  */
> +  if (early && TARGET_HAS_FMV_TARGET_ATTRIBUTE)
> +    return 0;
> +
> +  /* For target attribute semantics, this pass skips the early phase, and in
> +     the later stage is only responsible for expanding and dispatching
> +     target_clone declarations, as target annotated functions are dispatched
> +     in the front end.
> +
> +     The expanding and dispatching can be done at the late stage as the
> +     target_clone functions aren't allowed to be part of a larger FMV set, so
> +     all versions will all have the same body, so early optimisations are 
> safe
> +     to treat a call to a target_clones set as a call to one function.
> +
> +     For target_version semantics, this pass is responsible for expanding
> +     target_clones and dispatching all FMV function sets, including ones only
> +     made up of target_version declarations.
> +
> +     Cases where there is more than one declaration must be expanded and
> +     dispatched at the early stage, as the declarations may have different
> +     bodies, and so the early optimisation passes would not be valid.
> +
> +     The late stage is only used for the expansion and dispatching of the 
> simple
> +     case where the FMV set is defined by a single target_clone attribute.  
> */

Nice comment, thanks!

> +
>    FOR_EACH_FUNCTION (node)
> -    if (expand_target_clones (node, node->definition))
> -      to_dispatch.safe_push (node);
> +    {
> +      /* In the early stage, we need to expand any target clone that is not
> +      the simple case.  */
> +      if ((!early || !is_simple_target_clones_case (node)))
> +     if (expand_target_clones (node, node->definition)
> +         && TARGET_HAS_FMV_TARGET_ATTRIBUTE)
> +       /* In non target_version semantics, dispatch all target clones.  */
> +       to_dispatch.safe_push (node);

It looks like both the early and late passes would try to expand
the non-simple case, is that right?  Is that necessary, or could
we instead use:

        if (early == !is_simple_target_clones_case (node))

?  Maybe that just seems clearer to me and nobody else though.

> +    }
> +
> +  /* In target_version semantics dispatch all FMV function sets with a 
> default
> +     implementation.  */
> +  if (!TARGET_HAS_FMV_TARGET_ATTRIBUTE)
> +    FOR_EACH_FUNCTION (node)
> +      if (is_function_default_version (node->decl)
> +       && !lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl))
> +       && DECL_FUNCTION_VERSIONED (node->decl))
> +     to_dispatch.safe_push (node);

Similarly here, I wasn't sure why we did this for both passes.

Thanks,
Richard

Reply via email to