Hi!

Ping.

On Sat, 21 May 2016 17:59:17 +0200, I wrote:
> As discussed in <https://gcc.gnu.org/PR70945> "Offloading: compatibility
> of target and offloading toolchains", there are situations where we have
> to do more work to ensure compatibility between target and offloading
> toolchains.
> 
> The first thing I'm working on is math functions usage in offloaded
> regions.
> 
> Here is a first patch, addressing glibc's finite math optimizations: if
> -ffinite-math-only (as implied by -ffast-math, or -Ofast) is in effect,
> glibc's <math.h> is known to include <bits/math-finite.h> for "special
> entry points to use when the compiler got told to only expect finite
> results".  This divertes the math functions' assembler names from
> "[function]" to "__[function]_finite".  This, obviously, is incompatible
> with offloading targets that don't use glibc, and thus don't provide
> these "__[function]_finite" entry points.
> 
> In response to Alexander's
> <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70945#c3>, I argue this
> does belong into the generic offloading data handling, instead of a
> nvptx-specific pass, for the reason that it is not a nvptx-specific
> transformation but addresses a general target vs. offloading target
> configuration mismatch.
> 
> If I understand him correctly, Joseph in
> <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70945#c4> confirms my idea
> about how this may use some extension (attributes), allowing us to get
> rid of the "__[function]_finite" name matching heuristic.  That's indeed
> something to work on, but as it will take a rather long time until glibc
> changes make their way into distributions that end users are using, I'd
> like to start with the heuristic as implemented, and adjust this later
> on.
> 
> OK for trunk?  I'm working on a test case, too.
> 
> commit 0f65dbe65e883d2294c055631eccb07869bc5375
> Author: Thomas Schwinge <tho...@codesourcery.com>
> Date:   Fri May 13 17:02:30 2016 +0200
> 
>     [PR other/70945] Handle function_glibc_finite_math in offloading
>     
>       gcc/
>       PR other/70945
>       * targhooks.c (default_libc_has_function): Update comment.
>       * target.def (libc_has_function): Likewise.
>       * doc/tm.texi: Regenerate.
>       * coretypes.h (enum function_class): Add
>       function_glibc_finite_math.
>       * config/darwin.c (darwin_libc_has_function): Handle it.
>       * lto-streamer.h (enum lto_section_type): Rename
>       LTO_section_offload_table to LTO_section_offload_data.  Adjust all
>       users.
>       * lto-cgraph.c (void output_offload_data): New function, split out
>       of output_offload_tables.  Adjust all users.  Stream the target's
>       function_glibc_finite_math property.
>       (input_offload_data): New function, split out of
>       input_offload_tables.  Adjust all users.  Handle mismatch between
>       the target's and the offloading target's
>       function_glibc_finite_math property.
> ---
>  gcc/config/darwin.c    |   2 +
>  gcc/coretypes.h        |  11 ++-
>  gcc/doc/tm.texi        |   2 +-
>  gcc/lto-cgraph.c       | 181 
> ++++++++++++++++++++++++++++++++++++-------------
>  gcc/lto-streamer-out.c |   2 +-
>  gcc/lto-streamer.h     |   6 +-
>  gcc/lto/lto.c          |   2 +-
>  gcc/target.def         |   2 +-
>  gcc/targhooks.c        |   2 +-
>  9 files changed, 152 insertions(+), 58 deletions(-)
> 
> diff --git gcc/config/darwin.c gcc/config/darwin.c
> index 0055d80..92fe3e5 100644
> --- gcc/config/darwin.c
> +++ gcc/config/darwin.c
> @@ -3401,6 +3401,8 @@ darwin_libc_has_function (enum function_class fn_class)
>        || fn_class == function_c99_misc)
>      return (TARGET_64BIT
>           || strverscmp (darwin_macosx_version_min, "10.3") >= 0);
> +  if (fn_class == function_glibc_finite_math)
> +    return false;
>  
>    return true;
>  }
> diff --git gcc/coretypes.h gcc/coretypes.h
> index b3a91a6..aa48b5a 100644
> --- gcc/coretypes.h
> +++ gcc/coretypes.h
> @@ -305,14 +305,21 @@ union _dont_use_tree_here_;
>  
>  #endif
>  
> -/* Classes of functions that compiler needs to check
> +/* Properties, such as classes of functions that the compiler can check
>     whether they are present at the runtime or not.  */
>  enum function_class {
>    function_c94,
>    function_c99_misc,
>    function_c99_math_complex,
>    function_sincos,
> -  function_c11_misc
> +  function_c11_misc,
> +  /* If -ffinite-math-only (as implied by -ffast-math, or -Ofast) is in 
> effect,
> +     glibc's <math.h> is known to include <bits/math-finite.h> for "special
> +     entry points to use when the compiler got told to only expect finite
> +     results".  This divertes the math functions' assembler names from
> +     "[function]" to "__[function]_finite".  This property indicates whether
> +     such diversion may occur, not whether it actually has.  */
> +  function_glibc_finite_math
>  };
>  
>  /* Enumerate visibility settings.  This is deliberately ordered from most
> diff --git gcc/doc/tm.texi gcc/doc/tm.texi
> index 8c7f2a1..4ce3a43 100644
> --- gcc/doc/tm.texi
> +++ gcc/doc/tm.texi
> @@ -5308,7 +5308,7 @@ macro, a reasonable default is used.
>  @end defmac
>  
>  @deftypefn {Target Hook} bool TARGET_LIBC_HAS_FUNCTION (enum function_class 
> @var{fn_class})
> -This hook determines whether a function from a class of functions
> +This hook determines properties, such as whether a class of functions
>  @var{fn_class} is present at the runtime.
>  @end deftypefn
>  
> diff --git gcc/lto-cgraph.c gcc/lto-cgraph.c
> index cffa399..4e7aa17 100644
> --- gcc/lto-cgraph.c
> +++ gcc/lto-cgraph.c
> @@ -38,6 +38,9 @@ along with GCC; see the file COPYING3.  If not see
>  #include "ipa-utils.h"
>  #include "omp-offload.h"
>  #include "ipa-chkp.h"
> +#include "target.h"
> +#include "output.h"
> +#include "builtins.h"
>  
>  /* True when asm nodes has been output.  */
>  bool asm_nodes_output = false;
> @@ -1094,21 +1097,37 @@ read_string (struct lto_input_block *ib)
>    return str;
>  }
>  
> +/* Output offload data.  */
> +
> +static void output_offload_tables (struct lto_simple_output_block *);
> +
> +void output_offload_data (void)
> +{
> +  /* Return early if there is no offload data.  */
> +  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
> +    return;
> +
> +  struct lto_simple_output_block *ob
> +    = lto_create_simple_output_block (LTO_section_offload_data);
> +
> +  /* Stream the target's function_glibc_finite_math property.  */
> +  bool g_f_m = targetm.libc_has_function (function_glibc_finite_math);
> +  streamer_write_hwi_stream (ob->main_stream, g_f_m);
> +
> +  output_offload_tables (ob);
> +
> +  lto_destroy_simple_output_block (ob);
> +}
> +
>  /* Output function/variable tables that will allow libgomp to look up offload
>     target code.
>     OFFLOAD_FUNCS is filled in expand_omp_target, OFFLOAD_VARS is filled in
>     varpool_node::get_create.  In WHOPR (partitioned) mode during the WPA 
> stage
>     both OFFLOAD_FUNCS and OFFLOAD_VARS are filled by input_offload_tables.  
> */
>  
> -void
> -output_offload_tables (void)
> +static void
> +output_offload_tables (struct lto_simple_output_block *ob)
>  {
> -  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
> -    return;
> -
> -  struct lto_simple_output_block *ob
> -    = lto_create_simple_output_block (LTO_section_offload_table);
> -
>    for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
>      {
>        streamer_write_enum (ob->main_stream, LTO_symtab_tags,
> @@ -1126,7 +1145,6 @@ output_offload_tables (void)
>      }
>  
>    streamer_write_uhwi_stream (ob->main_stream, 0);
> -  lto_destroy_simple_output_block (ob);
>  
>    /* In WHOPR mode during the WPA stage the joint offload tables need to be
>       streamed to one partition only.  That's why we free offload_funcs and
> @@ -1884,65 +1902,132 @@ input_symtab (void)
>      }
>  }
>  
> -/* Input function/variable tables that will allow libgomp to look up offload
> -   target code, and store them into OFFLOAD_FUNCS and OFFLOAD_VARS.  */
> +/* Input offload data.  */
> +
> +static void input_offload_tables (struct lto_input_block *,
> +                               struct lto_file_decl_data *, bool);
>  
>  void
> -input_offload_tables (bool do_force_output)
> +input_offload_data (bool do_force_output)
>  {
>    struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
>    struct lto_file_decl_data *file_data;
>    unsigned int j = 0;
> +  bool g_f_m_target = false;
>  
>    while ((file_data = file_data_vec[j++]))
>      {
>        const char *data;
>        size_t len;
>        struct lto_input_block *ib
> -     = lto_create_simple_input_block (file_data, LTO_section_offload_table,
> +     = lto_create_simple_input_block (file_data, LTO_section_offload_data,
>                                        &data, &len);
>        if (!ib)
>       continue;
>  
> -      enum LTO_symtab_tags tag
> -     = streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
> -      while (tag)
> -     {
> -       if (tag == LTO_symtab_unavail_node)
> -         {
> -           int decl_index = streamer_read_uhwi (ib);
> -           tree fn_decl
> -             = lto_file_decl_data_get_fn_decl (file_data, decl_index);
> -           vec_safe_push (offload_funcs, fn_decl);
> +      /* Merge the target's function_glibc_finite_math property.  */
> +      g_f_m_target |= streamer_read_hwi (ib);
>  
> -           /* Prevent IPA from removing fn_decl as unreachable, since there
> -              may be no refs from the parent function to child_fn in offload
> -              LTO mode.  */
> -           if (do_force_output)
> -             cgraph_node::get (fn_decl)->mark_force_output ();
> -         }
> -       else if (tag == LTO_symtab_variable)
> -         {
> -           int decl_index = streamer_read_uhwi (ib);
> -           tree var_decl
> -             = lto_file_decl_data_get_var_decl (file_data, decl_index);
> -           vec_safe_push (offload_vars, var_decl);
> +      input_offload_tables (ib, file_data, do_force_output);
>  
> -           /* Prevent IPA from removing var_decl as unused, since there
> -              may be no refs to var_decl in offload LTO mode.  */
> -           if (do_force_output)
> -             varpool_node::get (var_decl)->force_output = 1;
> -         }
> -       else
> -         fatal_error (input_location,
> -                      "invalid offload table in %s", file_data->file_name);
> -
> -       tag = streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
> -     }
> -
> -      lto_destroy_simple_input_block (file_data, LTO_section_offload_table,
> +      lto_destroy_simple_input_block (file_data, LTO_section_offload_data,
>                                     ib, data, len);
>      }
> +
> +  /* Take action if the target has the function_glibc_finite_math property 
> set,
> +     and that doesn't match the current (that is, offloading target's).  */
> +  bool g_f_m = targetm.libc_has_function (function_glibc_finite_math);
> +  if (g_f_m_target && !g_f_m)
> +    {
> +      struct cgraph_node *node;
> +      FOR_EACH_FUNCTION (node)
> +      {
> +     /* This only applies to references to external math functions.  */
> +     if (!DECL_EXTERNAL (node->decl))
> +       continue;
> +     /* All the relevant math functions are registered as GCC builtins.  */
> +     if (!DECL_BUILT_IN (node->decl)
> +         || (mathfn_built_in (TREE_TYPE (TREE_TYPE (node->decl)),
> +                              DECL_FUNCTION_CODE (node->decl))
> +             == NULL_TREE))
> +       continue;
> +     /* Check whether the assembler name for "[function]" has been set to
> +        "__[function]_finite".  */
> +     if (!DECL_ASSEMBLER_NAME_SET_P (node->decl))
> +       continue;
> +     const char *asm_name
> +       = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl));
> +     if (*asm_name++ != '*')
> +       continue;
> +     size_t ulp_len = strlen (user_label_prefix);
> +     if (ulp_len == 0)
> +       ;
> +     else if (strncmp (asm_name, user_label_prefix, ulp_len) == 0)
> +       asm_name += ulp_len;
> +     else
> +       continue;
> +     if (*asm_name++ != '_')
> +       continue;
> +     if (*asm_name++ != '_')
> +       continue;
> +     const char *name = IDENTIFIER_POINTER (DECL_NAME (node->decl));
> +     size_t name_len = strlen (name);
> +     if (strncmp (asm_name, name, name_len) == 0)
> +       asm_name += name_len;
> +     else
> +       continue;
> +     if (strcmp (asm_name, "_finite") != 0)
> +       continue;
> +     /* ..., and if yes, reset it.  */
> +     symtab->change_decl_assembler_name (node->decl,
> +                                         DECL_NAME (node->decl));
> +      }
> +    }
> +}
> +
> +/* Input function/variable tables that will allow libgomp to look up offload
> +   target code, and store them into OFFLOAD_FUNCS and OFFLOAD_VARS.  */
> +
> +static void
> +input_offload_tables (struct lto_input_block *ib,
> +                   struct lto_file_decl_data *file_data,
> +                   bool do_force_output)
> +{
> +  enum LTO_symtab_tags tag
> +    = streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
> +  while (tag)
> +    {
> +      if (tag == LTO_symtab_unavail_node)
> +     {
> +       int decl_index = streamer_read_uhwi (ib);
> +       tree fn_decl
> +         = lto_file_decl_data_get_fn_decl (file_data, decl_index);
> +       vec_safe_push (offload_funcs, fn_decl);
> +
> +       /* Prevent IPA from removing fn_decl as unreachable, since there
> +          may be no refs from the parent function to child_fn in offload
> +          LTO mode.  */
> +       if (do_force_output)
> +         cgraph_node::get (fn_decl)->mark_force_output ();
> +     }
> +      else if (tag == LTO_symtab_variable)
> +     {
> +       int decl_index = streamer_read_uhwi (ib);
> +       tree var_decl
> +         = lto_file_decl_data_get_var_decl (file_data, decl_index);
> +       vec_safe_push (offload_vars, var_decl);
> +
> +       /* Prevent IPA from removing var_decl as unused, since there
> +          may be no refs to var_decl in offload LTO mode.  */
> +       if (do_force_output)
> +         varpool_node::get (var_decl)->force_output = 1;
> +     }
> +      else
> +     fatal_error (input_location,
> +                  "invalid offload table in %s", file_data->file_name);
> +
> +      tag = streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
> +    }
>  }
>  
>  /* True when we need optimization summary for NODE.  */
> diff --git gcc/lto-streamer-out.c gcc/lto-streamer-out.c
> index 35e58fd..616266b 100644
> --- gcc/lto-streamer-out.c
> +++ gcc/lto-streamer-out.c
> @@ -2385,7 +2385,7 @@ lto_output (void)
>       statements using the statement UIDs.  */
>    output_symtab ();
>  
> -  output_offload_tables ();
> +  output_offload_data ();
>  
>  #if CHECKING_P
>    lto_bitmap_free (output);
> diff --git gcc/lto-streamer.h gcc/lto-streamer.h
> index 92efdb8..df42cbd 100644
> --- gcc/lto-streamer.h
> +++ gcc/lto-streamer.h
> @@ -242,7 +242,7 @@ enum lto_section_type
>    LTO_section_inline_summary,
>    LTO_section_ipcp_transform,
>    LTO_section_ipa_icf,
> -  LTO_section_offload_table,
> +  LTO_section_offload_data,
>    LTO_section_mode_table,
>    LTO_section_ipa_hsa,
>    LTO_N_SECTION_TYPES                /* Must be last.  */
> @@ -914,8 +914,8 @@ bool lto_symtab_encoder_encode_initializer_p 
> (lto_symtab_encoder_t,
>                                             varpool_node *);
>  void output_symtab (void);
>  void input_symtab (void);
> -void output_offload_tables (void);
> -void input_offload_tables (bool);
> +void output_offload_data (void);
> +void input_offload_data (bool);
>  bool referenced_from_other_partition_p (struct ipa_ref_list *,
>                                       lto_symtab_encoder_t);
>  bool reachable_from_other_partition_p (struct cgraph_node *,
> diff --git gcc/lto/lto.c gcc/lto/lto.c
> index af735cb..c46bfa7 100644
> --- gcc/lto/lto.c
> +++ gcc/lto/lto.c
> @@ -2855,7 +2855,7 @@ read_cgraph_and_symbols (unsigned nfiles, const char 
> **fnames)
>    /* Read the symtab.  */
>    input_symtab ();
>  
> -  input_offload_tables (!flag_ltrans);
> +  input_offload_data (!flag_ltrans);
>  
>    /* Store resolutions into the symbol table.  */
>  
> diff --git gcc/target.def gcc/target.def
> index 6392e73..2ce827d 100644
> --- gcc/target.def
> +++ gcc/target.def
> @@ -2533,7 +2533,7 @@ set via @code{__attribute__}.",
>  
>  DEFHOOK
>  (libc_has_function,
> - "This hook determines whether a function from a class of functions\n\
> + "This hook determines properties, such as whether a class of functions\n\
>  @var{fn_class} is present at the runtime.",
>   bool, (enum function_class fn_class),
>   default_libc_has_function)
> diff --git gcc/targhooks.c gcc/targhooks.c
> index 6b4601b..c1f3c15 100644
> --- gcc/targhooks.c
> +++ gcc/targhooks.c
> @@ -1389,7 +1389,7 @@ default_have_conditional_execution (void)
>  }
>  
>  /* By default we assume that c99 functions are present at the runtime,
> -   but sincos is not.  */
> +   but others are not.  */
>  bool
>  default_libc_has_function (enum function_class fn_class)
>  {


Grüße
 Thomas

Attachment: signature.asc
Description: PGP signature

Reply via email to