> Difference in v2:
> Previously I was checking for ".local $symbol". Most symbols are not
> marked this way so now I pessimistically assume that any symbol defined
> in assembly may be local.
> 
> ---
> 
> This new pass heuristically detects symbols referenced by toplevel
> assembly to prevent their optimization.
> 
> Heuristics is done by comparing identifiers in assembly to known
> symbols.
> 
> The pass is split into 2 passes, in LGEN and in WPA.
> There must be one pass for WPA to be able to reference any symbol.
> However in WPA there may be multiple symbols with the same name,
> so we handle those local symbols in LGEN.
> 
> gcc/ChangeLog:
> 
>       * asm-toplevel.cc (mark_fragile_ref_by_asm):
>       Add marked_local to handle symbol as local.
>       (ipa_asm_heuristics): New.
>       (class pass_ipa_asm): New.
>       (make_pass_ipa_asm_lgen): New.
>       (make_pass_ipa_asm_wpa): New.
>       * common.opt: New flto-toplevel-asm-heuristics.
>       * passes.def: New asm passes.
>       * timevar.def (TV_IPA_LTO_ASM): New.
>       * tree-pass.h (make_pass_ipa_asm_lgen): New.
>       (make_pass_ipa_asm_wpa): New.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.dg/lto/toplevel-simple-asm-1_0.c: New test.
>       * gcc.dg/lto/toplevel-simple-asm-1_1.c: New test.
>       * gcc.dg/lto/toplevel-simple-asm-2_0.c: New test.
>       * gcc.dg/lto/toplevel-simple-asm-2_1.c: New test.
> ---
>  gcc/asm-toplevel.cc                           | 142 +++++++++++++++++-
>  gcc/common.opt                                |   4 +
>  gcc/passes.def                                |   2 +
>  .../gcc.dg/lto/toplevel-simple-asm-1_0.c      |  19 +++
>  .../gcc.dg/lto/toplevel-simple-asm-1_1.c      |  12 ++
>  .../gcc.dg/lto/toplevel-simple-asm-2_0.c      |  10 ++
>  .../gcc.dg/lto/toplevel-simple-asm-2_1.c      |  12 ++
>  gcc/timevar.def                               |   1 +
>  gcc/tree-pass.h                               |   2 +
>  9 files changed, 202 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_0.c
>  create mode 100644 gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_1.c
>  create mode 100644 gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_0.c
>  create mode 100644 gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_1.c
> 
> diff --git a/gcc/asm-toplevel.cc b/gcc/asm-toplevel.cc
> index 3f1f0f0aad5..d01ec2eb0eb 100644
> --- a/gcc/asm-toplevel.cc
> +++ b/gcc/asm-toplevel.cc
> @@ -29,11 +29,11 @@ along with GCC; see the file COPYING3.  If not see
>  /* This symbol must be available and cannot be renamed.
>     Marks the symbol and symbols that reference it.  */
>  static void
> -mark_fragile_ref_by_asm (symtab_node* node)
> +mark_fragile_ref_by_asm (symtab_node* node, bool maybe_local = false)
>  {
>    node->ref_by_asm = true;
>    /* Local symbols must remain in the same partition with their callers.  */
> -  if (!TREE_PUBLIC (node->decl))
> +  if (!TREE_PUBLIC (node->decl) || maybe_local)
>      {
>        unsigned j;
>        ipa_ref *ref;
> @@ -112,3 +112,141 @@ analyze_toplevel_extended_asm ()
>       walk_tree (&l, walk_through_constraints, (void*) &data, NULL);
>      }
>  }
> +
> +
> +/* Checks all toplevel assembly contents and compares them with known 
> symbols.
> +   Marks those symbols with relevant flags.
> +   Heuristics: Detects anything in assembly that looks like an identifer.
> +
> +   This pass must be in WPA, otherwise we will not see all possibly 
> referenced
> +   symbols - if a symbol is only declared, it will not be in the callgraph if
> +   it is only referenced from toplevel assembly.
> +   However in WPA there may be multiple symbols with the same identifier.
> +   The chosen solution is to handle local symbols in LGEN pass first.  */
> +
> +void
> +ipa_asm_heuristics ()
> +{
> +  hash_map<nofree_string_hash, symtab_node *> map;
> +  asm_node *anode = symtab->first_asm_symbol ();
> +  if (!anode)
> +    return;
> +
> +  symtab_node* snode;
> +  if (flag_wpa)
> +    {
> +      FOR_EACH_SYMBOL (snode)
> +     if (TREE_PUBLIC (snode->decl))
> +         map.put (snode->asm_name (), snode);
> +    }
> +  else
> +    {
> +      FOR_EACH_SYMBOL (snode)
> +     map.put (snode->asm_name (), snode);
> +    }
> +
> +  size_t ident_buffer_size = 128;
> +  char* ident = (char*) xmalloc (ident_buffer_size);
> +
> +  for (; anode; anode = anode->next)
> +    {
> +      if (TREE_CODE (anode->asm_str) != STRING_CST)
> +     continue;
> +
> +      const char *asm_str = TREE_STRING_POINTER (anode->asm_str);
> +      int asm_len = TREE_STRING_LENGTH (anode->asm_str);
> +      unsigned l = 0;
> +
> +      for (int i = 0; i < asm_len + 1; ++i)
> +     {
> +       char c = 0;
> +       if (i < asm_len)
> +         c = asm_str[i];
> +
> +       if ('0' <= c && c <= '9')
> +         {
> +           if (l)
> +             ident[l++] = c;
> +         }
> +       else if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_')
> +         ident[l++] = c;
> +       else if (l)
> +         {
> +           ident[l] = 0;
> +           symtab_node **n_ = map.get (ident);
> +           if (n_)
> +             mark_fragile_ref_by_asm (*n_, c == ':');

I would add a dump file which shows what asm statements made symbol to
go global for easier debugging later (I suppose eventually this may get
us surprises)
> +
> +           l = 0;
> +         }
> +
> +       /* Prevent overflow.  */
> +       if (l >= ident_buffer_size)
> +         {
> +           ident_buffer_size *= 2;
> +           ident = (char*) xrealloc (ident, ident_buffer_size);

Perhas auto_vec<char> and safe_push would be more readable (and less
effecient) way to do this in this millenia?
Honza
> +         }
> +     }
> +    }
> +  free (ident);
> +}
> +
> +namespace {
> +
> +const pass_data pass_data_ipa_asm =
> +{
> +  IPA_PASS, /* type */
> +  "ipa-asm", /* name */
> +  OPTGROUP_NONE, /* optinfo_flags */
> +  TV_IPA_LTO_ASM, /* tv_id */
> +  0, /* properties_required */
> +  0, /* properties_provided */
> +  0, /* properties_destroyed */
> +  0, /* todo_flags_start */
> +  0, /* todo_flags_finish */
> +};
> +
> +class pass_ipa_asm : public ipa_opt_pass_d
> +{
> +public:
> +  pass_ipa_asm (gcc::context *ctxt)
> +    : ipa_opt_pass_d (pass_data_ipa_asm, ctxt,
> +                   NULL, /* generate_summary */
> +                   NULL, /* write_summary */
> +                   NULL, /* read_summary */
> +                   NULL, /* write_optimization_summary */
> +                   NULL, /* read_optimization_summary */
> +                   NULL, /* stmt_fixup */
> +                   0, /* function_transform_todo_flags_start */
> +                   NULL, /* function_transform */
> +                   NULL) /* variable_transform */
> +  {}
> +
> +  /* opt_pass methods: */
> +
> +  bool gate (function *) final override
> +    {
> +      return (flag_lto || flag_wpa) && flag_toplevel_asm_heuristics;
> +    }
> +
> +  unsigned int execute (function *) final override
> +    {
> +      ipa_asm_heuristics ();
> +      return 0;
> +    }
> +
> +};
> +
> +} // anon namespace
> +
> +ipa_opt_pass_d *
> +make_pass_ipa_asm_lgen (gcc::context *ctxt)
> +{
> +  return new pass_ipa_asm (ctxt);
> +}
> +
> +ipa_opt_pass_d *
> +make_pass_ipa_asm_wpa (gcc::context *ctxt)
> +{
> +  return new pass_ipa_asm (ctxt);
> +}
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 0a3d65d4d4a..d0063e0dd14 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2352,6 +2352,10 @@ flto-partition=
>  Common Joined RejectNegative Enum(lto_partition_model) 
> Var(flag_lto_partition) Init(LTO_PARTITION_BALANCED)
>  Specify the algorithm to partition symbols and vars at linktime.
>  
> +flto-toplevel-asm-heuristics
> +Common Var(flag_toplevel_asm_heuristics) Init(0)
> +Enable heuristics to recognize symbol identifiers in toplevel (not extended) 
> assembly and prevent their mangling/deletion.
> +
>  ; The initial value of -1 comes from Z_DEFAULT_COMPRESSION in zlib.h.
>  flto-compression-level=
>  Common Joined RejectNegative UInteger Var(flag_lto_compression_level) 
> Init(-1) IntegerRange(0, 19)
> diff --git a/gcc/passes.def b/gcc/passes.def
> index fac04cd86c7..c97eb069bfc 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -157,10 +157,12 @@ along with GCC; see the file COPYING3.  If not see
>    NEXT_PASS (pass_ipa_increase_alignment);
>    NEXT_PASS (pass_ipa_tm);
>    NEXT_PASS (pass_ipa_lower_emutls);
> +  NEXT_PASS (pass_ipa_asm_lgen);
>    TERMINATE_PASS_LIST (all_small_ipa_passes)
>  
>    INSERT_PASSES_AFTER (all_regular_ipa_passes)
>    NEXT_PASS (pass_analyzer);
> +  NEXT_PASS (pass_ipa_asm_wpa);
>    NEXT_PASS (pass_ipa_odr);
>    NEXT_PASS (pass_ipa_whole_program_visibility);
>    NEXT_PASS (pass_ipa_profile);
> diff --git a/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_0.c 
> b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_0.c
> new file mode 100644
> index 00000000000..9d653d4c7a3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_0.c
> @@ -0,0 +1,19 @@
> +/* { dg-lto-do link } */
> +/* { dg-lto-options { {-O2 -flto -flto-toplevel-asm-heuristics 
> -flto-partition=1to1} {-O2 -flto -flto-toplevel-asm-heuristics 
> -flto-partition=max} {-O2 -flto -flto-toplevel-asm-heuristics 
> -flto-partition=cache}} } */
> +
> +void asm_fn();
> +void asm_fn_used();
> +
> +asm(".global asm_fn\nasm_fn:");
> +asm(".global asm_fn_used\nasm_fn_used:");
> +
> +
> +__attribute__((noinline))
> +int privatized_fn(int v) { return v + v;}
> +
> +extern void call_privatized_fn();
> +
> +int main() {
> +  privatized_fn (0);
> +  call_privatized_fn ();
> +}
> diff --git a/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_1.c 
> b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_1.c
> new file mode 100644
> index 00000000000..9544f69f4e5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_1.c
> @@ -0,0 +1,12 @@
> +extern void asm_fn_used();
> +
> +__attribute__((used))
> +void local_caller() {
> +  asm_fn_used();
> +}
> +
> +__attribute__((noipa))
> +static void privatized_fn() { asm volatile ("");}
> +asm(".long privatized_fn");
> +
> +void call_privatized_fn() { privatized_fn();}
> diff --git a/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_0.c 
> b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_0.c
> new file mode 100644
> index 00000000000..315149d7800
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_0.c
> @@ -0,0 +1,10 @@
> +/* { dg-lto-do link } */
> +/* { dg-lto-options { {-O2 -flto -flto-toplevel-asm-heuristics 
> -flto-partition=1to1} {-O2 -flto -flto-toplevel-asm-heuristics 
> -flto-partition=max} {-O2 -flto -flto-toplevel-asm-heuristics 
> -flto-partition=cache}} } */
> +
> +extern int use_statics ();
> +
> +extern int asm_var;
> +
> +int main() {
> +  return asm_var + use_statics ();
> +}
> diff --git a/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_1.c 
> b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_1.c
> new file mode 100644
> index 00000000000..1d7c3798963
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_1.c
> @@ -0,0 +1,12 @@
> +extern void static_asm_fn ();
> +extern int static_asm_var;
> +asm("static_asm_fn:");
> +asm("static_asm_var:");
> +
> +extern int asm_var;
> +asm(".global asm_var\nasm_var:");
> +
> +int use_statics () {
> +  static_asm_fn ();
> +  return static_asm_var;
> +}
> diff --git a/gcc/timevar.def b/gcc/timevar.def
> index 4f60f04baa1..f0f4521894a 100644
> --- a/gcc/timevar.def
> +++ b/gcc/timevar.def
> @@ -81,6 +81,7 @@ DEFTIMEVAR (TV_IPA_INLINING          , "ipa inlining 
> heuristics")
>  DEFTIMEVAR (TV_IPA_FNSPLIT           , "ipa function splitting")
>  DEFTIMEVAR (TV_IPA_COMDATS        , "ipa comdats")
>  DEFTIMEVAR (TV_IPA_OPT                    , "ipa various optimizations")
> +DEFTIMEVAR (TV_IPA_LTO_ASM        , "ipa lto asm heuristics")
>  DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS    , "lto stream decompression")
>  DEFTIMEVAR (TV_IPA_LTO_COMPRESS      , "lto stream compression")
>  DEFTIMEVAR (TV_IPA_LTO_OUTPUT        , "lto stream output")
> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
> index 410341d4711..b0f6ab4cca8 100644
> --- a/gcc/tree-pass.h
> +++ b/gcc/tree-pass.h
> @@ -527,6 +527,8 @@ extern simple_ipa_opt_pass 
> *make_pass_local_optimization_passes (gcc::context *c
>  extern simple_ipa_opt_pass *make_pass_ipa_remove_symbols (gcc::context 
> *ctxt);
>  
>  extern ipa_opt_pass_d *make_pass_analyzer (gcc::context *ctxt);
> +extern ipa_opt_pass_d *make_pass_ipa_asm_lgen (gcc::context *ctxt);
> +extern ipa_opt_pass_d *make_pass_ipa_asm_wpa (gcc::context *ctxt);
>  extern ipa_opt_pass_d *make_pass_ipa_whole_program_visibility (gcc::context
>                                                              *ctxt);
>  extern simple_ipa_opt_pass *make_pass_ipa_increase_alignment (gcc::context
> -- 
> 2.51.1
> 

Reply via email to