> Difference in v2:
> Previously I was checking for ".local $symbol". Most symbols are not
> marked this way so now I pessimistically assume that any symbol defined
> in assembly may be local.
>
> ---
>
> This new pass heuristically detects symbols referenced by toplevel
> assembly to prevent their optimization.
>
> Heuristics is done by comparing identifiers in assembly to known
> symbols.
>
> The pass is split into 2 passes, in LGEN and in WPA.
> There must be one pass for WPA to be able to reference any symbol.
> However in WPA there may be multiple symbols with the same name,
> so we handle those local symbols in LGEN.
>
> gcc/ChangeLog:
>
> * asm-toplevel.cc (mark_fragile_ref_by_asm):
> Add marked_local to handle symbol as local.
> (ipa_asm_heuristics): New.
> (class pass_ipa_asm): New.
> (make_pass_ipa_asm_lgen): New.
> (make_pass_ipa_asm_wpa): New.
> * common.opt: New flto-toplevel-asm-heuristics.
> * passes.def: New asm passes.
> * timevar.def (TV_IPA_LTO_ASM): New.
> * tree-pass.h (make_pass_ipa_asm_lgen): New.
> (make_pass_ipa_asm_wpa): New.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/lto/toplevel-simple-asm-1_0.c: New test.
> * gcc.dg/lto/toplevel-simple-asm-1_1.c: New test.
> * gcc.dg/lto/toplevel-simple-asm-2_0.c: New test.
> * gcc.dg/lto/toplevel-simple-asm-2_1.c: New test.
> ---
> gcc/asm-toplevel.cc | 142 +++++++++++++++++-
> gcc/common.opt | 4 +
> gcc/passes.def | 2 +
> .../gcc.dg/lto/toplevel-simple-asm-1_0.c | 19 +++
> .../gcc.dg/lto/toplevel-simple-asm-1_1.c | 12 ++
> .../gcc.dg/lto/toplevel-simple-asm-2_0.c | 10 ++
> .../gcc.dg/lto/toplevel-simple-asm-2_1.c | 12 ++
> gcc/timevar.def | 1 +
> gcc/tree-pass.h | 2 +
> 9 files changed, 202 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_0.c
> create mode 100644 gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_1.c
> create mode 100644 gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_0.c
> create mode 100644 gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_1.c
>
> diff --git a/gcc/asm-toplevel.cc b/gcc/asm-toplevel.cc
> index 3f1f0f0aad5..d01ec2eb0eb 100644
> --- a/gcc/asm-toplevel.cc
> +++ b/gcc/asm-toplevel.cc
> @@ -29,11 +29,11 @@ along with GCC; see the file COPYING3. If not see
> /* This symbol must be available and cannot be renamed.
> Marks the symbol and symbols that reference it. */
> static void
> -mark_fragile_ref_by_asm (symtab_node* node)
> +mark_fragile_ref_by_asm (symtab_node* node, bool maybe_local = false)
> {
> node->ref_by_asm = true;
> /* Local symbols must remain in the same partition with their callers. */
> - if (!TREE_PUBLIC (node->decl))
> + if (!TREE_PUBLIC (node->decl) || maybe_local)
> {
> unsigned j;
> ipa_ref *ref;
> @@ -112,3 +112,141 @@ analyze_toplevel_extended_asm ()
> walk_tree (&l, walk_through_constraints, (void*) &data, NULL);
> }
> }
> +
> +
> +/* Checks all toplevel assembly contents and compares them with known
> symbols.
> + Marks those symbols with relevant flags.
> + Heuristics: Detects anything in assembly that looks like an identifer.
> +
> + This pass must be in WPA, otherwise we will not see all possibly
> referenced
> + symbols - if a symbol is only declared, it will not be in the callgraph if
> + it is only referenced from toplevel assembly.
> + However in WPA there may be multiple symbols with the same identifier.
> + The chosen solution is to handle local symbols in LGEN pass first. */
> +
> +void
> +ipa_asm_heuristics ()
> +{
> + hash_map<nofree_string_hash, symtab_node *> map;
> + asm_node *anode = symtab->first_asm_symbol ();
> + if (!anode)
> + return;
> +
> + symtab_node* snode;
> + if (flag_wpa)
> + {
> + FOR_EACH_SYMBOL (snode)
> + if (TREE_PUBLIC (snode->decl))
> + map.put (snode->asm_name (), snode);
> + }
> + else
> + {
> + FOR_EACH_SYMBOL (snode)
> + map.put (snode->asm_name (), snode);
> + }
> +
> + size_t ident_buffer_size = 128;
> + char* ident = (char*) xmalloc (ident_buffer_size);
> +
> + for (; anode; anode = anode->next)
> + {
> + if (TREE_CODE (anode->asm_str) != STRING_CST)
> + continue;
> +
> + const char *asm_str = TREE_STRING_POINTER (anode->asm_str);
> + int asm_len = TREE_STRING_LENGTH (anode->asm_str);
> + unsigned l = 0;
> +
> + for (int i = 0; i < asm_len + 1; ++i)
> + {
> + char c = 0;
> + if (i < asm_len)
> + c = asm_str[i];
> +
> + if ('0' <= c && c <= '9')
> + {
> + if (l)
> + ident[l++] = c;
> + }
> + else if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_')
> + ident[l++] = c;
> + else if (l)
> + {
> + ident[l] = 0;
> + symtab_node **n_ = map.get (ident);
> + if (n_)
> + mark_fragile_ref_by_asm (*n_, c == ':');
I would add a dump file which shows what asm statements made symbol to
go global for easier debugging later (I suppose eventually this may get
us surprises)
> +
> + l = 0;
> + }
> +
> + /* Prevent overflow. */
> + if (l >= ident_buffer_size)
> + {
> + ident_buffer_size *= 2;
> + ident = (char*) xrealloc (ident, ident_buffer_size);
Perhas auto_vec<char> and safe_push would be more readable (and less
effecient) way to do this in this millenia?
Honza
> + }
> + }
> + }
> + free (ident);
> +}
> +
> +namespace {
> +
> +const pass_data pass_data_ipa_asm =
> +{
> + IPA_PASS, /* type */
> + "ipa-asm", /* name */
> + OPTGROUP_NONE, /* optinfo_flags */
> + TV_IPA_LTO_ASM, /* tv_id */
> + 0, /* properties_required */
> + 0, /* properties_provided */
> + 0, /* properties_destroyed */
> + 0, /* todo_flags_start */
> + 0, /* todo_flags_finish */
> +};
> +
> +class pass_ipa_asm : public ipa_opt_pass_d
> +{
> +public:
> + pass_ipa_asm (gcc::context *ctxt)
> + : ipa_opt_pass_d (pass_data_ipa_asm, ctxt,
> + NULL, /* generate_summary */
> + NULL, /* write_summary */
> + NULL, /* read_summary */
> + NULL, /* write_optimization_summary */
> + NULL, /* read_optimization_summary */
> + NULL, /* stmt_fixup */
> + 0, /* function_transform_todo_flags_start */
> + NULL, /* function_transform */
> + NULL) /* variable_transform */
> + {}
> +
> + /* opt_pass methods: */
> +
> + bool gate (function *) final override
> + {
> + return (flag_lto || flag_wpa) && flag_toplevel_asm_heuristics;
> + }
> +
> + unsigned int execute (function *) final override
> + {
> + ipa_asm_heuristics ();
> + return 0;
> + }
> +
> +};
> +
> +} // anon namespace
> +
> +ipa_opt_pass_d *
> +make_pass_ipa_asm_lgen (gcc::context *ctxt)
> +{
> + return new pass_ipa_asm (ctxt);
> +}
> +
> +ipa_opt_pass_d *
> +make_pass_ipa_asm_wpa (gcc::context *ctxt)
> +{
> + return new pass_ipa_asm (ctxt);
> +}
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 0a3d65d4d4a..d0063e0dd14 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2352,6 +2352,10 @@ flto-partition=
> Common Joined RejectNegative Enum(lto_partition_model)
> Var(flag_lto_partition) Init(LTO_PARTITION_BALANCED)
> Specify the algorithm to partition symbols and vars at linktime.
>
> +flto-toplevel-asm-heuristics
> +Common Var(flag_toplevel_asm_heuristics) Init(0)
> +Enable heuristics to recognize symbol identifiers in toplevel (not extended)
> assembly and prevent their mangling/deletion.
> +
> ; The initial value of -1 comes from Z_DEFAULT_COMPRESSION in zlib.h.
> flto-compression-level=
> Common Joined RejectNegative UInteger Var(flag_lto_compression_level)
> Init(-1) IntegerRange(0, 19)
> diff --git a/gcc/passes.def b/gcc/passes.def
> index fac04cd86c7..c97eb069bfc 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -157,10 +157,12 @@ along with GCC; see the file COPYING3. If not see
> NEXT_PASS (pass_ipa_increase_alignment);
> NEXT_PASS (pass_ipa_tm);
> NEXT_PASS (pass_ipa_lower_emutls);
> + NEXT_PASS (pass_ipa_asm_lgen);
> TERMINATE_PASS_LIST (all_small_ipa_passes)
>
> INSERT_PASSES_AFTER (all_regular_ipa_passes)
> NEXT_PASS (pass_analyzer);
> + NEXT_PASS (pass_ipa_asm_wpa);
> NEXT_PASS (pass_ipa_odr);
> NEXT_PASS (pass_ipa_whole_program_visibility);
> NEXT_PASS (pass_ipa_profile);
> diff --git a/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_0.c
> b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_0.c
> new file mode 100644
> index 00000000000..9d653d4c7a3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_0.c
> @@ -0,0 +1,19 @@
> +/* { dg-lto-do link } */
> +/* { dg-lto-options { {-O2 -flto -flto-toplevel-asm-heuristics
> -flto-partition=1to1} {-O2 -flto -flto-toplevel-asm-heuristics
> -flto-partition=max} {-O2 -flto -flto-toplevel-asm-heuristics
> -flto-partition=cache}} } */
> +
> +void asm_fn();
> +void asm_fn_used();
> +
> +asm(".global asm_fn\nasm_fn:");
> +asm(".global asm_fn_used\nasm_fn_used:");
> +
> +
> +__attribute__((noinline))
> +int privatized_fn(int v) { return v + v;}
> +
> +extern void call_privatized_fn();
> +
> +int main() {
> + privatized_fn (0);
> + call_privatized_fn ();
> +}
> diff --git a/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_1.c
> b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_1.c
> new file mode 100644
> index 00000000000..9544f69f4e5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-1_1.c
> @@ -0,0 +1,12 @@
> +extern void asm_fn_used();
> +
> +__attribute__((used))
> +void local_caller() {
> + asm_fn_used();
> +}
> +
> +__attribute__((noipa))
> +static void privatized_fn() { asm volatile ("");}
> +asm(".long privatized_fn");
> +
> +void call_privatized_fn() { privatized_fn();}
> diff --git a/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_0.c
> b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_0.c
> new file mode 100644
> index 00000000000..315149d7800
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_0.c
> @@ -0,0 +1,10 @@
> +/* { dg-lto-do link } */
> +/* { dg-lto-options { {-O2 -flto -flto-toplevel-asm-heuristics
> -flto-partition=1to1} {-O2 -flto -flto-toplevel-asm-heuristics
> -flto-partition=max} {-O2 -flto -flto-toplevel-asm-heuristics
> -flto-partition=cache}} } */
> +
> +extern int use_statics ();
> +
> +extern int asm_var;
> +
> +int main() {
> + return asm_var + use_statics ();
> +}
> diff --git a/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_1.c
> b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_1.c
> new file mode 100644
> index 00000000000..1d7c3798963
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/lto/toplevel-simple-asm-2_1.c
> @@ -0,0 +1,12 @@
> +extern void static_asm_fn ();
> +extern int static_asm_var;
> +asm("static_asm_fn:");
> +asm("static_asm_var:");
> +
> +extern int asm_var;
> +asm(".global asm_var\nasm_var:");
> +
> +int use_statics () {
> + static_asm_fn ();
> + return static_asm_var;
> +}
> diff --git a/gcc/timevar.def b/gcc/timevar.def
> index 4f60f04baa1..f0f4521894a 100644
> --- a/gcc/timevar.def
> +++ b/gcc/timevar.def
> @@ -81,6 +81,7 @@ DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining
> heuristics")
> DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting")
> DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats")
> DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations")
> +DEFTIMEVAR (TV_IPA_LTO_ASM , "ipa lto asm heuristics")
> DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression")
> DEFTIMEVAR (TV_IPA_LTO_COMPRESS , "lto stream compression")
> DEFTIMEVAR (TV_IPA_LTO_OUTPUT , "lto stream output")
> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
> index 410341d4711..b0f6ab4cca8 100644
> --- a/gcc/tree-pass.h
> +++ b/gcc/tree-pass.h
> @@ -527,6 +527,8 @@ extern simple_ipa_opt_pass
> *make_pass_local_optimization_passes (gcc::context *c
> extern simple_ipa_opt_pass *make_pass_ipa_remove_symbols (gcc::context
> *ctxt);
>
> extern ipa_opt_pass_d *make_pass_analyzer (gcc::context *ctxt);
> +extern ipa_opt_pass_d *make_pass_ipa_asm_lgen (gcc::context *ctxt);
> +extern ipa_opt_pass_d *make_pass_ipa_asm_wpa (gcc::context *ctxt);
> extern ipa_opt_pass_d *make_pass_ipa_whole_program_visibility (gcc::context
> *ctxt);
> extern simple_ipa_opt_pass *make_pass_ipa_increase_alignment (gcc::context
> --
> 2.51.1
>