On Thu, 13 Nov 2014, Jan Hubicka wrote:
> Hi,
> this patch adds infrastructure for proper streaming and merging of
> TREE_TARGET_OPTION. The catch is that TREE_TARGET_OPTION is autogenerated
> structure. For x86_64 it looks as follows:
>
> /* Structure to save/restore selected target specific options. */
> struct GTY(()) cl_target_option
> {
> HOST_WIDE_INT x_ix86_isa_flags_explicit;
> HOST_WIDE_INT x_ix86_target_flags_explicit;
> const char *x_ix86_arch_string;
> const char *x_ix86_recip_name;
> const char *x_ix86_tune_ctrl_string;
> const char *x_ix86_tune_memcpy_strategy;
> const char *x_ix86_tune_memset_strategy;
> const char *x_ix86_tune_string;
> HOST_WIDE_INT x_ix86_isa_flags;
> enum fpmath_unit x_ix86_fpmath;
> enum asm_dialect x_ix86_asm_dialect;
> ...
> enum stack_protector_guard x_ix86_stack_protector_guard;
> enum stringop_alg x_ix86_stringop_alg;
> enum tls_dialect x_ix86_tls_dialect;
> int x_ix86_branch_cost;
> int x_ix86_dump_tunes;
> ....
> int x_recip_mask;
> int x_target_flags;
> unsigned char arch;
> unsigned char arch_specified;
> unsigned char branch_cost;
> unsigned char schedule;
> unsigned char tune;
> unsigned char tune_defaulted;
> };
>
> The problem is that the strings prevents us from streaming the structure as a
> binary
> blob like we do for optimization nodes (and we should not do that to make LTO
> streaming
> stable across attribute tweaks in minor releases).
>
> This patch extends the AWK generators to produce four functions:
>
> /* Compare two target options */
> bool
> cl_target_option_eq (struct cl_target_option const *ptr1,
> struct cl_target_option const *ptr2)
> {
> if (ptr1->x_ix86_arch_string != ptr2->x_ix86_arch_string
> && (!ptr1->x_ix86_arch_string || !ptr2->x_ix86_arch_string
> || strcmp (ptr1->x_ix86_arch_string, ptr2->x_ix86_arch_string)))
> return false;
> ....
> if (ptr1->x_ix86_isa_flags != ptr2->x_ix86_isa_flags)
> return false;
> if (ptr1->x_ix86_fpmath != ptr2->x_ix86_fpmath)
> return false;
> return true;
> }
>
> /* Hash target options */
> hashval_t
> cl_target_option_hash (struct cl_target_option const *ptr)
> {
> inchash::hash hstate;
> if (ptr->x_ix86_arch_string)
> hstate.add (ptr->x_ix86_arch_string, strlen (ptr->x_ix86_arch_string));
> else
> hstate.add_int (0);
> ...
> hstate.add_wide_int (ptr->x_ix86_fpmath);
> return hstate.end ();
> }
>
> /* Stream out target options */
> void
> cl_target_option_stream_out (struct output_block *ob, struct cl_target_option
> *ptr)
> {
> streamer_write_string (ob, ob->main_stream, ptr->x_ix86_arch_string, true);
> ...
> streamer_write_uhwi (ob, ptr->x_ix86_isa_flags);
> streamer_write_uhwi (ob, ptr->x_ix86_fpmath);
> }
>
> /* Stream in target options */
> void
> cl_target_option_stream_in (struct lto_input_block *ib, struct data_in
> *data_in, struct cl_target_option *ptr)
> {
> ptr->x_ix86_arch_string = strdup (streamer_read_string (data_in, ib));
> ...
> ptr->x_ix86_fpmath = (enum fpmath_unit ) streamer_read_uhwi (ib);
> }
>
> These allows us to properly stream, compare and hash the values.
>
> There is some implementation lazyness - for example I stream all scalars as
> unsigned HOST_WIDE_INT that seems to be the largest type used for variables
> across the targets.
>
> If other types are used, the type matching (I stole from other part of the awk
> scripts) will need to be extended; for now it knows that const char * is a
> string and needs special care but nothing else.
>
> I have tested this with firefox and additional change to tree.c to always
> initialize TARGET_OPTION_NODE for defined functions (so target units are
> peorperly presered across units). This seems to work as intended - i.e. the
> functions gets link-time optimized according to -march settings passed at
> compile time.
>
> I noticed that memory use is now 3.8GB instead of previous 2.8GB - this may be
> tree merging problem caused by mismatching target settings but most probably
> it
> is an unrelated stuff. I will look into it tomorrow. I also plan to test this
> on PPC (that currently fails to bootstrap).
>
> Incrementally I would like to handle optimizations nodes same way and change
> streaming format to be a set of assignments field_name=value.
> This way we could behave sanely when some field is introduced or removed by
> either defaulting it or ignoring it.
I'm not sure that idea is good ;) Adding a version / checksum to
detect a mismatch early would be nice though. Maybe just compute
a hash for the fields in the AWK script?
> Bootstrapped/regtested x86_64-linux, seems sane?
Sounds sane - I'd like to have a AWK capable person have a look
for non-portable stuff (did you check AIX? ;))
Some more comments below
> * optc-save-gen.awk: Output cl_target_option_eq,
> cl_target_option_hash, cl_target_option_stream_out,
> cl_target_option_stream_in functions.
> * opth-gen.awk: Output prototypes for
> cl_target_option_eq and cl_target_option_hash.
> * lto-streamer.h (cl_target_option_stream_out,
> cl_target_option_stream_in): Declare.
> * tree.c (cl_option_hash_hash): Use cl_target_option_hash.
> (cl_option_hash_eq): Use cl_target_option_eq.
> * tree-streamer-in.c (unpack_value_fields): Stream in
> TREE_TARGET_OPTION.
> * lto-streamer-out.c (DFS::DFS_write_tree_body): Follow
> DECL_FUNCTION_SPECIFIC_TARGET.
> (hash_tree): Hash TREE_TARGET_OPTION; visit
> DECL_FUNCTION_SPECIFIC_TARGET.
> * tree-streamer-out.c (streamer_pack_tree_bitfields): Skip
> TS_TARGET_OPTION.
> (streamer_write_tree_body): Output TS_TARGET_OPTION.
>
> * lto.c (compare_tree_sccs_1): Compare cl_target_option_eq.
>
> Index: optc-save-gen.awk
> ===================================================================
> --- optc-save-gen.awk (revision 217448)
> +++ optc-save-gen.awk (working copy)
> @@ -39,6 +39,16 @@ print "#include " quote "intl.h" quote
> print ""
> print "#include " quote "flags.h" quote
> print "#include " quote "target.h" quote
> +print "#include " quote "inchash.h" quote
> +print "#include " quote "tree.h" quote
> +print "#include " quote "tree-ssa-alias.h" quote
> +print "#include " quote "is-a.h" quote
> +print "#include " quote "predict.h" quote
> +print "#include " quote "function.h" quote
> +print "#include " quote "basic-block.h" quote
> +print "#include " quote "gimple-expr.h" quote
> +print "#include " quote "gimple.h" quote
> +print "#include " quote "data-streamer.h" quote
> print ""
>
> if (n_extra_c_includes > 0) {
> @@ -417,4 +427,120 @@ print " targetm.target_option.print (
>
> print "}";
>
> +print "";
> +print "/* Compare two target options */";
> +print "bool";
> +print "cl_target_option_eq (struct cl_target_option const *ptr1,";
> +print " struct cl_target_option const *ptr2)";
> +print "{";
> +n_target_val = 0;
> +n_target_str = 0;
> +
> +for (i = 0; i < n_target_save; i++) {
> + var = target_save_decl[i];
> + sub (" *=.*", "", var);
> + name = var;
> + type = var;
> + sub("^.*[ *]", "", name)
> + sub(" *" name "$", "", type)
> + if (target_save_decl[i] ~ "^const char \\*+[_" alnum "]+$")
> + var_target_str[n_target_str++] = name;
> + else {
> + var_target_val_type[n_target_val] = type;
> + var_target_val[n_target_val++] = name;
> + }
> +}
> +if (have_save) {
> + for (i = 0; i < n_opts; i++) {
> + if (flag_set_p("Save", flags[i])) {
> + name = var_name(flags[i])
> + if(name == "")
> + name = "target_flags";
> +
> + if(name in var_list_seen)
> + continue;
> +
> + var_list_seen[name]++;
> + otype = var_type_struct(flags[i])
> + if (otype ~ "^const char \\**$")
> + var_target_str[n_target_str++] = "x_" name;
> + else {
> + var_target_val_type[n_target_val] = otype;
> + var_target_val[n_target_val++] = "x_" name;
> + }
> + }
> + }
> +} else {
> + var_target_val_type[n_target_val] = "int";
> + var_target_val[n_target_val++] = "x_target_flags";
> +}
> +
> +for (i = 0; i < n_target_str; i++) {
> + name = var_target_str[i]
> + print " if (ptr1->" name" != ptr2->" name;
> + print " && (!ptr1->" name" || !ptr2->" name
> + print " || strcmp (ptr1->" name", ptr2->" name ")))";
> + print " return false;";
> +}
> +for (i = 0; i < n_target_val; i++) {
> + name = var_target_val[i]
> + print " if (ptr1->" name" != ptr2->" name ")";
> + print " return false;";
> +}
> +
> +print " return true;";
> +
> +print "}";
> +
> +print "";
> +print "/* Hash target options */";
> +print "hashval_t";
> +print "cl_target_option_hash (struct cl_target_option const *ptr)";
> +print "{";
> +print " inchash::hash hstate;";
> +for (i = 0; i < n_target_str; i++) {
> + name = var_target_str[i]
> + print " if (ptr->" name")";
> + print " hstate.add (ptr->" name", strlen (ptr->" name"));";
> + print " else";
> + print " hstate.add_int (0);";
> +}
> +for (i = 0; i < n_target_val; i++) {
> + name = var_target_val[i]
> + print " hstate.add_wide_int (ptr->" name");";
> +}
> +print " return hstate.end ();";
> +print "}";
> +
> +print "";
> +print "/* Stream out target options */";
> +print "void";
> +print "cl_target_option_stream_out (struct output_block *ob, struct
> cl_target_option *ptr)";
> +print "{";
> +for (i = 0; i < n_target_str; i++) {
> + name = var_target_str[i]
> + print " streamer_write_string (ob, ob->main_stream, ptr->" name",
> true);";
> +}
> +for (i = 0; i < n_target_val; i++) {
> + name = var_target_val[i]
> + print " streamer_write_uhwi (ob, ptr->" name");";
> +}
> +print "}";
> +
> +print "";
> +print "/* Stream in target options */";
> +print "void";
> +print "cl_target_option_stream_in (struct lto_input_block *ib, struct
> data_in *data_in, struct cl_target_option *ptr)";
> +print "{";
> +for (i = 0; i < n_target_str; i++) {
> + name = var_target_str[i]
> + print " ptr->" name" = strdup (streamer_read_string (data_in, ib));";
> +}
> +for (i = 0; i < n_target_val; i++) {
> + name = var_target_val[i]
> + print " ptr->" name" = (" var_target_val_type[i] ") streamer_read_uhwi
> (ib);";
> +}
> +
> +print "}";
> +
> }
> Index: tree-streamer-in.c
> ===================================================================
> --- tree-streamer-in.c (revision 217448)
> +++ tree-streamer-in.c (working copy)
> @@ -506,9 +506,6 @@ unpack_value_fields (struct data_in *dat
> if (CODE_CONTAINS_STRUCT (code, TS_TRANSLATION_UNIT_DECL))
> unpack_ts_translation_unit_decl_value_fields (data_in, bp, expr);
>
> - if (CODE_CONTAINS_STRUCT (code, TS_TARGET_OPTION))
> - gcc_unreachable ();
> -
> if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION))
> unpack_ts_optimization (bp, expr);
>
> @@ -1081,6 +1078,9 @@ streamer_read_tree_body (struct lto_inpu
> if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR))
> lto_input_ts_constructor_tree_pointers (ib, data_in, expr);
>
> + if (CODE_CONTAINS_STRUCT (code, TS_TARGET_OPTION))
> + cl_target_option_stream_in (ib, data_in, TREE_TARGET_OPTION (expr));
> +
> if (code == OMP_CLAUSE)
> lto_input_ts_omp_clause_tree_pointers (ib, data_in, expr);
Any reason you don't use bitpacks and stream it from
[un]pack_value_fields? We can happily pack strings there as well.
The 'body' streaming is for tree pointers only.
> }
> Index: lto-streamer.h
> ===================================================================
> --- lto-streamer.h (revision 217448)
> +++ lto-streamer.h (working copy)
> @@ -832,6 +832,14 @@ bool reachable_from_this_partition_p (st
> lto_symtab_encoder_t);
> lto_symtab_encoder_t compute_ltrans_boundary (lto_symtab_encoder_t encoder);
>
> +/* In options-save.c. */
> +void cl_target_option_stream_out (struct output_block *,
> + struct cl_target_option *);
> +
> +void cl_target_option_stream_in (struct lto_input_block *,
> + struct data_in *,
> + struct cl_target_option *);
> +
>
> /* In lto-symtab.c. */
> extern void lto_symtab_merge_decls (void);
> Index: tree.c
> ===================================================================
> --- tree.c (revision 217448)
> +++ tree.c (working copy)
> @@ -11484,10 +11495,7 @@ cl_option_hash_hash (const void *x)
> }
>
> else if (TREE_CODE (t) == TARGET_OPTION_NODE)
> - {
> - p = (const char *)TREE_TARGET_OPTION (t);
> - len = sizeof (struct cl_target_option);
> - }
> + return cl_target_option_hash (TREE_TARGET_OPTION (t));
>
> else
> gcc_unreachable ();
> @@ -11526,9 +11534,8 @@ cl_option_hash_eq (const void *x, const
>
> else if (TREE_CODE (xt) == TARGET_OPTION_NODE)
> {
> - xp = (const char *)TREE_TARGET_OPTION (xt);
> - yp = (const char *)TREE_TARGET_OPTION (yt);
> - len = sizeof (struct cl_target_option);
> + return cl_target_option_eq (TREE_TARGET_OPTION (xt),
> + TREE_TARGET_OPTION (yt));
> }
>
> else
> Index: opth-gen.awk
> ===================================================================
> --- opth-gen.awk (revision 217448)
> +++ opth-gen.awk (working copy)
> @@ -293,6 +293,12 @@ print "";
> print "/* Print target option variables from a structure. */";
> print "extern void cl_target_option_print (FILE *, int, struct
> cl_target_option *);";
> print "";
> +print "/* Compare two target option variables from a structure. */";
> +print "extern bool cl_target_option_eq (const struct cl_target_option *,
> const struct cl_target_option *);";
> +print "";
> +print "/* Hash option variables from a structure. */";
> +print "extern hashval_t cl_target_option_hash (const struct cl_target_option
> *);";
> +print "";
> print "/* Anything that includes tm.h, does not necessarily need this. */"
> print "#if !defined(GCC_TM_H)"
> print "#include \"input.h\" /* for location_t */"
> Index: lto-streamer-out.c
> ===================================================================
> --- lto-streamer-out.c (revision 217448)
> +++ lto-streamer-out.c (working copy)
> @@ -594,7 +594,7 @@ DFS::DFS_write_tree_body (struct output_
> {
> DFS_follow_tree_edge (DECL_VINDEX (expr));
> DFS_follow_tree_edge (DECL_FUNCTION_PERSONALITY (expr));
> - /* Do not DECL_FUNCTION_SPECIFIC_TARGET. They will be regenerated. */
> + DFS_follow_tree_edge (DECL_FUNCTION_SPECIFIC_TARGET (expr));
> DFS_follow_tree_edge (DECL_FUNCTION_SPECIFIC_OPTIMIZATION (expr));
> }
>
> @@ -945,7 +945,7 @@ hash_tree (struct streamer_tree_cache_d
> strlen (TRANSLATION_UNIT_LANGUAGE (t)));
>
> if (CODE_CONTAINS_STRUCT (code, TS_TARGET_OPTION))
> - gcc_unreachable ();
> + hstate.add_wide_int (cl_target_option_hash (TREE_TARGET_OPTION (t)));
>
> if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION))
> hstate.add (t, sizeof (struct cl_optimization));
> @@ -1028,7 +1028,7 @@ hash_tree (struct streamer_tree_cache_d
> {
> visit (DECL_VINDEX (t));
> visit (DECL_FUNCTION_PERSONALITY (t));
> - /* Do not follow DECL_FUNCTION_SPECIFIC_TARGET. */
> + visit (DECL_FUNCTION_SPECIFIC_TARGET (t));
> visit (DECL_FUNCTION_SPECIFIC_OPTIMIZATION (t));
> }
>
> Index: lto/lto.c
> ===================================================================
> --- lto/lto.c (revision 217448)
> +++ lto/lto.c (working copy)
> @@ -1377,7 +1377,8 @@ compare_tree_sccs_1 (tree t1, tree t2, t
> return false;
>
> if (CODE_CONTAINS_STRUCT (code, TS_TARGET_OPTION))
> - gcc_unreachable ();
> + if (cl_target_option_eq (TREE_TARGET_OPTION (t1), TREE_TARGET_OPTION
> (t2)))
> + return false;
I suppose that's the reason for the memory use - should be
!cl_target_option_eq I think ;)
>
> if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION))
> if (memcmp (TREE_OPTIMIZATION (t1), TREE_OPTIMIZATION (t2),
> Index: tree-streamer-out.c
> ===================================================================
> --- tree-streamer-out.c (revision 217448)
> +++ tree-streamer-out.c (working copy)
> @@ -472,9 +472,6 @@ streamer_pack_tree_bitfields (struct out
> if (CODE_CONTAINS_STRUCT (code, TS_TRANSLATION_UNIT_DECL))
> pack_ts_translation_unit_decl_value_fields (ob, bp, expr);
>
> - if (CODE_CONTAINS_STRUCT (code, TS_TARGET_OPTION))
> - gcc_unreachable ();
> -
> if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION))
> pack_ts_optimization (bp, expr);
>
> @@ -963,6 +960,9 @@ streamer_write_tree_body (struct output_
> if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR))
> write_ts_constructor_tree_pointers (ob, expr, ref_p);
>
> + if (CODE_CONTAINS_STRUCT (code, TS_TARGET_OPTION))
> + cl_target_option_stream_out (ob, TREE_TARGET_OPTION (expr));
> +
> if (code == OMP_CLAUSE)
> write_ts_omp_clause_tree_pointers (ob, expr, ref_p);
> }
Otherwise looks good.
Thanks,
Richard.