On Fri, 6 Dec 2013, Jan Hubicka wrote:
> > On Thu, 21 Nov 2013, Jan Hubicka wrote:
> >
> > > >
> > > > Why do you need an additional -fparallelism? Wouldn't
> > > > -fwpa=... be a better match, matching -flto=...? As we already
> > > > pass down a -fwpa option to WPA this would make things easier, no?
> > >
> > > My plan was to possibly use same option later for parallelizing more
> > > parts of
> > > compiler, not only WPA streaming. Streaming in may have some chance if we
> > > get
> > > into thread safety of GGC or move sufficient amount of stuff out of GGC.
> > > Also
> > > we can parallelize inliner heuristic or IPA-PTA if it will ever work. So
> > > it
> > > would make sense with -flto-partition=none and perhaps with local
> > > optimization,
> > > too.
> >
> > I'd like to drop -flto-partition=none eventually. It's just one more
> > path through the compiler to support ...
> >
> > > But I can definitely update the patch to use -fwpa=N and we can deal with
> > > this
> > > once this becomes real. (i.e. I have no clue how to parallelize inliner
> > > without
> > > making its decisions dependent on the parallelizm and declining with
> > > parallelizm
> > > increased nor I have real plans for stream in procedure)
> >
> > Please.
> >
>
> Hi,
> here is updated patch. Sorry for taking time, I should have more time for
> hacking again
> now...
Ok.
Thanks,
Richard.
> Honza
>
> * lto-cgraph.c (asm_nodes_output): Make global.
> * lto-wrapper.c (run_gcc): Pass down paralelizm to WPA.
> * lto.c (lto_parallelism): New static var.
> (do_stream_out, wait_for_child, stream_out): New static functions.
> (lto_wpa_write_files): Add support for parallel streaming.
> (do_whole_program_analysis): Set parallelism.
> * lang.opt (fwpa): Add parameter.
> * lto-lang.c (lto_handle_option): Handle flag_wpa.
> (lto_init): Update use of flag_wpa.
> * lto-streamer.h (asm_nodes_output): Declare.
> Index: lto-cgraph.c
> ===================================================================
> *** lto-cgraph.c (revision 205646)
> --- lto-cgraph.c (working copy)
> *************** along with GCC; see the file COPYING3.
> *** 53,58 ****
> --- 53,61 ----
> #include "pass_manager.h"
> #include "ipa-utils.h"
>
> + /* True when asm nodes has been output. */
> + bool asm_nodes_output = false;
> +
> static void output_cgraph_opt_summary (void);
> static void input_cgraph_opt_summary (vec<symtab_node *> nodes);
>
> *************** output_symtab (void)
> *** 889,895 ****
> lto_symtab_encoder_iterator lsei;
> int i, n_nodes;
> lto_symtab_encoder_t encoder;
> - static bool asm_nodes_output = false;
>
> if (flag_wpa)
> output_cgraph_opt_summary ();
> --- 892,897 ----
> Index: lto-wrapper.c
> ===================================================================
> *** lto-wrapper.c (revision 205646)
> --- lto-wrapper.c (working copy)
> *************** run_gcc (unsigned argc, char *argv[])
> *** 745,751 ****
> tmp += list_option_len;
> strcpy (tmp, ltrans_output_file);
>
> ! obstack_ptr_grow (&argv_obstack, "-fwpa");
> }
>
> /* Append the input objects and possible preceding arguments. */
> --- 746,761 ----
> tmp += list_option_len;
> strcpy (tmp, ltrans_output_file);
>
> ! if (jobserver)
> ! obstack_ptr_grow (&argv_obstack, xstrdup ("-fwpa=jobserver"));
> ! else if (parallel > 1)
> ! {
> ! char buf[256];
> ! sprintf (buf, "-fwpa=%i", parallel);
> ! obstack_ptr_grow (&argv_obstack, xstrdup (buf));
> ! }
> ! else
> ! obstack_ptr_grow (&argv_obstack, "-fwpa");
> }
>
> /* Append the input objects and possible preceding arguments. */
> Index: lto/lto.c
> ===================================================================
> *** lto/lto.c (revision 205646)
> --- lto/lto.c (working copy)
> *************** along with GCC; see the file COPYING3.
> *** 53,58 ****
> --- 53,61 ----
> /* Vector to keep track of external variables we've seen so far. */
> vec<tree, va_gc> *lto_global_var_decls;
>
> + /* Number of parallel tasks to run, -1 if we want to use GNU Make
> jobserver. */
> + static int lto_parallelism;
> +
> static GTY(()) tree first_personality_decl;
>
> /* Returns a hash code for P. */
> *************** cmp_partitions_order (const void *a, con
> *** 2454,2459 ****
> --- 2457,2554 ----
> return orderb - ordera;
> }
>
> + /* Actually stream out ENCODER into TEMP_FILENAME. */
> +
> + static void
> + do_stream_out (char *temp_filename, lto_symtab_encoder_t encoder)
> + {
> + lto_file *file = lto_obj_file_open (temp_filename, true);
> + if (!file)
> + fatal_error ("lto_obj_file_open() failed");
> + lto_set_current_out_file (file);
> +
> + ipa_write_optimization_summaries (encoder);
> +
> + lto_set_current_out_file (NULL);
> + lto_obj_file_close (file);
> + free (file);
> + }
> +
> + /* Wait for forked process and signal errors. */
> + #ifdef HAVE_WORKING_FORK
> + static void
> + wait_for_child ()
> + {
> + int status;
> + do
> + {
> + int w = waitpid(0, &status, WUNTRACED | WCONTINUED);
> + if (w == -1)
> + fatal_error ("waitpid failed");
> +
> + if (WIFEXITED (status) && WEXITSTATUS (status))
> + fatal_error ("streaming subprocess failed");
> + else if (WIFSIGNALED (status))
> + fatal_error ("streaming subprocess was killed by signal");
> + }
> + while (!WIFEXITED(status) && !WIFSIGNALED(status));
> + }
> + #endif
> +
> + /* Stream out ENCODER into TEMP_FILENAME
> + Fork if that seems to help. */
> +
> + static void
> + stream_out (char *temp_filename, lto_symtab_encoder_t encoder, bool last)
> + {
> + #ifdef HAVE_WORKING_FORK
> + static int nruns;
> +
> + if (!lto_parallelism || lto_parallelism == 1)
> + {
> + do_stream_out (temp_filename, encoder);
> + return;
> + }
> +
> + /* Do not run more than LTO_PARALLELISM streamings
> + FIXME: we ignore limits on jobserver. */
> + if (lto_parallelism > 0 && nruns >= lto_parallelism)
> + {
> + wait_for_child ();
> + nruns --;
> + }
> + /* If this is not the last parallel partition, execute new
> + streaming process. */
> + if (!last)
> + {
> + pid_t cpid = fork ();
> +
> + if (!cpid)
> + {
> + setproctitle ("lto1-wpa-streaming");
> + do_stream_out (temp_filename, encoder);
> + exit (0);
> + }
> + /* Fork failed; lets do the job ourseleves. */
> + else if (cpid == -1)
> + do_stream_out (temp_filename, encoder);
> + else
> + nruns++;
> + }
> + /* Last partition; stream it and wait for all children to die. */
> + else
> + {
> + int i;
> + do_stream_out (temp_filename, encoder);
> + for (i = 0; i < nruns; i++)
> + wait_for_child ();
> + }
> + asm_nodes_output = true;
> + #else
> + do_stream_out (temp_filename, encoder);
> + #endif
> + }
> +
> /* Write all output files in WPA mode and the file with the list of
> LTRANS units. */
>
> *************** static void
> *** 2461,2478 ****
> lto_wpa_write_files (void)
> {
> unsigned i, n_sets;
> - lto_file *file;
> ltrans_partition part;
> FILE *ltrans_output_list_stream;
> char *temp_filename;
> size_t blen;
>
> /* Open the LTRANS output list. */
> if (!ltrans_output_list)
> fatal_error ("no LTRANS output list filename provided");
> - ltrans_output_list_stream = fopen (ltrans_output_list, "w");
> - if (ltrans_output_list_stream == NULL)
> - fatal_error ("opening LTRANS output list %s: %m", ltrans_output_list);
>
> timevar_push (TV_WHOPR_WPA);
>
> --- 2556,2570 ----
> lto_wpa_write_files (void)
> {
> unsigned i, n_sets;
> ltrans_partition part;
> FILE *ltrans_output_list_stream;
> char *temp_filename;
> + vec <char *>temp_filenames = vNULL;
> size_t blen;
>
> /* Open the LTRANS output list. */
> if (!ltrans_output_list)
> fatal_error ("no LTRANS output list filename provided");
>
> timevar_push (TV_WHOPR_WPA);
>
> *************** lto_wpa_write_files (void)
> *** 2508,2521 ****
> : cmp_partitions_order);
> for (i = 0; i < n_sets; i++)
> {
> - size_t len;
> ltrans_partition part = ltrans_partitions[i];
>
> /* Write all the nodes in SET. */
> sprintf (temp_filename + blen, "%u.o", i);
> - file = lto_obj_file_open (temp_filename, true);
> - if (!file)
> - fatal_error ("lto_obj_file_open() failed");
>
> if (!quiet_flag)
> fprintf (stderr, " %s (%s %i insns)", temp_filename, part->name,
> part->insns);
> --- 2600,2609 ----
> *************** lto_wpa_write_files (void)
> *** 2557,2577 ****
> }
> gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i);
>
> ! lto_set_current_out_file (file);
> !
> ! ipa_write_optimization_summaries (part->encoder);
>
> - lto_set_current_out_file (NULL);
> - lto_obj_file_close (file);
> - free (file);
> part->encoder = NULL;
>
> ! len = strlen (temp_filename);
> ! if (fwrite (temp_filename, 1, len, ltrans_output_list_stream) < len
> || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1)
> fatal_error ("writing to LTRANS output list %s: %m",
> ltrans_output_list);
> }
>
> lto_stats.num_output_files += n_sets;
>
> --- 2645,2669 ----
> }
> gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i);
>
> ! stream_out (temp_filename, part->encoder, i == n_sets - 1);
>
> part->encoder = NULL;
>
> ! temp_filenames.safe_push (xstrdup (temp_filename));
> ! }
> ! ltrans_output_list_stream = fopen (ltrans_output_list, "w");
> ! if (ltrans_output_list_stream == NULL)
> ! fatal_error ("opening LTRANS output list %s: %m", ltrans_output_list);
> ! for (i = 0; i < n_sets; i++)
> ! {
> ! unsigned int len = strlen (temp_filenames[i]);
> ! if (fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream) <
> len
> || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1)
> fatal_error ("writing to LTRANS output list %s: %m",
> ltrans_output_list);
> + free (temp_filenames[i]);
> }
> + temp_filenames.release();
>
> lto_stats.num_output_files += n_sets;
>
> *************** do_whole_program_analysis (void)
> *** 3126,3131 ****
> --- 3218,3235 ----
> {
> symtab_node *node;
>
> + lto_parallelism = 1;
> +
> + /* TODO: jobserver communicatoin is not supported, yet. */
> + if (!strcmp (flag_wpa, "jobserver"))
> + lto_parallelism = -1;
> + else
> + {
> + lto_parallelism = atoi (flag_wpa);
> + if (lto_parallelism <= 0)
> + lto_parallelism = 0;
> + }
> +
> timevar_start (TV_PHASE_OPT_GEN);
>
> /* Note that since we are in WPA mode, materialize_cgraph will not
> Index: lto/lang.opt
> ===================================================================
> *** lto/lang.opt (revision 205646)
> --- lto/lang.opt (working copy)
> *************** LTO Joined Var(ltrans_output_list)
> *** 33,41 ****
> Specify a file to which a list of files output by LTRANS is written.
>
> fwpa
> ! LTO Driver Report Var(flag_wpa)
> Run the link-time optimizer in whole program analysis (WPA) mode.
>
> fresolution=
> LTO Joined
> The resolution file
> --- 33,45 ----
> Specify a file to which a list of files output by LTRANS is written.
>
> fwpa
> ! LTO Driver Report
> Run the link-time optimizer in whole program analysis (WPA) mode.
>
> + fwpa=
> + LTO Driver RejectNegative Joined Var(flag_wpa)
> + Whole program analysis (WPA) mode with number of parallel jobs specified.
> +
> fresolution=
> LTO Joined
> The resolution file
> Index: lto/lto-lang.c
> ===================================================================
> *** lto/lto-lang.c (revision 205646)
> --- lto/lto-lang.c (working copy)
> *************** lto_handle_option (size_t scode, const c
> *** 749,754 ****
> --- 749,758 ----
> warn_psabi = value;
> break;
>
> + case OPT_fwpa:
> + flag_wpa = value ? "" : NULL;
> + break;
> +
> default:
> break;
> }
> *************** static bool
> *** 1148,1154 ****
> lto_init (void)
> {
> /* We need to generate LTO if running in WPA mode. */
> ! flag_generate_lto = flag_wpa;
>
> /* Create the basic integer types. */
> build_common_tree_nodes (flag_signed_char, /*short_double=*/false);
> --- 1152,1158 ----
> lto_init (void)
> {
> /* We need to generate LTO if running in WPA mode. */
> ! flag_generate_lto = (flag_wpa != NULL);
>
> /* Create the basic integer types. */
> build_common_tree_nodes (flag_signed_char, /*short_double=*/false);
> Index: lto-streamer.h
> ===================================================================
> *** lto-streamer.h (revision 205646)
> --- lto-streamer.h (working copy)
> *************** void lto_output_location (struct output_
> *** 873,878 ****
> --- 873,879 ----
>
>
> /* In lto-cgraph.c */
> + extern bool asm_nodes_output;
> lto_symtab_encoder_t lto_symtab_encoder_new (bool);
> int lto_symtab_encoder_encode (lto_symtab_encoder_t, symtab_node *);
> void lto_symtab_encoder_delete (lto_symtab_encoder_t);
>
>
--
Richard Biener <[email protected]>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer