On Fri, 22 Sep 2017, Richard Biener wrote: > > This simplifies canonicalize_loop_closed_ssa and does other minimal > TLC. It also adds a testcase I reduced from a stupid mistake I made > when reworking canonicalize_loop_closed_ssa. > > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. > > SPEC CPU 2006 is happy with it, current statistics on x86_64 with > -Ofast -march=haswell -floop-nest-optimize are > > 61 loop nests "optimized" > 45 loop nest transforms cancelled because of code generation issues > 21 loop nest optimizations timed out the 350000 ISL "operations" we allow
Overall compile time (with -j6) is 695 sec. w/o -floop-nest-optimize and 709 sec. with (this was with release checking). A single-run has 416.gamess (580s -> 618s), 436.cactusADM (206s -> 182s), 437.leslie3d (228s ->218s), 450.soplex (229s -> 226s), 465.tonto (428s -> 425s), 401.bzip2 (383s -> 379s), 462.libquantum (352s -> 343s), ignoring +-2s changes. Will do a 3-run for those to confirm (it would be only a single regression for 416.gamess). Sofar I'm positively surprised given the limitations (and inefficiencies) I know. I'll add some more opt-info stuff to assess the number of SCOPs we detect but discard during further analysis and the number of transforms we cancel because they turn out as a no-op. Richard. > { > - if (dump_file && dump_flags) > - fprintf (dump_file, "isl timed out --param max-isl-operations=%d\n", > - max_operations); > + location_t loc = find_loop_location > + (scop->scop_info->region.entry->dest->loop_father); > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc, > + "loop nest not optimized, optimization timed out " > + "after %d operations [--param max-isl-operations]\n", > + max_operations); > return false; > } > > Index: gcc/graphite.c > =================================================================== > --- gcc/graphite.c (revision 253091) > +++ gcc/graphite.c (working copy) > @@ -293,86 +293,6 @@ free_scops (vec<scop_p> scops) > scops.release (); > } > > -/* Returns true when P1 and P2 are close phis with the same > - argument. */ > - > -static inline bool > -same_close_phi_node (gphi *p1, gphi *p2) > -{ > - return (types_compatible_p (TREE_TYPE (gimple_phi_result (p1)), > - TREE_TYPE (gimple_phi_result (p2))) > - && operand_equal_p (gimple_phi_arg_def (p1, 0), > - gimple_phi_arg_def (p2, 0), 0)); > -} > - > -static void make_close_phi_nodes_unique (basic_block bb); > - > -/* Remove the close phi node at GSI and replace its rhs with the rhs > - of PHI. */ > - > -static void > -remove_duplicate_close_phi (gphi *phi, gphi_iterator *gsi) > -{ > - gimple *use_stmt; > - use_operand_p use_p; > - imm_use_iterator imm_iter; > - tree res = gimple_phi_result (phi); > - tree def = gimple_phi_result (gsi->phi ()); > - > - gcc_assert (same_close_phi_node (phi, gsi->phi ())); > - > - FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def) > - { > - FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) > - SET_USE (use_p, res); > - > - update_stmt (use_stmt); > - > - /* It is possible that we just created a duplicate close-phi > - for an already-processed containing loop. Check for this > - case and clean it up. */ > - if (gimple_code (use_stmt) == GIMPLE_PHI > - && gimple_phi_num_args (use_stmt) == 1) > - make_close_phi_nodes_unique (gimple_bb (use_stmt)); > - } > - > - remove_phi_node (gsi, true); > -} > - > -/* Removes all the close phi duplicates from BB. */ > - > -static void > -make_close_phi_nodes_unique (basic_block bb) > -{ > - gphi_iterator psi; > - > - for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi)) > - { > - gphi_iterator gsi = psi; > - gphi *phi = psi.phi (); > - > - /* At this point, PHI should be a close phi in normal form. */ > - gcc_assert (gimple_phi_num_args (phi) == 1); > - > - /* Iterate over the next phis and remove duplicates. */ > - gsi_next (&gsi); > - while (!gsi_end_p (gsi)) > - if (same_close_phi_node (phi, gsi.phi ())) > - remove_duplicate_close_phi (phi, &gsi); > - else > - gsi_next (&gsi); > - } > -} > - > -/* Return true when NAME is defined in LOOP. */ > - > -static bool > -defined_in_loop_p (tree name, loop_p loop) > -{ > - gcc_assert (TREE_CODE (name) == SSA_NAME); > - return loop == loop_containing_stmt (SSA_NAME_DEF_STMT (name)); > -} > - > /* Transforms LOOP to the canonical loop closed SSA form. */ > > static void > @@ -380,20 +300,22 @@ canonicalize_loop_closed_ssa (loop_p loo > { > edge e = single_exit (loop); > basic_block bb; > + gphi_iterator psi; > > if (!e || (e->flags & EDGE_COMPLEX)) > return; > > bb = e->dest; > > + /* Make the loop-close PHI node BB contain only PHIs and have a > + single predecessor. */ > if (single_pred_p (bb)) > { > e = split_block_after_labels (bb); > - make_close_phi_nodes_unique (e->src); > + bb = e->src; > } > else > { > - gphi_iterator psi; > basic_block close = split_edge (e); > e = single_succ_edge (close); > for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi)) > @@ -404,7 +326,7 @@ canonicalize_loop_closed_ssa (loop_p loo > > /* Only add close phi nodes for SSA_NAMEs defined in LOOP. */ > if (TREE_CODE (arg) != SSA_NAME > - || !defined_in_loop_p (arg, loop)) > + || loop_containing_stmt (SSA_NAME_DEF_STMT (arg)) != loop) > continue; > > tree res = copy_ssa_name (arg); > @@ -413,8 +335,30 @@ canonicalize_loop_closed_ssa (loop_p loo > UNKNOWN_LOCATION); > SET_USE (use_p, res); > } > + bb = close; > + } > > - make_close_phi_nodes_unique (close); > + /* Eliminate duplicates. This relies on processing loops from > + innermost to outer. */ > + for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi)) > + { > + gphi_iterator gsi = psi; > + gphi *phi = psi.phi (); > + > + /* At this point, PHI should be a close phi in normal form. */ > + gcc_assert (gimple_phi_num_args (phi) == 1); > + > + /* Iterate over the next phis and remove duplicates. */ > + gsi_next (&gsi); > + while (!gsi_end_p (gsi)) > + if (gimple_phi_arg_def (phi, 0) == gimple_phi_arg_def (gsi.phi (), 0)) > + { > + replace_uses_by (gimple_phi_result (gsi.phi ()), > + gimple_phi_result (phi)); > + remove_phi_node (&gsi, true); > + } > + else > + gsi_next (&gsi); > } > } > > @@ -443,7 +387,7 @@ static void > canonicalize_loop_closed_ssa_form (void) > { > loop_p loop; > - FOR_EACH_LOOP (loop, 0) > + FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) > canonicalize_loop_closed_ssa (loop); > > checking_verify_loop_closed_ssa (true); > @@ -509,6 +453,19 @@ graphite_transform_loops (void) > "loop nest optimized\n"); > } > > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + loop_p loop; > + int num_no_dependency = 0; > + > + FOR_EACH_LOOP (loop, 0) > + if (loop->can_be_parallel) > + num_no_dependency++; > + > + fprintf (dump_file, "%d loops carried no dependency.\n", > + num_no_dependency); > + } > + > free_scops (scops); > graphite_finalize (need_cfg_cleanup_p); > the_isl_ctx = NULL; > Index: gcc/testsuite/gcc.dg/graphite/scop-24.c > =================================================================== > --- gcc/testsuite/gcc.dg/graphite/scop-24.c (nonexistent) > +++ gcc/testsuite/gcc.dg/graphite/scop-24.c (working copy) > @@ -0,0 +1,29 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast -floop-nest-optimize" } */ > + > +typedef struct _IO_FILE FILE; > +extern struct _IO_FILE *stderr; > +typedef float real; > +typedef real rvec[3]; > +int rgbset (int); > +void ps_box (int, int); > +void plot_phi(char *fn,rvec box,int natoms,rvec x[],real phi[]) > +{ > + real phi_max,rr,gg,bb,fac,dx,x0,y0; > + int i; > + for(i=0; (i<natoms); i++) > + phi_max=((phi_max > __builtin_fabs(phi[i])) > + ? phi_max : __builtin_fabs(phi[i])); > + if (__builtin_fabs(phi_max)<1.2e-38) > + __builtin_fprintf(stderr, "X"); > + ps_box((real)(fac*box[0]-1),(real)(fac*box[1]-1)); > + for(i=0; (i<natoms); i++) > + { > + rr=gg=bb=1.0; > + if (phi[i] < 0) > + gg=bb=(1.0+(phi[i]/phi_max)); > + else > + rr=gg=(1.0-(phi[i]/phi_max)); > + rr=rgbset(rr); > + } > +} > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)