On Fri, 22 Sep 2017, Richard Biener wrote:

> 
> This simplifies canonicalize_loop_closed_ssa and does other minimal
> TLC.  It also adds a testcase I reduced from a stupid mistake I made
> when reworking canonicalize_loop_closed_ssa.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
> 
> SPEC CPU 2006 is happy with it, current statistics on x86_64 with
> -Ofast -march=haswell -floop-nest-optimize are
> 
>  61 loop nests "optimized"
>  45 loop nest transforms cancelled because of code generation issues
>  21 loop nest optimizations timed out the 350000 ISL "operations" we allow

Overall compile time (with -j6) is 695 sec. w/o -floop-nest-optimize
and 709 sec. with (this was with release checking).

A single-run has 416.gamess (580s -> 618s),
436.cactusADM (206s -> 182s), 437.leslie3d (228s ->218s),
450.soplex (229s -> 226s), 465.tonto (428s -> 425s), 401.bzip2 (383s -> 
379s), 462.libquantum (352s -> 343s), ignoring +-2s changes.  Will
do a 3-run for those to confirm (it would be only a single regression
for 416.gamess).

Sofar I'm positively surprised given the limitations (and inefficiencies)
I know.

I'll add some more opt-info stuff to assess the number of SCOPs we
detect but discard during further analysis and the number of transforms
we cancel because they turn out as a no-op.

Richard.

>      {
> -      if (dump_file && dump_flags)
> -     fprintf (dump_file, "isl timed out --param max-isl-operations=%d\n",
> -              max_operations);
> +      location_t loc = find_loop_location
> +     (scop->scop_info->region.entry->dest->loop_father);
> +      dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc,
> +                    "loop nest not optimized, optimization timed out "
> +                    "after %d operations [--param max-isl-operations]\n",
> +                    max_operations);
>        return false;
>      }
>  
> Index: gcc/graphite.c
> ===================================================================
> --- gcc/graphite.c    (revision 253091)
> +++ gcc/graphite.c    (working copy)
> @@ -293,86 +293,6 @@ free_scops (vec<scop_p> scops)
>    scops.release ();
>  }
>  
> -/* Returns true when P1 and P2 are close phis with the same
> -   argument.  */
> -
> -static inline bool
> -same_close_phi_node (gphi *p1, gphi *p2)
> -{
> -  return (types_compatible_p (TREE_TYPE (gimple_phi_result (p1)),
> -                           TREE_TYPE (gimple_phi_result (p2)))
> -       && operand_equal_p (gimple_phi_arg_def (p1, 0),
> -                           gimple_phi_arg_def (p2, 0), 0));
> -}
> -
> -static void make_close_phi_nodes_unique (basic_block bb);
> -
> -/* Remove the close phi node at GSI and replace its rhs with the rhs
> -   of PHI.  */
> -
> -static void
> -remove_duplicate_close_phi (gphi *phi, gphi_iterator *gsi)
> -{
> -  gimple *use_stmt;
> -  use_operand_p use_p;
> -  imm_use_iterator imm_iter;
> -  tree res = gimple_phi_result (phi);
> -  tree def = gimple_phi_result (gsi->phi ());
> -
> -  gcc_assert (same_close_phi_node (phi, gsi->phi ()));
> -
> -  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
> -    {
> -      FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
> -     SET_USE (use_p, res);
> -
> -      update_stmt (use_stmt);
> -
> -      /* It is possible that we just created a duplicate close-phi
> -      for an already-processed containing loop.  Check for this
> -      case and clean it up.  */
> -      if (gimple_code (use_stmt) == GIMPLE_PHI
> -       && gimple_phi_num_args (use_stmt) == 1)
> -     make_close_phi_nodes_unique (gimple_bb (use_stmt));
> -    }
> -
> -  remove_phi_node (gsi, true);
> -}
> -
> -/* Removes all the close phi duplicates from BB.  */
> -
> -static void
> -make_close_phi_nodes_unique (basic_block bb)
> -{
> -  gphi_iterator psi;
> -
> -  for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
> -    {
> -      gphi_iterator gsi = psi;
> -      gphi *phi = psi.phi ();
> -
> -      /* At this point, PHI should be a close phi in normal form.  */
> -      gcc_assert (gimple_phi_num_args (phi) == 1);
> -
> -      /* Iterate over the next phis and remove duplicates.  */
> -      gsi_next (&gsi);
> -      while (!gsi_end_p (gsi))
> -     if (same_close_phi_node (phi, gsi.phi ()))
> -       remove_duplicate_close_phi (phi, &gsi);
> -     else
> -       gsi_next (&gsi);
> -    }
> -}
> -
> -/* Return true when NAME is defined in LOOP.  */
> -
> -static bool
> -defined_in_loop_p (tree name, loop_p loop)
> -{
> -  gcc_assert (TREE_CODE (name) == SSA_NAME);
> -  return loop == loop_containing_stmt (SSA_NAME_DEF_STMT (name));
> -}
> -
>  /* Transforms LOOP to the canonical loop closed SSA form.  */
>  
>  static void
> @@ -380,20 +300,22 @@ canonicalize_loop_closed_ssa (loop_p loo
>  {
>    edge e = single_exit (loop);
>    basic_block bb;
> +  gphi_iterator psi;
>  
>    if (!e || (e->flags & EDGE_COMPLEX))
>      return;
>  
>    bb = e->dest;
>  
> +  /* Make the loop-close PHI node BB contain only PHIs and have a
> +     single predecessor.  */
>    if (single_pred_p (bb))
>      {
>        e = split_block_after_labels (bb);
> -      make_close_phi_nodes_unique (e->src);
> +      bb = e->src;
>      }
>    else
>      {
> -      gphi_iterator psi;
>        basic_block close = split_edge (e);
>        e = single_succ_edge (close);
>        for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
> @@ -404,7 +326,7 @@ canonicalize_loop_closed_ssa (loop_p loo
>  
>         /* Only add close phi nodes for SSA_NAMEs defined in LOOP.  */
>         if (TREE_CODE (arg) != SSA_NAME
> -           || !defined_in_loop_p (arg, loop))
> +           || loop_containing_stmt (SSA_NAME_DEF_STMT (arg)) != loop)
>           continue;
>  
>         tree res = copy_ssa_name (arg);
> @@ -413,8 +335,30 @@ canonicalize_loop_closed_ssa (loop_p loo
>                      UNKNOWN_LOCATION);
>         SET_USE (use_p, res);
>       }
> +      bb = close;
> +    }
>  
> -      make_close_phi_nodes_unique (close);
> +  /* Eliminate duplicates.  This relies on processing loops from
> +     innermost to outer.  */
> +  for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
> +    {
> +      gphi_iterator gsi = psi;
> +      gphi *phi = psi.phi ();
> +
> +      /* At this point, PHI should be a close phi in normal form.  */
> +      gcc_assert (gimple_phi_num_args (phi) == 1);
> +
> +      /* Iterate over the next phis and remove duplicates.  */
> +      gsi_next (&gsi);
> +      while (!gsi_end_p (gsi))
> +     if (gimple_phi_arg_def (phi, 0) == gimple_phi_arg_def (gsi.phi (), 0))
> +       {
> +         replace_uses_by (gimple_phi_result (gsi.phi ()),
> +                          gimple_phi_result (phi));
> +         remove_phi_node (&gsi, true);
> +       }
> +     else
> +       gsi_next (&gsi);
>      }
>  }
>  
> @@ -443,7 +387,7 @@ static void
>  canonicalize_loop_closed_ssa_form (void)
>  {
>    loop_p loop;
> -  FOR_EACH_LOOP (loop, 0)
> +  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
>      canonicalize_loop_closed_ssa (loop);
>  
>    checking_verify_loop_closed_ssa (true);
> @@ -509,6 +453,19 @@ graphite_transform_loops (void)
>                          "loop nest optimized\n");
>        }
>  
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      loop_p loop;
> +      int num_no_dependency = 0;
> +
> +      FOR_EACH_LOOP (loop, 0)
> +     if (loop->can_be_parallel)
> +       num_no_dependency++;
> +
> +      fprintf (dump_file, "%d loops carried no dependency.\n",
> +            num_no_dependency);
> +    }
> +
>    free_scops (scops);
>    graphite_finalize (need_cfg_cleanup_p);
>    the_isl_ctx = NULL;
> Index: gcc/testsuite/gcc.dg/graphite/scop-24.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/graphite/scop-24.c   (nonexistent)
> +++ gcc/testsuite/gcc.dg/graphite/scop-24.c   (working copy)
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -floop-nest-optimize" } */
> +
> +typedef struct _IO_FILE FILE;
> +extern struct _IO_FILE *stderr;
> +typedef float real;
> +typedef real rvec[3];
> +int rgbset (int);
> +void ps_box (int, int);
> +void plot_phi(char *fn,rvec box,int natoms,rvec x[],real phi[])
> +{
> +  real phi_max,rr,gg,bb,fac,dx,x0,y0;
> +  int i;
> +  for(i=0; (i<natoms); i++) 
> +    phi_max=((phi_max > __builtin_fabs(phi[i]))
> +          ? phi_max : __builtin_fabs(phi[i]));
> +  if (__builtin_fabs(phi_max)<1.2e-38)
> +      __builtin_fprintf(stderr, "X");
> +  ps_box((real)(fac*box[0]-1),(real)(fac*box[1]-1));
> +  for(i=0; (i<natoms); i++)
> +    {
> +      rr=gg=bb=1.0;
> +      if (phi[i] < 0)
> +     gg=bb=(1.0+(phi[i]/phi_max));
> +      else
> +     rr=gg=(1.0-(phi[i]/phi_max));
> +      rr=rgbset(rr);
> +    }
> +}
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)

Reply via email to