The following adds a simple ad-hoc (matching other code) way to
perform sinking and merging of common stores to a CFG merger
to the SSA code sinking pass.

On cc1-files (from the GCC 4.7 branch head) this performs
930 such merges out of which 366 are stores with the same value
(and thus require no PHI).

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

The motivating testcase is that of PR33315 which we now manage to
optimize to three straight-line stores from a previously twisted
CFG maze.

There are a lot of enhancement possibilities but as for all the
rest of the sinking code in this pass a proper dataflow driven
approach is missing (like SSU-PRE ontop of which sinking can
be implemented similar to hoisting ontop of GVN-PRE).

Comments?

I'll give it overnight SPEC testing (but don't expect any surprises).

Thanks,
Richard.

2016-07-13  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/33315
        * tree-ssa-sink.c: Include tree-eh.h.
        (sink_code_in_bb): Return TODO_cleanup_cfg if we commonized
        and sunk stores.  Implement store commoning by sinking to
        the successor.

        * gcc.dg/tree-ssa/ssa-sink-13.c: New testcase.
        * gcc.dg/tree-ssa/ssa-sink-14.c: Likewise.

Index: gcc/tree-ssa-sink.c
===================================================================
*** gcc/tree-ssa-sink.c (revision 238287)
--- gcc/tree-ssa-sink.c (working copy)
*************** along with GCC; see the file COPYING3.
*** 35,40 ****
--- 35,41 ----
  #include "tree-cfg.h"
  #include "cfgloop.h"
  #include "params.h"
+ #include "tree-eh.h"
  
  /* TODO:
     1. Sinking store only using scalar promotion (IE without moving the RHS):
*************** statement_sink_location (gimple *stmt, b
*** 460,466 ****
  
  /* Perform code sinking on BB */
  
! static void
  sink_code_in_bb (basic_block bb)
  {
    basic_block son;
--- 461,467 ----
  
  /* Perform code sinking on BB */
  
! static unsigned
  sink_code_in_bb (basic_block bb)
  {
    basic_block son;
*************** sink_code_in_bb (basic_block bb)
*** 468,473 ****
--- 469,628 ----
    edge_iterator ei;
    edge e;
    bool last = true;
+   unsigned todo = 0;
+ 
+   /* Very simplistic code to sink common stores from the predecessor through
+      our virtual PHI.  We do this before sinking stmts from BB as it might
+      expose sinking opportunities of the merged stores.
+      Once we have partial dead code elimination through sth like SSU-PRE this
+      should be moved there.  */
+   gphi *phi;
+   if (EDGE_COUNT (bb->preds) > 1
+       && (phi = get_virtual_phi (bb)))
+     {
+       /* Repeat until no more common stores are found.  */
+       while (1)
+       {
+         gimple *first_store = NULL;
+         auto_vec <tree, 5> vdefs;
+ 
+         /* Search for common stores defined by all virtual PHI args.
+            ???  Common stores not present in all predecessors could
+            be handled by inserting a forwarder to sink to.  Generally
+            this involves deciding which stores to do this for if
+            multiple common stores are present for different sets of
+            predecessors.  See PR11832 for an interesting case.  */
+         for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
+           {
+             tree arg = gimple_phi_arg_def (phi, i);
+             gimple *def = SSA_NAME_DEF_STMT (arg);
+             if (! is_gimple_assign (def)
+                 || stmt_can_throw_internal (def))
+               {
+                 /* ???  We could handle some cascading with the def being
+                    another PHI.  We'd have to insert multiple PHIs for
+                    the rhs then though (if they are not all equal).  */
+                 first_store = NULL;
+                 break;
+               }
+             /* ???  Do not try to do anything fancy with aliasing, thus
+                do not sink across non-aliased loads (or even stores,
+                so different store order will make the sinking fail).  */
+             bool all_uses_on_phi = true;
+             imm_use_iterator iter;
+             use_operand_p use_p;
+             FOR_EACH_IMM_USE_FAST (use_p, iter, arg)
+               if (USE_STMT (use_p) != phi)
+                 all_uses_on_phi = false;
+             if (! all_uses_on_phi)
+               {
+                 first_store = NULL;
+                 break;
+               }
+             if (! first_store)
+               first_store = def;
+             /* ??? We could handle differing SSA uses in the LHS by inserting
+                PHIs for them.  */
+             else if (! operand_equal_p (gimple_assign_lhs (first_store),
+                                         gimple_assign_lhs (def), 0))
+               {
+                 first_store = NULL;
+                 break;
+               }
+             TREE_VISITED (arg) = 1;
+             vdefs.safe_push (arg);
+           }
+         if (! first_store)
+           break;
+ 
+         /* Check if we need a PHI node to merge the stored values.  */
+         bool allsame = true;
+         for (unsigned i = 1; i < vdefs.length (); ++i)
+           {
+             gimple *def = SSA_NAME_DEF_STMT (vdefs[i]);
+             if (! operand_equal_p (gimple_assign_rhs1 (first_store),
+                                    gimple_assign_rhs1 (def), 0))
+               {
+                 allsame = false;
+                 break;
+               }
+           }
+ 
+         /* We cannot handle aggregate values if we need to merge them.  */
+         tree type = TREE_TYPE (gimple_assign_lhs (first_store));
+         if (! allsame
+             && ! is_gimple_reg_type (type))
+           break;
+ 
+         if (dump_enabled_p ())
+           {
+             dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
+                              gimple_location (first_store),
+                              "sinking common stores %sto ",
+                              allsame ? "with same value " : "");
+             dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM,
+                                gimple_assign_lhs (first_store));
+             dump_printf (MSG_OPTIMIZED_LOCATIONS, "\n");
+           }
+ 
+         /* Insert a PHI to merge differing stored values if necessary.
+            Note that in general inserting PHIs isn't a very good idea as
+            it makes the job of coalescing and register allocation harder.
+            Even common SSA uses on the rhs/lhs might extend their lifetime
+            across multiple edges by this code motion which makes
+            register allocation harder.  */
+         tree from;
+         if (! allsame)
+           {
+             from = make_ssa_name (type);
+             gphi *newphi = create_phi_node (from, bb);
+             for (unsigned i = 0; i < vdefs.length (); ++i)
+               {
+                 gimple *def = SSA_NAME_DEF_STMT (vdefs[i]);
+                 add_phi_arg (newphi, gimple_assign_rhs1 (def),
+                              EDGE_PRED (bb, i), UNKNOWN_LOCATION);
+               }
+           }
+         else
+           from = gimple_assign_rhs1 (first_store);
+ 
+         /* Remove all stores.  */
+         for (unsigned i = 0; i < vdefs.length (); ++i)
+           {
+             /* If we have more than one use of a VDEF on the PHI make sure
+                we remove the defining stmt only once.  */
+             if (TREE_VISITED (vdefs[i]))
+               {
+                 TREE_VISITED (vdefs[i]) = 0;
+                 gimple *def = SSA_NAME_DEF_STMT (vdefs[i]);
+                 gsi = gsi_for_stmt (def);
+                 unlink_stmt_vdef (def);
+                 gsi_remove (&gsi, false);
+                 release_defs (def);
+               }
+           }
+ 
+         /* Insert the first store at the beginning of the merge BB.  */
+         gimple_set_vdef (first_store, gimple_phi_result (phi));
+         SSA_NAME_DEF_STMT (gimple_vdef (first_store)) = first_store;
+         gimple_phi_set_result (phi, make_ssa_name (gimple_vop (cfun)));
+         gimple_set_vuse (first_store, gimple_phi_result (phi));
+         gimple_assign_set_rhs1 (first_store, from);
+         /* ???  Should we reset first_stores location?  */
+         gsi = gsi_after_labels (bb);
+         gsi_insert_before (&gsi, first_store, GSI_SAME_STMT);
+ 
+         todo |= TODO_cleanup_cfg;
+       }
+ 
+       /* We could now have empty predecessors that we could remove,
+        forming a proper CFG for further sinking.  Note that even
+        CFG cleanup doesn't do this fully at the moment and it
+        doesn't preserve post-dominators in the process either.
+        The mergephi pass might do it though.  gcc.dg/tree-ssa/ssa-sink-13.c
+        shows this nicely if you disable tail merging or (same effect)
+        make the stored values unequal.  */
+     }
  
    /* If this block doesn't dominate anything, there can't be any place to sink
       the statements to.  */
*************** sink_code_in_bb (basic_block bb)
*** 543,550 ****
         son;
         son = next_dom_son (CDI_POST_DOMINATORS, son))
      {
!       sink_code_in_bb (son);
      }
  }
  
  /* Perform code sinking.
--- 698,707 ----
         son;
         son = next_dom_son (CDI_POST_DOMINATORS, son))
      {
!       todo |= sink_code_in_bb (son);
      }
+ 
+   return todo;
  }
  
  /* Perform code sinking.
*************** pass_sink_code::execute (function *fun)
*** 622,634 ****
    memset (&sink_stats, 0, sizeof (sink_stats));
    calculate_dominance_info (CDI_DOMINATORS);
    calculate_dominance_info (CDI_POST_DOMINATORS);
!   sink_code_in_bb (EXIT_BLOCK_PTR_FOR_FN (fun));
    statistics_counter_event (fun, "Sunk statements", sink_stats.sunk);
    free_dominance_info (CDI_POST_DOMINATORS);
    remove_fake_exit_edges ();
    loop_optimizer_finalize ();
  
!   return 0;
  }
  
  } // anon namespace
--- 779,791 ----
    memset (&sink_stats, 0, sizeof (sink_stats));
    calculate_dominance_info (CDI_DOMINATORS);
    calculate_dominance_info (CDI_POST_DOMINATORS);
!   unsigned todo = sink_code_in_bb (EXIT_BLOCK_PTR_FOR_FN (fun));
    statistics_counter_event (fun, "Sunk statements", sink_stats.sunk);
    free_dominance_info (CDI_POST_DOMINATORS);
    remove_fake_exit_edges ();
    loop_optimizer_finalize ();
  
!   return todo;
  }
  
  } // anon namespace
Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c (revision 0)
--- gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c (working copy)
***************
*** 0 ****
--- 1,25 ----
+ /* PR33315 */
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-sink" } */
+ 
+ int num;
+ int a[20];
+ 
+ void test ()
+ {
+   int i;
+   int *ptr;
+   ptr = & a[0];
+   i = num;
+   if ( i == 1) *(ptr+0) = 0;
+   if ( i != 1) *(ptr+0) = 0;
+   if ( i == 2) *(ptr+1) = 0;
+   if ( i != 2) *(ptr+1) = 0;
+   if ( i == 3) *(ptr+2) = 0;
+   if ( i != 3) *(ptr+2) = 0;
+ }
+ 
+ /* We should sink/merge all stores and end up with a single BB.  */
+ 
+ /* { dg-final { scan-tree-dump-times "MEM\[^\n\r\]* = 0;" 3 "sink" } } */
+ /* { dg-final { scan-tree-dump-times "<bb " 1 "sink" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-14.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-14.c (revision 0)
--- gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-14.c (working copy)
***************
*** 0 ****
--- 1,17 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-sink" } */
+ 
+ int x;
+ void foo (int b)
+ {
+   if (b)
+     x = b;
+   else
+     x = 2;
+ }
+ 
+ /* We should have sunk the store and inserted a PHI to merge the
+    stored values.  */
+ 
+ /* { dg-final { scan-tree-dump-times " = PHI" 1 "sink" } } */
+ /* { dg-final { scan-tree-dump-times "x = " 1 "sink" } } */

Reply via email to