From 5280ddaddf71ed2f15340b7e6be12682294134ea Mon Sep 17 00:00:00 2001
From: Ajit Kumar Agarwal <ajitkum@xilix.com>
Date: Fri, 4 Sep 2015 18:50:48 +0200
Subject: [PATCH] [Patch,tree-optimization]: Add new path Splitting pass on
 tree ssa representation.

Added a new pass on path splitting on tree SSA representation. The path
splitting optimization does the CFG transformation when the two execution
paths of the IF-THEN-ELSE merge at the latch node of loop, then duplicate
the merge mode into two paths preserving the SSA semantics.

ChangeLog:
2015-09-05  Ajit Agarwal  <ajitkum@xilinx.com>

	* Makefile.in (OBJS): Add tree-ssa-path-split.o
	* common.opt (ftree-path-split): Add the new flag.
	* opts.c (default_options_table) : Add an entry for
	Path splitting optimization at -O2 and above.
	* passes.def (path_split): Add new path splitting pass.
	* timevar.def (TV_TREE_PATH_SPLIT): New.
	* tree-pass.h (make_pass_path_split): New declaration.
	* tree-ssa-path-split.c: New file.
	* tracer.c (transform_duplicate): New function.
	* tracer.h: New header file.
	* doc/invoke.texi (ftree-path-split): Document.
	(fdump-tree-path_split): Document.
	* testsuite/gcc.dg/path-split-1.c: New.

Signed-off-by:Ajit Agarwal ajitkum@xilinx.com
---
 gcc/Makefile.in                     |   1 +
 gcc/common.opt                      |   4 +
 gcc/doc/invoke.texi                 |  16 ++-
 gcc/opts.c                          |   1 +
 gcc/passes.def                      |   1 +
 gcc/testsuite/gcc.dg/path-split-1.c |  69 ++++++++++
 gcc/timevar.def                     |   1 +
 gcc/tracer.c                        |  33 +++--
 gcc/tracer.h                        |  26 ++++
 gcc/tree-pass.h                     |   1 +
 gcc/tree-ssa-path-split.c           | 255 ++++++++++++++++++++++++++++++++++++
 11 files changed, 394 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/path-split-1.c
 create mode 100644 gcc/tracer.h
 create mode 100644 gcc/tree-ssa-path-split.c

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 3d1c1e5..7d3abf6 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1462,6 +1462,7 @@ OBJS = \
 	tree-ssa-loop.o \
 	tree-ssa-math-opts.o \
 	tree-ssa-operands.o \
+	tree-ssa-path-split.o \
 	tree-ssa-phiopt.o \
 	tree-ssa-phiprop.o \
 	tree-ssa-pre.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index 94d1d88..da76d74 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2378,6 +2378,10 @@ ftree-vrp
 Common Report Var(flag_tree_vrp) Init(0) Optimization
 Perform Value Range Propagation on trees
 
+ftree-path-split
+Common Report Var(flag_tree_path_split) Init(0) Optimization
+Perform Path Splitting on trees for loop backedges
+
 funit-at-a-time
 Common Report Var(flag_unit_at_a_time) Init(1)
 Compile whole compilation unit at a time
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d7dc64e..c5e06e2 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -349,6 +349,7 @@ Objective-C and Objective-C++ Dialects}.
 -fdump-tree-fre@r{[}-@var{n}@r{]} @gol
 -fdump-tree-vtable-verify @gol
 -fdump-tree-vrp@r{[}-@var{n}@r{]} @gol
+-fdump-tree-path_split@r{[}-@var{n}@r{]} @gol
 -fdump-tree-storeccp@r{[}-@var{n}@r{]} @gol
 -fdump-final-insns=@var{file} @gol
 -fcompare-debug@r{[}=@var{opts}@r{]}  -fcompare-debug-second @gol
@@ -456,7 +457,7 @@ Objective-C and Objective-C++ Dialects}.
 -ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol
 -ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol
 -ftree-switch-conversion -ftree-tail-merge -ftree-ter @gol
--ftree-vectorize -ftree-vrp @gol
+-ftree-vectorize -ftree-vrp @gol -ftree-path-split @gol
 -funit-at-a-time -funroll-all-loops -funroll-loops @gol
 -funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops @gol
 -fipa-ra -fvariable-expansion-in-unroller -fvect-cost-model -fvpt @gol
@@ -7154,6 +7155,11 @@ is made by appending @file{.slp} to the source file name.
 Dump each function after Value Range Propagation (VRP).  The file name
 is made by appending @file{.vrp} to the source file name.
 
+@item path_split
+@opindex fdump-tree-path_split
+Dump each function after path splitting.  The file name is made by
+appending @file{.path_split} to the source file name
+
 @item all
 @opindex fdump-tree-all
 Enable all the available tree dumps with the flags provided in this option.
@@ -7656,6 +7662,7 @@ also turns on the following optimization flags:
 -ftree-switch-conversion -ftree-tail-merge @gol
 -ftree-pre @gol
 -ftree-vrp @gol
+-ftree-path-split @gol
 -fipa-ra}
 
 Please note the warning under @option{-fgcse} about
@@ -8957,6 +8964,13 @@ enabled by default at @option{-O2} and higher.  Null pointer check
 elimination is only done if @option{-fdelete-null-pointer-checks} is
 enabled.
 
+@item -ftree-path-split
+@opindex ftree-path-split
+Perform Path Splitting  on trees.  When two execution path of the
+if-then-else merge at the loop latch node, try to duplicate the
+merge node into two paths. This is enabled by default at @option{-O2}
+and above.
+
 @item -fsplit-ivs-in-unroller
 @opindex fsplit-ivs-in-unroller
 Enables expression of values of induction variables in later iterations
diff --git a/gcc/opts.c b/gcc/opts.c
index f1a9acd..b75becc 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -506,6 +506,7 @@ static const struct default_options default_options_table[] =
     { OPT_LEVELS_2_PLUS, OPT_fisolate_erroneous_paths_dereference, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_fipa_ra, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_flra_remat, NULL, 1 },
+    { OPT_LEVELS_2_PLUS, OPT_ftree_path_split, NULL, 1 },
 
     /* -O3 optimizations.  */
     { OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
diff --git a/gcc/passes.def b/gcc/passes.def
index 64fc4d9..536ef32 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3.  If not see
 	  NEXT_PASS (pass_ccp);
 	  /* After CCP we rewrite no longer addressed locals into SSA
 	     form if possible.  */
+          NEXT_PASS (pass_path_split);
 	  NEXT_PASS (pass_forwprop);
 	  NEXT_PASS (pass_sra_early);
 	  /* pass_build_ealias is a dummy pass that ensures that we
diff --git a/gcc/testsuite/gcc.dg/path-split-1.c b/gcc/testsuite/gcc.dg/path-split-1.c
new file mode 100644
index 0000000..af01e49
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/path-split-1.c
@@ -0,0 +1,69 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-path_split " } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define RGBMAX 255
+
+int
+test()
+{
+  int i, Pels;
+  unsigned char sum = 0;
+  unsigned char xr, xg, xb;
+  unsigned char xc, xm, xy, xk;
+  unsigned char *ReadPtr, *EritePtr;
+
+  ReadPtr = ( unsigned char *) malloc (sizeof (unsigned char) * 100);
+  EritePtr = ( unsigned char *) malloc (sizeof (unsigned char) * 100);
+
+  for (i = 0; i < 100;i++)
+     {
+       ReadPtr[i] = 100 - i;
+     }
+
+  for (i = 0; i < 100; i++)
+     {
+       xr = *ReadPtr++;
+       xg = *ReadPtr++;
+       xb = *ReadPtr++;
+
+       xc = (unsigned char) (RGBMAX - xr);
+       xm = (unsigned char) (RGBMAX - xg);
+       xy = (unsigned char) (RGBMAX - xb);
+
+       if (xc < xm)
+         {
+           xk = (unsigned char) (xc < xy ? xc : xy);
+         }
+       else
+        {
+          xk = (unsigned char) (xm < xy ? xm : xy);
+        }
+
+       xc = (unsigned char) (xc - xk);
+       xm = (unsigned char) (xm - xk);
+       xy = (unsigned char) (xy - xk);
+
+       *EritePtr++ = xc;
+       *EritePtr++ = xm;
+       *EritePtr++ = xy;
+       *EritePtr++ = xk;
+       sum += *EritePtr;
+    }
+  return sum;
+}
+
+int
+main()
+{
+  if (test() != 33)
+    abort();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "xc_\[0-9\]\[0-9\]* -> { xc_\[0-9\]\[0-9\]* }" "path_split"} } */
+/* { dg-final { scan-tree-dump "xm_\[0-9\]\[0-9\]* -> { xm_\[0-9\]\[0-9\]* }" "path_split"} } */
+/* { dg-final { scan-tree-dump "xy_\[0-9\]\[0-9\]* -> { xy_\[0-9\]\[0-9\]* }" "path_split"} } */
diff --git a/gcc/timevar.def b/gcc/timevar.def
index ac41075..e57e2ab 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -298,3 +298,4 @@ DEFTIMEVAR (TV_LINK		     , "link JIT code")
 DEFTIMEVAR (TV_LOAD		     , "load JIT result")
 DEFTIMEVAR (TV_JIT_ACQUIRING_MUTEX   , "acquiring JIT mutex")
 DEFTIMEVAR (TV_JIT_CLIENT_CODE   , "JIT client code")
+DEFTIMEVAR (TV_TREE_PATH_SPLIT  , "tree path split")
diff --git a/gcc/tracer.c b/gcc/tracer.c
index cad7ab1..fb618d6 100644
--- a/gcc/tracer.c
+++ b/gcc/tracer.c
@@ -56,9 +56,9 @@
 #include "tree-inline.h"
 #include "cfgloop.h"
 #include "fibonacci_heap.h"
+#include "tracer.h"
 
 static int count_insns (basic_block);
-static bool ignore_bb_p (const_basic_block);
 static bool better_p (const_edge, const_edge);
 static edge find_best_successor (basic_block);
 static edge find_best_predecessor (basic_block);
@@ -90,7 +90,7 @@ bb_seen_p (basic_block bb)
 }
 
 /* Return true if we should ignore the basic block for purposes of tracing.  */
-static bool
+bool
 ignore_bb_p (const_basic_block bb)
 {
   gimple g;
@@ -224,6 +224,22 @@ find_trace (basic_block bb, basic_block *trace)
   return i;
 }
 
+basic_block
+transform_duplicate (basic_block bb, basic_block bb2)
+{
+  edge e;
+  basic_block copy;
+
+  e = find_edge (bb, bb2);
+
+  copy = duplicate_block (bb2, e, bb);
+  flush_pending_stmts (e);
+
+  add_phi_args_after_copy (&copy, 1, NULL);
+
+  return (copy);
+}
+
 /* Look for basic blocks in frequency order, construct traces and tail duplicate
    if profitable.  */
 
@@ -319,17 +335,8 @@ tail_duplicate (void)
 		 entries or at least rotate the loop.  */
 	      && bb2->loop_father->header != bb2)
 	    {
-	      edge e;
-	      basic_block copy;
-
-	      nduplicated += counts [bb2->index];
-
-	      e = find_edge (bb, bb2);
-
-	      copy = duplicate_block (bb2, e, bb);
-	      flush_pending_stmts (e);
-
-	      add_phi_args_after_copy (&copy, 1, NULL);
+              nduplicated += counts [bb2->index];
+              basic_block copy = transform_duplicate (bb, bb2);
 
 	      /* Reconsider the original copy of block we've duplicated.
 	         Removing the most common predecessor may make it to be
diff --git a/gcc/tracer.h b/gcc/tracer.h
new file mode 100644
index 0000000..454d3b7
--- /dev/null
+++ b/gcc/tracer.h
@@ -0,0 +1,26 @@
+/* Header file for Tracer.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_TRACER_H
+#define GCC_TRACER_H
+
+extern basic_block transform_duplicate (basic_block bb, basic_block bb2);
+extern bool ignore_bb_p (const_basic_block bb);
+
+#endif /* GCC_TRaCER_H */
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 7b66a1c..6af7f0d 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -383,6 +383,7 @@ extern gimple_opt_pass *make_pass_tree_loop_done (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_ch (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_ch_vect (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_ccp (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_path_split (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_phi_only_cprop (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_build_ssa (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_build_alias (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-path-split.c b/gcc/tree-ssa-path-split.c
new file mode 100644
index 0000000..f8fd098
--- /dev/null
+++ b/gcc/tree-ssa-path-split.c
@@ -0,0 +1,255 @@
+/* Support routines for Path Splitting.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by Ajit Kumar Agarwal <ajitkum@xilinx.com>.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "cfghooks.h"
+#include "tree.h"
+#include "gimple.h"
+#include "rtl.h"
+#include "ssa.h"
+#include "flags.h"
+#include "alias.h"
+#include "fold-const.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "cfganal.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+#include "tree-cfg.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop-niter.h"
+#include "tree-ssa-loop.h"
+#include "tree-into-ssa.h"
+#include "tree-ssa.h"
+#include "tree-pass.h"
+#include "tree-dump.h"
+#include "cfgloop.h"
+#include "tree-scalar-evolution.h"
+#include "tree-ssa-propagate.h"
+#include "tree-chrec.h"
+#include "tracer.h"
+
+/* Get the latch node and predecessor of the latch node of loop if
+   the immediate dominator of the latch node of loop is the block
+   with conditional branch.  */
+
+static void
+find_trace_loop_latch_same_as_join_blk (loop_p loop, basic_block *trace)
+{
+  edge_iterator ei;
+  edge e1;
+
+  basic_block latch = loop->latch;
+
+  if (EDGE_COUNT (latch->preds) == 2)
+    {
+      basic_block bb
+                 = get_immediate_dominator (CDI_DOMINATORS, latch);
+      gimple last = gsi_stmt (gsi_last_bb (bb));
+
+      if (last && gimple_code (last) != GIMPLE_COND)
+        return ;
+
+      trace[1] = latch;
+
+      FOR_EACH_EDGE (e1, ei, latch->preds)
+      {
+        if (!single_succ_p(e1->src) ||
+            !(single_succ_edge (e1->src)->flags & EDGE_FALLTHRU))
+          break;
+        else
+          {
+            trace[0] = e1->src;
+            break;
+          }
+      }
+   }
+}
+
+/* Return false when there is cast assign stmt. Return true
+   if the number of stmt greater than 1 otherwise false.  */
+
+static bool
+is_feasible_trace (basic_block bb)
+{
+  int num_stmt = 0;
+  gimple_stmt_iterator gsi;
+
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+     {
+       gimple stmt = gsi_stmt (gsi);
+       if (gimple_assign_cast_p (stmt))
+         return false;
+       if (!is_gimple_debug (stmt))
+         num_stmt++;
+     }
+
+  if (num_stmt > 1)
+    return true;
+
+  return false;
+}
+
+/* If the immediate dominator of the latch of the loop is
+   block with conditional branch, then the loop latch  is
+   duplicated to its predecessors path preserving the SSA
+   semantics.
+
+   CFG before transformation.
+ 
+   <bb 6>:
+      xk_35 = MIN_EXPR <xy_34, xc_32>;
+      goto <bb 8>;
+
+   <bb 7>:
+      xk_36 = MIN_EXPR <xy_34, xm_33>;
+
+   <bb 8>:
+      # xk_4 = PHI <xk_35(6), xk_36(7)>
+      xc_37 = xc_32 - xk_4;
+      xm_38 = xm_33 - xk_4;
+      xy_39 = xy_34 - xk_4;
+
+   CFG After Path Splitting transformation
+   before cleanup phase.
+
+   <bb 7>:
+     xk_35 = MIN_EXPR <xy_34, xc_32>;
+
+   <bb 8>:
+     # xk_29 = PHI <xk_35(7)>
+     xc_56 = xc_32 - xk_29;
+     xm_57 = xm_33 - xk_29;
+     xy_58 = xy_34 - xk_29;
+     goto <bb 11>;
+
+   <bb 9>:
+     xk_36 = MIN_EXPR <xy_34, xm_33>;
+
+   <bb 10>:
+     # xk_4 = PHI <xk_36(9)>
+     xc_37 = xc_32 - xk_4;
+     xm_38 = xm_33 - xk_4;
+     xy_39 = xy_34 - xk_4;
+
+  <bb 11>: .......  */
+ 
+static bool
+perform_path_splitting ()
+{
+  bool changed = false;
+  basic_block trace[2] = {NULL, NULL};
+  loop_p loop;
+
+  loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
+  initialize_original_copy_tables ();
+  calculate_dominance_info (CDI_DOMINATORS);
+
+  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
+  {
+    if (ignore_bb_p (loop->latch))
+      continue;
+    find_trace_loop_latch_same_as_join_blk (loop, trace);
+
+    if (trace[0] && trace[1] && is_feasible_trace (trace[1]))
+      {
+        transform_duplicate (trace[0], trace[1]);
+        trace[0] = NULL;
+        trace[1] = NULL;
+        changed = true;
+      }
+  }
+  loop_optimizer_finalize ();
+  free_original_copy_tables ();
+  return changed;
+}
+
+static unsigned int
+execute_path_split (void)
+{
+  bool changed;
+
+  if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
+    return 0;
+
+  if (!(mark_dfs_back_edges ()))
+    return 0;
+
+  changed = perform_path_splitting();
+
+  if (changed)
+    {
+      free_dominance_info (CDI_DOMINATORS);
+      /* If we changed the CFG schedule loops for fixup by cleanup_cfg.  */
+      if (current_loops)
+        loops_state_set (LOOPS_NEED_FIXUP);
+    }
+ 
+  return changed ? TODO_cleanup_cfg : 0;
+
+}
+
+static bool
+gate_path_split(void)
+{
+  return flag_tree_path_split != 0;
+}
+
+namespace {
+
+const pass_data pass_data_path_split =
+{
+  GIMPLE_PASS, /* type */
+  "path_split", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_TREE_PATH_SPLIT, /* tv_id */
+  PROP_ssa, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_path_split : public gimple_opt_pass
+{
+   public:
+    pass_path_split (gcc::context *ctxt)
+      : gimple_opt_pass (pass_data_path_split, ctxt)
+    {}
+   /* opt_pass methods: */
+   opt_pass * clone () { return new pass_path_split (m_ctxt); }
+   virtual bool gate (function *) { return gate_path_split (); }
+   virtual unsigned int execute (function *) { return execute_path_split (); }
+ 
+}; // class pass_path_split
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_path_split (gcc::context *ctxt)
+{
+  return new pass_path_split (ctxt);
+}
-- 
1.8.2.1

