Could a gatekeeper please review the attached patch for a
target-independent optimization for C++ programs?

Most of the changes are again in the CG.  The files modified are:

osprey/be/cg/cflow.cxx
osprey/be/cg/eh_region.cxx
osprey/be/vho/vho_lower.cxx
osprey/common/com/config.cxx
osprey/common/com/config_opt.cxx
osprey/common/com/symtab_access.h
osprey/common/com/symtab_defs.h
osprey/common/com/config.h

Here is the proposed log message:

Enable CFLOW optimization within some EH regions.

Allow CG control flow optimization to reorder BB's inside of EH regions
by updating EH region information for each BB when performing
transformations.  Previously, CFLOW would simply skip all BB's of any
EH region in Optimize_Cyclic_Chain.

To make room for CFLOW optimization, in VHO simple EH regions are merged
into larger simple EH regions.  Here, a simple EH region is an EH region
with no real catch blocks or cleanup.


Thanks,

-David Coakley / AMD Open Source Compiler Engineering
Index: osprey/be/cg/cflow.cxx
===================================================================
--- osprey/be/cg/cflow.cxx	(revision 3593)
+++ osprey/be/cg/cflow.cxx	(working copy)
@@ -231,6 +231,7 @@
     struct vargoto_info v;
   } u;
   mUINT16 eh_rgn;		/* exc handling region number */
+  mUINT16 pseudo_eh_rgn;
   mBOOL cold;			/* part of cold region */
   INT nsuccs;			/* number of successors */
   struct succedge succs[BBINFO_NSUCCS]; /* successor edges; we dynamically
@@ -246,6 +247,8 @@
 #define Set_BBINFO_kind(b,k)		(BB_BBINFO(b)->kind=(k))
 #define     BBINFO_eh_rgn(b)		(BB_BBINFO(b)->eh_rgn+0)
 #define Set_BBINFO_eh_rgn(b, e)		(BB_BBINFO(b)->eh_rgn=(e))
+#define     BBINFO_pseudo_eh_rgn(b)		(BB_BBINFO(b)->pseudo_eh_rgn+0)
+#define Set_BBINFO_pseudo_eh_rgn(b, e)		(BB_BBINFO(b)->pseudo_eh_rgn=(e))
 #define     BBINFO_cold(b)		(BB_BBINFO(b)->cold+0)
 #define Set_BBINFO_cold(b, e)		(BB_BBINFO(b)->cold=(e))
 #define     BBINFO_nsuccs(b)		(BB_BBINFO(b)->nsuccs+0)
@@ -844,7 +847,7 @@
   id = BB_id(new_bb);
   BZERO(new_bb, sizeof(*new_bb));
   new_bb->id = id;
-  if (model) BB_rid(new_bb) = BB_rid(model);
+  if (model) BB_rid(new_bb) = BB_rid(model); 
   return new_bb;
 }
 
@@ -1787,6 +1790,8 @@
 				 * points to unused link list elements
 				 */
   INT eh_rgn;			/* The current EH region ID */
+  INT prev_eh_rgn = -2;
+  INT pseudo_eh_rgn = -1;
   INT eh_id;			/* Highest EH region ID used */
   BB *bb;
 
@@ -1866,6 +1871,12 @@
     /* Set the exception region ID for this block.
      */
     bbinfo->eh_rgn = eh_rgn;
+    if(eh_rgn != prev_eh_rgn)
+    {
+      prev_eh_rgn = eh_rgn;
+      pseudo_eh_rgn++;
+    }
+    bbinfo->pseudo_eh_rgn = pseudo_eh_rgn;
 
     /* Determine if BB is in cold region or not. We cache this in
      * the BB-info rather than overload BB_local_flag1 since cflow
@@ -5423,20 +5434,33 @@
      * Exception regions are similar except that the created chain need
      * only consist of the BBs in the same or nested exception region.
      */
-    Is_True(BB_rid(bb) == first_rid, 
-	    ("region nesting botched at BB:%d", BB_id(bb)));
+
     next_bb = BB_next(tail);
-    while (   next_bb
-	   &&    ((BB_rid(next_bb) != first_rid) 
+    if (Optimize_exception_ranges != 2 
+        && !(Optimize_exception_ranges == 1 && PU_cxx_lang(Get_Current_PU()))){
+      // if the next BB is of different REGION, put them into a chain 
+      while (   next_bb
+              &&    ((BB_rid(next_bb) != first_rid) 
 	      || (BBINFO_eh_rgn(tail) && BBINFO_eh_rgn(next_bb)))
-    ) {
-      do {
-	tail = next_bb;
-	BB_MAP_Set(chain_map, tail, chains);
-	next_bb = BB_next(tail);
-      } while (next_bb && BB_rid(tail) != first_rid);
+      ) {
+        do {
+          tail = next_bb;
+	  BB_MAP_Set(chain_map, tail, chains);
+	  next_bb = BB_next(tail);
+        } while (next_bb && BB_rid(tail) != first_rid);
+      }
+    } else if (!PU_simple_eh(Get_Current_PU()))
+    {
+	while (TRUE) {
+          if ( next_bb && 
+           (BB_rid(next_bb) != BB_rid(tail) 
+            || BBINFO_pseudo_eh_rgn(next_bb) != BBINFO_pseudo_eh_rgn(tail))) {
+          tail = next_bb;
+          BB_MAP_Set(chain_map, next_bb, chains);
+          next_bb = BB_next(tail);
+          } else break;
+	}
     }
-
     /* Isolate the new chain.
      */
     BB_prev(bb) = NULL;
@@ -5518,6 +5542,7 @@
 Weight_Succ_Chains(BB_MAP chain_map, BBCHAIN *chain)
 {
   BB *bb;
+  mINT16 last_rgn = BBINFO_pseudo_eh_rgn(chain->tail);
 
   /* The successors of 'chain' are the successors of the BBs in 'chain'.
    */
@@ -6052,7 +6077,12 @@
   while (unordered) {
     BBCHAIN *ch;
     double max_weight;
+    BOOL eh_succ;
+    BOOL eh_continue;
+    mINT32 last_eh;
 
+    last_eh = BBINFO_pseudo_eh_rgn(last_ordered->tail);
+
     /* Adjust the weights for the edges from the last ordered chain
      * to the unordered chains.
      */
@@ -6062,8 +6092,24 @@
      * normal BBs over those with a NEVER freq hint.
      */
     max_weight = unordered->weight;
+    eh_succ = (BBINFO_pseudo_eh_rgn(unordered->head) == last_eh);
+    eh_continue = (BBINFO_pseudo_eh_rgn(unordered->tail) == last_eh);
+	
     chain = unordered;
     for (ch = unordered->next; ch; ch = ch->next) {
+      BOOL my_succ = (BBINFO_pseudo_eh_rgn(ch->head) == last_eh);
+      BOOL my_continue = (BBINFO_pseudo_eh_rgn(ch->tail) == last_eh);
+      if (eh_succ && !my_succ)
+	continue;
+      if (eh_continue && !my_continue) 
+	continue;
+      if ((my_succ && !eh_succ) || (my_continue && !eh_continue))
+      {
+	eh_succ = my_succ;
+	eh_continue = my_continue;
+	chain = ch;
+	max_weight = ch->weight;
+      } else 
       if (   (ch->never == chain->never && ch->weight > max_weight)
 	  || (!ch->never && chain->never))
       {
@@ -6151,6 +6197,7 @@
 }
 
 
+static void Print_Chain_BBs(BBCHAIN *chain);
 /* ====================================================================
  *
  * Optimize_Cyclic_Chain
@@ -6193,8 +6240,15 @@
     /* Can't place the tail in the middle of an EH region --
      * the EH region must be kept contiguous and in the same order.
      */
-    if (BBINFO_eh_rgn(bb) && BBINFO_eh_rgn(next)) continue;
+    if ((Optimize_exception_ranges != 2 &&
+           !(PU_cxx_lang(Get_Current_PU())) && Optimize_exception_ranges == 1)
+           && BBINFO_eh_rgn(bb) && BBINFO_eh_rgn(next)) continue;
 
+    if (BBINFO_pseudo_eh_rgn(bb) !=  BBINFO_pseudo_eh_rgn(next) ||
+           BBINFO_pseudo_eh_rgn(bb) != BBINFO_pseudo_eh_rgn(chain->tail) ||
+           BBINFO_pseudo_eh_rgn(bb) != BBINFO_pseudo_eh_rgn(chain->head)) continue;
+
+
     /* Can't place the tail such that the new head and tail BBs
      * would not be in the same region as originally.
      */
@@ -6291,6 +6345,7 @@
       fprintf(TFile,
 	      "  BB:%d would be a better head of cyclic chain than BB:%d\n",
 	      BB_id(best_head), BB_id(orig_head));
+      Print_Chain_BBs(chain);
     }
     Chain_BBs(NULL, best_head);
     Chain_BBs(best_tail, NULL);
@@ -6351,6 +6406,9 @@
      * another, then combine the chains.
      */
     if ((can_combine) &&
+	   BBINFO_pseudo_eh_rgn(succ) == BBINFO_pseudo_eh_rgn(pred) &&
+	(BBINFO_pseudo_eh_rgn(succ) == BBINFO_pseudo_eh_rgn(schain->tail) ||
+	 BBINFO_pseudo_eh_rgn(pred) == BBINFO_pseudo_eh_rgn(pchain->head)   ) &&
         (pchain != schain && pchain->tail == pred && schain->head == succ)) {
       INT j;
       INT nsuccs;
@@ -6380,7 +6438,7 @@
       nsuccs = BBINFO_nsuccs(tail);
       for (j = 0; j < nsuccs; j++) {
 	if (   BBINFO_succ_bb(tail, j) == head
-	    && BBINFO_succ_offset(tail, j) == 0
+	    && BBINFO_succ_offset(tail, j) == 0 
 	) {
 	  Optimize_Cyclic_Chain(pchain, chain_map);
 	  break;
Index: osprey/be/cg/eh_region.cxx
===================================================================
--- osprey/be/cg/eh_region.cxx	(revision 3593)
+++ osprey/be/cg/eh_region.cxx	(working copy)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009, 2011 Advanced Micro Devices, Inc.  All Rights Reserved.
+ * Copyright (C) 2009-2011 Advanced Micro Devices, Inc.  All Rights Reserved.
  */
 
 /*
@@ -2245,11 +2245,10 @@
 #endif // KEY
 
   fix_mask_ranges();
-  reorder_range_list();
 #ifdef KEY
   flatten_regions();
-  reorder_range_list();
 #endif
+  reorder_range_list();
 
   ST * st = ST_For_Range_Table(wn);
   eh_pu_range_st = st;
Index: osprey/be/vho/vho_lower.cxx
===================================================================
--- osprey/be/vho/vho_lower.cxx	(revision 3593)
+++ osprey/be/vho/vho_lower.cxx	(working copy)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009-2010 Advanced Micro Devices, Inc.  All Rights Reserved.
+ * Copyright (C) 2009-2011 Advanced Micro Devices, Inc.  All Rights Reserved.
  */
 /*
  * Copyright 2003, 2004, 2005, 2006 PathScale, Inc.  All Rights Reserved.
@@ -5786,9 +5786,61 @@
   return last;
 }
 
+BOOL is_simple_eh_region(WN *wn)
+{
+  FmtAssert((WN_operator(wn) == OPR_REGION) && (WN_region_is_EH(wn)), ("It is not an EH region WN."));	
+  if (Optimize_exception_ranges == 0 || 
+      (Optimize_exception_ranges == 1 && !PU_cxx_lang(Get_Current_PU())))
+    return FALSE; 
+  if (!WN_block_empty(WN_region_pragmas(wn)))
+    return FALSE;
+  if (!WN_block_empty(WN_region_exits(wn)))
+    return FALSE;
+  INITO_IDX init_idx = WN_ereg_supp (wn);
+  INITV_IDX initv = INITV_blk (INITO_val (init_idx));
+  if (INITV_kind(initv) != INITVKIND_ZERO)
+    return FALSE;
+  return TRUE;
+}
+
+BOOL are_all_simple_eh_regions(WN* root)
+{
+  BOOL met = FALSE;
+  if (Optimize_exception_ranges == 0 || 
+      (Optimize_exception_ranges == 1 && !PU_cxx_lang(Get_Current_PU())))
+    return FALSE;
+  // Iteration of all REGION is needed, the SCFIter is not used, 
+  // as there can be comma EXPR in the VH WHIRL, so that SCFIter
+  // and StmtIter can not work properly.
+  for (WN_ITER* wn_walker = WN_WALK_TreeIter(root);
+       wn_walker != NULL;
+       wn_walker = WN_WALK_TreeNext(wn_walker)) 
+  {
+    WN *wn = WN_ITER_wn(wn_walker);
+    if ((WN_operator(wn) == OPR_REGION) && (WN_region_is_EH(wn)))
+    {
+      if (is_simple_eh_region(wn))
+        met = TRUE;
+      else 
+	return FALSE;
+    }
+  } 
+  return met;
+}
+
+static int Enclosing_Simple_EH = 0;
+
 static WN *
 vho_lower_region ( WN * wn, WN * block )
 {
+  BOOL is_simple_eh = FALSE;
+  if (WN_region_kind(wn) == REGION_KIND_EH && is_simple_eh_region(wn))
+  {
+    is_simple_eh = TRUE;
+    Enclosing_Simple_EH++;
+    WN_region_body(wn) = vho_lower_block ( WN_region_body(wn) );
+    Enclosing_Simple_EH--;
+  } else 
   WN_region_body(wn) = vho_lower_block ( WN_region_body(wn) );
   
   INT region_id = WN_region_id(wn);
@@ -5888,6 +5940,14 @@
     WN_INSERT_BlockLast (block, last);
     wn = NULL;	// don't insert it again
   }
+ 
+  if (Enclosing_Simple_EH != 0 && is_simple_eh){
+    WN *p;
+    p = WN_region_body(wn);
+    WN_region_body(wn) = NULL;
+    WN_Delete(wn);
+    return p;
+  }
   return wn;
 } /* vho_lower_region */
 #else
@@ -6465,6 +6525,18 @@
   }
 } /* vho_lower_check_labels */
 
+static void vho_lower_rename_region_id(WN * root)
+{
+  WN * wn;
+  for (WN_ITER* wni = WN_WALK_TreeIter(root); 
+         wni != NULL;
+         wni = WN_WALK_TreeNext(wni)) {
+    wn = WN_ITER_wn(wni);
+    if (WN_operator(wn) == OPR_REGION)
+      WN_set_region_id(wn, New_Region_Id());
+  }
+  return ;
+}
 
 static void
 vho_lower_rename_labels_defined ( WN * wn )
@@ -7645,6 +7717,7 @@
 
       if ( vho_lower_labels_defined )
         vho_lower_rename_labels_defined ( test_block );
+      vho_lower_rename_region_id(test_block);
 
       if ( VHO_Use_Do_While ) {
 
@@ -8462,6 +8535,12 @@
 #ifdef KEY
   current_pu_id ++;
 #endif
+ 
+  
+  if (are_all_simple_eh_regions(wn))
+    Set_PU_simple_eh(Get_Current_PU());  
+  else
+    Clear_PU_simple_eh(Get_Current_PU());  
 
   /* See if we need to lower the pu */
   if (    PU_has_very_high_whirl (Get_Current_PU ()) == FALSE
@@ -8470,6 +8549,54 @@
     return wn;
   }
 
+  if (PU_simple_eh(Get_Current_PU()))
+  {
+    char * pu_name = ST_name(&St_Table[PU_Info_proc_sym(Current_PU_Info)]);
+    WN *region_body = WN_func_body(wn);
+    INITV_IDX initv_label = New_INITV();
+    INITV_Set_ZERO (Initv_Table[initv_label], MTYPE_U4, 1);
+    INITV_IDX blk = New_INITV();
+    INITV_Init_Block (blk, initv_label);
+			
+    INITV_IDX iv;
+    iv = New_INITV();
+    INITV_Set_ZERO (Initv_Table[iv], MTYPE_U4, 1);
+		
+    Set_INITV_next (initv_label, iv);
+			    
+    TY_IDX ty = MTYPE_TO_TY_array[MTYPE_U4];
+    ST * ereg = Gen_Temp_Named_Symbol (ty, "ehpit", CLASS_VAR,
+		SCLASS_EH_REGION_SUPP);
+    Set_ST_is_initialized (*ereg);
+    Set_ST_is_not_used (*ereg);
+    INITO_IDX ereg_supp = New_INITO (ST_st_idx(ereg), blk);
+    WN * ehr = WN_CreateRegion (REGION_KIND_EH, region_body,
+         WN_CreateBlock(), WN_CreateBlock(), New_Region_Id(), ereg_supp); 
+    WN_func_body(wn) = WN_CreateBlock();
+    WN_INSERT_BlockFirst(WN_func_body(wn), ehr); 
+    WN *preamble_end;
+    WN *eh_block = WN_region_body(ehr);
+    preamble_end = WN_first(eh_block); 
+    while (preamble_end && 
+	!(WN_operator(preamble_end) == OPR_PRAGMA && 
+	  WN_pragma(preamble_end) == WN_PRAGMA_PREAMBLE_END))
+      preamble_end = WN_next(preamble_end);
+    if (preamble_end)
+    {
+      WN *first,*p;
+      WN *func_body = WN_func_body(wn);
+      first = WN_first(eh_block);
+      WN_first(eh_block) = WN_next(preamble_end);
+      p = preamble_end;
+      while(p != NULL)
+      {
+        preamble_end = WN_prev(p);
+	WN_INSERT_BlockFirst(func_body, p);
+	p = preamble_end;
+      }
+    }
+  }
+  
   WN_func_body(wn) = vho_lower_block ( WN_func_body(wn) );
 
   if ( VHO_Recycle_Pregs )
Index: osprey/common/com/config.cxx
===================================================================
--- osprey/common/com/config.cxx	(revision 3593)
+++ osprey/common/com/config.cxx	(working copy)
@@ -1309,6 +1309,11 @@
     OPT_Reorg_Common = TRUE;
   }
 
+  if ( ! Optimize_exception_ranges_set && Opt_Level == 0)
+  {
+     Optimize_exception_ranges = 0;
+  }
+
   if (Force_GP_Prolog) Force_Jalr = TRUE;
 #ifdef TARG_X8664
   // Bug 1039 - align aggregates to 16-byte for all optimization levels
Index: osprey/common/com/config_opt.cxx
===================================================================
--- osprey/common/com/config_opt.cxx	(revision 3593)
+++ osprey/common/com/config_opt.cxx	(working copy)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.  All Rights Reserved.
+ * Copyright (C) 2008-2010 Advanced Micro Devices, Inc.  All Rights Reserved.
  */
 
 /*
@@ -283,6 +283,8 @@
 BOOL Outlining_Enabled = FALSE;
 BOOL Instrumentation_Enabled_Before = FALSE;
 
+INT32  Optimize_exception_ranges = 1;
+BOOL   Optimize_exception_ranges_set = FALSE;
 #ifdef KEY
 INT32  OPT_Cyg_Instrument = 0;
 BOOL   Asm_Memory = FALSE;
@@ -875,6 +877,10 @@
     0, 0, 0,  &LANG_Ansi_Setjmp_On,   &LANG_Ansi_Setjmp_Set,
     "C/C++: enable optimization of functions with calls to setjmp" },
 
+  { OVK_INT32, OV_VISIBLE,     1, "exception_range_opt",           "",
+    1, 0, 2,  &Optimize_exception_ranges,   &Optimize_exception_ranges_set,
+    "Enable control flow optimization for exception ranges" },
+
 #if defined(__linux__) || defined(BUILD_OS_DARWIN)
   { OVK_BOOL,	OV_INTERNAL,	TRUE, "wfe_dfe",	"wfe_dfe",
     0, 0, 0,	&Enable_WFE_DFE,	NULL,
Index: osprey/common/com/symtab_access.h
===================================================================
--- osprey/common/com/symtab_access.h	(revision 3593)
+++ osprey/common/com/symtab_access.h	(working copy)
@@ -1136,6 +1136,13 @@
 PU_src_lang (const PU& pu)		{ return pu.src_lang; }
 
 inline BOOL
+PU_simple_eh(const PU& pu)		{ return (pu.flags & PU_SIMPLE_EH_RANGE) != 0;}
+inline void
+Set_PU_simple_eh(PU& pu)			{ pu.flags |= PU_SIMPLE_EH_RANGE; }
+inline void
+Clear_PU_simple_eh(PU& pu)		{ pu.flags &= ~PU_SIMPLE_EH_RANGE; }
+
+inline BOOL
 PU_mixed_lang (const PU& pu)		{ return (pu.src_lang & PU_MIXED_LANG) != 0; }
 inline void
 Set_PU_mixed_lang (PU& pu)		{ pu.src_lang |= PU_MIXED_LANG; }
Index: osprey/common/com/symtab_defs.h
===================================================================
--- osprey/common/com/symtab_defs.h	(revision 3593)
+++ osprey/common/com/symtab_defs.h	(working copy)
@@ -765,6 +765,8 @@
 #define PU_NOTHROW              0x0004000000000000LL // doesn't throw, e.g. decl as "void foo() throw()".
 #define PU_HAS_APPLY_ARGS       0x0008000000000000LL // __builtin_apply_args
 
+#define PU_SIMPLE_EH_RANGE	0x0010000000000000LL // there is a single eh range in PU, no clean-up or catch
+
 enum PU_SRC_LANG_FLAGS
 {
     PU_UNKNOWN_LANG	= 0x00,	// UNKNOWN 
Index: osprey/common/com/config.h
===================================================================
--- osprey/common/com/config.h	(revision 3593)
+++ osprey/common/com/config.h	(working copy)
@@ -593,6 +593,8 @@
 extern BOOL CG_mem_intrinsics;
 extern BOOL Emulate_memset;
 extern INT32 CG_memmove_inst_count;
+extern INT32 Optimize_exception_ranges;
+extern BOOL Optimize_exception_ranges_set;
 extern BOOL CG_memmove_inst_count_overridden;
 extern BOOL CG_bcopy_cannot_overlap;
 extern BOOL CG_memcpy_cannot_overlap;
------------------------------------------------------------------------------
Achieve unprecedented app performance and reliability
What every C/C++ and Fortran developer should know.
Learn how Intel has extended the reach of its next-generation tools
to help boost performance applications - inlcuding clusters.
http://p.sf.net/sfu/intel-dev2devmay
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel

Reply via email to