Author: mlai
Date: 2012-03-13 04:20:27 -0400 (Tue, 13 Mar 2012)
New Revision: 3881

Modified:
   trunk/osprey/be/cg/cg_flags.cxx
   trunk/osprey/be/cg/cg_flags.h
   trunk/osprey/be/cg/cg_loop.cxx
   trunk/osprey/be/cg/cgdriver.cxx
   trunk/osprey/be/cg/x8664/cgtarget.cxx
Log:
Added non-temporal store generation support for single basic block while loops. 
 Extended the option -CG:movnti to include -1 as a means for the user to 
instruct the compiler to emit non-temporal stores unconditionally.  (Behavior 
of existing functionality is not affected.)  Some TOP code cleanup for 
non-temporal store.

Approved by Jian-Xin Lai.


Modified: trunk/osprey/be/cg/cg_flags.cxx
===================================================================
--- trunk/osprey/be/cg/cg_flags.cxx     2012-03-12 21:10:05 UTC (rev 3880)
+++ trunk/osprey/be/cg/cg_flags.cxx     2012-03-13 08:20:27 UTC (rev 3881)
@@ -291,7 +291,7 @@
 BOOL CG_use_short_form = FALSE;
 UINT64 CG_p2align_freq = 10000;
 UINT32 CG_p2align_max_skip_bytes = 3;
-UINT32 CG_movnti = 1000;
+INT32 CG_movnti = 1000;
 BOOL CG_use_incdec = TRUE;
 BOOL CG_use_xortozero = TRUE; // bug 8592
 BOOL CG_use_xortozero_Set = FALSE;

Modified: trunk/osprey/be/cg/cg_flags.h
===================================================================
--- trunk/osprey/be/cg/cg_flags.h       2012-03-12 21:10:05 UTC (rev 3880)
+++ trunk/osprey/be/cg/cg_flags.h       2012-03-13 08:20:27 UTC (rev 3881)
@@ -881,7 +881,7 @@
 extern BOOL CG_compute_to;
 extern UINT64 CG_p2align_freq;
 extern UINT32 CG_p2align_max_skip_bytes;
-extern UINT32 CG_movnti;
+extern INT32 CG_movnti;
 extern BOOL CG_use_xortozero;
 extern BOOL CG_use_xortozero_Set;
 extern BOOL CG_use_incdec;

Modified: trunk/osprey/be/cg/cg_loop.cxx
===================================================================
--- trunk/osprey/be/cg/cg_loop.cxx      2012-03-12 21:10:05 UTC (rev 3880)
+++ trunk/osprey/be/cg/cg_loop.cxx      2012-03-13 08:20:27 UTC (rev 3881)
@@ -7442,6 +7442,9 @@
        CG_LOOP_Trace_Loop(loop, "*** Before SINGLE_BB_WHILELOOP_UNROLL ***");
 
       cg_loop.Build_CG_LOOP_Info(TRUE);
+#ifdef TARG_X8664
+      CGTARG_LOOP_Optimize(loop);
+#endif
       cg_loop.Determine_Unroll_Factor();
       Unroll_Dowhile_Loop(loop, cg_loop.Unroll_factor());
       cg_loop.Recompute_Liveness();

Modified: trunk/osprey/be/cg/cgdriver.cxx
===================================================================
--- trunk/osprey/be/cg/cgdriver.cxx     2012-03-12 21:10:05 UTC (rev 3880)
+++ trunk/osprey/be/cg/cgdriver.cxx     2012-03-13 08:20:27 UTC (rev 3881)
@@ -1237,8 +1237,8 @@
   { OVK_BOOL,  OV_INTERNAL, TRUE, "idivbyconst_opt", "", 
     0, 0, 0,   &CG_idivbyconst_opt, NULL },
   { OVK_UINT32,        OV_INTERNAL, TRUE, "movnti", "",
-    120, 0, UINT32_MAX>>1, &CG_movnti, NULL,
-    "Use x86-64's movnti instead of mov when writing memory blocks of this 
size or larger (in KB)" },
+    120, -1, UINT32_MAX>>1, &CG_movnti, NULL,
+    "Use x86-64's movnti instead of mov when writing memory blocks of this 
size or larger (in KB).  Value of -1 generates movnti unconditionally." },
   { OVK_BOOL,  OV_INTERNAL, TRUE, "cloop", "",
     0, 0, 0,   &CG_LOOP_cloop, NULL },
   { OVK_BOOL,  OV_INTERNAL, TRUE,"use_lddqu", "",

Modified: trunk/osprey/be/cg/x8664/cgtarget.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/cgtarget.cxx       2012-03-12 21:10:05 UTC (rev 
3880)
+++ trunk/osprey/be/cg/x8664/cgtarget.cxx       2012-03-13 08:20:27 UTC (rev 
3881)
@@ -111,6 +111,8 @@
 #include "stblock.h" //for Base_Symbol_And_Offset_For_Addressing
 #include "be_symtab.h" //Preg_Lda
 
+#define UNCONDITIONAL_MOVNTI -1
+
 UINT32 CGTARG_branch_taken_penalty;
 BOOL CGTARG_branch_taken_penalty_overridden = FALSE;
 
@@ -377,9 +379,6 @@
       case TOP_vstsd:
       case TOP_vstsd_n32:
       case TOP_vstsdx:
-      case TOP_vstntsd:
-      case TOP_vstntsdx:
-      case TOP_vstntsdxx:
       case TOP_vstorelpd:
       case TOP_vdivxxxsd:
       case TOP_vfaddxxxsd:
@@ -3232,12 +3231,20 @@
     case TOP_stsd:      return TOP_stntsd; break;
     case TOP_stsdx:     return TOP_stntsdx; break;
     case TOP_stsdxx:    return TOP_stntsdxx; break;
-    case TOP_vstss:      return TOP_vstntss; break;
-    case TOP_vstssx:     return TOP_vstntssx; break;
-    case TOP_vstssxx:    return TOP_vstntssxx; break;
-    case TOP_vstsd:      return TOP_vstntsd; break;
-    case TOP_vstsdx:     return TOP_vstntsdx; break;
-    case TOP_vstsdxx:    return TOP_vstntsdxx; break;
+
+    /* following cases asserted by CG_movnti == UNCONDITIONAL_MOVNTI */
+    case TOP_stups:     return TOP_stntps; break;
+    case TOP_stupsx:    return TOP_stntpsx; break;
+    case TOP_stupsxx:   return TOP_stntpsxx; break;
+    case TOP_vstups:    return TOP_vstntps; break;
+    case TOP_vstupsx:   return TOP_vstntpsx; break;
+    case TOP_vstupsxx:  return TOP_vstntpsxx; break;
+    case TOP_stupd:     return TOP_stntpd; break;
+    case TOP_stupdx:    return TOP_stntpdx; break;
+    case TOP_stupdxx:   return TOP_stntpdxx; break;
+    case TOP_vstupd:    return TOP_vstntpd; break;
+    case TOP_vstupdx:   return TOP_vstntpdx; break;
+    case TOP_vstupdxx:  return TOP_vstntpdxx; break;
     }
    FmtAssert(FALSE,("Non-Temporal Store: not supported!"));
    return TOP_UNDEFINED;
@@ -3294,43 +3301,52 @@
 {
   if(CG_movnti==0) return;
 
-  UINT32 trip_count = 0;
-  TN* trip_count_tn = CG_LOOP_Trip_Count(loop);
   BB* body = LOOP_DESCR_loophead(loop);
+  OP* op = NULL;
 
-  if( trip_count_tn != NULL &&
-      TN_is_constant(trip_count_tn) ){
-    trip_count = TN_value( trip_count_tn );
+  if (CG_movnti != UNCONDITIONAL_MOVNTI)
+  {
+    UINT32 trip_count = 0;
+    TN* trip_count_tn = CG_LOOP_Trip_Count(loop);
 
-  } else {
-    const ANNOTATION* annot = ANNOT_Get(BB_annotations(body), ANNOT_LOOPINFO);
-    const LOOPINFO* info = ANNOT_loopinfo(annot);
+    if( trip_count_tn != NULL &&
+        TN_is_constant(trip_count_tn) ){
+      trip_count = TN_value( trip_count_tn );
 
-    trip_count = WN_loop_trip_est(LOOPINFO_wn(info));
-  }
+    } else {
+      const ANNOTATION* annot = ANNOT_Get(BB_annotations(body), 
ANNOT_LOOPINFO);
+      const LOOPINFO* info = NULL;
+      if (annot != NULL)
+        info = ANNOT_loopinfo(annot);
+      if (info != NULL)
+        trip_count = WN_loop_trip_est(LOOPINFO_wn(info));
+    }
 
-  OP* op = NULL;
-  INT64 size = 0;
+    INT64 size = 0;
 
-  Working_Set.Clear();
+    Working_Set.Clear();
 
-  /* First, estimate the totol size (in bytes) that this loop will
-     bring to the cache.
-  */
-  FOR_ALL_BB_OPs_FWD( body, op ){
-    if(((OP_store( op ) && !TOP_is_nt_store(OP_code(op))) || //stores
-         OP_load(op) ) && //loads
-      !Op_In_Working_Set(op)){ //that were not in the working set
-      size += CGTARG_Mem_Ref_Bytes(op);
+    /* First, estimate the totol size (in bytes) that this loop will
+       bring to the cache.
+    */
+    FOR_ALL_BB_OPs_FWD( body, op ){
+      if(((OP_store( op ) && !TOP_is_nt_store(OP_code(op))) || //stores
+         OP_load(op) ) && //loads
+        !Op_In_Working_Set(op)){ //that were not in the working set
+        size += CGTARG_Mem_Ref_Bytes(op);
+      }
     }
-  }
 
-  size *= trip_count;
+    size *= trip_count;
 
-  const INT64 cache_size = CG_movnti * 1024;
+    const INT64 cache_size = CG_movnti * 1024;
 
-  if( size < cache_size )
-    return;
+    if( size < cache_size )
+      return;
+  }
+  // if CG_movnti == UNCONDITIONAL_MOVNTI:  generate non-temporal stores
+  // unconditionally
+
   FOR_ALL_BB_OPs_FWD( body, op ){
     if( OP_prefetch( op ) ){
       /* Get rid of any prefetchw operation, because it "loads the prefetched
@@ -3395,6 +3411,17 @@
          new_top = Movnti_Top(OP_code(op));
          break;
        }
+    case TOP_stups:
+    case TOP_stupsx:
+    case TOP_stupsxx:
+    case TOP_vstups:
+    case TOP_vstupsx:
+    case TOP_vstupsxx:
+       {
+         if (CG_movnti == UNCONDITIONAL_MOVNTI)
+           new_top = Movnti_Top(OP_code(op));
+         break;
+       }
     //SSE2 support
     case TOP_stapd:
     case TOP_stapdx:
@@ -3419,24 +3446,30 @@
          new_top = Movnti_Top(OP_code(op));
          break;
        }
-    //SSE4a support
+    //SSE4a/SSE41 support
     case TOP_stss:
     case TOP_stssx:
     case TOP_stssxx:
     case TOP_stsd:
     case TOP_stsdx:
     case TOP_stsdxx:
-    case TOP_vstss:
-    case TOP_vstssx:
-    case TOP_vstssxx:
-    case TOP_vstsd:
-    case TOP_vstsdx:
-    case TOP_vstsdxx:
        { 
-         if(Is_Target_SSE4a())
+         if(Is_Target_SSE4a() || Is_Target_SSE41())
             new_top = Movnti_Top(OP_code(op));
          break;
        }
+    case TOP_stupd:
+    case TOP_stupdx:
+    case TOP_stupdxx:
+    case TOP_vstupd:
+    case TOP_vstupdx:
+    case TOP_vstupdxx:
+       {
+         if (CG_movnti == UNCONDITIONAL_MOVNTI &&
+             (Is_Target_SSE4a() || Is_Target_SSE41()))
+           new_top = Movnti_Top(OP_code(op));
+         break;
+       }
     }//end switch
     
    if( new_top != TOP_UNDEFINED )


------------------------------------------------------------------------------
Keep Your Developer Skills Current with LearnDevNow!
The most comprehensive online learning library for Microsoft developers
is just $99.99! Visual Studio, SharePoint, SQL - plus HTML5, CSS3, MVC3,
Metro Style Apps, more. Free future releases when you subscribe now!
http://p.sf.net/sfu/learndevnow-d2d
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel

Reply via email to