Author: mberg
Date: 2012-04-12 15:35:30 -0400 (Thu, 12 Apr 2012)
New Revision: 3907

Modified:
   trunk/osprey/be/cg/oputil.cxx
   trunk/osprey/be/cg/whirl2ops.cxx
   trunk/osprey/be/cg/x8664/cg_sched.cxx
   trunk/osprey/be/cg/x8664/cgemit_targ.cxx
   trunk/osprey/be/cg/x8664/ebo_special.cxx
   trunk/osprey/be/cg/x8664/exp_loadstore.cxx
Log:
Changes include: support for 256-bit fma, limiting load exec on x86 cmp
insns to a single use for the consuming load, updates for non temporal
stores, adding a temp result to a binary and expansion pattern and
cleanup in CG_sched.  CR: Jian-Xin.



Modified: trunk/osprey/be/cg/oputil.cxx
===================================================================
--- trunk/osprey/be/cg/oputil.cxx       2012-04-12 08:48:34 UTC (rev 3906)
+++ trunk/osprey/be/cg/oputil.cxx       2012-04-12 19:35:30 UTC (rev 3907)
@@ -2102,16 +2102,10 @@
     {TOP_stss_n32,          TOP_vstss_n32},
     {TOP_stssx,             TOP_vstssx},
     {TOP_stssxx,            TOP_vstssxx},
-    {TOP_stntss,            TOP_vstntss},
-    {TOP_stntssx,           TOP_vstntssx},
-    {TOP_stntssxx,          TOP_vstntssxx},
     {TOP_stsd,              TOP_vstsd},
     {TOP_stsd_n32,          TOP_vstsd_n32},
     {TOP_stsdx,             TOP_vstsdx},
     {TOP_stsdxx,            TOP_vstsdxx},
-    {TOP_stntsd,            TOP_vstntsd},
-    {TOP_stntsdx,           TOP_vstntsdx},
-    {TOP_stntsdxx,          TOP_vstntsdxx},
     {TOP_maxss,             TOP_vfmaxss},
     {TOP_maxsd,             TOP_vfmaxsd},
     {TOP_minss,             TOP_vfminss},

Modified: trunk/osprey/be/cg/whirl2ops.cxx
===================================================================
--- trunk/osprey/be/cg/whirl2ops.cxx    2012-04-12 08:48:34 UTC (rev 3906)
+++ trunk/osprey/be/cg/whirl2ops.cxx    2012-04-12 19:35:30 UTC (rev 3907)
@@ -4688,6 +4688,7 @@
   TOP opcode; 
   TYPE_ID rtype = OPCODE_rtype(WN_opcode(expr));
   BOOL is_vector = MTYPE_is_vector(rtype);
+  BOOL PU_has_avx256 = FALSE;
 
   // now match a scalar or vector fma4 
   switch (WN_opcode(mul_wn)) {
@@ -4697,11 +4698,17 @@
   case OPC_F8MPY:
     opcode = (fma4) ? TOP_vfmaddsd : TOP_xfmadd213sd;
     break;
+  case OPC_V32F4MPY:
+  case OPC_V32C4MPY:
+    PU_has_avx256 = TRUE;
   case OPC_V16F4MPY:
   case OPC_V16C4MPY:
     FmtAssert(is_vector, ("unexpected fma vector form"));
     opcode = (fma4) ? TOP_vfmaddps : TOP_xfmadd213ps;
     break;
+  case OPC_V32F8MPY:
+  case OPC_V32C8MPY:
+    PU_has_avx256 = TRUE;
   case OPC_V16F8MPY:
   case OPC_V16C8MPY:
     FmtAssert(is_vector, ("unexpected fma vector form"));
@@ -4725,8 +4732,7 @@
   else
     Build_OP(opcode,  result,  opnd0,  opnd1, opnd2, &New_OPs); 
 
-  // TODO: add operand size check for 256-bit
-  if (PU_has_avx128 == FALSE)
+  if ((PU_has_avx128 == FALSE) && (PU_has_avx256 == FALSE))
     PU_has_avx128 = TRUE;
   
   return result; 
@@ -4747,6 +4753,7 @@
   TOP opcode; 
   TYPE_ID rtype = OPCODE_rtype(WN_opcode(expr));
   BOOL is_vector = MTYPE_is_vector(rtype);
+  BOOL PU_has_avx256 = FALSE;
 
   // now match a scalar or vector fma4 
   switch (WN_opcode(mul_wn)) {
@@ -4756,11 +4763,17 @@
   case OPC_F8MPY:
     opcode = (fma4) ? TOP_vfnmaddsd : TOP_xfnmadd213sd;
     break;
+  case OPC_V32F4MPY:
+  case OPC_V32C4MPY:
+    PU_has_avx256 = TRUE;
   case OPC_V16F4MPY:
   case OPC_V16C4MPY:
     FmtAssert(is_vector, ("unexpected fma vector form"));
     opcode = (fma4) ? TOP_vfnmaddps : TOP_xfnmadd213ps;
     break;
+  case OPC_V32F8MPY:
+  case OPC_V32C8MPY:
+    PU_has_avx256 = TRUE;
   case OPC_V16F8MPY:
   case OPC_V16C8MPY:
     FmtAssert(is_vector, ("unexpected fma vector form"));
@@ -4784,8 +4797,7 @@
   else
     Build_OP(opcode,  result,  opnd0,  opnd1, opnd2, &New_OPs); 
 
-  // TODO: add operand size check for 256-bit
-  if (PU_has_avx128 == FALSE)
+  if ((PU_has_avx128 == FALSE) && (PU_has_avx256 == FALSE))
     PU_has_avx128 = TRUE;
   
   return result; 
@@ -4805,6 +4817,7 @@
   TOP opcode; 
   TYPE_ID rtype = OPCODE_rtype(WN_opcode(expr));
   BOOL is_vector = MTYPE_is_vector(rtype);
+  BOOL PU_has_avx256 = FALSE;
 
   // now match a scalar or vector fma4 
   switch (WN_opcode(mul_wn)) {
@@ -4814,11 +4827,17 @@
   case OPC_F8MPY:
     opcode = (fma4) ? TOP_vfmsubsd : TOP_xfmsub213sd;
     break;
+  case OPC_V32F4MPY:
+  case OPC_V32C4MPY:
+    PU_has_avx256 = TRUE;
   case OPC_V16F4MPY:
   case OPC_V16C4MPY:
     FmtAssert(is_vector, ("unexpected fms vector form"));
     opcode = (fma4) ? TOP_vfmsubps : TOP_xfmsub213ps;
     break;
+  case OPC_V32F8MPY:
+  case OPC_V32C8MPY:
+    PU_has_avx256 = TRUE;
   case OPC_V16F8MPY:
   case OPC_V16C8MPY:
     FmtAssert(is_vector, ("unexpected fms vector form"));
@@ -4842,8 +4861,7 @@
   else
     Build_OP(opcode,  result,  opnd0,  opnd1, opnd2, &New_OPs); 
 
-  // TODO: add operand size check for 256-bit
-  if (PU_has_avx128 == FALSE)
+  if ((PU_has_avx128 == FALSE) && (PU_has_avx256 == FALSE))
     PU_has_avx128 = TRUE;
   
   return result; 
@@ -5431,25 +5449,29 @@
 
   case OPR_SUB:
   case OPR_ADD:
-    if ((CG_opt_level > 1) && Is_Target_Orochi() && 
-        Is_Target_AVX() &&
+    if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+        Is_Target_AVX() && 
         (Is_Target_FMA4() || Is_Target_FMA()) ) {
       BOOL fma4 = Is_Target_FMA4();
       BOOL expr_is_complex = FALSE;
       TYPE_ID rtype = OPCODE_rtype(opcode);
       WN *mul_wn = NULL;
       if ((rtype == MTYPE_V16C4) ||
-          (rtype == MTYPE_V16C8)) {
+          (rtype == MTYPE_V16C8) ||
+          (rtype == MTYPE_V32C4) ||
+          (rtype == MTYPE_V32C8)) {
         expr_is_complex = TRUE;
       }
-      
+
       // Looking for a fm{a/s} candidate via FMA4 insns
       if ( (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) &&
            (expr_is_complex == FALSE) ) {
         if ((WN_operator(mul_wn = WN_kid(expr, 0)) == OPR_MPY) &&
+            (WN_opcode(mul_wn) != OPC_V32C8MPY) &&
             (WN_opcode(mul_wn) != OPC_V16C8MPY) &&
+            (WN_opcode(mul_wn) != OPC_V32C4MPY) &&
             (WN_opcode(mul_wn) != OPC_V16C4MPY) &&
-            (WN_opcode(mul_wn) != OPC_FQMPY) && 
+            (WN_opcode(mul_wn) != OPC_FQMPY) &&
             (WN_opcode(mul_wn) != OPC_F10MPY) ) {
           rtype = OPCODE_rtype(WN_opcode (mul_wn));
           if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) {
@@ -5460,16 +5482,20 @@
             }
           }
         } else if ((WN_operator(mul_wn = WN_kid(expr, 1)) == OPR_MPY) &&
+                   (WN_opcode(mul_wn) != OPC_V32C8MPY) &&
                    (WN_opcode(mul_wn) != OPC_V16C8MPY) &&
+                   (WN_opcode(mul_wn) != OPC_V32C4MPY) &&
                    (WN_opcode(mul_wn) != OPC_V16C4MPY) &&
-                   (WN_opcode(mul_wn) != OPC_FQMPY) && 
+                   (WN_opcode(mul_wn) != OPC_FQMPY) &&
                    (WN_opcode(mul_wn) != OPC_F10MPY)) {
           rtype = OPCODE_rtype(WN_opcode (mul_wn));
           if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) {
             if (WN_operator(expr) == OPR_ADD) {
               return Handle_Fma_Operation(expr, result, mul_wn, FALSE, fma4);
-            } else if ((WN_operator(expr) == OPR_SUB) && 
+            } else if ((WN_operator(expr) == OPR_SUB) &&
+                       (WN_opcode(expr) != OPC_V32C4SUB) &&
                        (WN_opcode(expr) != OPC_V16C4SUB) &&
+                       (WN_opcode(expr) != OPC_V32C8SUB) &&
                        (WN_opcode(expr) != OPC_V16C8SUB)) {
               return Handle_Fnma_Operation(expr, result, mul_wn, FALSE, fma4);
             }

Modified: trunk/osprey/be/cg/x8664/cg_sched.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/cg_sched.cxx       2012-04-12 08:48:34 UTC (rev 
3906)
+++ trunk/osprey/be/cg/x8664/cg_sched.cxx       2012-04-12 19:35:30 UTC (rev 
3907)
@@ -379,7 +379,7 @@
       }
     }
 
-    return Resource_Table[c]->resources[cur_res][dispatch_unit];
+    return Resource_Table[clock]->resources[cur_res][dispatch_unit];
   }
 
   int  Dispatched_Ops( int c ) { return Resource_Table[c]->dispatched_ops; }
@@ -835,8 +835,7 @@
   const TOP top = OP_code(op);
   Resource_Table_Entry* entry = Resource_Table[cycle];
 
-  const ICU res = (take_it) ? cur_res : 
-                    Lookup_Property_By_Pipeinfo( op, cycle, true );
+  const ICU res = Lookup_Property_By_Pipeinfo( op, cycle, true );
 
   if( !entry->resources[res][dispatch_unit] )
     return false;
@@ -857,7 +856,7 @@
   OPR* opr = Get_OPR( op );
   const int dispatch_unit = Get_Dispatch_Unit( op, cycle, false );
 
-  if( !Probe_Resources( OPR_issue_time(opr), op, dispatch_unit, true ) ){
+  if( !Probe_Resources( cycle, op, dispatch_unit, true ) ){
     ASSERT( false );
   }
 

Modified: trunk/osprey/be/cg/x8664/cgemit_targ.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/cgemit_targ.cxx    2012-04-12 08:48:34 UTC (rev 
3906)
+++ trunk/osprey/be/cg/x8664/cgemit_targ.cxx    2012-04-12 19:35:30 UTC (rev 
3907)
@@ -1973,12 +1973,6 @@
   OP_Name[TOP_vstntps] = "vmovntps";
   OP_Name[TOP_vstntpsx] = "vmovntps";
   OP_Name[TOP_vstntpsxx] = "vmovntps";
-  OP_Name[TOP_vstntsd] = "vmovntsd";
-  OP_Name[TOP_vstntsdx] = "vmovntsd";
-  OP_Name[TOP_vstntsdxx] = "vmovntsd";
-  OP_Name[TOP_vstntss] = "vmovntss";
-  OP_Name[TOP_vstntssx] = "vmovntss";
-  OP_Name[TOP_vstntssxx] = "vmovntss";
   OP_Name[TOP_vldlpd] = "vmovsd";
   OP_Name[TOP_vldlpdx] = "vmovsd";
   OP_Name[TOP_vldlpdxx] = "vmovsd";

Modified: trunk/osprey/be/cg/x8664/ebo_special.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/ebo_special.cxx    2012-04-12 08:48:34 UTC (rev 
3906)
+++ trunk/osprey/be/cg/x8664/ebo_special.cxx    2012-04-12 19:35:30 UTC (rev 
3907)
@@ -10255,6 +10255,11 @@
     return Process_Side_Effects(opnd_tn, actual_tninfo, rval, opnds_swapped);
   }
 
+  // we only want one reaching use in this block for a cmp peep
+  if( EBO_flow_safe && ( load_uses > 1 ) ) {
+    return Process_Side_Effects(opnd_tn, actual_tninfo, rval, opnds_swapped);
+  }
+
   // If load is volatile, replace with exactly one load-exe OP, in order to
   // maintain the same number of memory accesses.
   if (OP_volatile(ld_op) &&

Modified: trunk/osprey/be/cg/x8664/exp_loadstore.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/exp_loadstore.cxx  2012-04-12 08:48:34 UTC (rev 
3906)
+++ trunk/osprey/be/cg/x8664/exp_loadstore.cxx  2012-04-12 19:35:30 UTC (rev 
3907)
@@ -1664,8 +1664,9 @@
                  FALSE, ops );
   }
 
-  Expand_Binary_And( tgt_tn, tmp2_tn, src1_tn, rtype, ops );
-  Expand_Binary_Or( tgt_tn, tgt_tn, tmp1_tn, rtype, ops );
+  TN* tmp = Build_TN_Like(tgt_tn);
+  Expand_Binary_And( tmp, tmp2_tn, src1_tn, rtype, ops );
+  Expand_Binary_Or( tgt_tn, tmp, tmp1_tn, rtype, ops );
 }
 
 /* ======================================================================


------------------------------------------------------------------------------
For Developers, A Lot Can Happen In A Second.
Boundary is the first to Know...and Tell You.
Monitor Your Applications in Ultra-Fine Resolution. Try it FREE!
http://p.sf.net/sfu/Boundary-d2dvs2
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel

Reply via email to