Author: mberg
Date: 2011-07-16 23:36:42 -0400 (Sat, 16 Jul 2011)
New Revision: 3695

Modified:
   trunk/osprey/be/cg/cgemit.cxx
   trunk/osprey/be/cg/lra.cxx
   trunk/osprey/be/cg/whirl2ops.cxx
   trunk/osprey/be/cg/x8664/ebo_special.cxx
   trunk/osprey/be/cg/x8664/expand.cxx
   trunk/osprey/common/targ_info/proc/x8664/orochi_si.cxx
Log:
Updates for:

* Alignment usage on BD
* EBO register pressure hueristic tuning
* horizontal add peephole opts
* updates on replicate forms
* fma neg additions
* CG dep graph updates for BD

CR by Jian-Xin



Modified: trunk/osprey/be/cg/cgemit.cxx
===================================================================
--- trunk/osprey/be/cg/cgemit.cxx       2011-07-15 07:51:29 UTC (rev 3694)
+++ trunk/osprey/be/cg/cgemit.cxx       2011-07-17 03:36:42 UTC (rev 3695)
@@ -4805,9 +4805,11 @@
  */      
       if(max_skip_bytes > 0)
       {
-        if(!Is_Target_Barcelona() && !Is_Target_Orochi() || CG_p2align != 2){
+        if(!Is_Target_Barcelona() || CG_p2align != 2){
           if (max_skip_bytes > 15)
            max_skip_bytes = 15;        
+          if(Is_Target_Orochi())
+            fprintf(Asm_File, "\t.p2align 3,,\n");
           fprintf(Asm_File, "\t.p2align 4,,%d\n", max_skip_bytes);
         }
         else 

Modified: trunk/osprey/be/cg/lra.cxx
===================================================================
--- trunk/osprey/be/cg/lra.cxx  2011-07-15 07:51:29 UTC (rev 3694)
+++ trunk/osprey/be/cg/lra.cxx  2011-07-17 03:36:42 UTC (rev 3695)
@@ -1008,7 +1008,8 @@
   LIVE_RANGE *lr = LR_For_TN(tn);
   if ((LR_def_cnt(lr) == 1) && (LR_upward_exposed_use(lr) == 0)) {
     if (LR_use_cnt(lr) == 1) {
-      has_sdsu = true;
+      // globals are not simple live ranges
+      has_sdsu = (TN_is_global_reg(tn)) ? has_sdsu : true;
     }
   }
   

Modified: trunk/osprey/be/cg/whirl2ops.cxx
===================================================================
--- trunk/osprey/be/cg/whirl2ops.cxx    2011-07-15 07:51:29 UTC (rev 3694)
+++ trunk/osprey/be/cg/whirl2ops.cxx    2011-07-17 03:36:42 UTC (rev 3695)
@@ -4732,6 +4732,61 @@
 }
 
 static TN* 
+Handle_Fnma_Operation(WN* expr, TN* result, WN *mul_wn, BOOL mul_kid0) 
+{
+  
+  WN* add_wn = (mul_kid0) ? WN_kid1(expr) : WN_kid0(expr); 
+  TN* opnd0; 
+  TN* opnd1; 
+  TN* opnd2;
+  TOP opcode; 
+  TYPE_ID rtype = OPCODE_rtype(WN_opcode(expr));
+  BOOL is_vector = MTYPE_is_vector(rtype);
+
+  // now match a scalar or vector fma4 
+  switch (WN_opcode(mul_wn)) {
+  case OPC_F4MPY:
+    opcode = TOP_vfnmaddss;
+    break;
+  case OPC_F8MPY:
+    opcode = TOP_vfnmaddsd;
+    break;
+  case OPC_V16F4MPY:
+  case OPC_V16C4MPY:
+    FmtAssert(is_vector, ("unexpected fma vector form"));
+    opcode = TOP_vfnmaddps;
+    break;
+  case OPC_V16F8MPY:
+  case OPC_V16C8MPY:
+    FmtAssert(is_vector, ("unexpected fma vector form"));
+    opcode = TOP_vfnmaddpd;
+    break;
+  default:
+    FmtAssert(FALSE, ("unexpected fma form"));
+    break;
+  }
+
+  opnd2 = Expand_Expr(add_wn, expr,  NULL); 
+  opnd1 = Expand_Expr(WN_kid1(mul_wn), mul_wn, NULL);
+  opnd0 = Expand_Expr(WN_kid0(mul_wn), mul_wn, NULL);
+
+  if(result == NULL) 
+    result = Allocate_Result_TN(expr, NULL); 
+
+  // Position tn's from loads on operand 1's position if possible.
+  if (OPCODE_is_load(WN_opcode(WN_kid0(mul_wn))))
+    Build_OP(opcode,  result,  opnd1,  opnd0, opnd2, &New_OPs); 
+  else
+    Build_OP(opcode,  result,  opnd0,  opnd1, opnd2, &New_OPs); 
+
+  // TODO: add operand size check for 256-bit
+  if (PU_has_avx128 == FALSE)
+    PU_has_avx128 = TRUE;
+  
+  return result; 
+}
+
+static TN* 
 Handle_Fms_Operation(WN* expr, TN* result, WN *mul_wn, BOOL mul_kid0) 
 {
   WN* sub_wn = (mul_kid0) ? WN_kid1(expr) : WN_kid0(expr); 
@@ -5402,7 +5457,11 @@
           if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) {
             if (WN_operator(expr) == OPR_ADD) {
               return Handle_Fma_Operation(expr, result, mul_wn, FALSE);
-            } 
+            } else if ((WN_operator(expr) == OPR_SUB) && 
+                       (WN_opcode(expr) != OPC_V16C4SUB) &&
+                       (WN_opcode(expr) != OPC_V16C8SUB)) {
+              return Handle_Fnma_Operation(expr, result, mul_wn, FALSE);
+            }
           }
         }
       }

Modified: trunk/osprey/be/cg/x8664/ebo_special.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/ebo_special.cxx    2011-07-15 07:51:29 UTC (rev 
3694)
+++ trunk/osprey/be/cg/x8664/ebo_special.cxx    2011-07-17 03:36:42 UTC (rev 
3695)
@@ -4827,6 +4827,22 @@
   {TOP_vfmsubaddpd,    TOP_vfmsubaddxpd,    TOP_vfmsubaddxxpd,           
TOP_vfmsubaddxxxpd,       TOP_UNDEFINED},
   {TOP_UNDEFINED,      TOP_vfmsubaddxrps,   TOP_vfmsubaddxxrps,   
TOP_vfmsubaddxxxrps,      TOP_UNDEFINED},
   {TOP_UNDEFINED,      TOP_vfmsubaddxrpd,   TOP_vfmsubaddxxrpd,   
TOP_vfmsubaddxxxrpd,      TOP_UNDEFINED},
+  {TOP_vfnmaddss,      TOP_vfnmaddxss,     TOP_vfnmaddxxss,      
TOP_vfnmaddxxxss,         TOP_UNDEFINED},
+  {TOP_vfnmaddsd,      TOP_vfnmaddxsd,     TOP_vfnmaddxxsd,      
TOP_vfnmaddxxxsd,         TOP_UNDEFINED},
+  {TOP_vfnmaddps,      TOP_vfnmaddxps,     TOP_vfnmaddxxps,      
TOP_vfnmaddxxxps,         TOP_UNDEFINED},
+  {TOP_vfnmaddpd,      TOP_vfnmaddxpd,      TOP_vfnmaddxxpd,     
TOP_vfnmaddxxxpd,         TOP_UNDEFINED},
+  {TOP_UNDEFINED,      TOP_vfnmaddxrss,     TOP_vfnmaddxxrss,     
TOP_vfnmaddxxxrss,        TOP_UNDEFINED},
+  {TOP_UNDEFINED,      TOP_vfnmaddxrsd,     TOP_vfnmaddxxrsd,     
TOP_vfnmaddxxxrsd,        TOP_UNDEFINED},
+  {TOP_UNDEFINED,      TOP_vfnmaddxrps,     TOP_vfnmaddxxrps,     
TOP_vfnmaddxxxrps,        TOP_UNDEFINED},
+  {TOP_UNDEFINED,      TOP_vfnmaddxrpd,     TOP_vfnmaddxxrpd,     
TOP_vfnmaddxxxrpd,        TOP_UNDEFINED},
+  {TOP_vfnmsubss,      TOP_vfnmsubxss,     TOP_vfnmsubxxss,      
TOP_vfnmsubxxxss,         TOP_UNDEFINED},
+  {TOP_vfnmsubsd,      TOP_vfnmsubxsd,     TOP_vfnmsubxxsd,      
TOP_vfnmsubxxxsd,         TOP_UNDEFINED},
+  {TOP_vfnmsubps,      TOP_vfnmsubxps,     TOP_vfnmsubxxps,      
TOP_vfnmsubxxxps,         TOP_UNDEFINED},
+  {TOP_vfnmsubpd,      TOP_vfnmsubxpd,      TOP_vfnmsubxxpd,     
TOP_vfnmsubxxxpd,         TOP_UNDEFINED},
+  {TOP_UNDEFINED,      TOP_vfnmsubxrss,     TOP_vfnmsubxxrss,     
TOP_vfnmsubxxxrss,        TOP_UNDEFINED},
+  {TOP_UNDEFINED,      TOP_vfnmsubxrsd,     TOP_vfnmsubxxrsd,     
TOP_vfnmsubxxxrsd,        TOP_UNDEFINED},
+  {TOP_UNDEFINED,      TOP_vfnmsubxrps,     TOP_vfnmsubxxrps,     
TOP_vfnmsubxxxrps,        TOP_UNDEFINED},
+  {TOP_UNDEFINED,      TOP_vfnmsubxrpd,     TOP_vfnmsubxxrpd,     
TOP_vfnmsubxxxrpd,        TOP_UNDEFINED},
   {TOP_icall,         TOP_icallx,          TOP_icallxx,          TOP_icallxxx, 
            TOP_UNDEFINED},
   {TOP_ijmp,          TOP_ijmpx,           TOP_ijmpxx,           TOP_ijmpxxx,  
            TOP_UNDEFINED},
   {TOP_cvtsd2ss,       TOP_cvtsd2ss_x,     TOP_cvtsd2ss_xx,      
TOP_cvtsd2ss_xxx,         TOP_UNDEFINED},
@@ -9218,6 +9234,17 @@
   case TOP_vfmsubaddpd:
     ret_val = TRUE;
     break;
+  case TOP_vfnmaddss:
+  case TOP_vfnmaddsd:
+  case TOP_vfnmaddps:
+  case TOP_vfnmaddpd:
+  case TOP_vfnmsubss:
+  case TOP_vfnmsubsd:
+  case TOP_vfnmsubps:
+  case TOP_vfnmsubpd:
+    ret_val = TRUE;
+    break;
+
   default:
     ret_val = FALSE;
     break;
@@ -9226,6 +9253,31 @@
   return ret_val;
 }
 
+BOOL EBO_Is_FMA4_NEG( OP* alu_op)
+{
+  const TOP top = OP_code(alu_op);
+  BOOL ret_val;
+
+  switch (top) {
+  case TOP_vfnmaddss:
+  case TOP_vfnmaddsd:
+  case TOP_vfnmaddps:
+  case TOP_vfnmaddpd:
+  case TOP_vfnmsubss:
+  case TOP_vfnmsubsd:
+  case TOP_vfnmsubps:
+  case TOP_vfnmsubpd:
+    ret_val = TRUE;
+    break;
+
+  default:
+    ret_val = FALSE;
+    break;
+  }
+
+  return ret_val;
+}
+
 static BOOL EBO_Allowable_Unaligned_Vector( OP *alu_op )
 {
   const TOP top = OP_code(alu_op);
@@ -9305,18 +9357,22 @@
     break;
 
   // fused multiply-subs
+  case TOP_vfnmaddss:
   case TOP_vfmsubss:
     new_mul_top = TOP_vmulss; 
     new_arith_top = TOP_vsubss; 
     break;
+  case TOP_vfnmaddsd:
   case TOP_vfmsubsd:
     new_mul_top = TOP_vmulsd; 
     new_arith_top = TOP_vsubsd; 
     break;
+  case TOP_vfnmaddps:
   case TOP_vfmsubps:
     new_mul_top = TOP_vfmul128v32; 
     new_arith_top = TOP_vfsub128v32; 
     break;
+  case TOP_vfnmaddpd:
   case TOP_vfmsubpd:
     new_mul_top = TOP_vfmul128v64; 
     new_arith_top = TOP_vfsub128v64; 
@@ -9453,7 +9509,11 @@
         ( arith_top != TOP_UNDEFINED ) ){
       TN *mul_result = Build_TN_Like(result);
       OP *mul_op = Mk_OP( mul_top, mul_result, mul_opnd1, mul_opnd2 );
-      OP *arith_op = Mk_OP( arith_top, result, mul_result, arith_opnd );
+      OP *arith_op;
+      if( EBO_Is_FMA4_NEG( alu_op ) )
+        arith_op = Mk_OP( arith_top, result, arith_opnd, mul_result );
+      else
+        arith_op = Mk_OP( arith_top, result, mul_result, arith_opnd );
 
       // Add the mul component of the fma
       Set_OP_unrolling( mul_op, OP_unrolling(alu_op) );

Modified: trunk/osprey/be/cg/x8664/expand.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/expand.cxx 2011-07-15 07:51:29 UTC (rev 3694)
+++ trunk/osprey/be/cg/x8664/expand.cxx 2011-07-17 03:36:42 UTC (rev 3695)
@@ -5896,7 +5896,7 @@
     Build_OP(TOP_shufpd, tmp7, tmp1, tmp1, Gen_Literal_TN(1, 1), ops);
     if ((CG_opt_level > 1) && Is_Target_Orochi() &&
         Is_Target_AVX() && Is_Target_FMA4()) {
-      Build_OP(TOP_fhadd128v64, tmp9, tmp3, tmp3, ops);
+      Expand_Reduce_Add(OPC_F8V16F8REDUCE_ADD, tmp9, tmp3, ops);
       Build_OP(TOP_shufps, tmp10, src1, src1, Gen_Literal_TN(238, 1), ops);
       Build_OP(TOP_cvtps2pd, tmp11, tmp10, ops);
       Build_OP(TOP_vfmaddsubpd, tmp12, tmp7, tmp4, tmp6, ops);
@@ -5919,7 +5919,7 @@
     Build_OP(TOP_shufpd, tmp22, tmp11, tmp11, Gen_Literal_TN(1, 1), ops);
     if ((CG_opt_level > 1) && Is_Target_Orochi() &&
         Is_Target_AVX() && Is_Target_FMA4()) {
-      Build_OP(TOP_fhadd128v64, tmp24, tmp18, tmp18, ops);
+      Expand_Reduce_Add(OPC_F8V16F8REDUCE_ADD, tmp25, tmp18, ops);
       Build_OP(TOP_vfmaddsubpd, tmp25, tmp22, tmp19, tmp21, ops);
     } else {
       Build_OP(TOP_fmul128v64, tmp23, tmp22, tmp19, ops);
@@ -6306,7 +6306,7 @@
   {
     TN* tmp = Build_TN_Like(op1);
     Build_OP(TOP_movapd, tmp, op1, ops);
-    if ( Is_Target_SSE3() ) {
+    if ( Is_Target_SSE3() && !Is_Target_Orochi() ) {
       Build_OP(TOP_fhadd128v64, result, tmp, tmp, ops);
     } else {
       TN* tmp_a = Build_TN_Like(op1);
@@ -6319,7 +6319,7 @@
   {
     TN* tmp = Build_TN_Like(op1);
     Build_OP(TOP_movaps, tmp, op1, ops);
-    if ( Is_Target_SSE3() ) {
+    if ( Is_Target_SSE3() && !Is_Target_Orochi() ) {
       Build_OP(TOP_fhadd128v32, tmp, op1, op1, ops);
       Build_OP(TOP_fhadd128v32, result, tmp, tmp, ops);
     } else {
@@ -6795,17 +6795,34 @@
   case TOP_fmovsldupxxx:
     new_op = TOP_fmovsldup;
     break;
+  case TOP_vmovsldup:
+  case TOP_vmovsldupx:
+  case TOP_vmovsldupxx:
+  case TOP_vmovsldupxxx:
+    new_op = TOP_vmovsldup;
+    break;
   case TOP_fmovshdup:
   case TOP_fmovshdupx:
   case TOP_fmovshdupxx:
   case TOP_fmovshdupxxx:
     new_op = TOP_fmovshdup;
     break;
+  case TOP_vmovshdup:
+  case TOP_vmovshdupx:
+  case TOP_vmovshdupxx:
+  case TOP_vmovshdupxxx:
+    new_op = TOP_vmovshdup;
+    break;
   case TOP_fmovddupx:
   case TOP_fmovddupxx:
   case TOP_fmovddupxxx:
     new_op = TOP_fmovddup;
     break;
+  case TOP_vmovddupx:
+  case TOP_vmovddupxx:
+  case TOP_vmovddupxxx:
+    new_op = TOP_vmovddup;
+    break;
 
   default:
     FmtAssert( FALSE, ("Exp_COPY_Ext: Unsupported opcode (%s)", 
TOP_Name(opcode)) );

Modified: trunk/osprey/common/targ_info/proc/x8664/orochi_si.cxx
===================================================================
--- trunk/osprey/common/targ_info/proc/x8664/orochi_si.cxx      2011-07-15 
07:51:29 UTC (rev 3694)
+++ trunk/osprey/common/targ_info/proc/x8664/orochi_si.cxx      2011-07-17 
03:36:42 UTC (rev 3695)
@@ -1609,6 +1609,8 @@
                     TOP_vfaddsub128v32,
                     TOP_vfadd128v64,
                     TOP_vfadd128v32,
+                    TOP_vfsub128v64,
+                    TOP_vfsub128v32,
                    TOP_UNDEFINED);
   Any_Operand_Access_Time(0);
   Any_Result_Available_Time(5);
@@ -1658,6 +1660,12 @@
                     TOP_vfaddx128v32,
                     TOP_vfaddxx128v32,
                     TOP_vfaddxxx128v32,
+                    TOP_vfsubx128v64,
+                    TOP_vfsubxx128v64,
+                    TOP_vfsubxxx128v64,
+                    TOP_vfsubx128v32,
+                    TOP_vfsubxx128v32,
+                    TOP_vfsubxxx128v32,
                    TOP_UNDEFINED);
   Any_Operand_Access_Time(0);
   Any_Result_Available_Time(10);
@@ -4940,8 +4948,6 @@
                         TOP_vfshuf128v64,
                         TOP_vshufps,
                         TOP_vfshuf128v32,
-                        TOP_vfsub128v64,
-                        TOP_vfsub128v32,
                         TOP_vunpckh128v64,
                         TOP_vunpckh128v32,
                         TOP_vunpckl128v64,
@@ -5118,12 +5124,6 @@
   Resource_Requirement(res_fstore, 0);
 
   Instruction_Group( "avx fp arith mem opnd 4",
-                        TOP_vfsubx128v64,
-                        TOP_vfsubxx128v64,
-                        TOP_vfsubxxx128v64,
-                        TOP_vfsubx128v32,
-                        TOP_vfsubxx128v32,
-                        TOP_vfsubxxx128v32,
                         TOP_vfrcpx128v32,
                         TOP_vfrcpxx128v32,
                         TOP_vfrcpxxx128v32,


------------------------------------------------------------------------------
AppSumo Presents a FREE Video for the SourceForge Community by Eric 
Ries, the creator of the Lean Startup Methodology on "Lean Startup 
Secrets Revealed." This video shows you how to validate your ideas, 
optimize your ideas and identify your business strategy.
http://p.sf.net/sfu/appsumosfdev2dev
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel

Reply via email to