Author: mberg
Date: 2012-04-12 15:35:30 -0400 (Thu, 12 Apr 2012)
New Revision: 3907
Modified:
trunk/osprey/be/cg/oputil.cxx
trunk/osprey/be/cg/whirl2ops.cxx
trunk/osprey/be/cg/x8664/cg_sched.cxx
trunk/osprey/be/cg/x8664/cgemit_targ.cxx
trunk/osprey/be/cg/x8664/ebo_special.cxx
trunk/osprey/be/cg/x8664/exp_loadstore.cxx
Log:
Changes include: support for 256-bit fma, limiting load exec on x86 cmp
insns to a single use for the consuming load, updates for non temporal
stores, adding a temp result to a binary and expansion pattern and
cleanup in CG_sched. CR: Jian-Xin.
Modified: trunk/osprey/be/cg/oputil.cxx
===================================================================
--- trunk/osprey/be/cg/oputil.cxx 2012-04-12 08:48:34 UTC (rev 3906)
+++ trunk/osprey/be/cg/oputil.cxx 2012-04-12 19:35:30 UTC (rev 3907)
@@ -2102,16 +2102,10 @@
{TOP_stss_n32, TOP_vstss_n32},
{TOP_stssx, TOP_vstssx},
{TOP_stssxx, TOP_vstssxx},
- {TOP_stntss, TOP_vstntss},
- {TOP_stntssx, TOP_vstntssx},
- {TOP_stntssxx, TOP_vstntssxx},
{TOP_stsd, TOP_vstsd},
{TOP_stsd_n32, TOP_vstsd_n32},
{TOP_stsdx, TOP_vstsdx},
{TOP_stsdxx, TOP_vstsdxx},
- {TOP_stntsd, TOP_vstntsd},
- {TOP_stntsdx, TOP_vstntsdx},
- {TOP_stntsdxx, TOP_vstntsdxx},
{TOP_maxss, TOP_vfmaxss},
{TOP_maxsd, TOP_vfmaxsd},
{TOP_minss, TOP_vfminss},
Modified: trunk/osprey/be/cg/whirl2ops.cxx
===================================================================
--- trunk/osprey/be/cg/whirl2ops.cxx 2012-04-12 08:48:34 UTC (rev 3906)
+++ trunk/osprey/be/cg/whirl2ops.cxx 2012-04-12 19:35:30 UTC (rev 3907)
@@ -4688,6 +4688,7 @@
TOP opcode;
TYPE_ID rtype = OPCODE_rtype(WN_opcode(expr));
BOOL is_vector = MTYPE_is_vector(rtype);
+ BOOL PU_has_avx256 = FALSE;
// now match a scalar or vector fma4
switch (WN_opcode(mul_wn)) {
@@ -4697,11 +4698,17 @@
case OPC_F8MPY:
opcode = (fma4) ? TOP_vfmaddsd : TOP_xfmadd213sd;
break;
+ case OPC_V32F4MPY:
+ case OPC_V32C4MPY:
+ PU_has_avx256 = TRUE;
case OPC_V16F4MPY:
case OPC_V16C4MPY:
FmtAssert(is_vector, ("unexpected fma vector form"));
opcode = (fma4) ? TOP_vfmaddps : TOP_xfmadd213ps;
break;
+ case OPC_V32F8MPY:
+ case OPC_V32C8MPY:
+ PU_has_avx256 = TRUE;
case OPC_V16F8MPY:
case OPC_V16C8MPY:
FmtAssert(is_vector, ("unexpected fma vector form"));
@@ -4725,8 +4732,7 @@
else
Build_OP(opcode, result, opnd0, opnd1, opnd2, &New_OPs);
- // TODO: add operand size check for 256-bit
- if (PU_has_avx128 == FALSE)
+ if ((PU_has_avx128 == FALSE) && (PU_has_avx256 == FALSE))
PU_has_avx128 = TRUE;
return result;
@@ -4747,6 +4753,7 @@
TOP opcode;
TYPE_ID rtype = OPCODE_rtype(WN_opcode(expr));
BOOL is_vector = MTYPE_is_vector(rtype);
+ BOOL PU_has_avx256 = FALSE;
// now match a scalar or vector fma4
switch (WN_opcode(mul_wn)) {
@@ -4756,11 +4763,17 @@
case OPC_F8MPY:
opcode = (fma4) ? TOP_vfnmaddsd : TOP_xfnmadd213sd;
break;
+ case OPC_V32F4MPY:
+ case OPC_V32C4MPY:
+ PU_has_avx256 = TRUE;
case OPC_V16F4MPY:
case OPC_V16C4MPY:
FmtAssert(is_vector, ("unexpected fma vector form"));
opcode = (fma4) ? TOP_vfnmaddps : TOP_xfnmadd213ps;
break;
+ case OPC_V32F8MPY:
+ case OPC_V32C8MPY:
+ PU_has_avx256 = TRUE;
case OPC_V16F8MPY:
case OPC_V16C8MPY:
FmtAssert(is_vector, ("unexpected fma vector form"));
@@ -4784,8 +4797,7 @@
else
Build_OP(opcode, result, opnd0, opnd1, opnd2, &New_OPs);
- // TODO: add operand size check for 256-bit
- if (PU_has_avx128 == FALSE)
+ if ((PU_has_avx128 == FALSE) && (PU_has_avx256 == FALSE))
PU_has_avx128 = TRUE;
return result;
@@ -4805,6 +4817,7 @@
TOP opcode;
TYPE_ID rtype = OPCODE_rtype(WN_opcode(expr));
BOOL is_vector = MTYPE_is_vector(rtype);
+ BOOL PU_has_avx256 = FALSE;
// now match a scalar or vector fma4
switch (WN_opcode(mul_wn)) {
@@ -4814,11 +4827,17 @@
case OPC_F8MPY:
opcode = (fma4) ? TOP_vfmsubsd : TOP_xfmsub213sd;
break;
+ case OPC_V32F4MPY:
+ case OPC_V32C4MPY:
+ PU_has_avx256 = TRUE;
case OPC_V16F4MPY:
case OPC_V16C4MPY:
FmtAssert(is_vector, ("unexpected fms vector form"));
opcode = (fma4) ? TOP_vfmsubps : TOP_xfmsub213ps;
break;
+ case OPC_V32F8MPY:
+ case OPC_V32C8MPY:
+ PU_has_avx256 = TRUE;
case OPC_V16F8MPY:
case OPC_V16C8MPY:
FmtAssert(is_vector, ("unexpected fms vector form"));
@@ -4842,8 +4861,7 @@
else
Build_OP(opcode, result, opnd0, opnd1, opnd2, &New_OPs);
- // TODO: add operand size check for 256-bit
- if (PU_has_avx128 == FALSE)
+ if ((PU_has_avx128 == FALSE) && (PU_has_avx256 == FALSE))
PU_has_avx128 = TRUE;
return result;
@@ -5431,25 +5449,29 @@
case OPR_SUB:
case OPR_ADD:
- if ((CG_opt_level > 1) && Is_Target_Orochi() &&
- Is_Target_AVX() &&
+ if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+ Is_Target_AVX() &&
(Is_Target_FMA4() || Is_Target_FMA()) ) {
BOOL fma4 = Is_Target_FMA4();
BOOL expr_is_complex = FALSE;
TYPE_ID rtype = OPCODE_rtype(opcode);
WN *mul_wn = NULL;
if ((rtype == MTYPE_V16C4) ||
- (rtype == MTYPE_V16C8)) {
+ (rtype == MTYPE_V16C8) ||
+ (rtype == MTYPE_V32C4) ||
+ (rtype == MTYPE_V32C8)) {
expr_is_complex = TRUE;
}
-
+
// Looking for a fm{a/s} candidate via FMA4 insns
if ( (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) &&
(expr_is_complex == FALSE) ) {
if ((WN_operator(mul_wn = WN_kid(expr, 0)) == OPR_MPY) &&
+ (WN_opcode(mul_wn) != OPC_V32C8MPY) &&
(WN_opcode(mul_wn) != OPC_V16C8MPY) &&
+ (WN_opcode(mul_wn) != OPC_V32C4MPY) &&
(WN_opcode(mul_wn) != OPC_V16C4MPY) &&
- (WN_opcode(mul_wn) != OPC_FQMPY) &&
+ (WN_opcode(mul_wn) != OPC_FQMPY) &&
(WN_opcode(mul_wn) != OPC_F10MPY) ) {
rtype = OPCODE_rtype(WN_opcode (mul_wn));
if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) {
@@ -5460,16 +5482,20 @@
}
}
} else if ((WN_operator(mul_wn = WN_kid(expr, 1)) == OPR_MPY) &&
+ (WN_opcode(mul_wn) != OPC_V32C8MPY) &&
(WN_opcode(mul_wn) != OPC_V16C8MPY) &&
+ (WN_opcode(mul_wn) != OPC_V32C4MPY) &&
(WN_opcode(mul_wn) != OPC_V16C4MPY) &&
- (WN_opcode(mul_wn) != OPC_FQMPY) &&
+ (WN_opcode(mul_wn) != OPC_FQMPY) &&
(WN_opcode(mul_wn) != OPC_F10MPY)) {
rtype = OPCODE_rtype(WN_opcode (mul_wn));
if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) {
if (WN_operator(expr) == OPR_ADD) {
return Handle_Fma_Operation(expr, result, mul_wn, FALSE, fma4);
- } else if ((WN_operator(expr) == OPR_SUB) &&
+ } else if ((WN_operator(expr) == OPR_SUB) &&
+ (WN_opcode(expr) != OPC_V32C4SUB) &&
(WN_opcode(expr) != OPC_V16C4SUB) &&
+ (WN_opcode(expr) != OPC_V32C8SUB) &&
(WN_opcode(expr) != OPC_V16C8SUB)) {
return Handle_Fnma_Operation(expr, result, mul_wn, FALSE, fma4);
}
Modified: trunk/osprey/be/cg/x8664/cg_sched.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/cg_sched.cxx 2012-04-12 08:48:34 UTC (rev
3906)
+++ trunk/osprey/be/cg/x8664/cg_sched.cxx 2012-04-12 19:35:30 UTC (rev
3907)
@@ -379,7 +379,7 @@
}
}
- return Resource_Table[c]->resources[cur_res][dispatch_unit];
+ return Resource_Table[clock]->resources[cur_res][dispatch_unit];
}
int Dispatched_Ops( int c ) { return Resource_Table[c]->dispatched_ops; }
@@ -835,8 +835,7 @@
const TOP top = OP_code(op);
Resource_Table_Entry* entry = Resource_Table[cycle];
- const ICU res = (take_it) ? cur_res :
- Lookup_Property_By_Pipeinfo( op, cycle, true );
+ const ICU res = Lookup_Property_By_Pipeinfo( op, cycle, true );
if( !entry->resources[res][dispatch_unit] )
return false;
@@ -857,7 +856,7 @@
OPR* opr = Get_OPR( op );
const int dispatch_unit = Get_Dispatch_Unit( op, cycle, false );
- if( !Probe_Resources( OPR_issue_time(opr), op, dispatch_unit, true ) ){
+ if( !Probe_Resources( cycle, op, dispatch_unit, true ) ){
ASSERT( false );
}
Modified: trunk/osprey/be/cg/x8664/cgemit_targ.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/cgemit_targ.cxx 2012-04-12 08:48:34 UTC (rev
3906)
+++ trunk/osprey/be/cg/x8664/cgemit_targ.cxx 2012-04-12 19:35:30 UTC (rev
3907)
@@ -1973,12 +1973,6 @@
OP_Name[TOP_vstntps] = "vmovntps";
OP_Name[TOP_vstntpsx] = "vmovntps";
OP_Name[TOP_vstntpsxx] = "vmovntps";
- OP_Name[TOP_vstntsd] = "vmovntsd";
- OP_Name[TOP_vstntsdx] = "vmovntsd";
- OP_Name[TOP_vstntsdxx] = "vmovntsd";
- OP_Name[TOP_vstntss] = "vmovntss";
- OP_Name[TOP_vstntssx] = "vmovntss";
- OP_Name[TOP_vstntssxx] = "vmovntss";
OP_Name[TOP_vldlpd] = "vmovsd";
OP_Name[TOP_vldlpdx] = "vmovsd";
OP_Name[TOP_vldlpdxx] = "vmovsd";
Modified: trunk/osprey/be/cg/x8664/ebo_special.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/ebo_special.cxx 2012-04-12 08:48:34 UTC (rev
3906)
+++ trunk/osprey/be/cg/x8664/ebo_special.cxx 2012-04-12 19:35:30 UTC (rev
3907)
@@ -10255,6 +10255,11 @@
return Process_Side_Effects(opnd_tn, actual_tninfo, rval, opnds_swapped);
}
+ // we only want one reaching use in this block for a cmp peep
+ if( EBO_flow_safe && ( load_uses > 1 ) ) {
+ return Process_Side_Effects(opnd_tn, actual_tninfo, rval, opnds_swapped);
+ }
+
// If load is volatile, replace with exactly one load-exe OP, in order to
// maintain the same number of memory accesses.
if (OP_volatile(ld_op) &&
Modified: trunk/osprey/be/cg/x8664/exp_loadstore.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/exp_loadstore.cxx 2012-04-12 08:48:34 UTC (rev
3906)
+++ trunk/osprey/be/cg/x8664/exp_loadstore.cxx 2012-04-12 19:35:30 UTC (rev
3907)
@@ -1664,8 +1664,9 @@
FALSE, ops );
}
- Expand_Binary_And( tgt_tn, tmp2_tn, src1_tn, rtype, ops );
- Expand_Binary_Or( tgt_tn, tgt_tn, tmp1_tn, rtype, ops );
+ TN* tmp = Build_TN_Like(tgt_tn);
+ Expand_Binary_And( tmp, tmp2_tn, src1_tn, rtype, ops );
+ Expand_Binary_Or( tgt_tn, tmp, tmp1_tn, rtype, ops );
}
/* ======================================================================
------------------------------------------------------------------------------
For Developers, A Lot Can Happen In A Second.
Boundary is the first to Know...and Tell You.
Monitor Your Applications in Ultra-Fine Resolution. Try it FREE!
http://p.sf.net/sfu/Boundary-d2dvs2
_______________________________________________
Open64-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/open64-devel