Author: mberg Date: 2012-04-12 15:35:30 -0400 (Thu, 12 Apr 2012) New Revision: 3907
Modified: trunk/osprey/be/cg/oputil.cxx trunk/osprey/be/cg/whirl2ops.cxx trunk/osprey/be/cg/x8664/cg_sched.cxx trunk/osprey/be/cg/x8664/cgemit_targ.cxx trunk/osprey/be/cg/x8664/ebo_special.cxx trunk/osprey/be/cg/x8664/exp_loadstore.cxx Log: Changes include: support for 256-bit fma, limiting load exec on x86 cmp insns to a single use for the consuming load, updates for non temporal stores, adding a temp result to a binary and expansion pattern and cleanup in CG_sched. CR: Jian-Xin. Modified: trunk/osprey/be/cg/oputil.cxx =================================================================== --- trunk/osprey/be/cg/oputil.cxx 2012-04-12 08:48:34 UTC (rev 3906) +++ trunk/osprey/be/cg/oputil.cxx 2012-04-12 19:35:30 UTC (rev 3907) @@ -2102,16 +2102,10 @@ {TOP_stss_n32, TOP_vstss_n32}, {TOP_stssx, TOP_vstssx}, {TOP_stssxx, TOP_vstssxx}, - {TOP_stntss, TOP_vstntss}, - {TOP_stntssx, TOP_vstntssx}, - {TOP_stntssxx, TOP_vstntssxx}, {TOP_stsd, TOP_vstsd}, {TOP_stsd_n32, TOP_vstsd_n32}, {TOP_stsdx, TOP_vstsdx}, {TOP_stsdxx, TOP_vstsdxx}, - {TOP_stntsd, TOP_vstntsd}, - {TOP_stntsdx, TOP_vstntsdx}, - {TOP_stntsdxx, TOP_vstntsdxx}, {TOP_maxss, TOP_vfmaxss}, {TOP_maxsd, TOP_vfmaxsd}, {TOP_minss, TOP_vfminss}, Modified: trunk/osprey/be/cg/whirl2ops.cxx =================================================================== --- trunk/osprey/be/cg/whirl2ops.cxx 2012-04-12 08:48:34 UTC (rev 3906) +++ trunk/osprey/be/cg/whirl2ops.cxx 2012-04-12 19:35:30 UTC (rev 3907) @@ -4688,6 +4688,7 @@ TOP opcode; TYPE_ID rtype = OPCODE_rtype(WN_opcode(expr)); BOOL is_vector = MTYPE_is_vector(rtype); + BOOL PU_has_avx256 = FALSE; // now match a scalar or vector fma4 switch (WN_opcode(mul_wn)) { @@ -4697,11 +4698,17 @@ case OPC_F8MPY: opcode = (fma4) ? TOP_vfmaddsd : TOP_xfmadd213sd; break; + case OPC_V32F4MPY: + case OPC_V32C4MPY: + PU_has_avx256 = TRUE; case OPC_V16F4MPY: case OPC_V16C4MPY: FmtAssert(is_vector, ("unexpected fma vector form")); opcode = (fma4) ? TOP_vfmaddps : TOP_xfmadd213ps; break; + case OPC_V32F8MPY: + case OPC_V32C8MPY: + PU_has_avx256 = TRUE; case OPC_V16F8MPY: case OPC_V16C8MPY: FmtAssert(is_vector, ("unexpected fma vector form")); @@ -4725,8 +4732,7 @@ else Build_OP(opcode, result, opnd0, opnd1, opnd2, &New_OPs); - // TODO: add operand size check for 256-bit - if (PU_has_avx128 == FALSE) + if ((PU_has_avx128 == FALSE) && (PU_has_avx256 == FALSE)) PU_has_avx128 = TRUE; return result; @@ -4747,6 +4753,7 @@ TOP opcode; TYPE_ID rtype = OPCODE_rtype(WN_opcode(expr)); BOOL is_vector = MTYPE_is_vector(rtype); + BOOL PU_has_avx256 = FALSE; // now match a scalar or vector fma4 switch (WN_opcode(mul_wn)) { @@ -4756,11 +4763,17 @@ case OPC_F8MPY: opcode = (fma4) ? TOP_vfnmaddsd : TOP_xfnmadd213sd; break; + case OPC_V32F4MPY: + case OPC_V32C4MPY: + PU_has_avx256 = TRUE; case OPC_V16F4MPY: case OPC_V16C4MPY: FmtAssert(is_vector, ("unexpected fma vector form")); opcode = (fma4) ? TOP_vfnmaddps : TOP_xfnmadd213ps; break; + case OPC_V32F8MPY: + case OPC_V32C8MPY: + PU_has_avx256 = TRUE; case OPC_V16F8MPY: case OPC_V16C8MPY: FmtAssert(is_vector, ("unexpected fma vector form")); @@ -4784,8 +4797,7 @@ else Build_OP(opcode, result, opnd0, opnd1, opnd2, &New_OPs); - // TODO: add operand size check for 256-bit - if (PU_has_avx128 == FALSE) + if ((PU_has_avx128 == FALSE) && (PU_has_avx256 == FALSE)) PU_has_avx128 = TRUE; return result; @@ -4805,6 +4817,7 @@ TOP opcode; TYPE_ID rtype = OPCODE_rtype(WN_opcode(expr)); BOOL is_vector = MTYPE_is_vector(rtype); + BOOL PU_has_avx256 = FALSE; // now match a scalar or vector fma4 switch (WN_opcode(mul_wn)) { @@ -4814,11 +4827,17 @@ case OPC_F8MPY: opcode = (fma4) ? TOP_vfmsubsd : TOP_xfmsub213sd; break; + case OPC_V32F4MPY: + case OPC_V32C4MPY: + PU_has_avx256 = TRUE; case OPC_V16F4MPY: case OPC_V16C4MPY: FmtAssert(is_vector, ("unexpected fms vector form")); opcode = (fma4) ? TOP_vfmsubps : TOP_xfmsub213ps; break; + case OPC_V32F8MPY: + case OPC_V32C8MPY: + PU_has_avx256 = TRUE; case OPC_V16F8MPY: case OPC_V16C8MPY: FmtAssert(is_vector, ("unexpected fms vector form")); @@ -4842,8 +4861,7 @@ else Build_OP(opcode, result, opnd0, opnd1, opnd2, &New_OPs); - // TODO: add operand size check for 256-bit - if (PU_has_avx128 == FALSE) + if ((PU_has_avx128 == FALSE) && (PU_has_avx256 == FALSE)) PU_has_avx128 = TRUE; return result; @@ -5431,25 +5449,29 @@ case OPR_SUB: case OPR_ADD: - if ((CG_opt_level > 1) && Is_Target_Orochi() && - Is_Target_AVX() && + if ((CG_opt_level > 1) && Is_Target_Orochi() && + Is_Target_AVX() && (Is_Target_FMA4() || Is_Target_FMA()) ) { BOOL fma4 = Is_Target_FMA4(); BOOL expr_is_complex = FALSE; TYPE_ID rtype = OPCODE_rtype(opcode); WN *mul_wn = NULL; if ((rtype == MTYPE_V16C4) || - (rtype == MTYPE_V16C8)) { + (rtype == MTYPE_V16C8) || + (rtype == MTYPE_V32C4) || + (rtype == MTYPE_V32C8)) { expr_is_complex = TRUE; } - + // Looking for a fm{a/s} candidate via FMA4 insns if ( (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) && (expr_is_complex == FALSE) ) { if ((WN_operator(mul_wn = WN_kid(expr, 0)) == OPR_MPY) && + (WN_opcode(mul_wn) != OPC_V32C8MPY) && (WN_opcode(mul_wn) != OPC_V16C8MPY) && + (WN_opcode(mul_wn) != OPC_V32C4MPY) && (WN_opcode(mul_wn) != OPC_V16C4MPY) && - (WN_opcode(mul_wn) != OPC_FQMPY) && + (WN_opcode(mul_wn) != OPC_FQMPY) && (WN_opcode(mul_wn) != OPC_F10MPY) ) { rtype = OPCODE_rtype(WN_opcode (mul_wn)); if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) { @@ -5460,16 +5482,20 @@ } } } else if ((WN_operator(mul_wn = WN_kid(expr, 1)) == OPR_MPY) && + (WN_opcode(mul_wn) != OPC_V32C8MPY) && (WN_opcode(mul_wn) != OPC_V16C8MPY) && + (WN_opcode(mul_wn) != OPC_V32C4MPY) && (WN_opcode(mul_wn) != OPC_V16C4MPY) && - (WN_opcode(mul_wn) != OPC_FQMPY) && + (WN_opcode(mul_wn) != OPC_FQMPY) && (WN_opcode(mul_wn) != OPC_F10MPY)) { rtype = OPCODE_rtype(WN_opcode (mul_wn)); if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) { if (WN_operator(expr) == OPR_ADD) { return Handle_Fma_Operation(expr, result, mul_wn, FALSE, fma4); - } else if ((WN_operator(expr) == OPR_SUB) && + } else if ((WN_operator(expr) == OPR_SUB) && + (WN_opcode(expr) != OPC_V32C4SUB) && (WN_opcode(expr) != OPC_V16C4SUB) && + (WN_opcode(expr) != OPC_V32C8SUB) && (WN_opcode(expr) != OPC_V16C8SUB)) { return Handle_Fnma_Operation(expr, result, mul_wn, FALSE, fma4); } Modified: trunk/osprey/be/cg/x8664/cg_sched.cxx =================================================================== --- trunk/osprey/be/cg/x8664/cg_sched.cxx 2012-04-12 08:48:34 UTC (rev 3906) +++ trunk/osprey/be/cg/x8664/cg_sched.cxx 2012-04-12 19:35:30 UTC (rev 3907) @@ -379,7 +379,7 @@ } } - return Resource_Table[c]->resources[cur_res][dispatch_unit]; + return Resource_Table[clock]->resources[cur_res][dispatch_unit]; } int Dispatched_Ops( int c ) { return Resource_Table[c]->dispatched_ops; } @@ -835,8 +835,7 @@ const TOP top = OP_code(op); Resource_Table_Entry* entry = Resource_Table[cycle]; - const ICU res = (take_it) ? cur_res : - Lookup_Property_By_Pipeinfo( op, cycle, true ); + const ICU res = Lookup_Property_By_Pipeinfo( op, cycle, true ); if( !entry->resources[res][dispatch_unit] ) return false; @@ -857,7 +856,7 @@ OPR* opr = Get_OPR( op ); const int dispatch_unit = Get_Dispatch_Unit( op, cycle, false ); - if( !Probe_Resources( OPR_issue_time(opr), op, dispatch_unit, true ) ){ + if( !Probe_Resources( cycle, op, dispatch_unit, true ) ){ ASSERT( false ); } Modified: trunk/osprey/be/cg/x8664/cgemit_targ.cxx =================================================================== --- trunk/osprey/be/cg/x8664/cgemit_targ.cxx 2012-04-12 08:48:34 UTC (rev 3906) +++ trunk/osprey/be/cg/x8664/cgemit_targ.cxx 2012-04-12 19:35:30 UTC (rev 3907) @@ -1973,12 +1973,6 @@ OP_Name[TOP_vstntps] = "vmovntps"; OP_Name[TOP_vstntpsx] = "vmovntps"; OP_Name[TOP_vstntpsxx] = "vmovntps"; - OP_Name[TOP_vstntsd] = "vmovntsd"; - OP_Name[TOP_vstntsdx] = "vmovntsd"; - OP_Name[TOP_vstntsdxx] = "vmovntsd"; - OP_Name[TOP_vstntss] = "vmovntss"; - OP_Name[TOP_vstntssx] = "vmovntss"; - OP_Name[TOP_vstntssxx] = "vmovntss"; OP_Name[TOP_vldlpd] = "vmovsd"; OP_Name[TOP_vldlpdx] = "vmovsd"; OP_Name[TOP_vldlpdxx] = "vmovsd"; Modified: trunk/osprey/be/cg/x8664/ebo_special.cxx =================================================================== --- trunk/osprey/be/cg/x8664/ebo_special.cxx 2012-04-12 08:48:34 UTC (rev 3906) +++ trunk/osprey/be/cg/x8664/ebo_special.cxx 2012-04-12 19:35:30 UTC (rev 3907) @@ -10255,6 +10255,11 @@ return Process_Side_Effects(opnd_tn, actual_tninfo, rval, opnds_swapped); } + // we only want one reaching use in this block for a cmp peep + if( EBO_flow_safe && ( load_uses > 1 ) ) { + return Process_Side_Effects(opnd_tn, actual_tninfo, rval, opnds_swapped); + } + // If load is volatile, replace with exactly one load-exe OP, in order to // maintain the same number of memory accesses. if (OP_volatile(ld_op) && Modified: trunk/osprey/be/cg/x8664/exp_loadstore.cxx =================================================================== --- trunk/osprey/be/cg/x8664/exp_loadstore.cxx 2012-04-12 08:48:34 UTC (rev 3906) +++ trunk/osprey/be/cg/x8664/exp_loadstore.cxx 2012-04-12 19:35:30 UTC (rev 3907) @@ -1664,8 +1664,9 @@ FALSE, ops ); } - Expand_Binary_And( tgt_tn, tmp2_tn, src1_tn, rtype, ops ); - Expand_Binary_Or( tgt_tn, tgt_tn, tmp1_tn, rtype, ops ); + TN* tmp = Build_TN_Like(tgt_tn); + Expand_Binary_And( tmp, tmp2_tn, src1_tn, rtype, ops ); + Expand_Binary_Or( tgt_tn, tmp, tmp1_tn, rtype, ops ); } /* ====================================================================== ------------------------------------------------------------------------------ For Developers, A Lot Can Happen In A Second. Boundary is the first to Know...and Tell You. Monitor Your Applications in Ultra-Fine Resolution. Try it FREE! http://p.sf.net/sfu/Boundary-d2dvs2 _______________________________________________ Open64-devel mailing list Open64-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/open64-devel