Author: mberg Date: 2011-06-21 14:55:15 -0400 (Tue, 21 Jun 2011) New Revision: 3660
Modified: trunk/osprey/be/cg/oputil.cxx trunk/osprey/be/cg/whirl2ops.cxx trunk/osprey/be/cg/x8664/expand.cxx Log: Additions to handle complex arithmetic for fma instructions as well as some updates to shift right and movddup. CR by Jian-Xin. Modified: trunk/osprey/be/cg/oputil.cxx =================================================================== --- trunk/osprey/be/cg/oputil.cxx 2011-06-21 04:40:48 UTC (rev 3659) +++ trunk/osprey/be/cg/oputil.cxx 2011-06-21 18:55:15 UTC (rev 3660) @@ -2172,7 +2172,7 @@ {TOP_movhlps, TOP_vmovhlps}, {TOP_movlhps, TOP_vmovlhps}, {TOP_psrldq, TOP_vpsrldq}, - {TOP_psrlq128v64, TOP_vpsrlq}, + {TOP_psrlq128v64, TOP_vpsrlqi}, {TOP_pslldq, TOP_vpslldq}, {TOP_psllw, TOP_vpsllw}, {TOP_pslld, TOP_vpslld}, @@ -2495,6 +2495,18 @@ {TOP_phaddsx128v16, TOP_vphaddsx128v16}, {TOP_phaddsxx128v16, TOP_vphaddsxx128v16}, {TOP_phaddsxxx128v16, TOP_vphaddsxxx128v16}, + {TOP_fmovddup, TOP_vmovddup}, + {TOP_fmovddupx, TOP_vmovddupx}, + {TOP_fmovddupxx, TOP_vmovddupxx}, + {TOP_fmovddupxxx, TOP_vmovddupxxx}, + {TOP_fmovshdup, TOP_vmovshdup}, + {TOP_fmovshdupx, TOP_vmovshdupx}, + {TOP_fmovshdupxx, TOP_vmovshdupxx}, + {TOP_fmovshdupxxx, TOP_vmovshdupxxx}, + {TOP_fmovsldup, TOP_vmovsldup}, + {TOP_fmovsldupx, TOP_vmovsldupx}, + {TOP_fmovsldupxx, TOP_vmovsldupxx}, + {TOP_fmovsldupxxx, TOP_vmovsldupxxx}, }; void Init_LegacySSE_To_Vex_Group(void) Modified: trunk/osprey/be/cg/whirl2ops.cxx =================================================================== --- trunk/osprey/be/cg/whirl2ops.cxx 2011-06-21 04:40:48 UTC (rev 3659) +++ trunk/osprey/be/cg/whirl2ops.cxx 2011-06-21 18:55:15 UTC (rev 3660) @@ -4693,7 +4693,7 @@ opnd2 = Expand_Expr(add_wn, expr, NULL); opnd1 = Expand_Expr(WN_kid1(mul_wn), mul_wn, NULL); opnd0 = Expand_Expr(WN_kid0(mul_wn), mul_wn, NULL); - + if(result == NULL) result = Allocate_Result_TN(expr, NULL); @@ -5348,12 +5348,22 @@ case OPR_ADD: if ((CG_opt_level > 1) && Is_Target_Orochi() && Is_Target_AVX() && Is_Target_FMA4()) { + BOOL expr_is_complex = FALSE; TYPE_ID rtype = OPCODE_rtype(opcode); WN *mul_wn = NULL; + if ((rtype == MTYPE_V16C4) || + (rtype == MTYPE_V16C8)) { + expr_is_complex = TRUE; + } + // Looking for a fm{a/s} candidate via FMA4 insns - if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) { + if ( (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) && + (expr_is_complex == FALSE) ) { if ((WN_operator(mul_wn = WN_kid(expr, 0)) == OPR_MPY) && - (WN_opcode(mul_wn) != OPC_FQMPY) && (WN_opcode(mul_wn) != OPC_F10MPY) ) { + (WN_opcode(mul_wn) != OPC_V16C8MPY) && + (WN_opcode(mul_wn) != OPC_V16C4MPY) && + (WN_opcode(mul_wn) != OPC_FQMPY) && + (WN_opcode(mul_wn) != OPC_F10MPY) ) { rtype = OPCODE_rtype(WN_opcode (mul_wn)); if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) { if (WN_operator(expr) == OPR_ADD) { @@ -5363,7 +5373,10 @@ } } } else if ((WN_operator(mul_wn = WN_kid(expr, 1)) == OPR_MPY) && - (WN_opcode(mul_wn) != OPC_FQMPY) && (WN_opcode(mul_wn) != OPC_F10MPY)) { + (WN_opcode(mul_wn) != OPC_V16C8MPY) && + (WN_opcode(mul_wn) != OPC_V16C4MPY) && + (WN_opcode(mul_wn) != OPC_FQMPY) && + (WN_opcode(mul_wn) != OPC_F10MPY)) { rtype = OPCODE_rtype(WN_opcode (mul_wn)); if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) { if (WN_operator(expr) == OPR_ADD) { Modified: trunk/osprey/be/cg/x8664/expand.cxx =================================================================== --- trunk/osprey/be/cg/x8664/expand.cxx 2011-06-21 04:40:48 UTC (rev 3659) +++ trunk/osprey/be/cg/x8664/expand.cxx 2011-06-21 18:55:15 UTC (rev 3660) @@ -5630,12 +5630,19 @@ TN* tmp5 = Build_TN_Like(src1); Build_OP(TOP_fmovsldup, tmp1, src2, ops); - Build_OP(TOP_fmul128v32, tmp2, tmp1, src1, ops); - Build_OP(TOP_fmovshdup,tmp3, src2, ops); - Build_OP(TOP_shufps, tmp4, src1, src1, Gen_Literal_TN(177, 1), ops); - Build_OP(TOP_fmul128v32, tmp5, tmp3, tmp4, ops); - Build_OP(TOP_faddsub128v32, result, tmp2, tmp5, ops); - + if ((CG_opt_level > 1) && Is_Target_Orochi() && + Is_Target_AVX() && Is_Target_FMA4()) { + Build_OP(TOP_fmovshdup,tmp3, src2, ops); + Build_OP(TOP_shufps, tmp4, src1, src1, Gen_Literal_TN(177, 1), ops); + Build_OP(TOP_fmul128v32, tmp5, tmp3, tmp4, ops); + Build_OP(TOP_vfmaddsubps, result, tmp1, src1, tmp5, ops); + } else { + Build_OP(TOP_fmul128v32, tmp2, tmp1, src1, ops); + Build_OP(TOP_fmovshdup,tmp3, src2, ops); + Build_OP(TOP_shufps, tmp4, src1, src1, Gen_Literal_TN(177, 1), ops); + Build_OP(TOP_fmul128v32, tmp5, tmp3, tmp4, ops); + Build_OP(TOP_faddsub128v32, result, tmp2, tmp5, ops); + } } else if (TN_size(src1) != TN_size(src2)){ TN* src1_t; TN* src2_t; @@ -5669,12 +5676,21 @@ TN* tmp6 = Build_TN_Like(src1); Build_OP(TOP_fmovddup, tmp1, src2_t, ops); - Build_OP(TOP_fmul128v64, tmp2, src1_t, tmp1, ops); - Build_OP(TOP_shufpd, tmp3, src1_t, src1_t, Gen_Literal_TN(1, 1), ops); - Build_OP(TOP_shufpd, tmp4, src2_t, src2_t, Gen_Literal_TN(1, 1), ops); - Build_OP(TOP_fmovddup, tmp5, tmp4, ops); - Build_OP(TOP_fmul128v64, tmp6, tmp3, tmp5, ops); - Build_OP(TOP_faddsub128v64, result, tmp2, tmp6, ops); + if ((CG_opt_level > 1) && Is_Target_Orochi() && + Is_Target_AVX() && Is_Target_FMA4()) { + Build_OP(TOP_shufpd, tmp3, src1_t, src1_t, Gen_Literal_TN(1, 1), ops); + Build_OP(TOP_shufpd, tmp4, src2_t, src2_t, Gen_Literal_TN(1, 1), ops); + Build_OP(TOP_fmovddup, tmp5, tmp4, ops); + Build_OP(TOP_fmul128v64, tmp6, tmp3, tmp5, ops); + Build_OP(TOP_vfmaddsubpd, result, src1_t, tmp1, tmp6, ops); + } else { + Build_OP(TOP_fmul128v64, tmp2, src1_t, tmp1, ops); + Build_OP(TOP_shufpd, tmp3, src1_t, src1_t, Gen_Literal_TN(1, 1), ops); + Build_OP(TOP_shufpd, tmp4, src2_t, src2_t, Gen_Literal_TN(1, 1), ops); + Build_OP(TOP_fmovddup, tmp5, tmp4, ops); + Build_OP(TOP_fmul128v64, tmp6, tmp3, tmp5, ops); + Build_OP(TOP_faddsub128v64, result, tmp2, tmp6, ops); + } } return; } @@ -5720,12 +5736,21 @@ Build_OP(TOP_addsd, tmp4, tmp3, tmp1, ops); Build_OP(TOP_fmovddup, tmp5, tmp4, ops); Build_OP(TOP_fmovddup, tmp6, src2, ops); - Build_OP(TOP_fmul128v64, tmp8, tmp6, src1, ops); - Build_OP(TOP_shufpd, tmp9, src1, src1, Gen_Literal_TN(1, 1), ops); - Expand_Neg(tmp12, tmp2, MTYPE_F8, ops); - Build_OP(TOP_fmovddup, tmp13, tmp12, ops); - Build_OP(TOP_fmul128v64, tmp10, tmp9, tmp13, ops); - Build_OP(TOP_faddsub128v64, tmp11, tmp8, tmp10, ops); + if ((CG_opt_level > 1) && Is_Target_Orochi() && + Is_Target_AVX() && Is_Target_FMA4()) { + Build_OP(TOP_shufpd, tmp9, src1, src1, Gen_Literal_TN(1, 1), ops); + Expand_Neg(tmp12, tmp2, MTYPE_F8, ops); + Build_OP(TOP_fmovddup, tmp13, tmp12, ops); + Build_OP(TOP_fmul128v64, tmp10, tmp9, tmp13, ops); + Build_OP(TOP_vfmaddsubpd, tmp11, tmp6, src1, tmp10, ops); + } else { + Build_OP(TOP_fmul128v64, tmp8, tmp6, src1, ops); + Build_OP(TOP_shufpd, tmp9, src1, src1, Gen_Literal_TN(1, 1), ops); + Expand_Neg(tmp12, tmp2, MTYPE_F8, ops); + Build_OP(TOP_fmovddup, tmp13, tmp12, ops); + Build_OP(TOP_fmul128v64, tmp10, tmp9, tmp13, ops); + Build_OP(TOP_faddsub128v64, tmp11, tmp8, tmp10, ops); + } Build_OP(TOP_fdiv128v64, result, tmp11, tmp5, ops); } else if (opcode == OPC_V16C8DIV) { @@ -5792,11 +5817,19 @@ Build_OP(TOP_unpckhpd, tmp5, tmp2, tmp2, ops); Build_OP(TOP_fmul128v64, tmp6, tmp5, tmp1, ops); Build_OP(TOP_shufpd, tmp7, tmp1, tmp1, Gen_Literal_TN(1, 1), ops); - Build_OP(TOP_fmul128v64, tmp8, tmp7, tmp4, ops); - Build_OP(TOP_fhadd128v64, tmp9, tmp3, tmp3, ops); - Build_OP(TOP_shufps, tmp10, src1, src1, Gen_Literal_TN(238, 1), ops); - Build_OP(TOP_cvtps2pd, tmp11, tmp10, ops); - Build_OP(TOP_faddsub128v64, tmp12, tmp8, tmp6, ops); + if ((CG_opt_level > 1) && Is_Target_Orochi() && + Is_Target_AVX() && Is_Target_FMA4()) { + Build_OP(TOP_fhadd128v64, tmp9, tmp3, tmp3, ops); + Build_OP(TOP_shufps, tmp10, src1, src1, Gen_Literal_TN(238, 1), ops); + Build_OP(TOP_cvtps2pd, tmp11, tmp10, ops); + Build_OP(TOP_vfmaddsubpd, tmp12, tmp7, tmp4, tmp6, ops); + } else { + Build_OP(TOP_fmul128v64, tmp8, tmp7, tmp4, ops); + Build_OP(TOP_fhadd128v64, tmp9, tmp3, tmp3, ops); + Build_OP(TOP_shufps, tmp10, src1, src1, Gen_Literal_TN(238, 1), ops); + Build_OP(TOP_cvtps2pd, tmp11, tmp10, ops); + Build_OP(TOP_faddsub128v64, tmp12, tmp8, tmp6, ops); + } Build_OP(TOP_shufpd, tmp13, tmp12, tmp12, Gen_Literal_TN(1, 1), ops); Build_OP(TOP_fdiv128v64, tmp14, tmp13, tmp9, ops); Build_OP(TOP_cvtpd2ps, tmp15, tmp14, ops); @@ -5807,9 +5840,15 @@ Build_OP(TOP_unpckhpd, tmp20, tmp17, tmp17, ops); Build_OP(TOP_fmul128v64, tmp21, tmp20, tmp11, ops); Build_OP(TOP_shufpd, tmp22, tmp11, tmp11, Gen_Literal_TN(1, 1), ops); - Build_OP(TOP_fmul128v64, tmp23, tmp22, tmp19, ops); - Build_OP(TOP_fhadd128v64, tmp24, tmp18, tmp18, ops); - Build_OP(TOP_faddsub128v64, tmp25, tmp23, tmp21, ops); + if ((CG_opt_level > 1) && Is_Target_Orochi() && + Is_Target_AVX() && Is_Target_FMA4()) { + Build_OP(TOP_fhadd128v64, tmp24, tmp18, tmp18, ops); + Build_OP(TOP_vfmaddsubpd, tmp25, tmp22, tmp19, tmp21, ops); + } else { + Build_OP(TOP_fmul128v64, tmp23, tmp22, tmp19, ops); + Build_OP(TOP_fhadd128v64, tmp24, tmp18, tmp18, ops); + Build_OP(TOP_faddsub128v64, tmp25, tmp23, tmp21, ops); + } Build_OP(TOP_shufpd, tmp26, tmp25, tmp25, Gen_Literal_TN(1, 1), ops); Build_OP(TOP_fdiv128v64, tmp27, tmp26, tmp24, ops); Build_OP(TOP_cvtpd2ps, tmp28, tmp27, ops); @@ -7245,9 +7284,15 @@ Build_OP(TOP_fmovddup, tmp1, op2, ops); Build_OP(TOP_shufpd, tmp2, op2, op2, Gen_Literal_TN(1, 1), ops); Build_OP(TOP_fmovddup, tmp3, tmp2, ops); - Build_OP(TOP_fmul128v64, tmp4, op0, tmp1, ops); - Build_OP(TOP_fmul128v64, tmp5, op1, tmp3, ops); - Build_OP(TOP_faddsub128v64, result, tmp4, tmp5, ops); + if ((CG_opt_level > 1) && Is_Target_Orochi() && + Is_Target_AVX() && Is_Target_FMA4()) { + Build_OP(TOP_fmul128v64, tmp5, op1, tmp3, ops); + Build_OP(TOP_vfmaddsubpd, result, op0, tmp1, tmp5, ops); + } else { + Build_OP(TOP_fmul128v64, tmp4, op0, tmp1, ops); + Build_OP(TOP_fmul128v64, tmp5, op1, tmp3, ops); + Build_OP(TOP_faddsub128v64, result, tmp4, tmp5, ops); + } break; } case INTRN_V16C8CONJG: ------------------------------------------------------------------------------ EditLive Enterprise is the world's most technically advanced content authoring tool. Experience the power of Track Changes, Inline Image Editing and ensure content is compliant with Accessibility Checking. http://p.sf.net/sfu/ephox-dev2dev _______________________________________________ Open64-devel mailing list Open64-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/open64-devel