Author: mberg
Date: 2011-06-21 14:55:15 -0400 (Tue, 21 Jun 2011)
New Revision: 3660
Modified:
trunk/osprey/be/cg/oputil.cxx
trunk/osprey/be/cg/whirl2ops.cxx
trunk/osprey/be/cg/x8664/expand.cxx
Log:
Additions to handle complex arithmetic for fma instructions as well as
some updates to shift right and movddup.
CR by Jian-Xin.
Modified: trunk/osprey/be/cg/oputil.cxx
===================================================================
--- trunk/osprey/be/cg/oputil.cxx 2011-06-21 04:40:48 UTC (rev 3659)
+++ trunk/osprey/be/cg/oputil.cxx 2011-06-21 18:55:15 UTC (rev 3660)
@@ -2172,7 +2172,7 @@
{TOP_movhlps, TOP_vmovhlps},
{TOP_movlhps, TOP_vmovlhps},
{TOP_psrldq, TOP_vpsrldq},
- {TOP_psrlq128v64, TOP_vpsrlq},
+ {TOP_psrlq128v64, TOP_vpsrlqi},
{TOP_pslldq, TOP_vpslldq},
{TOP_psllw, TOP_vpsllw},
{TOP_pslld, TOP_vpslld},
@@ -2495,6 +2495,18 @@
{TOP_phaddsx128v16, TOP_vphaddsx128v16},
{TOP_phaddsxx128v16, TOP_vphaddsxx128v16},
{TOP_phaddsxxx128v16, TOP_vphaddsxxx128v16},
+ {TOP_fmovddup, TOP_vmovddup},
+ {TOP_fmovddupx, TOP_vmovddupx},
+ {TOP_fmovddupxx, TOP_vmovddupxx},
+ {TOP_fmovddupxxx, TOP_vmovddupxxx},
+ {TOP_fmovshdup, TOP_vmovshdup},
+ {TOP_fmovshdupx, TOP_vmovshdupx},
+ {TOP_fmovshdupxx, TOP_vmovshdupxx},
+ {TOP_fmovshdupxxx, TOP_vmovshdupxxx},
+ {TOP_fmovsldup, TOP_vmovsldup},
+ {TOP_fmovsldupx, TOP_vmovsldupx},
+ {TOP_fmovsldupxx, TOP_vmovsldupxx},
+ {TOP_fmovsldupxxx, TOP_vmovsldupxxx},
};
void Init_LegacySSE_To_Vex_Group(void)
Modified: trunk/osprey/be/cg/whirl2ops.cxx
===================================================================
--- trunk/osprey/be/cg/whirl2ops.cxx 2011-06-21 04:40:48 UTC (rev 3659)
+++ trunk/osprey/be/cg/whirl2ops.cxx 2011-06-21 18:55:15 UTC (rev 3660)
@@ -4693,7 +4693,7 @@
opnd2 = Expand_Expr(add_wn, expr, NULL);
opnd1 = Expand_Expr(WN_kid1(mul_wn), mul_wn, NULL);
opnd0 = Expand_Expr(WN_kid0(mul_wn), mul_wn, NULL);
-
+
if(result == NULL)
result = Allocate_Result_TN(expr, NULL);
@@ -5348,12 +5348,22 @@
case OPR_ADD:
if ((CG_opt_level > 1) && Is_Target_Orochi() &&
Is_Target_AVX() && Is_Target_FMA4()) {
+ BOOL expr_is_complex = FALSE;
TYPE_ID rtype = OPCODE_rtype(opcode);
WN *mul_wn = NULL;
+ if ((rtype == MTYPE_V16C4) ||
+ (rtype == MTYPE_V16C8)) {
+ expr_is_complex = TRUE;
+ }
+
// Looking for a fm{a/s} candidate via FMA4 insns
- if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) {
+ if ( (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) &&
+ (expr_is_complex == FALSE) ) {
if ((WN_operator(mul_wn = WN_kid(expr, 0)) == OPR_MPY) &&
- (WN_opcode(mul_wn) != OPC_FQMPY) && (WN_opcode(mul_wn) !=
OPC_F10MPY) ) {
+ (WN_opcode(mul_wn) != OPC_V16C8MPY) &&
+ (WN_opcode(mul_wn) != OPC_V16C4MPY) &&
+ (WN_opcode(mul_wn) != OPC_FQMPY) &&
+ (WN_opcode(mul_wn) != OPC_F10MPY) ) {
rtype = OPCODE_rtype(WN_opcode (mul_wn));
if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) {
if (WN_operator(expr) == OPR_ADD) {
@@ -5363,7 +5373,10 @@
}
}
} else if ((WN_operator(mul_wn = WN_kid(expr, 1)) == OPR_MPY) &&
- (WN_opcode(mul_wn) != OPC_FQMPY) && (WN_opcode(mul_wn) !=
OPC_F10MPY)) {
+ (WN_opcode(mul_wn) != OPC_V16C8MPY) &&
+ (WN_opcode(mul_wn) != OPC_V16C4MPY) &&
+ (WN_opcode(mul_wn) != OPC_FQMPY) &&
+ (WN_opcode(mul_wn) != OPC_F10MPY)) {
rtype = OPCODE_rtype(WN_opcode (mul_wn));
if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) {
if (WN_operator(expr) == OPR_ADD) {
Modified: trunk/osprey/be/cg/x8664/expand.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/expand.cxx 2011-06-21 04:40:48 UTC (rev 3659)
+++ trunk/osprey/be/cg/x8664/expand.cxx 2011-06-21 18:55:15 UTC (rev 3660)
@@ -5630,12 +5630,19 @@
TN* tmp5 = Build_TN_Like(src1);
Build_OP(TOP_fmovsldup, tmp1, src2, ops);
- Build_OP(TOP_fmul128v32, tmp2, tmp1, src1, ops);
- Build_OP(TOP_fmovshdup,tmp3, src2, ops);
- Build_OP(TOP_shufps, tmp4, src1, src1, Gen_Literal_TN(177, 1), ops);
- Build_OP(TOP_fmul128v32, tmp5, tmp3, tmp4, ops);
- Build_OP(TOP_faddsub128v32, result, tmp2, tmp5, ops);
-
+ if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+ Is_Target_AVX() && Is_Target_FMA4()) {
+ Build_OP(TOP_fmovshdup,tmp3, src2, ops);
+ Build_OP(TOP_shufps, tmp4, src1, src1, Gen_Literal_TN(177, 1), ops);
+ Build_OP(TOP_fmul128v32, tmp5, tmp3, tmp4, ops);
+ Build_OP(TOP_vfmaddsubps, result, tmp1, src1, tmp5, ops);
+ } else {
+ Build_OP(TOP_fmul128v32, tmp2, tmp1, src1, ops);
+ Build_OP(TOP_fmovshdup,tmp3, src2, ops);
+ Build_OP(TOP_shufps, tmp4, src1, src1, Gen_Literal_TN(177, 1), ops);
+ Build_OP(TOP_fmul128v32, tmp5, tmp3, tmp4, ops);
+ Build_OP(TOP_faddsub128v32, result, tmp2, tmp5, ops);
+ }
} else if (TN_size(src1) != TN_size(src2)){
TN* src1_t;
TN* src2_t;
@@ -5669,12 +5676,21 @@
TN* tmp6 = Build_TN_Like(src1);
Build_OP(TOP_fmovddup, tmp1, src2_t, ops);
- Build_OP(TOP_fmul128v64, tmp2, src1_t, tmp1, ops);
- Build_OP(TOP_shufpd, tmp3, src1_t, src1_t, Gen_Literal_TN(1, 1), ops);
- Build_OP(TOP_shufpd, tmp4, src2_t, src2_t, Gen_Literal_TN(1, 1), ops);
- Build_OP(TOP_fmovddup, tmp5, tmp4, ops);
- Build_OP(TOP_fmul128v64, tmp6, tmp3, tmp5, ops);
- Build_OP(TOP_faddsub128v64, result, tmp2, tmp6, ops);
+ if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+ Is_Target_AVX() && Is_Target_FMA4()) {
+ Build_OP(TOP_shufpd, tmp3, src1_t, src1_t, Gen_Literal_TN(1, 1), ops);
+ Build_OP(TOP_shufpd, tmp4, src2_t, src2_t, Gen_Literal_TN(1, 1), ops);
+ Build_OP(TOP_fmovddup, tmp5, tmp4, ops);
+ Build_OP(TOP_fmul128v64, tmp6, tmp3, tmp5, ops);
+ Build_OP(TOP_vfmaddsubpd, result, src1_t, tmp1, tmp6, ops);
+ } else {
+ Build_OP(TOP_fmul128v64, tmp2, src1_t, tmp1, ops);
+ Build_OP(TOP_shufpd, tmp3, src1_t, src1_t, Gen_Literal_TN(1, 1), ops);
+ Build_OP(TOP_shufpd, tmp4, src2_t, src2_t, Gen_Literal_TN(1, 1), ops);
+ Build_OP(TOP_fmovddup, tmp5, tmp4, ops);
+ Build_OP(TOP_fmul128v64, tmp6, tmp3, tmp5, ops);
+ Build_OP(TOP_faddsub128v64, result, tmp2, tmp6, ops);
+ }
}
return;
}
@@ -5720,12 +5736,21 @@
Build_OP(TOP_addsd, tmp4, tmp3, tmp1, ops);
Build_OP(TOP_fmovddup, tmp5, tmp4, ops);
Build_OP(TOP_fmovddup, tmp6, src2, ops);
- Build_OP(TOP_fmul128v64, tmp8, tmp6, src1, ops);
- Build_OP(TOP_shufpd, tmp9, src1, src1, Gen_Literal_TN(1, 1), ops);
- Expand_Neg(tmp12, tmp2, MTYPE_F8, ops);
- Build_OP(TOP_fmovddup, tmp13, tmp12, ops);
- Build_OP(TOP_fmul128v64, tmp10, tmp9, tmp13, ops);
- Build_OP(TOP_faddsub128v64, tmp11, tmp8, tmp10, ops);
+ if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+ Is_Target_AVX() && Is_Target_FMA4()) {
+ Build_OP(TOP_shufpd, tmp9, src1, src1, Gen_Literal_TN(1, 1), ops);
+ Expand_Neg(tmp12, tmp2, MTYPE_F8, ops);
+ Build_OP(TOP_fmovddup, tmp13, tmp12, ops);
+ Build_OP(TOP_fmul128v64, tmp10, tmp9, tmp13, ops);
+ Build_OP(TOP_vfmaddsubpd, tmp11, tmp6, src1, tmp10, ops);
+ } else {
+ Build_OP(TOP_fmul128v64, tmp8, tmp6, src1, ops);
+ Build_OP(TOP_shufpd, tmp9, src1, src1, Gen_Literal_TN(1, 1), ops);
+ Expand_Neg(tmp12, tmp2, MTYPE_F8, ops);
+ Build_OP(TOP_fmovddup, tmp13, tmp12, ops);
+ Build_OP(TOP_fmul128v64, tmp10, tmp9, tmp13, ops);
+ Build_OP(TOP_faddsub128v64, tmp11, tmp8, tmp10, ops);
+ }
Build_OP(TOP_fdiv128v64, result, tmp11, tmp5, ops);
} else if (opcode == OPC_V16C8DIV) {
@@ -5792,11 +5817,19 @@
Build_OP(TOP_unpckhpd, tmp5, tmp2, tmp2, ops);
Build_OP(TOP_fmul128v64, tmp6, tmp5, tmp1, ops);
Build_OP(TOP_shufpd, tmp7, tmp1, tmp1, Gen_Literal_TN(1, 1), ops);
- Build_OP(TOP_fmul128v64, tmp8, tmp7, tmp4, ops);
- Build_OP(TOP_fhadd128v64, tmp9, tmp3, tmp3, ops);
- Build_OP(TOP_shufps, tmp10, src1, src1, Gen_Literal_TN(238, 1), ops);
- Build_OP(TOP_cvtps2pd, tmp11, tmp10, ops);
- Build_OP(TOP_faddsub128v64, tmp12, tmp8, tmp6, ops);
+ if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+ Is_Target_AVX() && Is_Target_FMA4()) {
+ Build_OP(TOP_fhadd128v64, tmp9, tmp3, tmp3, ops);
+ Build_OP(TOP_shufps, tmp10, src1, src1, Gen_Literal_TN(238, 1), ops);
+ Build_OP(TOP_cvtps2pd, tmp11, tmp10, ops);
+ Build_OP(TOP_vfmaddsubpd, tmp12, tmp7, tmp4, tmp6, ops);
+ } else {
+ Build_OP(TOP_fmul128v64, tmp8, tmp7, tmp4, ops);
+ Build_OP(TOP_fhadd128v64, tmp9, tmp3, tmp3, ops);
+ Build_OP(TOP_shufps, tmp10, src1, src1, Gen_Literal_TN(238, 1), ops);
+ Build_OP(TOP_cvtps2pd, tmp11, tmp10, ops);
+ Build_OP(TOP_faddsub128v64, tmp12, tmp8, tmp6, ops);
+ }
Build_OP(TOP_shufpd, tmp13, tmp12, tmp12, Gen_Literal_TN(1, 1), ops);
Build_OP(TOP_fdiv128v64, tmp14, tmp13, tmp9, ops);
Build_OP(TOP_cvtpd2ps, tmp15, tmp14, ops);
@@ -5807,9 +5840,15 @@
Build_OP(TOP_unpckhpd, tmp20, tmp17, tmp17, ops);
Build_OP(TOP_fmul128v64, tmp21, tmp20, tmp11, ops);
Build_OP(TOP_shufpd, tmp22, tmp11, tmp11, Gen_Literal_TN(1, 1), ops);
- Build_OP(TOP_fmul128v64, tmp23, tmp22, tmp19, ops);
- Build_OP(TOP_fhadd128v64, tmp24, tmp18, tmp18, ops);
- Build_OP(TOP_faddsub128v64, tmp25, tmp23, tmp21, ops);
+ if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+ Is_Target_AVX() && Is_Target_FMA4()) {
+ Build_OP(TOP_fhadd128v64, tmp24, tmp18, tmp18, ops);
+ Build_OP(TOP_vfmaddsubpd, tmp25, tmp22, tmp19, tmp21, ops);
+ } else {
+ Build_OP(TOP_fmul128v64, tmp23, tmp22, tmp19, ops);
+ Build_OP(TOP_fhadd128v64, tmp24, tmp18, tmp18, ops);
+ Build_OP(TOP_faddsub128v64, tmp25, tmp23, tmp21, ops);
+ }
Build_OP(TOP_shufpd, tmp26, tmp25, tmp25, Gen_Literal_TN(1, 1), ops);
Build_OP(TOP_fdiv128v64, tmp27, tmp26, tmp24, ops);
Build_OP(TOP_cvtpd2ps, tmp28, tmp27, ops);
@@ -7245,9 +7284,15 @@
Build_OP(TOP_fmovddup, tmp1, op2, ops);
Build_OP(TOP_shufpd, tmp2, op2, op2, Gen_Literal_TN(1, 1), ops);
Build_OP(TOP_fmovddup, tmp3, tmp2, ops);
- Build_OP(TOP_fmul128v64, tmp4, op0, tmp1, ops);
- Build_OP(TOP_fmul128v64, tmp5, op1, tmp3, ops);
- Build_OP(TOP_faddsub128v64, result, tmp4, tmp5, ops);
+ if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+ Is_Target_AVX() && Is_Target_FMA4()) {
+ Build_OP(TOP_fmul128v64, tmp5, op1, tmp3, ops);
+ Build_OP(TOP_vfmaddsubpd, result, op0, tmp1, tmp5, ops);
+ } else {
+ Build_OP(TOP_fmul128v64, tmp4, op0, tmp1, ops);
+ Build_OP(TOP_fmul128v64, tmp5, op1, tmp3, ops);
+ Build_OP(TOP_faddsub128v64, result, tmp4, tmp5, ops);
+ }
break;
}
case INTRN_V16C8CONJG:
------------------------------------------------------------------------------
EditLive Enterprise is the world's most technically advanced content
authoring tool. Experience the power of Track Changes, Inline Image
Editing and ensure content is compliant with Accessibility Checking.
http://p.sf.net/sfu/ephox-dev2dev
_______________________________________________
Open64-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/open64-devel