Author: mberg
Date: 2011-04-07 15:33:55 -0400 (Thu, 07 Apr 2011)
New Revision: 3539
Modified:
trunk/osprey/be/cg/cg.cxx
trunk/osprey/be/cg/cg_flags.cxx
trunk/osprey/be/cg/ebo.cxx
trunk/osprey/be/cg/x8664/cgtarget.cxx
trunk/osprey/be/cg/x8664/ebo_special.cxx
trunk/osprey/be/cg/x8664/exp_loadstore.cxx
trunk/osprey/be/opt/opt_estr.cxx
trunk/osprey/common/com/config_wopt.cxx
trunk/osprey/common/com/config_wopt.h
Log:
Addition of -WOPT:SIB=<on|off> flag and its functionality to support
Scaled-Index-Base address mode generation. Also extended pattern
matching for add sub instructions to generation inc and dec
instructions.
Modified: trunk/osprey/be/cg/cg.cxx
===================================================================
--- trunk/osprey/be/cg/cg.cxx 2011-04-07 17:28:58 UTC (rev 3538)
+++ trunk/osprey/be/cg/cg.cxx 2011-04-07 19:33:55 UTC (rev 3539)
@@ -748,6 +748,7 @@
Set_Error_Phase( "Code Generation" );
Start_Timer( T_CodeGen_CU );
+
#ifdef TARG_X8664
// Cannot enable emit_unwind_info if Force_Frame_Pointer is not set
// Need this flag set for C++ exceptions and for -g
Modified: trunk/osprey/be/cg/cg_flags.cxx
===================================================================
--- trunk/osprey/be/cg/cg_flags.cxx 2011-04-07 17:28:58 UTC (rev 3538)
+++ trunk/osprey/be/cg/cg_flags.cxx 2011-04-07 19:33:55 UTC (rev 3539)
@@ -296,7 +296,7 @@
UINT64 CG_p2align_freq = 10000;
UINT32 CG_p2align_max_skip_bytes = 3;
UINT32 CG_movnti = 1000;
-BOOL CG_use_incdec = FALSE;
+BOOL CG_use_incdec = TRUE;
BOOL CG_use_xortozero = TRUE; // bug 8592
BOOL CG_use_xortozero_Set = FALSE;
BOOL CG_use_test = FALSE;
Modified: trunk/osprey/be/cg/ebo.cxx
===================================================================
--- trunk/osprey/be/cg/ebo.cxx 2011-04-07 17:28:58 UTC (rev 3538)
+++ trunk/osprey/be/cg/ebo.cxx 2011-04-07 19:33:55 UTC (rev 3539)
@@ -2970,6 +2970,12 @@
op_replaced = Constant_Operand1 (op, opnd_tn, opnd_tninfo);
}
}
+#ifdef TARG_X8664
+ if (!op_replaced && (OP_code(op) == TOP_inc32 || OP_code(op) ==
TOP_dec32
+ || OP_code(op) == TOP_inc64 || OP_code(op) ==
TOP_dec64 )
+ )
+ op_replaced = Constant_Operand1 (op, opnd_tn, opnd_tninfo);
+#endif
if (o1_idx >= 0) {
tn = opnd_tn[o1_idx];
if (!op_replaced &&
Modified: trunk/osprey/be/cg/x8664/cgtarget.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/cgtarget.cxx 2011-04-07 17:28:58 UTC (rev
3538)
+++ trunk/osprey/be/cg/x8664/cgtarget.cxx 2011-04-07 19:33:55 UTC (rev
3539)
@@ -166,6 +166,11 @@
case TOP_ldhpdx:
case TOP_ldhpdxx:
case TOP_ldhpd_n32:
+ // only source operand of cvtps2pd can be memory, so the ref is 8 bytes.
+ case TOP_cvtps2pd:
+ case TOP_cvtps2pd_x:
+ case TOP_cvtps2pd_xx:
+ case TOP_cvtps2pd_xxx:
return 8;
default:
return 16;
Modified: trunk/osprey/be/cg/x8664/ebo_special.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/ebo_special.cxx 2011-04-07 17:28:58 UTC (rev
3538)
+++ trunk/osprey/be/cg/x8664/ebo_special.cxx 2011-04-07 19:33:55 UTC (rev
3539)
@@ -148,7 +148,7 @@
static BOOL Convert_Imm_And( OP* op, TN *tnr, TN *tn, INT64 imm_val,
EBO_TN_INFO *tninfo );
static BOOL Convert_Imm_Mul( OP* op, TN *tnr, TN *tn, INT64 imm_val,
EBO_TN_INFO *tninfo );
static BOOL Convert_Imm_Or( OP* op, TN *tnr, TN *tn, INT64 imm_val,
EBO_TN_INFO *tninfo );
-static BOOL Convert_Imm_Add( OP* op, TN *tnr, TN *tn, INT64 imm_val,
EBO_TN_INFO *tninfo );
+static BOOL Convert_Imm_Add( OP* op, TN *tnr, TN *tn, INT64 imm_val,
EBO_TN_INFO *tninfo, BOOL simplify_iadd );
static BOOL Convert_Imm_Xor( OP* op, TN *tnr, TN *tn, INT64 imm_val,
EBO_TN_INFO *tninfo );
static BOOL Convert_Imm_Cmp( OP* op, TN *tnr, TN *tn, INT64 imm_val,
EBO_TN_INFO *tninfo );
@@ -1477,7 +1477,8 @@
/* Attempt to convert an add of 'tn' + 'imm_val' into an addi. Return
TRUE if we succeed, FALSE otherwise. */
static BOOL
-Convert_Imm_Add (OP *op, TN *tnr, TN *tn, INT64 imm_val, EBO_TN_INFO *tninfo)
+Convert_Imm_Add (OP *op, TN *tnr, TN *tn, INT64 imm_val,
+ EBO_TN_INFO *tninfo, BOOL simplify_iadd)
{
#if Is_True_On
if (!(EBO_Opt_Mask & EBO_CONVERT_IMM_ADD)) return FALSE;
@@ -1501,11 +1502,26 @@
new_op = Mk_OP(new_opcode, tnr, tn);
} else if (ISA_LC_Value_In_Class ( imm_val, LC_simm32)) {
- if ( OP_code(op) == TOP_addi32 || OP_code(op) == TOP_addi64 ||
- OP_code(op) == TOP_lea32 || OP_code(op) == TOP_lea64 )
+ // Use simplify_iadd to guard against inc/dec forms which
+ // come from addi-addi combinations.
+ if ( simplify_iadd ) {
+ if ( OP_code(op) == TOP_lea32 || OP_code(op) == TOP_lea64 ) {
+ return FALSE;
+ } else if ( OP_code(op) == TOP_addi32 || OP_code(op) == TOP_addi64 ) {
+ if ( ( imm_val != 1 ) && ( imm_val != -1 ) )
+ return FALSE;
+ else if ( Is_Target_32bit() )
+ return FALSE;
+ }
+ } else if ( OP_code(op) == TOP_addi32 || OP_code(op) == TOP_addi64 ||
+ OP_code(op) == TOP_lea32 || OP_code(op) == TOP_lea64 ) {
return FALSE;
+ }
new_opcode = is_64bit ? TOP_addi64 : TOP_addi32;
BOOL rflags_read = FALSE;
+ if ( simplify_iadd )
+ new_opcode = OP_code(op);
+
// If there is an instruction that is awaiting a rflags update then,
// do not convert the current op.
for( OP* next_op = OP_next(op); next_op != NULL;
@@ -1535,6 +1551,17 @@
TOP_is_change_rflags( OP_code(op) )))))
return FALSE;
+ if ( simplify_iadd ) {
+ bool valid_inc_dec = true;
+ if ( is_64bit && (OP_code(op) != TOP_addi64))
+ valid_inc_dec = false;
+ else if (!is_64bit && (OP_code(op) != TOP_addi32))
+ valid_inc_dec = false;
+
+ if (valid_inc_dec == false)
+ return FALSE;
+ }
+
if (new_opcode != TOP_inc32 && new_opcode != TOP_inc64 &&
new_opcode != TOP_dec32 && new_opcode != TOP_dec64)
new_op = Mk_OP(new_opcode, tnr, tn, Gen_Literal_TN(imm_val, 4));
@@ -1617,7 +1644,8 @@
opcode == TOP_add64 ||
opcode == TOP_lea32 ||
opcode == TOP_lea64)
- return Convert_Imm_Add(op, tnr, tn1, TN_value(tn0), opnd_tninfo[o1_idx]);
+ return Convert_Imm_Add(op, tnr, tn1, TN_value(tn0),
+ opnd_tninfo[o1_idx], false);
return FALSE;
}
@@ -1850,7 +1878,18 @@
return TRUE;
}
+BOOL OP_iadd_inc(OP* op)
+{
+ if (OP_iadd(op)) return TRUE;
+ TOP top = OP_code(op);
+ if (top == TOP_inc32 || top == TOP_inc64 ||
+ top == TOP_dec32 || top == TOP_dec64)
+ return TRUE;
+ return FALSE;
+}
+
+
/*
* Look at an exression that has a constant second operand and attempt to
* simplify the computations.
@@ -1886,6 +1925,11 @@
TN *tn0 = opnd_tn[o0_idx];
TN *tn1 = opnd_tn[o1_idx];
+ if (OP_code(op) == TOP_inc32 || OP_code(op) == TOP_inc64)
+ tn1 = Gen_Literal_TN(1, 4);
+ else if (OP_code(op) == TOP_dec32 || OP_code(op) == TOP_dec64)
+ tn1 = Gen_Literal_TN(-1, 4);
+
TN *tnr = OP_has_result(op) ? OP_result(op,0) : NULL;
/* Don't mess with symbols. */
@@ -1921,8 +1965,10 @@
opcode == TOP_add64 ||
opcode == TOP_lea32 ||
opcode == TOP_lea64 )
- return Convert_Imm_Add(op, tnr, tn0, imm_val, opnd_tninfo[o0_idx]);
+ return Convert_Imm_Add( op, tnr, tn0, imm_val,
+ opnd_tninfo[o0_idx], false );
+
if( OP_imul( op ) )
return Convert_Imm_Mul( op, tnr, tn0, imm_val, opnd_tninfo[o0_idx] );
@@ -1943,12 +1989,16 @@
TOP pred_opcode = OP_code(pred_op);
/* Look for a sequence of two addi that can be combined. */
- if (OP_iadd(op) && OP_iadd(pred_op))
+ if (OP_iadd_inc(op) && OP_iadd_inc(pred_op))
{
INT ptn0_idx = 0;
INT ptn1_idx = 1;
TN *ptn0 = OP_opnd(pred_op, ptn0_idx);
TN *ptn1 = OP_opnd(pred_op, ptn1_idx);
+ if (OP_code(pred_op) == TOP_inc32 || OP_code(pred_op) == TOP_inc64)
+ ptn1 = Gen_Literal_TN(1, 4);
+ else if (OP_code(pred_op) == TOP_dec32 || OP_code(pred_op) == TOP_dec64)
+ ptn1 = Gen_Literal_TN(-1, 4);
if (TN_is_constant(ptn1) && !TN_is_symbol(ptn1))
{
@@ -1958,7 +2008,7 @@
if (EBO_tn_available(bb, ptn0_tninfo))
{
const INT64 new_val = imm_val + TN_value(ptn1);
- if (Convert_Imm_Add(op, tnr, ptn0, new_val, ptn0_tninfo))
+ if (Convert_Imm_Add(op, tnr, ptn0, new_val, ptn0_tninfo, false))
{
if (EBO_Trace_Optimization)
fprintf(TFile,"\tcombine immediate adds\n");
@@ -1969,6 +2019,14 @@
}
}
+ if ( opcode == TOP_addi32 ||
+ opcode == TOP_addi64 ) {
+ if ( ( imm_val == 1 ) || ( imm_val == -1 ) ) {
+ return Convert_Imm_Add( op, tnr, tn0, imm_val,
+ opnd_tninfo[o0_idx], true );
+ }
+ }
+
return FALSE;
}
@@ -2691,6 +2749,7 @@
return TRUE;
}
+static inline TN* OP_opnd_use( OP* op, ISA_OPERAND_USE use );
BOOL Delete_Unwanted_Prefetches ( OP* op )
{
@@ -2703,7 +2762,9 @@
OP *incr = NULL;
OP *as_opnd = NULL;
OP *as_result = NULL;
+ OP *leaxx = NULL;
OP *load_store = NULL;
+ BOOL sib = FALSE;
BB* bb = OP_bb( op );
OP *next = BB_first_op( bb );
@@ -2714,7 +2775,9 @@
if(PF_GET_KEEP_ANYWAY(WN_prefetch_flag(mem_wn)))
return FALSE;
#endif
- if (OP_find_opnd_use( op, OU_base ) >= 0)
+ if (OP_find_opnd_use( op, OU_base ) >= 0 &&
+ // the prefetch instruction has passed a call of this function, so pass
it.
+ Get_Top_For_Addr_Mode(OP_code(op), BASE_MODE) == OP_code(op))
base = OP_opnd( op, OP_find_opnd_use( op, OU_base ));
else
return FALSE; // Can not analyze further; make safe assumption.
@@ -2729,21 +2792,43 @@
as_result = next;
else if (OP_opnd(next, 0) == base)
as_opnd = next;
- }
+ } else if ((OP_code(next) == TOP_leax32 || OP_code(next) == TOP_leax64)
+ && OP_result(next, 0) == base)
+ leaxx = next;
next = OP_next(next);
}
+ INT delta_base;
if (!incr) {
- if (!as_result && !as_opnd)
+ if (!as_result && !as_opnd && !leaxx)
return TRUE;
- else if (as_result)
- incr = as_result;
+ else if (leaxx)
+ { // further analyze the two terms for RPR
+ TN* term;
+ term = OP_opnd_use(leaxx, OU_index);
+
+ OP *w_incr;
+ for (w_incr = BB_first_op(bb); w_incr != NULL; w_incr =
OP_next(w_incr))
+ {
+ if (((OP_code(w_incr) == TOP_addi32 || OP_code(w_incr) ==
TOP_addi64)) &&
+ (OP_results(w_incr) != 0 && OP_result(w_incr, 0) == term &&
+ OP_opnd(w_incr, 0) == term))
+ break;
+ }
+ if (w_incr != NULL){
+ sib = TRUE;
+ delta_base = TN_value(OP_opnd(w_incr,1)) *
(TN_value(OP_opnd_use(leaxx,OU_scale)));
+ } else
+ return TRUE;
+ } else if (as_result)
+ incr = as_result;
else
incr = as_opnd;
}
- INT delta_base = TN_value(OP_opnd(incr, 1));
+ if (!sib)
+ delta_base = TN_value(OP_opnd(incr, 1));
next = BB_first_op( bb );
while (next && !load_store) {
@@ -4110,12 +4195,21 @@
break;
case TOP_addi32:
+ case TOP_inc32:
+ case TOP_dec32:
if( Is_Target_64bit() )
return FALSE;
// fall thru
case TOP_addi64:
+ case TOP_inc64:
+ case TOP_dec64:
a.base = OP_opnd( addr_op, 0 );
- a.offset = OP_opnd( addr_op, 1 );
+ if (top == TOP_inc32 || top == TOP_inc64)
+ a.offset = Gen_Literal_TN(1, 4);
+ else if (top == TOP_dec32 || top == TOP_dec64)
+ a.offset = Gen_Literal_TN(-1, 4);
+ else
+ a.offset = OP_opnd( addr_op, 1 );
break;
case TOP_mov32:
Modified: trunk/osprey/be/cg/x8664/exp_loadstore.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/exp_loadstore.cxx 2011-04-07 17:28:58 UTC (rev
3538)
+++ trunk/osprey/be/cg/x8664/exp_loadstore.cxx 2011-04-07 19:33:55 UTC (rev
3539)
@@ -192,7 +192,7 @@
if ( rclass == ISA_REGISTER_CLASS_mmx )
return base != NULL ? TOP_ld64_2m : TOP_ld64_2m_n32;
else if ( rclass == ISA_REGISTER_CLASS_float )
- return base != NULL ? TOP_ld64_2sse : TOP_ld64_2sse_n32;
+ return base != NULL ? TOP_ldlps : TOP_ldlps_n32;
else
return base != NULL ? TOP_ld64 : TOP_ld64_off;
case MTYPE_M8I1:
Modified: trunk/osprey/be/opt/opt_estr.cxx
===================================================================
--- trunk/osprey/be/opt/opt_estr.cxx 2011-04-07 17:28:58 UTC (rev 3538)
+++ trunk/osprey/be/opt/opt_estr.cxx 2011-04-07 19:33:55 UTC (rev 3539)
@@ -1002,7 +1002,19 @@
return FALSE;
}
+static
+BOOL Is_address_const(OPERATOR opr, const CODEREP *expr,
+ const CODEREP *expr_sib, const CODEREP *expr_parent)
+{
+ if (!WOPT_Enable_SIB) return FALSE;
+ if (opr == OPR_MPY && expr->Kind() == CK_CONST &&
+ (expr->Const_val() == 2 ||
+ expr->Const_val() == 4 ||
+ expr->Const_val() == 8 ))
+ return TRUE;
+ return FALSE;
+}
//======================================================================
// Determine if this occurrence is a strength-reduction candidate
@@ -1102,7 +1114,8 @@
opr == OPR_ADD || opr == OPR_SUB))
{
if (Is_cvt_linear(use_opnd0) &&
- Is_implicit_cvt_linear(cr->Dtyp(), use_opnd0)) {
+ Is_implicit_cvt_linear(cr->Dtyp(), use_opnd0) &&
+ !Is_address_const(opr, use_opnd1, use_opnd0, cr)) {
return TRUE;
}
}
@@ -1115,7 +1128,8 @@
opr == OPR_ADD || opr == OPR_SUB))
{
if (Is_cvt_linear(use_opnd1) &&
- Is_implicit_cvt_linear(cr->Dtyp(), use_opnd1)) {
+ Is_implicit_cvt_linear(cr->Dtyp(), use_opnd1) &&
+ !Is_address_const(opr, use_opnd0, use_opnd1, cr)) {
return TRUE;
}
}
@@ -1187,7 +1201,8 @@
opr == OPR_ADD || opr == OPR_SUB))
{
if (Is_cvt_linear(use_opnd0) &&
- Is_implicit_cvt_linear(cr->Dtyp(), use_opnd0)) {
+ Is_implicit_cvt_linear(cr->Dtyp(), use_opnd0) &&
+ !Is_address_const(opr, use_opnd1, use_opnd0, cr)) {
return TRUE;
}
}
@@ -1200,7 +1215,8 @@
opr == OPR_ADD || opr == OPR_SUB))
{
if (Is_cvt_linear(use_opnd1) &&
- Is_implicit_cvt_linear(cr->Dtyp(), use_opnd1)) {
+ Is_implicit_cvt_linear(cr->Dtyp(), use_opnd1) &&
+ !Is_address_const(opr, use_opnd0, use_opnd1, cr)) {
return TRUE;
}
}
Modified: trunk/osprey/common/com/config_wopt.cxx
===================================================================
--- trunk/osprey/common/com/config_wopt.cxx 2011-04-07 17:28:58 UTC (rev
3538)
+++ trunk/osprey/common/com/config_wopt.cxx 2011-04-07 19:33:55 UTC (rev
3539)
@@ -207,6 +207,7 @@
BOOL WOPT_Enable_MP_varref = TRUE;
const BOOL WOPT_Enable_MP_Const_Prop = TRUE;
BOOL WOPT_Enable_New_SR = TRUE;
+BOOL WOPT_Enable_SIB = FALSE;
INT32 WOPT_Enable_New_SR_Limit = 0;
BOOL WOPT_Enable_Output_Copy = TRUE;
INT32 WOPT_Enable_Ocopy_Lookupstmt = 2;
@@ -587,6 +588,8 @@
#endif
{ OVK_BOOL, OV_VISIBLE, TRUE, "new_sr", "",
0, 0, 0, &WOPT_Enable_New_SR, NULL },
+ { OVK_BOOL, OV_VISIBLE, FALSE, "sib", "sib",
+ 0, 0, 0, &WOPT_Enable_SIB, NULL },
{ OVK_INT32, OV_VISIBLE, TRUE, "new_sr_limit", "new_sr_limit",
INT32_MAX, 0, INT32_MAX, &WOPT_Enable_New_SR_Limit, NULL },
{ OVK_BOOL, OV_VISIBLE, TRUE, "ocopy", "ocopy",
Modified: trunk/osprey/common/com/config_wopt.h
===================================================================
--- trunk/osprey/common/com/config_wopt.h 2011-04-07 17:28:58 UTC (rev
3538)
+++ trunk/osprey/common/com/config_wopt.h 2011-04-07 19:33:55 UTC (rev
3539)
@@ -214,6 +214,7 @@
extern const BOOL WOPT_Enable_MP_Const_Prop; /* perform const prop into MP
region */
extern BOOL WOPT_Enable_New_SR; /* new strength-reduction */
extern BOOL WOPT_Enable_New_SR_Limit;
+extern BOOL WOPT_Enable_SIB; /* abandon some SR candidate for x86
SIB */
extern BOOL WOPT_Enable_Output_Copy; /* output copy propagation */
extern BOOL WOPT_Enable_Ocopy_Lookupstmt;
extern BOOL WOPT_Enable_Parm; /* insert OPTPARM over parms */
------------------------------------------------------------------------------
Xperia(TM) PLAY
It's a major breakthrough. An authentic gaming
smartphone on the nation's most reliable network.
And it wants your games.
http://p.sf.net/sfu/verizon-sfdev
_______________________________________________
Open64-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/open64-devel