Author: xan
Date: 2011-03-23 13:49:46 -0400 (Wed, 23 Mar 2011)
New Revision: 3525
Modified:
trunk/osprey/be/opt/opt_dse.cxx
Log:
add the enhancement of dead store elimination back due to some
performance degredation observed,
and also fix bug 707 by checking all alias-set of each mu-node aux-id.
this check in has been reviewed by Sun Chan, and approved by Suneel Jain.
Modified: trunk/osprey/be/opt/opt_dse.cxx
===================================================================
--- trunk/osprey/be/opt/opt_dse.cxx 2011-03-22 23:32:46 UTC (rev 3524)
+++ trunk/osprey/be/opt/opt_dse.cxx 2011-03-23 17:49:46 UTC (rev 3525)
@@ -87,6 +87,10 @@
#include "opt_ssa.h"
#include "opt_mu_chi.h"
#include "opt_util.h"
+#include <vector>
+using std::vector;
+#include "opt_alias_rule.h"
+#include "idx_32_set.h"
// ====================================================================
// ====================================================================
@@ -104,6 +108,8 @@
#ifdef TARG_SL
vector <WN *> *_injury_aux_intrnop;
#endif
+ vector <WN *> *_last_store_vec;
+ vector <IDX_32_SET *> * _alias_aux_vec;
//
// access methods
@@ -131,7 +137,7 @@
void Set_Required_VSE( VER_STAB_ENTRY *, BOOL , WN * ) const;
void Set_Required_PHI( VER_STAB_ENTRY *vse, WN *ref_wn ) const;
void Set_Required_MU( MU_NODE *mu, BOOL real_use ) const;
- void Set_Required_CHI( CHI_NODE *chi ) const;
+ void Set_Required_CHI( CHI_NODE *chi, BOOL *chi_is_live ) const;
void Set_Required_WN( WN *wn ) const;
void Add_EH_exposed_use(WN *call) const;
#ifdef KEY
@@ -148,6 +154,25 @@
void Repair_Injured_AuxIntrnOP(void) const ;
void Append_Injured_AuxIntrnOp(WN *wn) const {
_injury_aux_intrnop->insert(_injury_aux_intrnop->begin(), wn);};
#endif
+ WN* Last_store(AUX_ID aid) const
+ { return (*_last_store_vec)[aid]; }
+ void Set_last_store( AUX_ID aid, WN *store) const
+ { (*_last_store_vec)[aid] = store; }
+ BOOL Aliased_aux( AUX_ID id1, AUX_ID id2) const
+ {
+ if((*_alias_aux_vec)[id1]->MemberP(id2))
+ return TRUE;
+ else
+ return FALSE;
+ }
+ //OSP_468, remove all Set_Required_Imp_VSE()
+ //void Set_Required_Imp_VSE( VER_ID vid, BOOL real_use) const;
+ BOOL Mem_WN_equiv_rec(WN *wn1, WN *wn2) const;
+ BOOL Mem_WN_equiv(WN *wn1, WN* wn2) const;
+ BOOL Same_memloc( WN* store1, WN* store2) const;
+ VER_ID Prop_vsym_new_result( VER_ID vid ) const;
+ void Propagate_vsym_wn( WN *wn ) const;
+ void Propagate_vsym_bb( BB_NODE *bb ) const;
public:
@@ -161,6 +186,27 @@
_injury_aux_intrnop = CXX_NEW(vector<WN *>, pool);
#endif
+ //init _last_store_vec
+ INT asym_count = Opt_stab()->Lastidx() + 1;
+ _last_store_vec = CXX_NEW(vector<WN *>, pool);
+ _last_store_vec->insert(_last_store_vec->end(), asym_count, (WN*)NULL);
+
+ //init _alias_set_vec
+ _alias_aux_vec = CXX_NEW(vector<IDX_32_SET*>, pool);
+ for( INT aid = 0; aid < asym_count; aid++) {
+ IDX_32_SET *alias_set = CXX_NEW( IDX_32_SET(asym_count, pool,
OPTS_FALSE), pool);
+ _alias_aux_vec->push_back(alias_set);
+ }
+ for(INT aid = 1; aid<asym_count; aid++) {
+ (*_alias_aux_vec)[aid]->Union1D(aid);
+ POINTS_TO *apt = Opt_stab()->Points_to(aid);
+ for(INT nid = aid +1; nid<asym_count; nid++) {
+ if(Opt_stab()->Rule()->Aliased_Memop( Opt_stab()->Points_to(nid),
apt)) {
+ (*_alias_aux_vec)[aid]->Union1D(nid);
+ (*_alias_aux_vec)[nid]->Union1D(aid);
+ }
+ }
+ }
}
~DSE( void )
@@ -400,8 +446,8 @@
// we only need to propagate this usage if the symbol
// was not already marked as having a use
if ( vse->Any_use() ) return;
- vse->Set_Any_use();
-
+
+ BOOL vse_live=TRUE;
// now recursively follow this symbol's use-def chain
switch ( vse->Type() ) {
case WHIRL_STMT:
@@ -415,7 +461,7 @@
Set_Required_PHI( vse, ref_wn );
break;
case CHI_STMT:
- Set_Required_CHI( vse->Chi() );
+ Set_Required_CHI( vse->Chi(), &vse_live );
break;
case ENTRY_STMT:
// no definition, value is just live-in to the region
@@ -430,8 +476,12 @@
}
if ( Tracing() ) {
- fprintf( TFile, "<dse> Required VSE: var:%d version:%d\n",
+ if(vse_live)
+ fprintf( TFile, "<dse> Required VSE: var:%d version:%d\n",
vse->Aux_id(), vse->Version() );
+ else
+ fprintf( TFile, "<dse> Not Required VSE: var:%d version:%d\n",
+ vse->Aux_id(), vse->Version() );
}
}
@@ -456,6 +506,7 @@
for ( INT32 opndnum = 0; opndnum < phi->Size(); opndnum++ ) {
VER_ID phi_opnd = phi->Opnd(opndnum);
VER_STAB_ENTRY *sym = Opt_stab()->Ver_stab_entry(phi_opnd);
+ Set_last_store( sym->Aux_id(), NULL);
Set_Required_VSE( sym, FALSE, ref_wn );
}
}
@@ -469,6 +520,21 @@
DSE::Set_Required_MU( MU_NODE *mu, BOOL real_use ) const
{
VER_STAB_ENTRY *ver = Opt_stab()->Ver_stab_entry(mu->Opnd());
+ Set_last_store(ver->Aux_id(), NULL);
+
+ // there may be implied mu-nodes, so that we need to traverse the alias set
+ const BS *alias_set = Opt_stab()->Rule()->Alias_Set_Indirect(
Opt_stab());
+ for (AUX_ID idx = BS_Choose( alias_set );
+ idx != (AUX_ID) BS_CHOOSE_FAILURE;
+ idx = BS_Choose_Next ( alias_set, idx )) {
+
+ // Volatile do not appear in any mu and chi
+ if (!Opt_stab()->Aux_stab_entry(idx)->Is_volatile() ||
+ Opt_stab()->Aux_stab_entry(idx)->Is_virtual() ){
+ Set_last_store(idx, NULL);
+ }
+ }
+
Set_Required_VSE( ver, real_use, NULL );
}
@@ -487,13 +553,110 @@
opt_stab->Ver_stab_entry(WN_ver(rhs))->Aux_id());
}
+BOOL
+DSE::Mem_WN_equiv_rec(WN *wn1, WN *wn2) const
+{
+ if (!wn1 || !wn2) return FALSE;
+ if (!Mem_WN_equiv(wn1,wn2)) {
+ return FALSE;
+ }
+ for (INT i=0; i<WN_kid_count(wn1); i++) {
+ if (!Mem_WN_equiv_rec(WN_kid(wn1,i),WN_kid(wn2,i))) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+BOOL
+DSE::Mem_WN_equiv(WN *wn1, WN *wn2) const
+{
+ if (!WN_Equiv(wn1, wn2))
+ return FALSE;
+
+ OPERATOR opr = WN_operator(wn1);
+ if(OPERATOR_has_field_id(opr)) {
+ if(WN_field_id(wn1) != WN_field_id(wn2))
+ return FALSE;
+ }
+
+ if(opr == OPR_STBITS || opr == OPR_ISTBITS || opr == OPR_LDBITS || opr ==
OPR_ILDBITS) {
+ if( WN_bit_offset(wn1) != WN_bit_offset(wn2))
+ return FALSE;
+ if( WN_bit_size(wn1) != WN_bit_size(wn2))
+ return FALSE;
+ }
+
+ if (WN_has_mu(wn1, Cfg()->Rgn_level())) {
+ MU_NODE *mu1 = Opt_stab()->Get_occ(wn1)->Mem_mu_node();
+ MU_NODE *mu2 = Opt_stab()->Get_occ(wn2)->Mem_mu_node();
+ if(mu1 != NULL && mu2 != NULL && mu1->Opnd() != mu2->Opnd() )
+ return FALSE;
+ if((mu1==NULL) != (mu2 == NULL))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+BOOL
+DSE::Same_memloc( WN* store1, WN* store2) const
+{
+ FmtAssert(OPERATOR_is_scalar_istore(WN_operator(store1)) ||
+ OPERATOR_is_scalar_store (WN_operator(store1)) ||
+ WN_operator(store1) == OPR_MSTORE,
+ ("DSE::Same_memloc: store1 is not istore"));
+ FmtAssert(OPERATOR_is_scalar_istore(WN_operator(store2)) ||
+ OPERATOR_is_scalar_store (WN_operator(store2)) ||
+ WN_operator(store2) == OPR_MSTORE,
+ ("DSE::Same_memloc: store2 is not istore"));
+
+ OCC_TAB_ENTRY *occ1 = Opt_stab()->Get_occ(store1);
+ OCC_TAB_ENTRY *occ2 = Opt_stab()->Get_occ(store2);
+ FmtAssert(occ1 != NULL && occ2 != NULL, ("DSE::Same_memloc: occ == NULL"));
+ POINTS_TO *pt1 = occ1->Points_to();
+ POINTS_TO *pt2 = occ2->Points_to();
+ FmtAssert(pt1 != NULL && pt2 != NULL, ("DSE::Same_memloc: points_to ==
NULL"));
+
+ if (Opt_stab()->Rule()->Same_location(store1, store2, pt1, pt2)) {
+#if defined(TARG_NVISA)
+ //for dynamic array, we need to be more conservative
+ INT i;
+ TY_IDX ty;
+ ty = ST_type(pt1->Base());
+ if (TY_kind(ty) == KIND_ARRAY) {
+ for (i = 0; i < TY_AR_ndims(ty); i++) {
+ if ( !TY_AR_const_lbnd(ty, i) ||
+ !TY_AR_const_ubnd(ty, i) )
+ return FALSE;
+ }
+ }
+
+ ty = ST_type(pt2->Base());
+ if (TY_kind(ty) == KIND_ARRAY) {
+ for (i = 0; i < TY_AR_ndims(ty); i++) {
+ if ( !TY_AR_const_lbnd(ty, i) ||
+ !TY_AR_const_ubnd(ty, i) )
+ return FALSE;
+ }
+ }
+#endif
+ return TRUE;
+ }
+ else if (Mem_WN_equiv(store1, store2)) {
+ if (WN_kid_count(store1)>1 && Mem_WN_equiv_rec(WN_kid1(store1),
WN_kid1(store2)))
+ return TRUE;
+ }
+ return FALSE;
+}
+
// ====================================================================
// Recursively set the flags that say this chi statement and the
// variables it references are used.
// ====================================================================
void
-DSE::Set_Required_CHI( CHI_NODE *chi ) const
+DSE::Set_Required_CHI( CHI_NODE *chi, BOOL *chi_is_live ) const
{
AUX_ID vaux = chi->Aux_id();
BOOL real_use = FALSE;
@@ -517,8 +680,37 @@
Opt_stab()->Ver_stab_entry(chi->Result())->Set_Real_use();
}
- Set_Required_WN(chiwn);
+ *chi_is_live = TRUE;
+ if (OPERATOR_is_scalar_istore(WN_operator(chiwn)) ||
+ OPERATOR_is_scalar_store(WN_operator(chiwn)) ||
+ WN_operator(chiwn) == OPR_MSTORE) {
+ WN *last_st = Last_store(vaux);
+ if( last_st != NULL && chiwn != last_st ) {
+ if( Same_memloc(chiwn, last_st) ) {
+ *chi_is_live = FALSE;
+ if ( Tracing() ) {
+ fprintf ( TFile, "DSE::Set_Required_CHI, current chiwn is not set
live:\n" );
+ fdump_tree_no_st(TFile, chiwn);
+ }
+ }
+ }
+ if(*chi_is_live)
+ Set_last_store(vaux, chiwn);
+ }
+ if(*chi_is_live) {
+ VER_STAB_ENTRY *vsym = Opt_stab()->Ver_stab_entry(chi->Result());
+ vsym->Set_Any_use();
+ Set_Required_WN(chiwn);
+ }
+
+ if (OPERATOR_is_scalar_istore(WN_operator(chiwn)) ||
+ OPERATOR_is_scalar_store(WN_operator(chiwn)) ||
+ WN_operator(chiwn) == OPR_MSTORE) {
+ if(*chi_is_live)
+ Set_last_store(vaux, chiwn);
+ }
+
// The following breaks the use-def chain. The definition
// of the chi operand can become dse-dead. It violates assertions
// in opt_verify.cxx. We can change the verifier to check this
@@ -578,6 +770,11 @@
if ( WN_has_ver(wn) ) {
VER_STAB_ENTRY *sym = Opt_stab()->Ver_stab_entry(WN_ver(wn));
+ if(OPERATOR_is_scalar_load( WN_operator(wn) )) {
+ //OSP_468
+ //Set_Required_Imp_VSE(WN_ver(wn), TRUE);
+ Set_last_store(sym->Aux_id(), NULL);
+ }
Set_Required_VSE( sym, TRUE, wn );
}
@@ -718,6 +915,130 @@
return FALSE;
}
+
+VER_ID
+DSE::Prop_vsym_new_result( VER_ID vid ) const
+{
+ VER_STAB_ENTRY *vse = Opt_stab()->Ver_stab_entry(vid);
+ if ( vse->Type() == PHI_STMT ) {
+ // we assume that the result is correct
+ return vse->Phi()->Result();
+ }
+ else if ( vse->Type() == CHI_STMT ) {
+ // is this statement live?
+ if (vse->Chi()->Live()) {
+ return vse->Chi()->Result();
+ }
+ else {
+ return Prop_vsym_new_result(vse->Chi()->Opnd());
+ }
+ }
+ else {
+ return vid;
+ }
+}
+
+void
+DSE::Propagate_vsym_wn( WN *wn ) const
+{
+ if ( WN_has_ver(wn) ) {
+ WN_set_ver(wn, Prop_vsym_new_result(WN_ver(wn)));
+ }
+
+ if ( WN_has_mu(wn, Cfg()->Rgn_level()) ) {
+ // process the mu operand as use
+ OCC_TAB_ENTRY *occ = Opt_stab()->Get_occ(wn);
+ MU_NODE *mu = occ->Mem_mu_node();
+ mu->Set_opnd( Prop_vsym_new_result(mu->Opnd()) );
+ }
+ return;
+}
+
+// ====================================================================
+// After we've deleted some globals that had chis with a virtual
+// variable in them, we need to go back and fix up the use-def chain
+// to be correct.
+//
+// This involves "skipping" over dead statements and updating the
+// references. The algorithm does update the dead statements as well
+// however, so each reference only has to look at its definition to see
+// what the propagated value is.
+//
+// Do this in dominator-tree order, so it must be called for the first
+// time with the entry-bb that dominates all blocks.
+// ====================================================================
+
+void
+DSE::Propagate_vsym_bb( BB_NODE *bb ) const
+{
+ // propagate into the phi-nodes, if any
+ PHI_LIST_ITER phi_iter;
+ PHI_NODE *phi;
+ FOR_ALL_ELEM ( phi, phi_iter, Init(bb->Phi_list()) ) {
+ if (phi->Live() ) {
+ for ( INT pkid = 0; pkid < phi->Size(); pkid++ ) {
+ phi->Set_opnd(pkid, Prop_vsym_new_result(phi->Opnd(pkid)));
+ }
+ }
+ }
+
+ // handle the block's statements, INCLUDING non-live ones
+ STMT_ITER stmt_iter;
+ WN *stmt;
+ FOR_ALL_ELEM(stmt, stmt_iter, Init(bb->Firststmt(),bb->Laststmt())) {
+ // handle mu references only on live statements
+ if ( !Is_deleted_statement(stmt) ) {
+ // handle the statement's mu list
+ if ( WN_has_mu(stmt, Cfg()->Rgn_level()) ) {
+ MU_LIST *mu_list = _opt_stab->Get_stmt_mu_list(stmt);
+ if (mu_list) {
+ MU_LIST_ITER mu_iter;
+ MU_NODE *mu;
+ FOR_ALL_NODE( mu, mu_iter, Init(mu_list)) {
+ mu->Set_opnd( Prop_vsym_new_result(mu->Opnd()) );
+ }
+ }
+ }
+
+ for ( INT32 kidnum = 0; kidnum < WN_kid_count(stmt); kidnum++ ) {
+ Propagate_vsym_wn( WN_kid(stmt, kidnum) );
+ }
+
+ // need to handle all chi statements, dead or not
+ // handle the statement's chi list
+ if ( WN_has_chi(stmt, Cfg()->Rgn_level())) {
+ CHI_LIST *chi_list = _opt_stab->Get_generic_chi_list(stmt);
+ if (chi_list) {
+ CHI_LIST_ITER chi_iter;
+ CHI_NODE *chi;
+ FOR_ALL_NODE( chi, chi_iter, Init(chi_list)) {
+ // propagate into the chi node's operand
+ //
+ // only do this for a non-dead chi; otherwise, we can have
+ // live-range overlap when we resurrenct this dead chi:
+ // resurrecting only updates along the def chain, not the
+ // use chain.
+ // not updating the dead chi here is ok since all the uses
+ // below this should be dead (or we have an overlapped
+ // live range already).
+ //
+ if (chi->Live())
+ chi->Set_opnd( Prop_vsym_new_result( chi->Opnd() ));
+ }
+ }
+ }
+ }
+ }
+
+ // do copy propagation for this block's dominated nodes
+ BB_NODE *dom_bb;
+ BB_LIST_ITER dom_bb_iter;
+ FOR_ALL_ELEM(dom_bb, dom_bb_iter, Init(bb->Dom_bbs())) {
+ Propagate_vsym_bb(dom_bb);
+ }
+}
+
+
#if defined(TARG_SL)
/* result = intrnsic_slave_op( master, ar2,ar3...)
* the master is the result of master intrnsic op
@@ -909,6 +1230,8 @@
}
}
+ Propagate_vsym_bb( Cfg()->Entry_bb() );
+
if ( Tracing() ) {
fprintf ( TFile, "SSA::Dead_store_elim (after dse)\n" );
FOR_ALL_NODE( ssa_id, ver_stab_iter, Init() ) {
------------------------------------------------------------------------------
Enable your software for Intel(R) Active Management Technology to meet the
growing manageability and security demands of your customers. Businesses
are taking advantage of Intel(R) vPro (TM) technology - will your software
be a part of the solution? Download the Intel(R) Manageability Checker
today! http://p.sf.net/sfu/intel-dev2devmar
_______________________________________________
Open64-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/open64-devel