Author: pallavimathew
Date: 2011-07-06 15:11:17 -0400 (Wed, 06 Jul 2011)
New Revision: 3681

Modified:
   trunk/osprey/be/cg/cgexp_internals.h
   trunk/osprey/be/cg/whirl2ops.cxx
   trunk/osprey/be/cg/x8664/expand.cxx
   trunk/osprey/be/com/x8664/betarget.cxx
   trunk/osprey/be/lno/simd.cxx
   trunk/osprey/common/com/config_lno.cxx
   trunk/osprey/common/com/config_lno.h
   trunk/osprey/common/com/opcode_gen_core.cxx
   trunk/osprey/common/com/opcode_gen_core.h
   trunk/osprey/common/com/wn_util.cxx
   trunk/osprey/common/com/wn_util.h
Log:
This patch introduces a framework for vectorization of IF-statements of the 
form:
  - if (x != 0) { single_istore_statement } else {empty_body}
  - if (x != 0) { if (y !=0) {single_istore_statement} else {empty_body}} else 
{empty_body}
This optimization is turned on by default and can be controlled by 
-LNO:simd_vect_if={on/off}.

Sample program:
long array[2000000];
void sample() {
  long i;
  for(i = 0; i < 2000000; i++) {
    if (array[i])
        array[i] ^= i;
  }
  return;
}

Vectorization of such an if-statement is done by first vectorizing its 
subexpressions.
The result of the vectorized if-condition is computed by 'pcmpeqq' 
(V16I8V16I8EQ) and
is used to select between the result of executing the statement in the if-body 
or 
leaving the array element unchanged. This selection is performed by the 
'pblendvb' (V16I1V16I1SELECT) operation. Both of these are SSE4.1 instructions.
This patch extends the supported types of
 - OPR_SELECT to include V16I1 (to support generation of pblendvb operation).
 - OPR_EQ to include V16I8 (to support generation of pcmpeqq operation).

This patch also recognizes and handles vectorization of invariants rooted at 
OPR_ADD, OPR_SUB and OPR_MPY.

C.R. by Fred Chow, Mei Ye and Jian-Xin Lai.



Modified: trunk/osprey/be/cg/cgexp_internals.h
===================================================================
--- trunk/osprey/be/cg/cgexp_internals.h        2011-07-05 23:09:12 UTC (rev 
3680)
+++ trunk/osprey/be/cg/cgexp_internals.h        2011-07-06 19:11:17 UTC (rev 
3681)
@@ -129,6 +129,7 @@
 extern void Expand_Max (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops);
 extern void Expand_MinMax (TN *dest, TN *dest2, TN *src1, TN *src2, TYPE_ID 
mtype, OPS *ops);
 extern void Expand_Select (TN *dest_tn, TN *cond_tn, TN *true_tn, TN 
*false_tn, TYPE_ID mtype, BOOL float_cond, OPS *ops);
+extern void Expand_Select_To_Blend (TYPE_ID mtype, TN* result, TN* op0, TN* 
op1, TN* op2, OPS *ops);
 extern void Expand_Flop (OPCODE opcode, TN *result, TN *src1, TN *src2, TN 
*src3, OPS *ops);
 
 #ifdef TARG_X8664

Modified: trunk/osprey/be/cg/whirl2ops.cxx
===================================================================
--- trunk/osprey/be/cg/whirl2ops.cxx    2011-07-05 23:09:12 UTC (rev 3680)
+++ trunk/osprey/be/cg/whirl2ops.cxx    2011-07-06 19:11:17 UTC (rev 3681)
@@ -3166,9 +3166,18 @@
   WN   *compare;
   VARIANT variant;
 
+  if (opcode == OPC_V16I1V16I1SELECT) {
+    TN* op1 = Expand_Expr(WN_kid0(select), select, NULL);
+    TN* op2 = Expand_Expr(WN_kid1(select), select, NULL);
+    TN* op3 = Expand_Expr(WN_kid2(select), select, NULL);
 
+    if (result == NULL) 
+      result = Allocate_Result_TN (select, NULL);
+  
+    Expand_Select(result, op1, op2, op3, MTYPE_V16I1, FALSE, &New_OPs); 
//FALSE passed as dummy arg
+    return result;
+  }
 
-
  /*
   *  Expand the true/false before the condition
   */

Modified: trunk/osprey/be/cg/x8664/expand.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/expand.cxx 2011-07-05 23:09:12 UTC (rev 3680)
+++ trunk/osprey/be/cg/x8664/expand.cxx 2011-07-06 19:11:17 UTC (rev 3681)
@@ -4233,6 +4233,11 @@
   BOOL float_cond,
   OPS *ops)
 {
+  if (mtype == MTYPE_V16I1) {
+    Expand_Select_To_Blend(mtype, dest_tn, cond_tn, true_tn, false_tn, ops);
+    return;
+  }
+
   Is_True( TN_register_class(cond_tn) == ISA_REGISTER_CLASS_integer,
           ("Handle this case in Expand_Select") );
   const BOOL non_sse2_fp = MTYPE_is_F10(mtype) ||
@@ -4314,7 +4319,54 @@
   }
 }
   
+//Vector type SELECT are expanded to *blend* operation.
+//For now we only handle vector type V16I1.
 void
+Expand_Select_To_Blend (TYPE_ID mtype, TN* result, TN* op0, TN* op1, TN* op2, 
OPS *ops)
+{
+  FmtAssert(mtype == MTYPE_V16I1, ("Non-vector type passed to 
Expand_Select_To_Blend"));
+  TN* xmm0;
+  if( Trace_Exp ) {
+    fprintf(TFile, "expand %s: ", mtype == MTYPE_V16I1? 
OPCODE_name(OPC_V16I1V16I1SELECT): "***Unsupported opcode***");
+    if (result) Print_TN(result,FALSE);
+    fprintf(TFile, " :- ");
+    if (op0) Print_TN(op0,FALSE);
+    fprintf(TFile, " ");
+    if (op1) Print_TN(op1,FALSE);
+    fprintf(TFile, " ");
+    if (op2) Print_TN(op2,FALSE);
+    fprintf(TFile, " ");
+    fprintf(TFile, "\n");
+  }
+
+  if (!Is_Target_AVX()) {
+    //pblendvb (non-AVX) uses the 'xmm0' register as an implicit argument 
containing the mask.
+    //To build a TN dedicated to reg xmm0, pass value "1" to Build_Dedicated_TN
+    //instead of "XMM0(enum value of 17)". This avoids a bug in out of bound 
access
+    //of the array 'v16_ded_tns' which is size 17. Need to file this bug.
+    xmm0 = Build_Dedicated_TN(ISA_REGISTER_CLASS_float,1,16);
+    Exp_COPY(xmm0, op2, ops);
+    Set_TN_is_global_reg(xmm0);
+  }
+  switch(mtype) {
+  case MTYPE_V16I1:
+    if (Is_Target_Orochi() && Is_Target_AVX())
+      Build_OP(TOP_blendv128v8, result, op0, op1, op2, ops);
+    else
+      Build_OP(TOP_blendv128v8, result, op0, xmm0, op1, ops);
+    break;
+  default:
+    FmtAssert(FALSE,
+              ("Expand_Select_To_Blend: Unsupported mtype (%d)", mtype));
+  }
+
+  if (Trace_Exp) {
+    //Print_OPS appears to be printing extra characters at end of string  
"into  ||| ..."
+    fprintf(TFile, " into "); Print_OPS (ops);
+  }
+}
+
+void
 Expand_Min (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
 {
   Is_True( !TN_has_value( src1 ), ("Expand_Min: src1 has value") );

Modified: trunk/osprey/be/com/x8664/betarget.cxx
===================================================================
--- trunk/osprey/be/com/x8664/betarget.cxx      2011-07-05 23:09:12 UTC (rev 
3680)
+++ trunk/osprey/be/com/x8664/betarget.cxx      2011-07-06 19:11:17 UTC (rev 
3681)
@@ -135,6 +135,11 @@
   case OPR_TRAP:
     FmtAssert( FALSE, ("UNIMPLEMENTED") );
 
+  case OPR_EQ:
+    if(rtype == MTYPE_V16I8)
+      return TOP_cmpeq128v64;
+    else
+      return TOP_UNDEFINED;
   default:
     return TOP_UNDEFINED;
   }

Modified: trunk/osprey/be/lno/simd.cxx
===================================================================
--- trunk/osprey/be/lno/simd.cxx        2011-07-05 23:09:12 UTC (rev 3680)
+++ trunk/osprey/be/lno/simd.cxx        2011-07-06 19:11:17 UTC (rev 3681)
@@ -94,8 +94,13 @@
 #include "small_trips.h"           // for Remove_Unity_Trip_Loop
 
 #define ABS(a) ((a<0)?-(a):(a))
+#define BINARY_OP(opr) (opr == OPR_ADD || opr == OPR_SUB || opr == OPR_MPY || 
opr == OPR_SHL)
 
+static void WN_collect_iloads_nr(std::list<WN *> *wn_list, WN *wn);
 BOOL debug;
+BOOL under_if = FALSE;  // sub-expression of an if-statement,
+                        // also indicates if if-vectorization is turned on.
+BOOL nested_if = FALSE; // sub-expression of a nested-if-statement
 
 extern WN *Split_Using_Preg(WN* stmt, WN* simd_op,
                    ARRAY_DIRECTED_GRAPH16* dep_graph,
@@ -120,6 +125,7 @@
 static REDUCTION_MANAGER *curr_simd_red_manager;       
 
 static void Simd_Mark_Code (WN* wn);
+static TYPE_ID Simd_Get_Vector_Type(WN *istore);
 
 static INT Last_Vectorizable_Loop_Id = 0;
 SIMD_VECTOR_CONF Simd_vect_conf;
@@ -263,8 +269,12 @@
   case OPR_LT: case OPR_GT: case OPR_LE: case OPR_GE:
     if (MTYPE_is_float(desc) && MTYPE_is_integral(rtype))
       return TRUE;
-    else
+    else {
+      if (under_if && 
+         (opr == OPR_NE && MTYPE_is_integral(desc) && 
MTYPE_is_integral(rtype)))
+       return TRUE;
       return FALSE;
+    }
   case OPR_TRUNC:
     if (rtype == MTYPE_I4 && desc == MTYPE_F4)
       return TRUE;
@@ -357,6 +367,157 @@
   }  
 }
 
+
+/*Determine whether an if-statement meets criteria for vectorization.
+  The if-statement can be one of the following forms:
+  if (x != 0) { single_istore_statement } else {empty_body}
+  if (x != 0) { if (y !=0) {single_istore_statement} else {empty_body}} else 
{empty_body}
+  The check on whether the sub-expressions of if-statement are vectorizable
+  is done as for other statements by Gather_Vectorizable_Ops_Helper.
+  To extend the vectorization of if-statements to include triple-nested 
+  statements or more, consider using a bitmask in place of under_if/nested_if.
+*/
+BOOL Is_Vectorizable_If(WN* wn, BOOL inner) {
+  BOOL safe;
+  
+  FmtAssert(WN_operator(wn) == OPR_IF, ("Not an IF statement\n"));
+  WN *if_test = WN_if_test(wn);
+  WN *if_then = WN_then(wn);
+  WN *if_else = WN_else(wn);
+
+  FmtAssert(WN_operator(if_else) == OPR_BLOCK, ("Else part of IF is not a 
block"));
+  if (WN_first(if_else) != NULL)
+    //non-empty else-clause
+    return FALSE;
+
+  // if-condition should be of the form (expr != 0)
+  if (WN_operator(if_test) != OPR_NE)
+    return FALSE;
+  if (!((WN_operator(WN_kid1(if_test)) == OPR_INTCONST) && 
+       WN_const_val(WN_kid1(if_test)) == 0))
+    return FALSE;
+
+  FmtAssert(WN_operator(if_then) == OPR_BLOCK, ("Then part of IF is not a 
block"));
+  if (WN_first(if_then) != WN_last(if_then))
+    //multiple statements in the body
+    return FALSE; 
+
+  //Nested-if: if (cond) {if (inner-cond) {...}}
+  if (WN_operator(WN_first(if_then)) == OPR_IF) {
+    if (inner)
+      //triply nested-if.
+      return FALSE;
+    if (!Is_Vectorizable_If(WN_first(if_then), TRUE))
+      return FALSE;
+    //Handle nested-ifs only if the vector lengths of cond and inner-cond 
match,
+    //since these two conditions will be fused for vectorization.
+    WN *nested_if_test = WN_if_test(WN_first(if_then));
+    int vlength_cond = Simd_Get_Vector_Type(if_test);
+    int vlength_inner_cond = Simd_Get_Vector_Type(nested_if_test);
+    if (vlength_cond != vlength_inner_cond)
+      return FALSE;
+    //Vector length must be V16I8 in order to use SSE4.1 instruction pcmpeqq 
and pblendvb
+    if (vlength_cond != MTYPE_V16I8)
+      return FALSE;  
+    // The nested-if-condition must not have side effects, since its 
evaluation is speculative.
+    if (WN_has_side_effects(nested_if_test))
+       return FALSE;
+
+    //Check if iloads in nested-if-condition are safe to speculate.
+    std::list<WN *> outer_loads, inner_loads;
+    //Collect iloads in the outer and inner if-conditions.
+    WN_collect_iloads(&outer_loads, if_test); 
+    //Collect iloads non-recursively since a match for the outer-most iload 
implies a match for any inner iloads.
+    WN_collect_iloads_nr(&inner_loads, nested_if_test);
+    std::list<WN *>::iterator outer_ld_iter, inner_ld_iter;
+    for (inner_ld_iter = inner_loads.begin(); inner_ld_iter != 
inner_loads.end(); ++inner_ld_iter) {
+      WN *inner_iload = *inner_ld_iter;
+      safe = FALSE;
+      //For a speculative iload in the inner if-condition to be safe, 
+      //it must also be present in the outer if-condition.
+      //The if-condition contains a single condition, so no need to check for 
short-circuit.
+      for (outer_ld_iter = outer_loads.begin(); outer_ld_iter != 
outer_loads.end(); ++outer_ld_iter) {
+       WN *outer_iload = *outer_ld_iter;
+       if (Tree_Equiv(inner_iload, outer_iload)) {
+         safe = TRUE;
+         break;
+       }
+      }
+      if (!safe)
+       return FALSE;
+    }
+  }
+  else {
+    //the single statement in the if-body should be an istore.
+    if (WN_operator(WN_first(if_then)) != OPR_ISTORE)
+      return FALSE;
+    //the RHS of the istore must not have side-effects because after 
vectorization, 
+    //the RHS is always evaluated regardless of the if-condition.
+    if (WN_has_side_effects(WN_kid0(WN_first(if_then))))
+       return FALSE;
+
+    //Check if iloads in if-body are safe to speculate.
+    WN *outer_if_test = if_test;
+    if(inner) {
+      // This is a nested-if. Check for safe speculation must be done against 
outermost if-condition.
+      WN *loop = Enclosing_Do_Loop(if_test);
+      outer_if_test = WN_if_test(WN_first((WN_do_body(loop)))); 
+    }
+    std::list<WN *> outer_loads, then_loads;
+    //Collect iloads in the outer if-condition.
+    WN_collect_iloads(&outer_loads, outer_if_test); 
+    //Collect iloads from RHS of the istore in the if-body.
+    WN_collect_iloads_nr(&then_loads, WN_kid0(WN_first(if_then))); 
+    std::list<WN *>::iterator outer_ld_iter, then_ld_iter;
+    for (then_ld_iter = then_loads.begin(); then_ld_iter != then_loads.end(); 
++then_ld_iter) {
+      WN *then_iload = *then_ld_iter;
+      safe = FALSE;
+      //For a speculative iload in the if-body to be safe, 
+      //it must also be present in the outer if-condition.
+      for (outer_ld_iter = outer_loads.begin(); outer_ld_iter != 
outer_loads.end(); ++outer_ld_iter) {
+       WN *outer_iload = *outer_ld_iter;
+       if (Tree_Equiv(then_iload, outer_iload)) {
+         safe = TRUE;
+         break;
+       }
+      }
+      if (!safe)
+       return FALSE;
+    }
+    //Check whether the istore in the if-body is safe to speculate.
+    WN *istore = WN_first(if_then);
+    //For the speculative istore (in the if-body) to be safe, 
+    //there must be an iload from the same address present in the outer 
if-condition.
+    safe = FALSE;
+    for (outer_ld_iter = outer_loads.begin(); outer_ld_iter != 
outer_loads.end(); ++outer_ld_iter) {
+      WN *outer_iload = *outer_ld_iter;
+      if (Tree_Equiv(WN_kid1(istore), WN_kid0(outer_iload))) {
+       if ((WN_offset(istore) == WN_offset(outer_iload)) &&
+           (WN_ty(istore) == WN_load_addr_ty(outer_iload)) &&
+           (WN_desc(istore) == WN_desc(outer_iload))) {
+         safe = TRUE;
+         break;
+       }
+      }
+    }
+    if (!safe)
+      return FALSE;
+  }
+  //Vector length of condition and body of if-statement must match
+  int vlength_cond = Simd_Get_Vector_Type(if_test);
+  int vlength_body = Simd_Get_Vector_Type(WN_first(if_then));
+  if (vlength_cond != vlength_body)
+    return FALSE;
+  if (vlength_cond != MTYPE_V16I8)
+    return FALSE;
+    
+  WN *if_parent =LWN_Get_Parent(wn);
+  if (WN_operator(if_parent) != OPR_BLOCK)
+    return FALSE;
+
+  return TRUE;
+}
+
 extern WN *find_loop_var_in_simple_ub(WN* loop); // defined in 
vintr_fission.cxx
 
 typedef enum {
@@ -376,9 +537,9 @@
     opr=WN_operator(wn);
   }
 
-  // Recognize an invariant expression rooted at OPR_SHL.
+  // Recognize 2 operand invariant expressions.
   // Should eventually be generalized to any 2 operand operation.
-  if (opr == OPR_SHL) {
+  if (BINARY_OP(opr)) {
     if ((simd_operand_kind(WN_kid0(wn), loop) == Invariant) &&
         (simd_operand_kind(WN_kid1(wn), loop) == Invariant))
       return Invariant;
@@ -394,7 +555,6 @@
     if (symbol1==symbol2)
       return Complex;
     DEF_LIST* def_list=Du_Mgr->Ud_Get_Def(wn);
-    WN* loop_stmt=def_list->Loop_stmt();
     WN* body=WN_do_body(loop);
     DEF_LIST_ITER d_iter(def_list);
     for (DU_NODE* dnode=d_iter.First(); !d_iter.Is_Empty();
@@ -958,7 +1118,8 @@
       return FALSE;
   }
 
-  if (OPCODE_is_compare(WN_opcode(wn)) && WN_operator(parent) != OPR_SELECT)
+  if (OPCODE_is_compare(WN_opcode(wn)) && (WN_operator(parent) != OPR_SELECT)
+      && (!under_if && WN_operator(parent) != OPR_IF))
     return FALSE;
 
   //Bug 10148: don't vectoorize F8RECIP if it is MPY's child
@@ -1029,6 +1190,7 @@
     return FALSE;
      
   if (WN_operator(parent) != OPR_ISTORE && WN_operator(parent) != OPR_STID &&
+      !(under_if && WN_operator(parent) == OPR_IF) && 
       !is_vectorizable_op(WN_operator(parent), 
                          WN_rtype(parent), WN_desc(parent)))
     return FALSE;
@@ -1534,22 +1696,40 @@
 {
   SIMD_KIND smallest_kind = INVALID;
 
+  //Should not check under_if in conjunction with OPR_IF here
+  //since Find_Simd_Kind is not called in the context of a single
+  //expression but a stack of vectorizable nodes.
+
   for (INT i=0; i<vec_simd_ops->Elements(); i++){
     WN* simd_op=vec_simd_ops->Top_nth(i);
 
     WN* istore=LWN_Get_Parent(simd_op);
     // bug 2336 - trace up the correct type
     while(istore && !OPCODE_is_store(WN_opcode(istore)) && 
+         (WN_operator(istore) != OPR_IF) && 
          WN_operator(istore) != OPR_DO_LOOP)
       istore = LWN_Get_Parent(istore);
-    FmtAssert(istore || WN_operator(istore) == OPR_DO_LOOP, ("NYI"));  
-
+    FmtAssert(!istore || 
+             WN_operator(istore) == OPR_DO_LOOP ||
+             (WN_operator(istore) == OPR_IF) || 
+             OPCODE_is_store(WN_opcode(istore)), ("NYI"));     
     TYPE_ID type;
     if (WN_desc(istore) == MTYPE_V) 
       type = WN_rtype(istore);
     else 
       type = WN_desc(istore);
 
+    if (WN_operator(istore) == OPR_IF) {
+    //simd_op is part of the if-condition. 
+      type = WN_rtype(simd_op);
+      if (WN_operator(simd_op) == OPR_NE) {
+       //We're assuming that OPR_NE is the root of the condition
+       //and none of its sub-expr will contain OPR_NE. 
+       //Further work: We need to ensure that this is indeed the case.
+       type = WN_desc(simd_op);
+      }
+    }
+
     switch(type) {
     case MTYPE_C4:
       if (smallest_kind > V16C4)
@@ -1646,19 +1826,40 @@
   TYPE_ID rtype = WN_rtype(wn);
   TYPE_ID desc = WN_desc(wn);
   
-  // Recognize invariant sub-expression rooted at OPR_SHL and do not
-  // push it onto the stack of vectorizable operations. 
+  // Recognize 2 operand invariant sub-expression
+  // and do not push it onto the stack of vectorizable operations. 
   // Should eventually be generalized to prevent any 2 operand invariant
   // from being vectorized.
-  if (opr == OPR_SHL && 
-      simd_operand_kind(WN_kid0(wn), LWN_Get_Parent(WN_do_body(loop))) == 
Invariant &&
-      simd_operand_kind(WN_kid1(wn), LWN_Get_Parent(WN_do_body(loop))) == 
Invariant)
-    if (is_vectorizable_op(WN_operator(wn), WN_rtype(wn), WN_desc(wn)))
-      return TRUE;
-  if (opr == OPR_IF || opr == OPR_REGION){
+  WN* body_parent = LWN_Get_Parent(WN_do_body(loop));
+  if (BINARY_OP(opr) &&
+      simd_operand_kind(WN_kid0(wn), body_parent) == Invariant &&
+      simd_operand_kind(WN_kid1(wn), body_parent) == Invariant)
+    if (is_vectorizable_op(WN_operator(wn), WN_rtype(wn), WN_desc(wn))) {
+      WN* parent = LWN_Get_Parent(wn);
+      // Invariant children of a store need to be vectorized as they will not 
be replicated.
+      if (parent && !OPCODE_is_store(WN_opcode(parent)))
+       return TRUE;
+    }
+  if (opr == OPR_REGION){
     Report_Non_Vectorizable_Op(wn);
     return FALSE;
   }
+  if (WN_operator(wn) == OPR_IF) {
+    if (!Simd_vect_conf.Is_SSE41() || !LNO_Simd_Vect_If) {
+      Report_Non_Vectorizable_Op(wn);
+      return FALSE;
+    }
+    if(!Is_Vectorizable_If(wn, FALSE)) { 
+      //ok to always pass FALSE. 
+      //Is_Vectorizable_If will correctly pass TRUE for inner-if on recursive 
call.
+      return FALSE;
+    }
+    if (!under_if)
+      under_if = TRUE;
+    else
+      nested_if = TRUE;
+  }
+
   if (is_vectorizable_op(opr, rtype, desc)){
     if ((opr != OPR_INTRINSIC_OP && 
         Is_Well_Formed_Simd(wn, loop)) ||
@@ -1872,6 +2073,13 @@
     }    
   }
 
+  if (WN_operator(wn) == OPR_IF) {
+    // Done with processing if statement
+    if (!nested_if)
+      under_if = FALSE;
+    else
+      nested_if = FALSE;
+  }
   return TRUE;
 }
 
@@ -3198,8 +3406,9 @@
     while((stmt1=LWN_Get_Parent(stmt)) != body){
        stmt = stmt1;
        if (WN_opcode(stmt)==OPC_BLOCK){
-        under_scf=TRUE;
-        break;
+         if (!(LNO_Simd_Vect_If && Simd_vect_conf.Is_SSE41()) || 
(WN_opcode(LWN_Get_Parent(stmt)) != OPC_IF))
+           under_scf=TRUE;
+         break;
       }
     }     
     if (under_scf)
@@ -3207,11 +3416,14 @@
     TYPE_ID rtype = WN_rtype(simd_op);
     TYPE_ID desc = WN_desc(simd_op);
     // CHANGED
+    if ((LNO_Simd_Vect_If && Simd_vect_conf.Is_SSE41()) && 
(WN_operator(LWN_Get_Parent(simd_op)) == OPR_IF))
+      under_if = TRUE;
     FmtAssert(is_vectorizable_op(WN_operator(simd_op), rtype, desc),
               ("Handle this piece"));
     if (!is_vectorizable_op(WN_operator(simd_op), rtype, desc))
       continue; //will never happen due to the above assert
-    
+    if ((LNO_Simd_Vect_If && Simd_vect_conf.Is_SSE41()) && 
(WN_operator(LWN_Get_Parent(simd_op)) == OPR_IF) && (under_if))
+      under_if = FALSE;
     for (INT kid_no=0; kid_no<WN_kid_count(simd_op); kid_no++){
       WN* tmp=WN_kid(simd_op,kid_no);
       SIMD_OPERAND_KIND kind=simd_operand_kind(tmp,LWN_Get_Parent(body));
@@ -4349,12 +4561,23 @@
             WN_operator(stmt) != OPR_DO_LOOP &&
             // Bug 5225 - trace up should stop at a CVT or a TRUNC.
             WN_operator(stmt) != OPR_CVT &&
+           WN_operator(stmt) != OPR_IF &&
             WN_operator(stmt) != OPR_TRUNC) {
         stmt = LWN_Get_Parent(stmt);
       }
       if (!stmt || WN_operator(stmt) == OPR_DO_LOOP)
         type = WN_rtype(istore); //use parent's desc
-      else type = WN_desc(stmt); //use store's desc
+      else {
+       if(WN_operator(stmt) == OPR_IF)
+         // istore is (part of) the if-condition since if-body will have 
ISTORE as parent stmt.
+         // This returns desc of OPR_NE (root of if-condition). 
+         if (WN_operator(istore) == OPR_IF)
+           type = WN_desc(WN_kid0(istore));
+         else
+           type = WN_desc(istore);
+       else
+         type = WN_desc(stmt); //use store's desc
+      }
     } else type = WN_desc(istore);//parent is a store
     switch(type) {
       case MTYPE_V16C8: case MTYPE_C8:
@@ -4385,6 +4608,8 @@
       case MTYPE_U8:
         vmtype = MTYPE_V16I8;
         break;
+    default:
+      DevWarn("Unexpected type in Simd_Get_Vector_Type");
     }
   return vmtype;
 }
@@ -4433,6 +4658,14 @@
    if (WN_operator(simd_op) == OPR_CVT || WN_operator(simd_op) == OPR_TRUNC)
      type = WN_rtype(const_wn);
 
+   if ((LNO_Simd_Vect_If && Simd_vect_conf.Is_SSE41()) && (WN_operator(istore) 
== OPR_IF)) {
+     FmtAssert(WN_operator(simd_op) == OPR_NE, ("Condition of OPC_IF must be 
rooted at OPR_NE"));
+     //Match the vector type of parent simd_op.
+     //We know that this constant is a zero and so its vectorized version 
+     //can be made to match the size of the vectorized parent.
+     type = WN_desc(simd_op); 
+   }
+
    if (WN_operator(simd_op) == OPR_PARM &&
           WN_operator(istore) == OPR_INTRINSIC_OP &&
           WN_intrinsic(istore) == INTRN_SUBSU2) {
@@ -4492,8 +4725,13 @@
   else
       type = WN_desc(istore);
 
-  if (WN_operator(simd_op) == OPR_CVT || WN_operator(simd_op) == OPR_TRUNC)
-     type = desc;
+  OPERATOR opr = WN_operator(inv_wn);   
+  if (WN_operator(simd_op) == OPR_CVT || WN_operator(simd_op) == OPR_TRUNC) {
+    if (WN_operator(simd_op) == OPR_CVT && (BINARY_OP(opr)))
+       type = WN_rtype(inv_wn);
+    else
+      type = desc;
+  }
 
    switch (type) {
      case MTYPE_V16C8: case MTYPE_C8:
@@ -4511,12 +4749,12 @@
           break;
      case MTYPE_V16F4: case MTYPE_F4:
           inv_wn =
-            LWN_CreateExp1(OPCODE_make_op(OPR_REPLICATE, MTYPE_V16F4, desc),
+            LWN_CreateExp1(OPCODE_make_op(OPR_REPLICATE, MTYPE_V16F4, 
MTYPE_F4),
                            inv_wn);
           break;
      case MTYPE_V16F8: case MTYPE_F8:
           inv_wn =
-            LWN_CreateExp1(OPCODE_make_op(OPR_REPLICATE, MTYPE_V16F8, desc),
+            LWN_CreateExp1(OPCODE_make_op(OPR_REPLICATE, MTYPE_V16F8, 
MTYPE_F8),
                            inv_wn);
           break;
      case MTYPE_V16I1: case MTYPE_U1: case MTYPE_I1:
@@ -5183,6 +5421,207 @@
    CXX_DELETE(equivalence_class, &LNO_local_pool);
 }
 
+/*
+   Vectorize an if-statement. 
+   Vectorization of if-statement uses the following SSE4.1 operations:
+   1. pcmpeqq: evaluates pairs of if-conditions.
+   2. pblendvb: represents the vectorized if-body by selecting the value 
+      to store depending on the result of the pcmpeqq.
+   
+   By the time a vectorizable if statement is passed to 
+   Simd_Vectorize_If, its sub-expressions have already been vectorized.
+
+  Original           Input to                 Output of 
+                     Simd_Vectorize_If        Simd_Vectorize_If
+  if                 if                             vexpr
+      expr               vexpr                      v0
+      0                  v0                       veq
+    ne                 vne                        vkid0
+  then        ===>   then             ===>        kid1
+      kid0               vkid0                  vblend   
+      kid1               kid1                   kid1
+    istore             vistore                vistore
+*/
+static void Simd_Vectorize_If(WN *simd_op) {
+  WN *if_test, *blend_mask, *then_body, *then_istore, *blend_source, 
+    *blend_dest_array, *blend_dest, *blend_parent, *blend, *loop_block,
+    *inner_if_test, *blend_mask_kid0, *blend_mask_kid1;
+  BOOL nested = FALSE;
+  BOOL special = FALSE;
+
+  //step 1: verify that form of the if-statement is as expected.
+  if_test = WN_if_test(simd_op); 
+  then_body = WN_then(simd_op);
+  FmtAssert(WN_operator(if_test) == OPR_NE, 
+           ("Unexpected condition in IF-expression\n"));
+  FmtAssert(WN_rtype(if_test) == MTYPE_V16I8, 
+           ("Unexpected vector type of if-condition"));
+  FmtAssert(WN_operator(then_body) == OPR_BLOCK, 
+           ("body of if must be a block\n "));
+  then_istore = WN_first(then_body);
+
+  //step 2: check for nested-if statement and if so access the istore in the 
inner body.
+  if (WN_operator(then_istore) == OPR_IF) {
+    nested = TRUE;
+    inner_if_test = WN_if_test(then_istore);
+    FmtAssert(WN_operator(WN_then(then_istore)) == OPR_BLOCK, 
+             ("body of if must be a block\n "));
+    then_istore = WN_first(WN_then(then_istore));
+    FmtAssert(WN_operator(inner_if_test) == OPR_NE, 
+             ("Unexpected condition in nested-IF-expression\n"));
+  }
+  FmtAssert(WN_operator(then_istore) == OPR_ISTORE, 
+           ("if-body must be an istore\n "));
+
+  if (!nested) {
+    //step 3: construct blend mask from the if-condition by
+    //replacing OPR_NE with OPR_EQ (generates pcmpeqq).
+    blend_mask = LWN_Copy_Tree(if_test);
+    LWN_Copy_Def_Use(if_test, blend_mask, Du_Mgr);
+    FmtAssert(WN_rtype(blend_mask) == MTYPE_V16I8, ("Unexpected rtype for 
vectorized NE"));
+    WN_set_operator(blend_mask, OPR_EQ); 
+    WN_desc(blend_mask) == MTYPE_V16I8;
+  }
+  else {
+    // Nested-if:
+    WN *testval1, *testval2, *testval2kid1, *testval1kid1, *shlkid1, *shlkid2, 
+       *mask1, *mask2, *bitmask, *bitmask_copy;
+
+    testval1 = WN_kid0(if_test); // represents vexpr1 
+    testval2 = WN_kid0(inner_if_test); //represents vexpr2 
+
+    /*
+      Step 4: Check if nested-if conditions are of a special form (shown below 
in original form):
+      if (x & (1 << c1))  // extract single bit
+        if (x & (1 << c2))  // extract another single bit
+          s1; 
+    */
+    if (WN_Equiv(testval1, testval2) && WN_operator(testval1) == OPR_BAND) {
+      if (Tree_Equiv(WN_kid0(testval1), WN_kid0(testval2))) {
+       testval1kid1 = WN_kid1(testval1);
+       testval2kid1 = WN_kid1(testval2);
+       // the sub-expression (1 << c1) is an invariant, so its vectorized form 
will have OPR_REPLICATE
+       if (WN_Equiv(testval1kid1, testval2kid1) && WN_operator(testval1kid1) 
== OPR_REPLICATE) {
+         shlkid1 = WN_kid0(testval1kid1);
+         shlkid2 = WN_kid0(testval2kid1);
+         if (WN_Equiv(shlkid1, shlkid2) && WN_operator(shlkid1) == OPR_SHL) {
+           if (WN_Equiv(WN_kid0(shlkid1), WN_kid0(shlkid2))) {
+             if (WN_operator(WN_kid0(shlkid1)) == OPR_INTCONST && 
+                 WN_const_val(WN_kid0(shlkid1)) == 1) {
+               special = TRUE;
+             }
+           }
+         }
+       }
+      }
+    }
+    if (special) {
+      /* Step 5: Construct blend mask for special if-condition by 
+         converting it to the following equivalent (shown below in original 
form):
+        if ((x & (1<<c1 |1<<c2)) == (1<<c1 | 1<<c2)) // extract the two bits 
simultaneously
+        and then vectorize.
+      */
+      blend_mask = LWN_Copy_Tree(if_test);
+      LWN_Copy_Def_Use(if_test, blend_mask, Du_Mgr);
+
+      //bitmask = por(testval1kid1, testval2kid1);
+      mask1 = LWN_Copy_Tree(testval1kid1);
+      LWN_Copy_Def_Use(testval1kid1, mask1, Du_Mgr);
+      mask2 = LWN_Copy_Tree(testval2kid1);
+      LWN_Copy_Def_Use(testval2kid1, mask2, Du_Mgr);
+      //bitmask = (1<<c1|1<<c2, 1<<c1|1<<c2)
+      bitmask = WN_CreateExp2(OPR_BIOR, WN_rtype(mask1), MTYPE_V, 
+                             mask1, mask2);
+
+      // make extract = pand(WN_kid0(testval1, bitmask));
+      // extract = ((x[i] & (1<<c1|1<<c2)), (x[i+1] & (1<<c1|1<<c2)))
+      WN_kid1(WN_kid0(blend_mask)) = bitmask;
+      // blend_mask = pcmpeqq(extract, bitmask);
+      bitmask_copy = LWN_Copy_Tree(bitmask);
+      LWN_Copy_Def_Use(bitmask, bitmask_copy, Du_Mgr);
+      WN_kid1(blend_mask) = bitmask_copy;
+      WN_set_operator(blend_mask, OPR_EQ); 
+    }
+    else {
+      //Step 6: Construct blend mask for non-special nested if-conditions by
+      //fusing the two condtions with a por.
+      /*
+                          Input to               Output of 
+        Original          Simd_Vectorize_If      Simd_Vectorize_If
+        if                if            
+            expr1             vexpr1                     vexpr1
+            0                 v0                         v0  
+          ne                vne                        veq 
+        then         ==>  then             ==>           vexpr2
+          if                if                           v0
+              expr2             vexpr2                 veq 
+              0                 v0                   vor
+            ne                vne                    vkid0
+          then              then                     kid1
+              kid0              vkid0              vblend
+              kid1              kid1               kid1
+            istore            vistore            vistore
+      */
+
+      blend_mask_kid0 = LWN_Copy_Tree(if_test);
+      LWN_Copy_Def_Use(if_test, blend_mask_kid0, Du_Mgr);
+      WN_set_operator(blend_mask_kid0, OPR_EQ); 
+      
+      blend_mask_kid1 = LWN_Copy_Tree(inner_if_test);
+      LWN_Copy_Def_Use(inner_if_test, blend_mask_kid1, Du_Mgr);
+      WN_set_operator(blend_mask_kid1, OPR_EQ); 
+
+      blend_mask = WN_CreateExp2(OPR_BIOR, WN_rtype(blend_mask_kid1), MTYPE_V, 
+                                blend_mask_kid0, blend_mask_kid1);
+    }
+  }
+
+  //Step 7: Construct kid1 of blend from LHS of the istore statement.
+  blend_source = WN_kid0(then_istore); 
+
+  //Step 8: Construct kid2 of blend from RHS of the istore statement.
+  blend_dest_array = LWN_Copy_Tree(WN_kid1(then_istore)); 
+  LWN_Copy_Def_Use(WN_kid1(then_istore), blend_dest_array, Du_Mgr); 
+  blend_dest = WN_Iload(WN_desc(then_istore), WN_offset(then_istore), 
+                       Make_Pointer_Type(MTYPE_To_TY(WN_desc(then_istore))), 
+                       blend_dest_array, WN_field_id(then_istore));  
+
+  blend_parent = then_istore;
+  if(special) {
+    blend = WN_CreateExp3(OPR_SELECT, MTYPE_V16I1, MTYPE_V16I1,
+                         blend_dest, blend_source, blend_mask);   
+  }
+  else {
+    blend = WN_CreateExp3(OPR_SELECT, MTYPE_V16I1, MTYPE_V16I1,
+                         blend_source, blend_dest, blend_mask);   
+  }
+
+  //Step 9: Store result of blend is stored in the LHS of the istore
+  WN_kid0(then_istore) = blend;
+  LWN_Set_Parent(blend, blend_parent);
+  LWN_Parentize(blend);
+
+  loop_block = LWN_Get_Parent(simd_op);
+  LWN_Set_Parent(blend_parent, loop_block);
+
+  //Step 10: Replace the if-statement with the istore expression containing 
the blend-tree
+  if (simd_op == WN_first(loop_block))
+    WN_first(loop_block) = blend_parent;
+  else {
+    WN_next(WN_prev(simd_op)) = blend_parent;
+    WN_prev(blend_parent) = WN_prev(simd_op);
+  }
+  if (simd_op == WN_last(loop_block)) {
+    WN_last(loop_block) = blend_parent;
+    WN_next(blend_parent) = NULL;
+  }
+  else {
+    WN_next(blend_parent) = WN_next(simd_op);
+    WN_prev(WN_next(simd_op)) = blend_parent;
+  }
+  return;
+}
+
 static void Simd_Vectorize_SimdOp_And_Kids(WN *simd_op, TYPE_ID vmtype, BOOL 
*invarkid)
 {
 
@@ -5244,6 +5683,18 @@
   WN *istore = LWN_Get_Parent(simd_op);
   if(WN_operator(istore) == OPR_SHUFFLE)
      istore =  LWN_Get_Parent(istore); //up one level
+
+  if(WN_operator(istore) == OPR_IF) {
+    if (WN_operator(LWN_Get_Parent(istore)) == OPR_BLOCK &&
+       WN_operator(LWN_Get_Parent(LWN_Get_Parent(istore))) == OPR_IF) {
+      // This is the inner if of a nested-if. Do nothing here as 
+      // Simd_Vectorize_SimdOp_And_Kids on the parent will vectorize this 
inner if.
+      // LWN_Get_Parent(LWN_Get_Parent(istore)) is the parent if.
+    }
+    else
+      Simd_Vectorize_If(istore);
+  }
+
   if (WN_operator(istore) != OPR_STID && WN_operator(istore) != OPR_CVT &&
         WN_operator(istore) != OPR_TRUNC &&
         !OPCODE_is_compare(WN_opcode(istore))) {
@@ -6373,3 +6824,22 @@
 // Bug 3617 : Num_Vec() from ACCESS_ARRAY may not be in synch with
 // WN_num_dim(array) dues to delinearization. If we were to access different
 // kids in array, WN_num_dim(array) is the reliable source to find #kids.
+ 
+// Collect the indirect loads in a whirl tree. 
+// Does not recursively inspect kids of iloads.
+static void WN_collect_iloads_nr(std::list<WN *> *wn_list, WN *wn)
+{ 
+  if (!wn_list || !wn) return;
+
+  if (OPCODE_operator(WN_opcode(wn))==OPR_ILOAD)
+    wn_list->push_back(wn);
+  else
+  {
+    int i;
+    for (i = 0; i < WN_kid_count(wn); i++)
+    { 
+      WN *kid = WN_kid(wn,i);
+      WN_collect_iloads_nr(wn_list,kid);
+    } 
+  } 
+} 

Modified: trunk/osprey/common/com/config_lno.cxx
===================================================================
--- trunk/osprey/common/com/config_lno.cxx      2011-07-05 23:09:12 UTC (rev 
3680)
+++ trunk/osprey/common/com/config_lno.cxx      2011-07-06 19:11:17 UTC (rev 
3681)
@@ -179,11 +179,11 @@
 #ifdef TARG_X8664
   0,           /* Fission */
   TRUE,                /* Serial_distribute */
-  0,           /* Iter_threshold */
+  1,           /* Iter_threshold */
 #else
   1,           /* Fission */
   FALSE,       /* Serial_distribute */
-  0,           /* Iter_threshold */
+  1,           /* Iter_threshold */
 #endif
   0,           /* Fission_inner_register_limit */
   TRUE,                /* Forward_substitution */
@@ -271,6 +271,7 @@
   TRUE,         /* Simd_Reduction */
   TRUE,         /* Simd_Avoid_Fusion */
   FALSE,        /* Simd_Rm_Unity_Remainder */  
+  TRUE,         /* Simd_Vect_If */
   TRUE,         /* Run_hoistif */
   TRUE,                /* Ignore_Feedback */
   TRUE,         /* Run_unswitch */
@@ -401,11 +402,11 @@
 #ifdef TARG_X8664
   0,           /* Fission */
   TRUE,                /* Serial_distribute */
-  0,           /* Iter_threshold */
+  1,           /* Iter_threshold */
 #else
   1,           /* Fission */
   FALSE,       /* Serial_distribute */
-  0,           /* Iter_threshold */
+  1,           /* Iter_threshold */
 #endif
   0,           /* Fission_inner_register_limit */
   TRUE,                /* Forward_substitution */
@@ -492,7 +493,8 @@
   FALSE,       /* Simd_Verbose */
   TRUE,         /* Simd_Reduction */
   TRUE,         /* Simd_Avoid_Fusion */
-  FALSE,         /* Simd_Rm_Unity_Remainder*/
+  FALSE,        /* Simd_Rm_Unity_Remainder*/
+  TRUE,         /* Simd_Vect_If */
   TRUE,         /* Run_hoistif */
   TRUE,                /* Ignore_Feedback */
   TRUE,         /* Run_unswitch */
@@ -879,6 +881,7 @@
   LNOPT_BOOL ( "simd_reduction",       "simd_red",     Simd_Reduction ),
   LNOPT_BOOL ( "simd_avoid_fusion",    NULL,   Simd_Avoid_Fusion ),
   LNOPT_BOOL ( "simd_rm_unity_remainder", NULL,        
Simd_Rm_Unity_Remainder),
+  LNOPT_BOOL ( "simd_vect_if",         NULL,   Simd_Vect_If ),
   LNOPT_BOOL ( "hoistif",              NULL,   Run_hoistif ),
   LNOPT_BOOL ( "ignore_feedback",      NULL,   Ignore_Feedback ),
   LNOPT_BOOL ( "unswitch",             NULL,   Run_unswitch ),
@@ -1224,5 +1227,12 @@
                        Mhd_Options.L[i].TLB_Miss_Penalty;
     }
   }
+
+  /* Value of 1 for LNO_Iter_threshold is interpreted as default in which case 
+     the flag is set based on target. Otherwise use user-specified value.
+   */
+  if(LNO_Iter_threshold == 1) {
+    LNO_Iter_threshold = (Is_Target_SSE41())? 8 : 0;
+  }
 }
 

Modified: trunk/osprey/common/com/config_lno.h
===================================================================
--- trunk/osprey/common/com/config_lno.h        2011-07-05 23:09:12 UTC (rev 
3680)
+++ trunk/osprey/common/com/config_lno.h        2011-07-06 19:11:17 UTC (rev 
3681)
@@ -323,6 +323,7 @@
   BOOL           Simd_Reduction;
   BOOL           Simd_Avoid_Fusion;
   BOOL    Simd_Rm_Unity_Remainder;
+  BOOL    Simd_Vect_If;
   BOOL    Run_hoistif;
   BOOL    Ignore_Feedback;
   BOOL    Run_unswitch;
@@ -575,6 +576,7 @@
 #define LNO_Simd_Reduction             Current_LNO->Simd_Reduction
 #define LNO_Simd_Avoid_Fusion          Current_LNO->Simd_Avoid_Fusion
 #define LNO_Simd_Rm_Unity_Remainder    Current_LNO->Simd_Rm_Unity_Remainder
+#define LNO_Simd_Vect_If                Current_LNO->Simd_Vect_If
 #define LNO_Run_hoistif                 Current_LNO->Run_hoistif
 #define LNO_Ignore_Feedback             Current_LNO->Ignore_Feedback
 #define LNO_Run_Unswitch                Current_LNO->Run_unswitch

Modified: trunk/osprey/common/com/opcode_gen_core.cxx
===================================================================
--- trunk/osprey/common/com/opcode_gen_core.cxx 2011-07-05 23:09:12 UTC (rev 
3680)
+++ trunk/osprey/common/com/opcode_gen_core.cxx 2011-07-06 19:11:17 UTC (rev 
3681)
@@ -3270,9 +3270,13 @@
         break;
 
       case OPR_SELECT:
-        // [RTYPE] : b,f,i,p,z [DESC] : V,B
+        // [RTYPE] : b,f,i,p,z,V16 [DESC] : V,B,V16
         valid = Is_MTYPE_b_f_i_p_z [rtype] && 
                (desc == MTYPE_V || desc == MTYPE_B);
+#ifdef TARG_X8664
+       // add more valid vector types later.
+       valid = valid || (rtype == MTYPE_V16I1 && desc == MTYPE_V16I1);
+#endif
         break;
 
       case OPR_TAS:
@@ -3631,7 +3635,7 @@
       break;
 
     case OPR_SELECT:
-      // [RTYPE] : b,f,i,p,z [DESC] : V,b
+      // [RTYPE] : b,f,i,p,z,V16 [DESC] : V,b,V16
       sprintf (buffer, "OPC_%s%s%s", MTYPE_name(rtype), 
        desc == MTYPE_V ? "" : MTYPE_name(desc), &OPERATOR_info [opr]._name 
[4]);
       break;

Modified: trunk/osprey/common/com/opcode_gen_core.h
===================================================================
--- trunk/osprey/common/com/opcode_gen_core.h   2011-07-05 23:09:12 UTC (rev 
3680)
+++ trunk/osprey/common/com/opcode_gen_core.h   2011-07-06 19:11:17 UTC (rev 
3681)
@@ -1034,6 +1034,11 @@
   OPC_U8I16EQ            = OPR_EQ + RTYPE(MTYPE_U8) + DESC(MTYPE_I16),
   OPC_U8U16EQ            = OPR_EQ + RTYPE(MTYPE_U8) + DESC(MTYPE_U16),
 #endif /* TARG_X8664 */
+#ifdef TARG_X8664
+  // Note that the result of the comparison of individual elements (for 
OPC_V16I8V16I8EQ)
+  // should be either 0 or the bit mask of all 1's (NOT 0 or 1).
+  OPC_V16I8V16I8EQ       = OPR_EQ + RTYPE(MTYPE_V16I8) + DESC(MTYPE_V16I8),
+#endif
   OPC_EVAL               = OPR_EVAL + RTYPE(MTYPE_V) + DESC(MTYPE_V),
   OPC_EXC_SCOPE_BEGIN    = OPR_EXC_SCOPE_BEGIN + RTYPE(MTYPE_V) + 
DESC(MTYPE_V),
   OPC_EXC_SCOPE_END      = OPR_EXC_SCOPE_END + RTYPE(MTYPE_V) + DESC(MTYPE_V),
@@ -2351,6 +2356,7 @@
 #else
   OPC_V16F4SELECT       = OPR_SELECT + RTYPE(MTYPE_V16F4) + DESC(MTYPE_V),
   OPC_V16F8SELECT       = OPR_SELECT + RTYPE(MTYPE_V16F8) + DESC(MTYPE_V),
+  OPC_V16I1V16I1SELECT  = OPR_SELECT + RTYPE(MTYPE_V16I1) + DESC(MTYPE_V16I1),
 #endif /* TARG_X8664 */
   OPC_BBSELECT           = OPR_SELECT + RTYPE(MTYPE_B) + DESC(MTYPE_B),
   OPC_I4BSELECT          = OPR_SELECT + RTYPE(MTYPE_I4) + DESC(MTYPE_B),

Modified: trunk/osprey/common/com/wn_util.cxx
===================================================================
--- trunk/osprey/common/com/wn_util.cxx 2011-07-05 23:09:12 UTC (rev 3680)
+++ trunk/osprey/common/com/wn_util.cxx 2011-07-06 19:11:17 UTC (rev 3681)
@@ -1983,3 +1983,23 @@
   }
 }
 
+/***********************************************************************
+ *
+ * This routine collects all the indirect loads in a whirl tree. 
+ *
+ ***********************************************************************/
+void 
+WN_collect_iloads(std::list<WN*>* wn_list, WN* wn)
+{ 
+  if (!wn_list || !wn) return;
+
+  if (WN_operator(wn) == OPR_ILOAD)
+    wn_list->push_back(wn);
+
+  for (int i = 0; i < WN_kid_count(wn); i++)
+  { 
+    WN *kid = WN_kid(wn, i);
+    WN_collect_iloads(wn_list, kid);
+  } 
+} 
+

Modified: trunk/osprey/common/com/wn_util.h
===================================================================
--- trunk/osprey/common/com/wn_util.h   2011-07-05 23:09:12 UTC (rev 3680)
+++ trunk/osprey/common/com/wn_util.h   2011-07-06 19:11:17 UTC (rev 3681)
@@ -481,6 +481,7 @@
 /* Needed for the STL vector class used below
  */
 #include "vector"
+#include "list"
 #include "mempool_allocator.h"
 
 typedef mempool_allocator<WN*> VEC_POOL_ALLOCATOR;
@@ -494,6 +495,8 @@
                                           WN_MAP parent_map,
                                           BOOL make_compiler_generated);
 
+extern void WN_collect_iloads(std::list<WN*>*, WN*);
+
 #endif /* __cplusplus */
 
 #endif /* wn_util_INCLUDED */


------------------------------------------------------------------------------
All of the data generated in your IT infrastructure is seriously valuable.
Why? It contains a definitive record of application performance, security 
threats, fraudulent activity, and more. Splunk takes this data and makes 
sense of it. IT sense. And common sense.
http://p.sf.net/sfu/splunk-d2d-c2
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel

Reply via email to