Author: pallavimathew
Date: 2011-06-10 12:59:05 -0400 (Fri, 10 Jun 2011)
New Revision: 3643

Modified:
   trunk/osprey/be/lno/cond.cxx
   trunk/osprey/be/lno/cond.h
   trunk/osprey/be/lno/lnoutils.h
   trunk/osprey/be/lno/model.cxx
   trunk/osprey/be/lno/model.h
   trunk/osprey/be/lno/simd.cxx
   trunk/osprey/common/com/config_lno.cxx
   trunk/osprey/common/com/config_lno.h
Log:
This change is to move the vectorizable loop to the innermost position. 
This optimization is disabled by default and can be enabled via 
-LNO:loop_model_simd=on. 

Below is a sample program that shows the effect of using 
-LNO:loop_model_simd=on. 
With this flag, simd checks whether moving an outerloop to innermost position 
will 
enable more vectorization and provides this information to loop interchange 
which 
does the actual interchange. In the example below, the middle loop is moved to 
the 
innermost position enabling generation of more vectorized operations. 

$cat testcase.f
      SUBROUTINE SAMPLE(A,X,Y,Z,N1,N2,N3)
      IMPLICIT DOUBLE PRECISION(A-H,O-Z)
 
      DIMENSION X(100,N1),Y(100,N1),Z(100),A(100)
      I3 = 0
      DO 30 I2 = 1,N3
         DO 20 I1= 1,N2
            I3 = I3 + 1
            Z(I3) = (7.0*(A(I2) - A(I1)))
            B = Z(I3)
            DO 10 I = 1,N1
               Y(I3,I) = B * X(I3,I)
   10       CONTINUE
   20    CONTINUE
   30 CONTINUE
      END

Sample compilations:
$ openf90 testcase.f -O3 -LNO:simd_verbose=on
(testcase.f:7) Vectorization is not likely to be beneficial (try -LNO:simd=2 to 
vectorize it). Loop was not vectorized.
(testcase.f:11) Non-contiguous array "X" reference exists. Loop was not 
vectorized.
(testcase.f:11) Non-contiguous array "X" reference exists. Loop was not 
vectorized.
 
$ openf90 testcase.f -O3 -LNO:simd_verbose=on -LNO:simd=2
(testcase.f:7) LOOP WAS VECTORIZED.
(testcase.f:11) Non-contiguous array "X" reference exists. Loop was not 
vectorized.
(testcase.f:11) Non-contiguous array "X" reference exists. Loop was not 
vectorized.
 
$ openf90 testcase.f -O3 -LNO:simd_verbose=on -LNO:loop_model_simd=on 
Vectorizable outer loop at line:7 is moved to innermost position
(testcase.f:7) LOOP WAS VECTORIZED.
(testcase.f:7) LOOP WAS VECTORIZED.
(testcase.f:7) LOOP WAS VECTORIZED.

C.R. Mei Ye.


Modified: trunk/osprey/be/lno/cond.cxx
===================================================================
--- trunk/osprey/be/lno/cond.cxx        2011-06-08 06:06:24 UTC (rev 3642)
+++ trunk/osprey/be/lno/cond.cxx        2011-06-10 16:59:05 UTC (rev 3643)
@@ -2050,7 +2050,7 @@
   LWN_Set_Parent(loop_copy, LWN_Get_Parent(wn));
   LWN_Parentize(loop_copy);
 #ifdef TARG_X8664
-  BOOL Has_Dependencies = !Is_Vectorizable_Loop(loop_copy);
+  BOOL Has_Dependencies = !Is_Vectorizable_Inner_Loop(loop_copy);
 #else
   BOOL Has_Dependencies = TRUE;
 #endif

Modified: trunk/osprey/be/lno/cond.h
===================================================================
--- trunk/osprey/be/lno/cond.h  2011-06-08 06:06:24 UTC (rev 3642)
+++ trunk/osprey/be/lno/cond.h  2011-06-10 16:59:05 UTC (rev 3643)
@@ -279,7 +279,4 @@
 extern BOOL Redundant_Condition(COND_BOUNDS_INFO* info, WN* wn_cond,
   WN* wn_if);
 
-#ifdef TARG_X8664
-extern BOOL Is_Vectorizable_Loop(WN* loop);
 #endif
-#endif

Modified: trunk/osprey/be/lno/lnoutils.h
===================================================================
--- trunk/osprey/be/lno/lnoutils.h      2011-06-08 06:06:24 UTC (rev 3642)
+++ trunk/osprey/be/lno/lnoutils.h      2011-06-10 16:59:05 UTC (rev 3643)
@@ -909,4 +909,9 @@
                                   BOOL *inductive_use, BOOL *indirect_use);
 #endif
 
+#ifdef TARG_X8664
+extern BOOL Is_Vectorizable_Inner_Loop(WN* loop);
+extern BOOL Is_Vectorizable_Outer_Loop(WN* loop);
+#endif
+
 #endif // LNOUTILS_DECLARE

Modified: trunk/osprey/be/lno/model.cxx
===================================================================
--- trunk/osprey/be/lno/model.cxx       2011-06-08 06:06:24 UTC (rev 3642)
+++ trunk/osprey/be/lno/model.cxx       2011-06-10 16:59:05 UTC (rev 3643)
@@ -455,6 +455,80 @@
 
 extern INT Debug_Cache_Model; 
 
+
+#ifndef TARG_X8664
+void
+LOOP_MODEL::Shift_Vectorizable_Innermost (void) {
+}
+
+#else
+void
+LOOP_MODEL::Shift_Vectorizable_Innermost (void) {
+    
+  if (!LNO_Interchange)
+    return;
+
+  Is_True (_wn && _can_be_inner, ("Internal inconsistency"));
+
+  INT wndepth = Do_Loop_Depth (_wn);
+
+  // step 1: anchor the vectorizable innermost loop in the innermost position
+  //
+  
+  //bug 2456, bug 5724 and bug 9143
+  //if an inner loop is vectorizable and it is beneficial to do so, then
+  //we should keep this loop innermost (i.e. the innermost loop can not
+  //be changed
+  if (Is_Vectorizable_Inner_Loop(_wn) && 
+      Is_Vectorization_Beneficial(WN_do_body(_wn))) {
+
+    _required_permutation[_inner_loop] = _inner_loop; //set inner's position
+    for (INT i = 0; i <= wndepth; i++) {
+      //only inner can be inner - don't change out
+      _can_be_inner[i] = (i == _inner_loop); 
+    }
+    return;
+  }
+    
+  // step 2: shift vectrorizable outer loop into the innermost position
+  //
+
+  if (LNO_Loop_Model_Simd) {
+    DOLOOP_STACK stack (&Model_Local_Pool);
+    Build_Doloop_Stack (_wn, &stack);
+    
+    // Work from second innermost to outer, examing if the loop can be 
+    // vectorized.
+    //
+    #define MAX_TRY 3
+    for (INT i = 1; i < MIN (wndepth, MAX_TRY); i++) {
+      WN* loop = stack.Top_nth (i);
+      if (!_can_be_inner [wndepth - i])
+        break;
+
+      if (Is_Vectorizable_Outer_Loop(loop) && 
+          Is_Vectorization_Beneficial(WN_do_body(_wn))) {
+
+        const char* fmt = 
+        "Vectorizable outer loop at line:%d is moved to innermost position\n";
+
+        if (LNO_Simd_Verbose) {
+            printf (fmt, Srcpos_To_Line (WN_Get_Linenum (loop)));
+        } 
+    
+        if (Get_Trace(TP_LNOPT, TT_LNO_MODEL)) {
+            fprintf (TFile, fmt, Srcpos_To_Line (WN_Get_Linenum (loop)));
+        }
+
+        for (INT j = 0; j <= wndepth; j++) {
+          _can_be_inner[j] = (j == (wndepth - i)); 
+        }
+      }
+    }
+  }
+}
+#endif
+
 void 
 LOOP_MODEL::Model(WN* wn, 
                   BOOL* can_be_inner, 
@@ -536,18 +610,9 @@
   if (LNO_Interchange == FALSE) {
     for (INT j = 0; j <= wndepth; j++)
       _required_permutation[j] = j;
+  } else {
+    Shift_Vectorizable_Innermost ();
   }
-#ifdef TARG_X8664
-  //bug 2456, bug 5724 and bug 9143
-  //if an inner loop is vectorizable and it is beneficial to do so, then
-  //we should keep this loop innermost (i.e. the innermost loop can not
-  //be changed
- else  if(Is_Vectorizable_Loop(wn) && 
Is_Vectorization_Beneficial(WN_do_body(wn))){
-    _required_permutation[_inner_loop] = _inner_loop; //set inner's position
-   for (INT j = 0; j <= wndepth; j++)
-      _can_be_inner[j] = (j == _inner_loop); //only inner can be inner - don't 
change out
-  }
-#endif
 
   INT loop_count = 0; 
   while (tmp) {

Modified: trunk/osprey/be/lno/model.h
===================================================================
--- trunk/osprey/be/lno/model.h 2011-06-08 06:06:24 UTC (rev 3642)
+++ trunk/osprey/be/lno/model.h 2011-06-10 16:59:05 UTC (rev 3643)
@@ -506,7 +506,6 @@
 
 extern BOOL Is_Bad_Array(WN* wn_ref, INT nloops); 
 #ifdef TARG_X8664
-extern BOOL Is_Vectorizable_Loop(WN* loop);
 extern BOOL Is_Vectorization_Beneficial(WN* loop);
 
 // bug 5880
@@ -610,6 +609,11 @@
   enum MODEL_LIMIT {MODEL_LIMIT_UNSET, MODEL_LIMIT_IDEAL, MODEL_LIMIT_RES,
                        MODEL_LIMIT_LAT};
   MODEL_LIMIT _model_limit;
+
+  // Try to move the vectorizable outer loop into the innermost position
+  //
+  void Shift_Vectorizable_Innermost (void);
+
 public:
   friend class REGISTER_MODEL;
   INT Num_Fp_Regs() const {return _num_fp_regs;}

Modified: trunk/osprey/be/lno/simd.cxx
===================================================================
--- trunk/osprey/be/lno/simd.cxx        2011-06-08 06:06:24 UTC (rev 3642)
+++ trunk/osprey/be/lno/simd.cxx        2011-06-10 16:59:05 UTC (rev 3643)
@@ -135,6 +135,32 @@
 
 static WN_MAP unroll_map; 
 
+// Some functions in this module are called in different contexts:
+//
+//   - SC_SIMD: The functions are called during SIMD phase. The loop
+//        being vectorized should be in the innermost possition. 
+//
+//   - SC_LOOP_MODELING: The functions are called during the loop modeling time
+//       (i.e. LOOP_MODEL::Model() is called). Some functions in this module 
will 
+//       be called to see if a loop, possiblly outer loop, is legal and 
beneficial
+//       to be vectorized when the loop shifts to innermost possition.
+// 
+//       Since the loop is not necessarily in the innermost possition at the 
time
+//       the functions are called. These functions must envision the situations
+//       when the loop is moved to innermost position.
+//
+//   - SC_OTHER: other misc situations. We conservatively assume that the loop
+//       being vectorized is already in the innermost position.
+//
+typedef enum {
+    SC_INVALID = 0,
+    SC_SIMD = 1,
+    SC_LOOP_MODELING = 2, 
+    SC_OTHER = 3,
+} SIMD_CONTEXT;
+
+static SIMD_CONTEXT simd_context = SC_INVALID;
+
 // Return TRUE iff there are too few iterations to generate a single 
 // vectorized iteration.
 //
@@ -475,6 +501,11 @@
 
   int store_granular_size = (LNO_Iter_threshold) ? 8 : 4;
 
+  if (LNO_Loop_Model_Simd) {
+    // vector-length should be >= 2
+    store_granular_size = Simd_vect_conf.Get_Vect_Byte_Size () / 2;
+  }
+
   if (LNO_Run_Simd == 0)
     return FALSE;
   else if (LNO_Run_Simd == 2)
@@ -632,9 +663,143 @@
  return TRUE;
 }
 
-static BOOL Unit_Stride_Reference(
+//  Returns TRUE if all uses in the expression 'wn' are invariant 
+//  w.r.t the <loop> when it shifts to the innermost position.
+//
+//  This function assume that it is legal to move the <loop> in the innermost 
pos
+//
+//  The <innermost> is the current innermost loop. If <loop> is already in 
+//  the innermost position, <loop> should be equal to <innermost>.
+//
+static BOOL Is_Loop_Invariant_Helper (WN* wn, WN* loop, WN* innermost);
+static BOOL Is_Loop_Invariant (WN* wn, WN* loop, WN* innermost)
+{
+  if (loop == innermost) {
+    return Is_Loop_Invariant_Exp (wn, loop); 
+  }
+
+  return Is_Loop_Invariant_Helper (wn, loop, innermost);
+}
+
+// Helper function of Is_Loop_Invariant_Helper(). 
+// Returns true if:
+//    - the wn is inside innermost loop, or 
+//    - it is init and fini expression of the loop being examined. 
+// Returns false if: 
+//     - the wn is in imperfect part in the loop nests. Does not matter it is 
imperfect part which loop. 
+//     - not inside the loop at all
+// Consider a general case:
+//  wn0   
+//  loop1 (i = init; i <= fini; i++) {
+//       wn1 
+//       loop 2{ 
+//          wn 2
+//           loop 3 {
+//              wn3
+//           }
+//      }
+//  }
+//  - Is_WN_Inside_Loop(wn3, loop1, loop3/*innermost*/) returns true
+//  - Is_WN_Inside_Loop(wn2, loop1, loop3/*innermost*/) returns false 
+//  - Is_WN_Inside_Loop(wn1, loop1, loop3/*innermost*/) returns false 
+//  - Is_WN_Inside_Loop(wn0, loop1, loop3/*innermost*/) returns false 
+//  - Is_WN_Inside_loop(<wn-"init"/wn-"fini", loop1, loop3) return TRUE since 
it is part of loop construct. 
+static BOOL Is_WN_Inside_Loop (WN* wn, WN* loop, WN* innermost)
+{
+    Is_True (WN_operator (loop) == OPR_DO_LOOP && 
+             WN_operator (innermost) == OPR_DO_LOOP, ("invalid input"));
+
+    for (WN* ancestor = wn; ancestor; ancestor = LWN_Get_Parent(ancestor)) {
+        if (ancestor == WN_do_body (innermost))
+         // <wn> is inside <innermost> loop.
+         return TRUE;
+
+        if ((ancestor == WN_index(loop)) || (ancestor == WN_start(loop)) || 
+           (ancestor == WN_end(loop)) || (ancestor == WN_step(loop)))
+         // <wn> is inside one of the components of <loop> other than its body.
+         return TRUE;
+
+       if (ancestor == loop) 
+         // <wn> is in the body of <loop> either in its imperfect part or 
+         // as part of another loop nested inside <loop>.
+         return FALSE;
+    }
+    //<wn> is outside <loop>
+    return FALSE;
+}
+
+static BOOL Is_Loop_Invariant_Helper (WN* wn, WN* loop, WN* innermost)
+{
+    ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph; 
+    if (!dg) return FALSE;
+
+    OPERATOR opr = WN_operator(wn); 
+
+    if (OPERATOR_is_call (opr)) {
+        // we should not come across a call.
+        return FALSE;
+    }
+
+    if (opr == OPR_ILOAD) {
+        VINDEX16 v = dg->Get_Vertex(wn); 
+        if (v == 0) 
+            return FALSE; 
+
+        EINDEX16 e = 0; 
+        for (e = dg->Get_In_Edge(v); e; e = dg->Get_Next_In_Edge(e)) { 
+            VINDEX16 v_source = dg->Get_Source(e);
+            WN* wn_source = dg->Get_Wn(v_source);
+            if (Is_WN_Inside_Loop (wn_source, loop, innermost))
+                   return FALSE; 
+        }
+
+        for (INT kid = 0; kid < WN_kid_count(wn); kid++) {
+            if (!Is_Loop_Invariant_Helper (WN_kid(wn, kid), loop, innermost))
+                   return FALSE; 
+        }
+        return TRUE; 
+    } else if (opr == OPR_INTRINSIC_OP || opr == OPR_PURE_CALL_OP) { 
+        for (INT i = 0; i < WN_kid_count(wn); i++) {
+            WN* wn_parm_node = WN_kid(wn, i);
+            if (WN_Parm_By_Reference(wn_parm_node))
+                return FALSE; 
+
+            WN* wn_parameter = WN_kid0(wn_parm_node); 
+            if (!Is_Loop_Invariant_Helper (wn_parameter, loop, innermost))
+                return FALSE; 
+        }
+        return TRUE;  
+    } else if (opr == OPR_LDID) {
+        DEF_LIST *def_list = Du_Mgr->Ud_Get_Def(wn);
+       if (!def_list || def_list->Incomplete())
+         return FALSE;
+        DEF_LIST_ITER iter(def_list);
+        const DU_NODE* node = NULL;
+        for (node = iter.First(); !iter.Is_Empty(); node = iter.Next()) {
+            WN* def = node->Wn();
+            if (Is_WN_Inside_Loop (def, loop, innermost))
+                return FALSE;
+        }
+        return TRUE; 
+    } else {
+        if (!Statically_Safe_Node(wn)) 
+            return FALSE; 
+
+        for (INT kid = 0; kid < WN_kid_count(wn); kid++)
+            if (!Is_Loop_Invariant_Helper (WN_kid(wn, kid), loop, innermost))
+                return FALSE;
+    }
+    return TRUE; 
+}
+
+
+// Helper function of Unit_Stride_Reference. Do *NOT* call this function 
+// directly.
+//
+static BOOL Unit_Stride_Reference_Helper(
                 WN *wn, 
                 WN *loop, 
+                WN *innermost,
                 BOOL in_simd)
 {
 
@@ -643,7 +808,7 @@
     if (WN_opcode(wn) == OPC_BLOCK){
       WN* kid = WN_first (wn);
       while (kid) {
-        if(!Unit_Stride_Reference(kid, loop, in_simd))
+        if(!Unit_Stride_Reference_Helper(kid, loop, innermost, in_simd))
            return FALSE;
         kid = WN_next(kid);
     } // end while
@@ -651,7 +816,7 @@
     }// endif
 
     if(WN_operator(wn) == OPR_ARRAY && 
-       (in_simd || !Is_Loop_Invariant_Exp(wn, loop))){
+       (in_simd || !Is_Loop_Invariant(wn, loop, innermost))){
       
       ACCESS_ARRAY* aa = (ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map, wn);
       ACCESS_VECTOR* av;
@@ -693,13 +858,33 @@
       }// end if array
 
     for (UINT kidno = 0; kidno < WN_kid_count(wn); kidno ++) {
-       if(!Unit_Stride_Reference(WN_kid(wn, kidno), loop, in_simd))
+       if(!Unit_Stride_Reference_Helper(WN_kid(wn, kidno), 
+                                        loop, innermost, in_simd)) {
           return FALSE;
+       }
      }
 
    return TRUE;
 }
 
+static BOOL Unit_Stride_Reference (
+                WN *wn, 
+                WN *loop, 
+                BOOL in_simd)
+{
+   FmtAssert (simd_context == SC_SIMD && in_simd && Do_Loop_Is_Inner (loop) ||
+              simd_context == SC_OTHER && Do_Loop_Is_Inner (loop) ||
+              simd_context == SC_LOOP_MODELING, 
+              ("impossible"));
+    
+    WN* innermost = loop;
+    if (!Do_Loop_Is_Inner (loop)) {
+       innermost = SNL_Innermost_Do (loop);
+    }
+
+    return Unit_Stride_Reference_Helper (wn, loop, innermost, in_simd);
+}
+
 static void Report_Non_Vectorizable_Op(WN *wn)
 {
   if(non_vect_op) return; //already reported by sub-ops
@@ -1440,7 +1625,11 @@
   }  
 }
 
+
 BOOL Gather_Vectorizable_Ops(
+  WN* wn, SCALAR_REF_STACK* simd_ops, MEM_POOL *pool, WN *loop);
+
+BOOL Gather_Vectorizable_Ops_Helper(
   WN* wn, SCALAR_REF_STACK* simd_ops, MEM_POOL *pool, WN *loop)
 {
   if (WN_opcode(wn) == OPC_BLOCK){
@@ -1502,9 +1691,11 @@
 
   for (INT kidno=0; kidno<WN_kid_count(wn); kidno++){
     WN* kid = WN_kid(wn,kidno);
-    if (!Gather_Vectorizable_Ops(kid,simd_ops,pool,loop))
+    if (!Gather_Vectorizable_Ops(kid,simd_ops,pool,loop)) {
+      Report_Non_Vectorizable_Op(kid);
       return FALSE;
-   }
+    }
+  }
 
   // Bug 3011 - If 'wn' is a reduction statement then it should not be used
   // more than once (except in reductions on the same variable) inside this 
@@ -1521,11 +1712,14 @@
     // 'wn' is a reduction statement.
     // If there is more than one use for this definition inside this loop 
     // then do not vectorize.
-    if (!Du_Mgr)
+    if (!Du_Mgr) {
+      Report_Non_Vectorizable_Op(wn);
       return FALSE;
+    }
     USE_LIST* use_list=Du_Mgr->Du_Get_Use(wn);
-    if (!use_list)
+    if (!use_list) {
       return FALSE;
+    }
     WN *body = WN_do_body(loop);
     USE_LIST_ITER uiter(use_list);
     INT num_reuse = 0;
@@ -1557,11 +1751,16 @@
   // involved in a reduction.
   if (WN_operator(wn) == OPR_STID && curr_simd_red_manager &&
       curr_simd_red_manager->Which_Reduction(wn) == RED_NONE) {
-    if (!Du_Mgr)
+    if (!Du_Mgr) {
+      Report_Non_Vectorizable_Op(wn);
       return FALSE;
+    }
     USE_LIST* use_list=Du_Mgr->Du_Get_Use(wn);
-    if (!use_list || use_list->Incomplete()) //bug 12536 - conservative if
-      return FALSE;                          //            incomplete
+    if (!use_list || use_list->Incomplete()) { 
+      //bug 12536 - conservative if incomplete
+      Report_Non_Vectorizable_Op(wn);
+      return FALSE;                            
+    }
     USE_LIST_ITER uiter(use_list);
     for (DU_NODE* u = uiter.First(); !uiter.Is_Empty(); u=uiter.Next()) {
       WN* use=u->Wn();
@@ -1576,9 +1775,10 @@
    //TODO: investigate whether these pragmas can be removed before simd
       if(stmt && WN_operator(stmt)==OPR_XPRAGMA && 
             WN_pragma(stmt) == WN_PRAGMA_COPYIN_BOUND && 
-            WN_kid0(stmt) == use)
+            WN_kid0(stmt) == use) {
+        Report_Non_Vectorizable_Op(wn);
         return FALSE;
-     
+      } 
 
       WN* loop_stmt = WN_first(body);
       for (; loop_stmt; loop_stmt = WN_next(loop_stmt)) {
@@ -1595,11 +1795,15 @@
   // Bug 3875 - Also, the STID should not be used to compute address from a
   // ARRAY node.
   if (WN_operator(wn) == OPR_STID) {
-    if (!Du_Mgr)
+    if (!Du_Mgr) {
+      Report_Non_Vectorizable_Op(wn);
       return FALSE;
+    }
     USE_LIST* use_list=Du_Mgr->Du_Get_Use(wn);
-    if (!use_list)
+    if (!use_list) {
+      Report_Non_Vectorizable_Op(wn);
       return FALSE;
+    }
     USE_LIST_ITER uiter(use_list);
     for (DU_NODE* u = uiter.First(); !uiter.Is_Empty(); u=uiter.Next()) {
       WN* use=u->Wn();
@@ -1622,11 +1826,15 @@
   // why a loop. 
   if (WN_operator(wn) == OPR_STID && curr_simd_red_manager &&
       curr_simd_red_manager->Which_Reduction(wn) == RED_NONE) {
-    if (!Du_Mgr)
+    if (!Du_Mgr) {
+      Report_Non_Vectorizable_Op(wn);
       return FALSE;
+    }
     USE_LIST* use_list=Du_Mgr->Du_Get_Use(wn);
-    if (!use_list)
+    if (!use_list) {
+      Report_Non_Vectorizable_Op(wn);
       return FALSE;
+    }
     USE_LIST_ITER uiter(use_list);
     BOOL used_in_loop = FALSE;
     for (DU_NODE* u = uiter.First(); !uiter.Is_Empty() && !used_in_loop; 
@@ -1667,6 +1875,15 @@
   return TRUE;
 }
 
+BOOL Gather_Vectorizable_Ops(
+  WN* wn, SCALAR_REF_STACK* simd_ops, MEM_POOL *pool, WN *loop) {
+  if (!Gather_Vectorizable_Ops_Helper (wn, simd_ops, pool, loop)) {
+    Report_Non_Vectorizable_Op (wn);
+    return FALSE;
+  }
+  return TRUE;
+}
+
 //-----------------------------------------------------------------------
 // NAME: Find_Nodes
 // FUNCTION: Find all of the nodes in the tree rooted at 'wn_tree' with the
@@ -2178,75 +2395,84 @@
     return Contain_Vectorizable_Intrinsic(body);
 }
 
-//copy from vloop to ploop
-static void Simd_Copy_Def_Use_For_Loop_Stmt(WN* vloop, WN *ploop)
+/*
+ * Copy the use-def relations from a given loop body to its copy wn node 
+ */
+static void Simd_Copy_Def_Use_For_Loop_Body(WN* vbody, WN *pbody, SYMBOL index)
 {
+    WN *vstmt, *pstmt;
 
-    SYMBOL index(WN_index(vloop));
+    Is_True((WN_opcode(vbody) == OPC_BLOCK && WN_opcode(pbody) == OPC_BLOCK),
+            ("This function only works for DO_LOOP's loop body"));
 
-    WN *vbody = WN_do_body(vloop);
-    WN *pbody = WN_do_body(ploop);
-    WN *vstmt, *pstmt;
     for (vstmt=WN_first(vbody), pstmt=WN_first(pbody);
-         vstmt != NULL && pstmt != NULL;
-         vstmt=WN_next(vstmt), pstmt=WN_next(pstmt))
-      Copy_Def_Use(vstmt, pstmt, index, FALSE/*synch*/);
+            vstmt != NULL && pstmt != NULL;
+            vstmt=WN_next(vstmt), pstmt=WN_next(pstmt))
+        Copy_Def_Use(vstmt, pstmt, index, FALSE/*synch*/);
 
     for (vstmt=WN_first(vbody), pstmt=WN_first(pbody);
-         vstmt != NULL && pstmt != NULL;
-         vstmt=WN_next(vstmt), pstmt=WN_next(pstmt))
+            vstmt != NULL && pstmt != NULL;
+            vstmt=WN_next(vstmt), pstmt=WN_next(pstmt))
     {
-       if (WN_operator(vstmt) != OPR_PRAGMA &&
-           WN_operator(pstmt) != OPR_PRAGMA )
-         LWN_Copy_Def_Use(WN_kid0(vstmt),WN_kid0(pstmt), Du_Mgr);
+        if (WN_operator(vstmt) != OPR_PRAGMA &&
+                WN_operator(pstmt) != OPR_PRAGMA )
+            LWN_Copy_Def_Use(WN_kid0(vstmt),WN_kid0(pstmt), Du_Mgr);
     }
 
     for (vstmt=WN_first(vbody), pstmt=WN_first(pbody);
-         vstmt != NULL && pstmt != NULL;
-         vstmt=WN_next(vstmt), pstmt=WN_next(pstmt)){
+            vstmt != NULL && pstmt != NULL;
+            vstmt=WN_next(vstmt), pstmt=WN_next(pstmt)){
 
-       if (WN_operator(vstmt) == OPR_STID) {
-         USE_LIST* use_list=Du_Mgr->Du_Get_Use(vstmt);
-        USE_LIST_ITER uiter(use_list);
-        DOLOOP_STACK sym_stack(&LNO_local_pool);
-        SYMBOL symbol(vstmt);
-        Find_Nodes(OPR_LDID, symbol, WN_do_body(ploop),&sym_stack);
-        for (INT j = 0; j < sym_stack.Elements(); j++) {
-          WN* wn_use =  sym_stack.Bottom_nth(j);
-          DEF_LIST *def_list = Du_Mgr->Ud_Get_Def(wn_use);
-          def_list->Set_loop_stmt(ploop);
+        if (WN_operator(vstmt) == OPR_STID) {
+            USE_LIST* use_list=Du_Mgr->Du_Get_Use(vstmt);
+            USE_LIST_ITER uiter(use_list);
+            DOLOOP_STACK sym_stack(&LNO_local_pool);
+            SYMBOL symbol(vstmt);
+            Find_Nodes(OPR_LDID, symbol, pbody,&sym_stack);
+            for (INT j = 0; j < sym_stack.Elements(); j++) {
+                WN* wn_use =  sym_stack.Bottom_nth(j);
+                DEF_LIST *def_list = Du_Mgr->Ud_Get_Def(wn_use);
+                def_list->Set_loop_stmt(LWN_Get_Parent(pbody));
+            }
+            if (use_list->Incomplete()) {
+                Du_Mgr->Create_Use_List(pstmt);
+                Du_Mgr->Du_Get_Use(pstmt)->Set_Incomplete();
+                continue;
+            }
+            for (DU_NODE* u=uiter.First(); !uiter.Is_Empty(); u=uiter.Next()) {
+                WN* use = u->Wn();
+                Du_Mgr->Add_Def_Use(pstmt, use);
+            }
         }
-        if (use_list->Incomplete()) {
-          Du_Mgr->Create_Use_List(pstmt);
-          Du_Mgr->Du_Get_Use(pstmt)->Set_Incomplete();
-          continue;
-        }
-        for (DU_NODE* u=uiter.First(); !uiter.Is_Empty(); u=uiter.Next()) {
-          WN* use = u->Wn();
-          Du_Mgr->Add_Def_Use(pstmt, use);
-        }
-      }
     }
 }
+//copy from vloop to ploop
+static void Simd_Copy_Def_Use_For_Loop_Stmt(WN* vloop, WN *ploop)
+{
 
+    SYMBOL index(WN_index(vloop));
 
-extern BOOL Is_Vectorizable_Loop (WN* innerloop) 
+    WN *vbody = WN_do_body(vloop);
+    WN *pbody = WN_do_body(ploop);
+    Simd_Copy_Def_Use_For_Loop_Body(vbody, pbody, index);
+}
+
+static BOOL SIMD_Is_Vectorizable_Loop (WN* vect_loop, WN* body) 
 {
   if (LNO_Run_Simd == 0)
     return FALSE;
 
-  if (Loop_Has_Asm(innerloop))
+  if (Loop_Has_Asm(vect_loop))
     return FALSE;
 
-  if (WN_opcode(innerloop) != OPC_DO_LOOP ||
-      !Do_Loop_Is_Good(innerloop) ||
-      Do_Loop_Has_Calls(innerloop) ||
-      Do_Loop_Has_Gotos(innerloop) ||
-      Do_Loop_Is_Mp(innerloop) ||
-      !Do_Loop_Is_Inner(innerloop))
+  if (WN_opcode(vect_loop) != OPC_DO_LOOP ||
+      !Do_Loop_Is_Good(vect_loop) ||
+      Do_Loop_Has_Calls(vect_loop) ||
+      Do_Loop_Has_Gotos(vect_loop) ||
+      Do_Loop_Is_Mp(vect_loop)) {
     return FALSE;
+  }
 
-  WN* body = WN_do_body(innerloop);
   WN* stmt;
   MEM_POOL SIMD_tmp_pool;
   MEM_POOL_Initialize(&SIMD_tmp_pool,"SIMD_tmp_pool",FALSE);
@@ -2257,11 +2483,19 @@
            &SIMD_tmp_pool);
 
   BOOL save_simp_state = WN_Simplifier_Enable(FALSE);
-  Simd_Mark_Code(WN_do_body(innerloop)); 
+  BOOL innermost_loop  = TRUE;
+
+  // In the case of outer loop vectorization, body is not the loop body of 
vect_loop.
+  if (simd_context == SC_LOOP_MODELING 
+          && LWN_Get_Parent(body) != vect_loop) {
+      innermost_loop = FALSE;
+  }
+
+  Simd_Mark_Code(WN_do_body(vect_loop)); 
   WN_Simplifier_Enable(save_simp_state);
 
   if (LNO_Simd_Reduction) {
-    WN* func_nd = LWN_Get_Parent(innerloop);
+    WN* func_nd = LWN_Get_Parent(vect_loop);
     while(func_nd && WN_opcode(func_nd) != OPC_FUNC_ENTRY)
       func_nd = LWN_Get_Parent(func_nd);
     simd_red_manager = CXX_NEW 
@@ -2273,7 +2507,7 @@
   Induction_Seen = FALSE;
   BOOL _stop = FALSE;
   for (stmt=WN_first(body); stmt; stmt=WN_next(stmt))
-    if (!Gather_Vectorizable_Ops(stmt, simd_ops,&SIMD_tmp_pool, innerloop)){
+    if (!Gather_Vectorizable_Ops(stmt, simd_ops,&SIMD_tmp_pool, vect_loop)){
         _stop = TRUE;
         break;
     }
@@ -2284,7 +2518,7 @@
    //Bug 6963: Loop invariant array reference(loads) is permitted at this time
    // Simd will move these kind of array references out of the loop.
    BOOL move_invar = (!Get_Trace(TP_LNOPT, TT_LNO_GUARD) && LNO_Minvar);
-   if(!_stop && !Unit_Stride_Reference(body, innerloop, !move_invar))
+   if(!_stop && !Unit_Stride_Reference(body, vect_loop, !move_invar))
      _stop = TRUE;
 
   if(_stop){
@@ -2294,25 +2528,49 @@
   }
   
   // Dependence Analysis
-  WN* loop_copy = LWN_Copy_Tree(innerloop, TRUE, LNO_Info_Map);
-  DO_LOOP_INFO* dli=Get_Do_Loop_Info(innerloop);
+  WN* loop_copy = LWN_Copy_Tree(vect_loop, TRUE, LNO_Info_Map);
+
+  if (!innermost_loop) {
+      WN_do_body(loop_copy)= LWN_Copy_Tree(body, TRUE, LNO_Info_Map);
+      LWN_Set_Parent(WN_do_body(loop_copy), loop_copy);
+  }
+
+  DO_LOOP_INFO* dli=Get_Do_Loop_Info(vect_loop);
   DO_LOOP_INFO* new_loop_info =
     CXX_NEW(DO_LOOP_INFO(dli,&LNO_default_pool), &LNO_default_pool);
   Set_Do_Loop_Info(loop_copy, new_loop_info);
   adg=Array_Dependence_Graph;
-  if (!adg->Add_Deps_To_Copy_Block(innerloop, loop_copy, TRUE)) {
-    LNO_Erase_Dg_From_Here_In(loop_copy, adg);
-    MEM_POOL_Pop(&SIMD_tmp_pool);
-    MEM_POOL_Delete(&SIMD_tmp_pool);
-    return FALSE;
+
+  if (innermost_loop) {
+      if (!adg->Add_Deps_To_Copy_Block(vect_loop, loop_copy, TRUE)) {
+          LNO_Erase_Dg_From_Here_In(loop_copy, adg);
+          MEM_POOL_Pop(&SIMD_tmp_pool);
+          MEM_POOL_Delete(&SIMD_tmp_pool);
+          return FALSE;
+      }
+  } else {
+      if (!adg->Add_Deps_To_Copy_Block(WN_start(vect_loop), 
WN_start(loop_copy), TRUE)
+              || !adg->Add_Deps_To_Copy_Block(WN_end(vect_loop), 
WN_end(loop_copy), TRUE)
+              || !adg->Add_Deps_To_Copy_Block(WN_step(vect_loop), 
WN_step(loop_copy), TRUE)
+              || !adg->Add_Deps_To_Copy_Block(body, WN_do_body(loop_copy), 
TRUE)) {
+          LNO_Erase_Dg_From_Here_In(loop_copy, adg);
+          MEM_POOL_Pop(&SIMD_tmp_pool);
+          MEM_POOL_Delete(&SIMD_tmp_pool);
+          return FALSE;
+      }
   }
 
-  Copy_Def_Use(WN_start(innerloop), WN_start(loop_copy),
-                WN_index(innerloop), FALSE /* synch */);
-  Copy_Def_Use(WN_end(innerloop), WN_end(loop_copy),
-                WN_index(innerloop), FALSE /* synch */);
-  Simd_Copy_Def_Use_For_Loop_Stmt(innerloop, loop_copy);
 
+  Copy_Def_Use(WN_start(vect_loop), WN_start(loop_copy),
+                WN_index(vect_loop), FALSE /* synch */);
+  Copy_Def_Use(WN_end(vect_loop), WN_end(loop_copy),
+                WN_index(vect_loop), FALSE /* synch */);
+  if (innermost_loop) {
+      Simd_Copy_Def_Use_For_Loop_Stmt(vect_loop, loop_copy);
+  } else {
+      Simd_Copy_Def_Use_For_Loop_Body(body, WN_do_body(loop_copy), 
SYMBOL(WN_index(vect_loop)));
+  }
+
   MEM_POOL_Initialize(&SIMD_default_pool,"SIMD_default_pool",FALSE);
   MEM_POOL_Push(&SIMD_default_pool);
 
@@ -2330,8 +2588,59 @@
   return !Has_Dependencies;
 }
 
+extern BOOL Is_Vectorizable_Inner_Loop (WN* innerloop) {
+
+    SIMD_CONTEXT sc_save = simd_context; 
+    simd_context = SC_OTHER; 
+
+    BOOL res;
+    if (!Do_Loop_Is_Inner (innerloop)) {
+        // Not applicable
+        //
+        res =FALSE;
+    } else {
+        res = SIMD_Is_Vectorizable_Loop (innerloop, WN_do_body (innerloop));
+    }
+
+    simd_context = sc_save;    
+    return res;
+}
+
+// return TRUE iff it is legal to vectorize <loop> if it were in the 
+//  innermost position.
+//
+// NOTE: It is up to the caller to determine if it is legal to move <loop>
+//   into innermost position
+//
+extern BOOL Is_Vectorizable_Outer_Loop (WN* loop) {
+
+    Is_True (WN_operator (loop) == OPR_DO_LOOP, ("invalid input"));
+
+    if (Do_Loop_Is_Inner (loop)) {
+        // not applicable 
+        return FALSE;
+    }
+
+    WN* innermost = SNL_Innermost_Do (loop);
+    if (!Do_Loop_Is_Inner (innermost)) {
+        // there is a loop inside the SNL-sense innermost loop, give up.
+        // 
+        return FALSE;
+    }
+
+    SIMD_CONTEXT sc_save = simd_context;
+    simd_context = SC_LOOP_MODELING;
+
+    BOOL res = SIMD_Is_Vectorizable_Loop (loop, WN_do_body (innermost));
+    
+    simd_context = sc_save;
+    return res;
+}
+
 extern void Mark_Auto_Vectorizable_Loops (WN* wn)
 {
+  simd_context = SC_OTHER;
+
   OPCODE opc=WN_opcode(wn);
 
   if (!OPCODE_is_scf(opc)) 
@@ -2339,7 +2648,7 @@
   else if (opc==OPC_DO_LOOP) {
     if (Do_Loop_Is_Good(wn) && Do_Loop_Is_Inner(wn) && !Do_Loop_Has_Calls(wn)
        && !Do_Loop_Is_Mp(wn) && !Do_Loop_Has_Gotos(wn)) {
-      if (Is_Vectorizable_Loop(wn)) {
+      if (SIMD_Is_Vectorizable_Loop (wn, WN_do_body (wn))) {
        DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn, FALSE);
        dli->Vectorizable = TRUE;
       }        
@@ -2355,6 +2664,8 @@
     for (UINT kidno=0; kidno<WN_kid_count(wn); kidno++) {
       Mark_Auto_Vectorizable_Loops(WN_kid(wn,kidno));
     }
+
+    simd_context = SC_INVALID;
 }
 
 /* To facilitate vectorization, convert all 
@@ -5098,7 +5409,7 @@
              WN_operator(start) == OPR_STID, 
              ("criteria isn't met"));
 
-    OPCODE ld_opc = OPCODE_make_op (OPR_LDID, WN_desc(start), WN_desc(start));
+    OPCODE ld_opc = OPCODE_make_op (OPR_LDID, Promote_Type(WN_desc(start)), 
WN_desc(start));
     WN* ld_idx = LWN_CreateLdid (ld_opc, WN_start(vect_loop));
 
     Delete_Def_Use (WN_kid0(start_r));
@@ -5998,6 +6309,8 @@
   MEM_POOL_Initialize(&SIMD_default_pool,"SIMD_default_pool",FALSE);
   MEM_POOL_Push(&SIMD_default_pool);
 
+  simd_context = SC_SIMD;
+
   adg=Array_Dependence_Graph;
 
   debug = Get_Trace(TP_LNOPT, TT_LNO_DEBUG_SIMD);
@@ -6038,8 +6351,11 @@
   }
   if (LNO_Simd_Reduction && simd_red_manager)
     CXX_DELETE(simd_red_manager,&SIMD_default_pool);
+  simd_context = SC_INVALID;
+
   MEM_POOL_Pop(&SIMD_default_pool);
   MEM_POOL_Delete(&SIMD_default_pool);
+
 }
 
 // IPA does not pad common blocks that participate in I/O. The base address

Modified: trunk/osprey/common/com/config_lno.cxx
===================================================================
--- trunk/osprey/common/com/config_lno.cxx      2011-06-08 06:06:24 UTC (rev 
3642)
+++ trunk/osprey/common/com/config_lno.cxx      2011-06-10 16:59:05 UTC (rev 
3643)
@@ -200,6 +200,7 @@
   FALSE, FALSE,        /* Run_lego */
   TRUE,                /* Run_lego_localizer */
   TRUE,                /* Loop_finalization */
+  FALSE,       /* Loop_model_simd*/
   8,           /* Max_do_loop_depth_strict */
   FALSE,       /* Mem_sim */
   TRUE,                /* Minvar */
@@ -421,6 +422,7 @@
   FALSE, FALSE,        /* Run_lego */
   TRUE,                /* Run_lego_localizer */
   TRUE,                /* Loop_finalization */
+  FALSE,       /* Loop_model_simd*/
   8,           /* Max_do_loop_depth_strict */
   FALSE,       /* Mem_sim */
   TRUE,                /* Minvar */
@@ -905,6 +907,7 @@
   LNOPT_BOOL ( "ifminmax",              NULL,   IfMinMax ), 
   LNOPT_BOOL ( "call_info",             NULL,   Run_call_info ), 
   LNOPT_BOOL ( "loop_finalize",        NULL,   Loop_finalization),
+  LNOPT_BOOL ( "loop_model_simd",      NULL,   Loop_model_simd),
   LNOPT_BOOL ( "shackle",              NULL,   Shackle),
   LNOPT_BOOL ( "cross_loop",           NULL,   Cross_loop),
   LNOPT_BOOL ( "ipa",                   NULL,   IPA_Enabled),

Modified: trunk/osprey/common/com/config_lno.h
===================================================================
--- trunk/osprey/common/com/config_lno.h        2011-06-08 06:06:24 UTC (rev 
3642)
+++ trunk/osprey/common/com/config_lno.h        2011-06-10 16:59:05 UTC (rev 
3643)
@@ -261,6 +261,7 @@
   BOOL Run_lego_set;
   BOOL Run_lego_localizer;
   BOOL Loop_finalization;
+  BOOL  Loop_model_simd; 
   UINT32 Max_do_loop_depth_strict;
   BOOL Mem_sim;
   BOOL Minvar;
@@ -509,6 +510,7 @@
 #define LNO_Run_Lego_Set               Current_LNO->Run_lego_set
 #define LNO_Run_Lego_Localizer         Current_LNO->Run_lego_localizer
 #define LNO_Loop_Finalization          Current_LNO->Loop_finalization
+#define LNO_Loop_Model_Simd            Current_LNO->Loop_model_simd
 #define LNO_Max_Do_Loop_Depth_Strict   Current_LNO->Max_do_loop_depth_strict
 #define LNO_Mem_Sim                    Current_LNO->Mem_sim
 #define LNO_Minvar                     Current_LNO->Minvar


------------------------------------------------------------------------------
EditLive Enterprise is the world's most technically advanced content
authoring tool. Experience the power of Track Changes, Inline Image
Editing and ensure content is compliant with Accessibility Checking.
http://p.sf.net/sfu/ephox-dev2dev
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel

Reply via email to