Some comments:

1. It's better to replace the new value -1 by a macro or enum.
2. The description for movnti defined in osprey/be/cg/cgdriver.cxx
should also be changed.
3. This change looks very strange:
@@ -3332,6 +3342,10 @@
   if( size < cache_size )
     return;
+  }
+  else
+    ; // CG_movnti == -1:  generate non-temporal stores unconditionall
+
Why not remove the whole else part and only keep the comments?
4, This change may introduce indent issue:
@@ -3295,9 +3300,13 @@
{
   if(CG_movnti==0) return;
+  BB* body = LOOP_DESCR_loophead(loop);
+  OP* op = NULL;
+
+  if (CG_movnti != -1)
+  {
...
+  }
+  else

2012/3/9 Lai, Michael <michael....@amd.com>:
> Can a gatekeeper review the following change?
>
>
>
> Added non-temporal store generation support for single basic block while
> loops.  Extended the option -CG:movnti to include -1 as a means for the user
> to instruct the compiler to emit non-temporal stores unconditionally.
> (Behavior of existing functionality is not affected.)  Some TOP code cleanup
> for non-temporal store.
>
>
>
>
>
> Index: osprey/be/cg/x8664/cgtarget.cxx
>
> ===================================================================
>
> --- osprey/be/cg/x8664/cgtarget.cxx (revision 3875)
>
> +++ osprey/be/cg/x8664/cgtarget.cxx (working copy)
>
> @@ -378,9 +378,6 @@
>
>        case TOP_vstsd:
>
>        case TOP_vstsd_n32:
>
>        case TOP_vstsdx:
>
> -      case TOP_vstntsd:
>
> -      case TOP_vstntsdx:
>
> -      case TOP_vstntsdxx:
>
>        case TOP_vstorelpd:
>
>        case TOP_vdivxxxsd:
>
>        case TOP_vfaddxxxsd:
>
> @@ -3233,12 +3230,20 @@
>
>      case TOP_stsd:      return TOP_stntsd; break;
>
>      case TOP_stsdx:     return TOP_stntsdx; break;
>
>      case TOP_stsdxx:    return TOP_stntsdxx; break;
>
> -    case TOP_vstss:      return TOP_vstntss; break;
>
> -    case TOP_vstssx:     return TOP_vstntssx; break;
>
> -    case TOP_vstssxx:    return TOP_vstntssxx; break;
>
> -    case TOP_vstsd:      return TOP_vstntsd; break;
>
> -    case TOP_vstsdx:     return TOP_vstntsdx; break;
>
> -    case TOP_vstsdxx:    return TOP_vstntsdxx; break;
>
> +
>
> +    /* following cases asserted by CG_movnti == -1 */
>
> +    case TOP_stups:     return TOP_stntps; break;
>
> +    case TOP_stupsx:    return TOP_stntpsx; break;
>
> +    case TOP_stupsxx:   return TOP_stntpsxx; break;
>
> +    case TOP_vstups:    return TOP_vstntps; break;
>
> +    case TOP_vstupsx:   return TOP_vstntpsx; break;
>
> +    case TOP_vstupsxx:  return TOP_vstntpsxx; break;
>
> +    case TOP_stupd:     return TOP_stntpd; break;
>
> +    case TOP_stupdx:    return TOP_stntpdx; break;
>
> +    case TOP_stupdxx:   return TOP_stntpdxx; break;
>
> +    case TOP_vstupd:    return TOP_vstntpd; break;
>
> +    case TOP_vstupdx:   return TOP_vstntpdx; break;
>
> +    case TOP_vstupdxx:  return TOP_vstntpdxx; break;
>
>      }
>
>     FmtAssert(FALSE,("Non-Temporal Store: not supported!"));
>
>     return TOP_UNDEFINED;
>
> @@ -3295,9 +3300,13 @@
>
> {
>
>    if(CG_movnti==0) return;
>
>
>
> +  BB* body = LOOP_DESCR_loophead(loop);
>
> +  OP* op = NULL;
>
> +
>
> +  if (CG_movnti != -1)
>
> +  {
>
>    UINT32 trip_count = 0;
>
>    TN* trip_count_tn = CG_LOOP_Trip_Count(loop);
>
> -  BB* body = LOOP_DESCR_loophead(loop);
>
>
>
>    if( trip_count_tn != NULL &&
>
>        TN_is_constant(trip_count_tn) ){
>
> @@ -3305,12 +3314,13 @@
>
>
>
>    } else {
>
>      const ANNOTATION* annot = ANNOT_Get(BB_annotations(body),
> ANNOT_LOOPINFO);
>
> -    const LOOPINFO* info = ANNOT_loopinfo(annot);
>
> -
>
> -    trip_count = WN_loop_trip_est(LOOPINFO_wn(info));
>
> +    const LOOPINFO* info = NULL;
>
> +    if (annot != NULL)
>
> +      info = ANNOT_loopinfo(annot);
>
> +    if (info != NULL)
>
> +      trip_count = WN_loop_trip_est(LOOPINFO_wn(info));
>
>    }
>
>
>
> -  OP* op = NULL;
>
>    INT64 size = 0;
>
>
>
>    Working_Set.Clear();
>
> @@ -3332,6 +3342,10 @@
>
>
>
>    if( size < cache_size )
>
>      return;
>
> +  }
>
> +  else
>
> +    ; // CG_movnti == -1:  generate non-temporal stores unconditionally
>
> +
>
>    FOR_ALL_BB_OPs_FWD( body, op ){
>
>      if( OP_prefetch( op ) ){
>
>        /* Get rid of any prefetchw operation, because it "loads the
> prefetched
>
> @@ -3396,6 +3410,17 @@
>
>           new_top = Movnti_Top(OP_code(op));
>
>           break;
>
>         }
>
> +    case TOP_stups:
>
> +    case TOP_stupsx:
>
> +    case TOP_stupsxx:
>
> +    case TOP_vstups:
>
> +    case TOP_vstupsx:
>
> +    case TOP_vstupsxx:
>
> +       {
>
> +         if (CG_movnti == -1)
>
> +           new_top = Movnti_Top(OP_code(op));
>
> +         break;
>
> +       }
>
>      //SSE2 support
>
>      case TOP_stapd:
>
>      case TOP_stapdx:
>
> @@ -3420,24 +3445,30 @@
>
>     new_top = Movnti_Top(OP_code(op));
>
>           break;
>
>         }
>
> -    //SSE4a support
>
> +    //SSE4a/SSE41 support
>
>      case TOP_stss:
>
>      case TOP_stssx:
>
>      case TOP_stssxx:
>
>      case TOP_stsd:
>
>      case TOP_stsdx:
>
>      case TOP_stsdxx:
>
> -    case TOP_vstss:
>
> -    case TOP_vstssx:
>
> -    case TOP_vstssxx:
>
> -    case TOP_vstsd:
>
> -    case TOP_vstsdx:
>
> -    case TOP_vstsdxx:
>
>         {
>
> -         if(Is_Target_SSE4a())
>
> +         if(Is_Target_SSE4a() || Is_Target_SSE41())
>
>              new_top = Movnti_Top(OP_code(op));
>
>           break;
>
>         }
>
> +    case TOP_stupd:
>
> +    case TOP_stupdx:
>
> +    case TOP_stupdxx:
>
> +    case TOP_vstupd:
>
> +    case TOP_vstupdx:
>
> +    case TOP_vstupdxx:
>
> +       {
>
> +         if (CG_movnti == -1 &&
>
> +             (Is_Target_SSE4a() || Is_Target_SSE41()))
>
> +           new_top = Movnti_Top(OP_code(op));
>
> +         break;
>
> +       }
>
>      }//end switch
>
>
>
>     if( new_top != TOP_UNDEFINED )
>
> Index: osprey/be/cg/cg_flags.cxx
>
> ===================================================================
>
> --- osprey/be/cg/cg_flags.cxx (revision 3875)
>
> +++ osprey/be/cg/cg_flags.cxx (working copy)
>
> @@ -291,7 +291,7 @@
>
> BOOL CG_use_short_form = FALSE;
>
> UINT64 CG_p2align_freq = 10000;
>
> UINT32 CG_p2align_max_skip_bytes = 3;
>
> -UINT32 CG_movnti = 1000;
>
> +INT32 CG_movnti = 1000;
>
> BOOL CG_use_incdec = TRUE;
>
> BOOL CG_use_xortozero = TRUE; // bug 8592
>
> BOOL CG_use_xortozero_Set = FALSE;
>
> Index: osprey/be/cg/cgdriver.cxx
>
> ===================================================================
>
> --- osprey/be/cg/cgdriver.cxx (revision 3875)
>
> +++ osprey/be/cg/cgdriver.cxx (working copy)
>
> @@ -1237,7 +1237,7 @@
>
>    { OVK_BOOL, OV_INTERNAL, TRUE, "idivbyconst_opt", "",
>
>      0, 0, 0, &CG_idivbyconst_opt, NULL },
>
>    { OVK_UINT32, OV_INTERNAL, TRUE, "movnti", "",
>
> -    120, 0, UINT32_MAX>>1, &CG_movnti, NULL,
>
> +    120, -1, UINT32_MAX>>1, &CG_movnti, NULL,
>
>      "Use x86-64's movnti instead of mov when writing memory blocks of this
> size or larger (in KB)" },
>
>    { OVK_BOOL, OV_INTERNAL, TRUE, "cloop", "",
>
>      0, 0, 0, &CG_LOOP_cloop, NULL },
>
> Index: osprey/be/cg/cg_flags.h
>
> ===================================================================
>
> --- osprey/be/cg/cg_flags.h (revision 3875)
>
> +++ osprey/be/cg/cg_flags.h (working copy)
>
> @@ -881,7 +881,7 @@
>
> extern BOOL CG_compute_to;
>
> extern UINT64 CG_p2align_freq;
>
> extern UINT32 CG_p2align_max_skip_bytes;
>
> -extern UINT32 CG_movnti;
>
> +extern INT32 CG_movnti;
>
> extern BOOL CG_use_xortozero;
>
> extern BOOL CG_use_xortozero_Set;
>
> extern BOOL CG_use_incdec;
>
> Index: osprey/be/cg/cg_loop.cxx
>
> ===================================================================
>
> --- osprey/be/cg/cg_loop.cxx (revision 3875)
>
> +++ osprey/be/cg/cg_loop.cxx (working copy)
>
> @@ -7442,6 +7442,9 @@
>
>   CG_LOOP_Trace_Loop(loop, "*** Before SINGLE_BB_WHILELOOP_UNROLL ***");
>
>
>
>        cg_loop.Build_CG_LOOP_Info(TRUE);
>
> +#ifdef TARG_X8664
>
> +      CGTARG_LOOP_Optimize(loop);
>
> +#endif
>
>        cg_loop.Determine_Unroll_Factor();
>
>        Unroll_Dowhile_Loop(loop, cg_loop.Unroll_factor());
>
>        cg_loop.Recompute_Liveness();
>
>
> ------------------------------------------------------------------------------
> Virtualization & Cloud Management Using Capacity Planning
> Cloud computing makes use of virtualization - but cloud computing
> also focuses on allowing computing to be delivered as a service.
> http://www.accelacomm.com/jaw/sfnl/114/51521223/
> _______________________________________________
> Open64-devel mailing list
> Open64-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/open64-devel
>



-- 
Regards,
Lai Jian-Xin

------------------------------------------------------------------------------
Virtualization & Cloud Management Using Capacity Planning
Cloud computing makes use of virtualization - but cloud computing 
also focuses on allowing computing to be delivered as a service.
http://www.accelacomm.com/jaw/sfnl/114/51521223/
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel

Reply via email to