Looks good to me. Thank you very much.

2012/3/11 Lai, Michael <michael....@amd.com>:
> I applied all your comments, re-tested, re-packaged and re-attached.  Please 
> review.
>
> Thanks,
> Michael
>
> -----Original Message-----
> From: Lai, Michael
> Sent: Friday, March 09, 2012 9:11 AM
> To: 'Jian-Xin Lai'
> Cc: open64-devel@lists.sourceforge.net
> Subject: RE: [Open64-devel] code review for non-temporal store generation [CG]
>
> Thanks for the comments, Jian-Xin.  I will act on your suggestions.
> [BTW, the "strangeness" as you pointed out and the indentation were a result 
> of my trying to retain the original code as much as possible.  Since that 
> created confusion, I will go ahead and make the changes as you suggested.]
>
> Thanks!
> Michael
>
> -----Original Message-----
> From: Jian-Xin Lai [mailto:laij...@gmail.com]
> Sent: Friday, March 09, 2012 7:25 AM
> To: Lai, Michael
> Cc: open64-devel@lists.sourceforge.net
> Subject: Re: [Open64-devel] code review for non-temporal store generation [CG]
>
> Some comments:
>
> 1. It's better to replace the new value -1 by a macro or enum.
> 2. The description for movnti defined in osprey/be/cg/cgdriver.cxx should 
> also be changed.
> 3. This change looks very strange:
> @@ -3332,6 +3342,10 @@
>   if( size < cache_size )
>     return;
> +  }
> +  else
> +    ; // CG_movnti == -1:  generate non-temporal stores unconditionall
> +
> Why not remove the whole else part and only keep the comments?
> 4, This change may introduce indent issue:
> @@ -3295,9 +3300,13 @@
> {
>   if(CG_movnti==0) return;
> +  BB* body = LOOP_DESCR_loophead(loop);
> +  OP* op = NULL;
> +
> +  if (CG_movnti != -1)
> +  {
> ...
> +  }
> +  else
>
> 2012/3/9 Lai, Michael <michael....@amd.com>:
>> Can a gatekeeper review the following change?
>>
>>
>>
>> Added non-temporal store generation support for single basic block
>> while loops.  Extended the option -CG:movnti to include -1 as a means
>> for the user to instruct the compiler to emit non-temporal stores 
>> unconditionally.
>> (Behavior of existing functionality is not affected.)  Some TOP code
>> cleanup for non-temporal store.
>>
>>
>>
>>
>>
>> Index: osprey/be/cg/x8664/cgtarget.cxx
>>
>> ===================================================================
>>
>> --- osprey/be/cg/x8664/cgtarget.cxx (revision 3875)
>>
>> +++ osprey/be/cg/x8664/cgtarget.cxx (working copy)
>>
>> @@ -378,9 +378,6 @@
>>
>>        case TOP_vstsd:
>>
>>        case TOP_vstsd_n32:
>>
>>        case TOP_vstsdx:
>>
>> -      case TOP_vstntsd:
>>
>> -      case TOP_vstntsdx:
>>
>> -      case TOP_vstntsdxx:
>>
>>        case TOP_vstorelpd:
>>
>>        case TOP_vdivxxxsd:
>>
>>        case TOP_vfaddxxxsd:
>>
>> @@ -3233,12 +3230,20 @@
>>
>>      case TOP_stsd:      return TOP_stntsd; break;
>>
>>      case TOP_stsdx:     return TOP_stntsdx; break;
>>
>>      case TOP_stsdxx:    return TOP_stntsdxx; break;
>>
>> -    case TOP_vstss:      return TOP_vstntss; break;
>>
>> -    case TOP_vstssx:     return TOP_vstntssx; break;
>>
>> -    case TOP_vstssxx:    return TOP_vstntssxx; break;
>>
>> -    case TOP_vstsd:      return TOP_vstntsd; break;
>>
>> -    case TOP_vstsdx:     return TOP_vstntsdx; break;
>>
>> -    case TOP_vstsdxx:    return TOP_vstntsdxx; break;
>>
>> +
>>
>> +    /* following cases asserted by CG_movnti == -1 */
>>
>> +    case TOP_stups:     return TOP_stntps; break;
>>
>> +    case TOP_stupsx:    return TOP_stntpsx; break;
>>
>> +    case TOP_stupsxx:   return TOP_stntpsxx; break;
>>
>> +    case TOP_vstups:    return TOP_vstntps; break;
>>
>> +    case TOP_vstupsx:   return TOP_vstntpsx; break;
>>
>> +    case TOP_vstupsxx:  return TOP_vstntpsxx; break;
>>
>> +    case TOP_stupd:     return TOP_stntpd; break;
>>
>> +    case TOP_stupdx:    return TOP_stntpdx; break;
>>
>> +    case TOP_stupdxx:   return TOP_stntpdxx; break;
>>
>> +    case TOP_vstupd:    return TOP_vstntpd; break;
>>
>> +    case TOP_vstupdx:   return TOP_vstntpdx; break;
>>
>> +    case TOP_vstupdxx:  return TOP_vstntpdxx; break;
>>
>>      }
>>
>>     FmtAssert(FALSE,("Non-Temporal Store: not supported!"));
>>
>>     return TOP_UNDEFINED;
>>
>> @@ -3295,9 +3300,13 @@
>>
>> {
>>
>>    if(CG_movnti==0) return;
>>
>>
>>
>> +  BB* body = LOOP_DESCR_loophead(loop);
>>
>> +  OP* op = NULL;
>>
>> +
>>
>> +  if (CG_movnti != -1)
>>
>> +  {
>>
>>    UINT32 trip_count = 0;
>>
>>    TN* trip_count_tn = CG_LOOP_Trip_Count(loop);
>>
>> -  BB* body = LOOP_DESCR_loophead(loop);
>>
>>
>>
>>    if( trip_count_tn != NULL &&
>>
>>        TN_is_constant(trip_count_tn) ){
>>
>> @@ -3305,12 +3314,13 @@
>>
>>
>>
>>    } else {
>>
>>      const ANNOTATION* annot = ANNOT_Get(BB_annotations(body),
>> ANNOT_LOOPINFO);
>>
>> -    const LOOPINFO* info = ANNOT_loopinfo(annot);
>>
>> -
>>
>> -    trip_count = WN_loop_trip_est(LOOPINFO_wn(info));
>>
>> +    const LOOPINFO* info = NULL;
>>
>> +    if (annot != NULL)
>>
>> +      info = ANNOT_loopinfo(annot);
>>
>> +    if (info != NULL)
>>
>> +      trip_count = WN_loop_trip_est(LOOPINFO_wn(info));
>>
>>    }
>>
>>
>>
>> -  OP* op = NULL;
>>
>>    INT64 size = 0;
>>
>>
>>
>>    Working_Set.Clear();
>>
>> @@ -3332,6 +3342,10 @@
>>
>>
>>
>>    if( size < cache_size )
>>
>>      return;
>>
>> +  }
>>
>> +  else
>>
>> +    ; // CG_movnti == -1:  generate non-temporal stores
>> +unconditionally
>>
>> +
>>
>>    FOR_ALL_BB_OPs_FWD( body, op ){
>>
>>      if( OP_prefetch( op ) ){
>>
>>        /* Get rid of any prefetchw operation, because it "loads the
>> prefetched
>>
>> @@ -3396,6 +3410,17 @@
>>
>>           new_top = Movnti_Top(OP_code(op));
>>
>>           break;
>>
>>         }
>>
>> +    case TOP_stups:
>>
>> +    case TOP_stupsx:
>>
>> +    case TOP_stupsxx:
>>
>> +    case TOP_vstups:
>>
>> +    case TOP_vstupsx:
>>
>> +    case TOP_vstupsxx:
>>
>> +       {
>>
>> +         if (CG_movnti == -1)
>>
>> +           new_top = Movnti_Top(OP_code(op));
>>
>> +         break;
>>
>> +       }
>>
>>      //SSE2 support
>>
>>      case TOP_stapd:
>>
>>      case TOP_stapdx:
>>
>> @@ -3420,24 +3445,30 @@
>>
>>     new_top = Movnti_Top(OP_code(op));
>>
>>           break;
>>
>>         }
>>
>> -    //SSE4a support
>>
>> +    //SSE4a/SSE41 support
>>
>>      case TOP_stss:
>>
>>      case TOP_stssx:
>>
>>      case TOP_stssxx:
>>
>>      case TOP_stsd:
>>
>>      case TOP_stsdx:
>>
>>      case TOP_stsdxx:
>>
>> -    case TOP_vstss:
>>
>> -    case TOP_vstssx:
>>
>> -    case TOP_vstssxx:
>>
>> -    case TOP_vstsd:
>>
>> -    case TOP_vstsdx:
>>
>> -    case TOP_vstsdxx:
>>
>>         {
>>
>> -         if(Is_Target_SSE4a())
>>
>> +         if(Is_Target_SSE4a() || Is_Target_SSE41())
>>
>>              new_top = Movnti_Top(OP_code(op));
>>
>>           break;
>>
>>         }
>>
>> +    case TOP_stupd:
>>
>> +    case TOP_stupdx:
>>
>> +    case TOP_stupdxx:
>>
>> +    case TOP_vstupd:
>>
>> +    case TOP_vstupdx:
>>
>> +    case TOP_vstupdxx:
>>
>> +       {
>>
>> +         if (CG_movnti == -1 &&
>>
>> +             (Is_Target_SSE4a() || Is_Target_SSE41()))
>>
>> +           new_top = Movnti_Top(OP_code(op));
>>
>> +         break;
>>
>> +       }
>>
>>      }//end switch
>>
>>
>>
>>     if( new_top != TOP_UNDEFINED )
>>
>> Index: osprey/be/cg/cg_flags.cxx
>>
>> ===================================================================
>>
>> --- osprey/be/cg/cg_flags.cxx (revision 3875)
>>
>> +++ osprey/be/cg/cg_flags.cxx (working copy)
>>
>> @@ -291,7 +291,7 @@
>>
>> BOOL CG_use_short_form = FALSE;
>>
>> UINT64 CG_p2align_freq = 10000;
>>
>> UINT32 CG_p2align_max_skip_bytes = 3;
>>
>> -UINT32 CG_movnti = 1000;
>>
>> +INT32 CG_movnti = 1000;
>>
>> BOOL CG_use_incdec = TRUE;
>>
>> BOOL CG_use_xortozero = TRUE; // bug 8592
>>
>> BOOL CG_use_xortozero_Set = FALSE;
>>
>> Index: osprey/be/cg/cgdriver.cxx
>>
>> ===================================================================
>>
>> --- osprey/be/cg/cgdriver.cxx (revision 3875)
>>
>> +++ osprey/be/cg/cgdriver.cxx (working copy)
>>
>> @@ -1237,7 +1237,7 @@
>>
>>    { OVK_BOOL, OV_INTERNAL, TRUE, "idivbyconst_opt", "",
>>
>>      0, 0, 0, &CG_idivbyconst_opt, NULL },
>>
>>    { OVK_UINT32, OV_INTERNAL, TRUE, "movnti", "",
>>
>> -    120, 0, UINT32_MAX>>1, &CG_movnti, NULL,
>>
>> +    120, -1, UINT32_MAX>>1, &CG_movnti, NULL,
>>
>>      "Use x86-64's movnti instead of mov when writing memory blocks of
>> this size or larger (in KB)" },
>>
>>    { OVK_BOOL, OV_INTERNAL, TRUE, "cloop", "",
>>
>>      0, 0, 0, &CG_LOOP_cloop, NULL },
>>
>> Index: osprey/be/cg/cg_flags.h
>>
>> ===================================================================
>>
>> --- osprey/be/cg/cg_flags.h (revision 3875)
>>
>> +++ osprey/be/cg/cg_flags.h (working copy)
>>
>> @@ -881,7 +881,7 @@
>>
>> extern BOOL CG_compute_to;
>>
>> extern UINT64 CG_p2align_freq;
>>
>> extern UINT32 CG_p2align_max_skip_bytes;
>>
>> -extern UINT32 CG_movnti;
>>
>> +extern INT32 CG_movnti;
>>
>> extern BOOL CG_use_xortozero;
>>
>> extern BOOL CG_use_xortozero_Set;
>>
>> extern BOOL CG_use_incdec;
>>
>> Index: osprey/be/cg/cg_loop.cxx
>>
>> ===================================================================
>>
>> --- osprey/be/cg/cg_loop.cxx (revision 3875)
>>
>> +++ osprey/be/cg/cg_loop.cxx (working copy)
>>
>> @@ -7442,6 +7442,9 @@
>>
>>   CG_LOOP_Trace_Loop(loop, "*** Before SINGLE_BB_WHILELOOP_UNROLL
>> ***");
>>
>>
>>
>>        cg_loop.Build_CG_LOOP_Info(TRUE);
>>
>> +#ifdef TARG_X8664
>>
>> +      CGTARG_LOOP_Optimize(loop);
>>
>> +#endif
>>
>>        cg_loop.Determine_Unroll_Factor();
>>
>>        Unroll_Dowhile_Loop(loop, cg_loop.Unroll_factor());
>>
>>        cg_loop.Recompute_Liveness();
>>
>>
>> ----------------------------------------------------------------------
>> -------- Virtualization & Cloud Management Using Capacity Planning
>> Cloud computing makes use of virtualization - but cloud computing also
>> focuses on allowing computing to be delivered as a service.
>> http://www.accelacomm.com/jaw/sfnl/114/51521223/
>> _______________________________________________
>> Open64-devel mailing list
>> Open64-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/open64-devel
>>
>
>
>
> --
> Regards,
> Lai Jian-Xin
>



-- 
Regards,
Lai Jian-Xin

------------------------------------------------------------------------------
Try before you buy = See our experts in action!
The most comprehensive online learning library for Microsoft developers
is just $99.99! Visual Studio, SharePoint, SQL - plus HTML5, CSS3, MVC3,
Metro Style Apps, more. Free future releases when you subscribe now!
http://p.sf.net/sfu/learndevnow-dev2
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel

Reply via email to