Thanks for the comments, Jian-Xin. I will act on your suggestions. [BTW, the "strangeness" as you pointed out and the indentation were a result of my trying to retain the original code as much as possible. Since that created confusion, I will go ahead and make the changes as you suggested.]
Thanks! Michael -----Original Message----- From: Jian-Xin Lai [mailto:laij...@gmail.com] Sent: Friday, March 09, 2012 7:25 AM To: Lai, Michael Cc: open64-devel@lists.sourceforge.net Subject: Re: [Open64-devel] code review for non-temporal store generation [CG] Some comments: 1. It's better to replace the new value -1 by a macro or enum. 2. The description for movnti defined in osprey/be/cg/cgdriver.cxx should also be changed. 3. This change looks very strange: @@ -3332,6 +3342,10 @@ if( size < cache_size ) return; + } + else + ; // CG_movnti == -1: generate non-temporal stores unconditionall + Why not remove the whole else part and only keep the comments? 4, This change may introduce indent issue: @@ -3295,9 +3300,13 @@ { if(CG_movnti==0) return; + BB* body = LOOP_DESCR_loophead(loop); + OP* op = NULL; + + if (CG_movnti != -1) + { ... + } + else 2012/3/9 Lai, Michael <michael....@amd.com>: > Can a gatekeeper review the following change? > > > > Added non-temporal store generation support for single basic block > while loops. Extended the option -CG:movnti to include -1 as a means > for the user to instruct the compiler to emit non-temporal stores > unconditionally. > (Behavior of existing functionality is not affected.) Some TOP code > cleanup for non-temporal store. > > > > > > Index: osprey/be/cg/x8664/cgtarget.cxx > > =================================================================== > > --- osprey/be/cg/x8664/cgtarget.cxx (revision 3875) > > +++ osprey/be/cg/x8664/cgtarget.cxx (working copy) > > @@ -378,9 +378,6 @@ > > case TOP_vstsd: > > case TOP_vstsd_n32: > > case TOP_vstsdx: > > - case TOP_vstntsd: > > - case TOP_vstntsdx: > > - case TOP_vstntsdxx: > > case TOP_vstorelpd: > > case TOP_vdivxxxsd: > > case TOP_vfaddxxxsd: > > @@ -3233,12 +3230,20 @@ > > case TOP_stsd: return TOP_stntsd; break; > > case TOP_stsdx: return TOP_stntsdx; break; > > case TOP_stsdxx: return TOP_stntsdxx; break; > > - case TOP_vstss: return TOP_vstntss; break; > > - case TOP_vstssx: return TOP_vstntssx; break; > > - case TOP_vstssxx: return TOP_vstntssxx; break; > > - case TOP_vstsd: return TOP_vstntsd; break; > > - case TOP_vstsdx: return TOP_vstntsdx; break; > > - case TOP_vstsdxx: return TOP_vstntsdxx; break; > > + > > + /* following cases asserted by CG_movnti == -1 */ > > + case TOP_stups: return TOP_stntps; break; > > + case TOP_stupsx: return TOP_stntpsx; break; > > + case TOP_stupsxx: return TOP_stntpsxx; break; > > + case TOP_vstups: return TOP_vstntps; break; > > + case TOP_vstupsx: return TOP_vstntpsx; break; > > + case TOP_vstupsxx: return TOP_vstntpsxx; break; > > + case TOP_stupd: return TOP_stntpd; break; > > + case TOP_stupdx: return TOP_stntpdx; break; > > + case TOP_stupdxx: return TOP_stntpdxx; break; > > + case TOP_vstupd: return TOP_vstntpd; break; > > + case TOP_vstupdx: return TOP_vstntpdx; break; > > + case TOP_vstupdxx: return TOP_vstntpdxx; break; > > } > > FmtAssert(FALSE,("Non-Temporal Store: not supported!")); > > return TOP_UNDEFINED; > > @@ -3295,9 +3300,13 @@ > > { > > if(CG_movnti==0) return; > > > > + BB* body = LOOP_DESCR_loophead(loop); > > + OP* op = NULL; > > + > > + if (CG_movnti != -1) > > + { > > UINT32 trip_count = 0; > > TN* trip_count_tn = CG_LOOP_Trip_Count(loop); > > - BB* body = LOOP_DESCR_loophead(loop); > > > > if( trip_count_tn != NULL && > > TN_is_constant(trip_count_tn) ){ > > @@ -3305,12 +3314,13 @@ > > > > } else { > > const ANNOTATION* annot = ANNOT_Get(BB_annotations(body), > ANNOT_LOOPINFO); > > - const LOOPINFO* info = ANNOT_loopinfo(annot); > > - > > - trip_count = WN_loop_trip_est(LOOPINFO_wn(info)); > > + const LOOPINFO* info = NULL; > > + if (annot != NULL) > > + info = ANNOT_loopinfo(annot); > > + if (info != NULL) > > + trip_count = WN_loop_trip_est(LOOPINFO_wn(info)); > > } > > > > - OP* op = NULL; > > INT64 size = 0; > > > > Working_Set.Clear(); > > @@ -3332,6 +3342,10 @@ > > > > if( size < cache_size ) > > return; > > + } > > + else > > + ; // CG_movnti == -1: generate non-temporal stores > +unconditionally > > + > > FOR_ALL_BB_OPs_FWD( body, op ){ > > if( OP_prefetch( op ) ){ > > /* Get rid of any prefetchw operation, because it "loads the > prefetched > > @@ -3396,6 +3410,17 @@ > > new_top = Movnti_Top(OP_code(op)); > > break; > > } > > + case TOP_stups: > > + case TOP_stupsx: > > + case TOP_stupsxx: > > + case TOP_vstups: > > + case TOP_vstupsx: > > + case TOP_vstupsxx: > > + { > > + if (CG_movnti == -1) > > + new_top = Movnti_Top(OP_code(op)); > > + break; > > + } > > //SSE2 support > > case TOP_stapd: > > case TOP_stapdx: > > @@ -3420,24 +3445,30 @@ > > new_top = Movnti_Top(OP_code(op)); > > break; > > } > > - //SSE4a support > > + //SSE4a/SSE41 support > > case TOP_stss: > > case TOP_stssx: > > case TOP_stssxx: > > case TOP_stsd: > > case TOP_stsdx: > > case TOP_stsdxx: > > - case TOP_vstss: > > - case TOP_vstssx: > > - case TOP_vstssxx: > > - case TOP_vstsd: > > - case TOP_vstsdx: > > - case TOP_vstsdxx: > > { > > - if(Is_Target_SSE4a()) > > + if(Is_Target_SSE4a() || Is_Target_SSE41()) > > new_top = Movnti_Top(OP_code(op)); > > break; > > } > > + case TOP_stupd: > > + case TOP_stupdx: > > + case TOP_stupdxx: > > + case TOP_vstupd: > > + case TOP_vstupdx: > > + case TOP_vstupdxx: > > + { > > + if (CG_movnti == -1 && > > + (Is_Target_SSE4a() || Is_Target_SSE41())) > > + new_top = Movnti_Top(OP_code(op)); > > + break; > > + } > > }//end switch > > > > if( new_top != TOP_UNDEFINED ) > > Index: osprey/be/cg/cg_flags.cxx > > =================================================================== > > --- osprey/be/cg/cg_flags.cxx (revision 3875) > > +++ osprey/be/cg/cg_flags.cxx (working copy) > > @@ -291,7 +291,7 @@ > > BOOL CG_use_short_form = FALSE; > > UINT64 CG_p2align_freq = 10000; > > UINT32 CG_p2align_max_skip_bytes = 3; > > -UINT32 CG_movnti = 1000; > > +INT32 CG_movnti = 1000; > > BOOL CG_use_incdec = TRUE; > > BOOL CG_use_xortozero = TRUE; // bug 8592 > > BOOL CG_use_xortozero_Set = FALSE; > > Index: osprey/be/cg/cgdriver.cxx > > =================================================================== > > --- osprey/be/cg/cgdriver.cxx (revision 3875) > > +++ osprey/be/cg/cgdriver.cxx (working copy) > > @@ -1237,7 +1237,7 @@ > > { OVK_BOOL, OV_INTERNAL, TRUE, "idivbyconst_opt", "", > > 0, 0, 0, &CG_idivbyconst_opt, NULL }, > > { OVK_UINT32, OV_INTERNAL, TRUE, "movnti", "", > > - 120, 0, UINT32_MAX>>1, &CG_movnti, NULL, > > + 120, -1, UINT32_MAX>>1, &CG_movnti, NULL, > > "Use x86-64's movnti instead of mov when writing memory blocks of > this size or larger (in KB)" }, > > { OVK_BOOL, OV_INTERNAL, TRUE, "cloop", "", > > 0, 0, 0, &CG_LOOP_cloop, NULL }, > > Index: osprey/be/cg/cg_flags.h > > =================================================================== > > --- osprey/be/cg/cg_flags.h (revision 3875) > > +++ osprey/be/cg/cg_flags.h (working copy) > > @@ -881,7 +881,7 @@ > > extern BOOL CG_compute_to; > > extern UINT64 CG_p2align_freq; > > extern UINT32 CG_p2align_max_skip_bytes; > > -extern UINT32 CG_movnti; > > +extern INT32 CG_movnti; > > extern BOOL CG_use_xortozero; > > extern BOOL CG_use_xortozero_Set; > > extern BOOL CG_use_incdec; > > Index: osprey/be/cg/cg_loop.cxx > > =================================================================== > > --- osprey/be/cg/cg_loop.cxx (revision 3875) > > +++ osprey/be/cg/cg_loop.cxx (working copy) > > @@ -7442,6 +7442,9 @@ > > CG_LOOP_Trace_Loop(loop, "*** Before SINGLE_BB_WHILELOOP_UNROLL > ***"); > > > > cg_loop.Build_CG_LOOP_Info(TRUE); > > +#ifdef TARG_X8664 > > + CGTARG_LOOP_Optimize(loop); > > +#endif > > cg_loop.Determine_Unroll_Factor(); > > Unroll_Dowhile_Loop(loop, cg_loop.Unroll_factor()); > > cg_loop.Recompute_Liveness(); > > > ---------------------------------------------------------------------- > -------- Virtualization & Cloud Management Using Capacity Planning > Cloud computing makes use of virtualization - but cloud computing also > focuses on allowing computing to be delivered as a service. > http://www.accelacomm.com/jaw/sfnl/114/51521223/ > _______________________________________________ > Open64-devel mailing list > Open64-devel@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/open64-devel > -- Regards, Lai Jian-Xin ------------------------------------------------------------------------------ Virtualization & Cloud Management Using Capacity Planning Cloud computing makes use of virtualization - but cloud computing also focuses on allowing computing to be delivered as a service. http://www.accelacomm.com/jaw/sfnl/114/51521223/ _______________________________________________ Open64-devel mailing list Open64-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/open64-devel