Looks good to me. Thank you very much. 2012/3/11 Lai, Michael <michael....@amd.com>: > I applied all your comments, re-tested, re-packaged and re-attached. Please > review. > > Thanks, > Michael > > -----Original Message----- > From: Lai, Michael > Sent: Friday, March 09, 2012 9:11 AM > To: 'Jian-Xin Lai' > Cc: open64-devel@lists.sourceforge.net > Subject: RE: [Open64-devel] code review for non-temporal store generation [CG] > > Thanks for the comments, Jian-Xin. I will act on your suggestions. > [BTW, the "strangeness" as you pointed out and the indentation were a result > of my trying to retain the original code as much as possible. Since that > created confusion, I will go ahead and make the changes as you suggested.] > > Thanks! > Michael > > -----Original Message----- > From: Jian-Xin Lai [mailto:laij...@gmail.com] > Sent: Friday, March 09, 2012 7:25 AM > To: Lai, Michael > Cc: open64-devel@lists.sourceforge.net > Subject: Re: [Open64-devel] code review for non-temporal store generation [CG] > > Some comments: > > 1. It's better to replace the new value -1 by a macro or enum. > 2. The description for movnti defined in osprey/be/cg/cgdriver.cxx should > also be changed. > 3. This change looks very strange: > @@ -3332,6 +3342,10 @@ > if( size < cache_size ) > return; > + } > + else > + ; // CG_movnti == -1: generate non-temporal stores unconditionall > + > Why not remove the whole else part and only keep the comments? > 4, This change may introduce indent issue: > @@ -3295,9 +3300,13 @@ > { > if(CG_movnti==0) return; > + BB* body = LOOP_DESCR_loophead(loop); > + OP* op = NULL; > + > + if (CG_movnti != -1) > + { > ... > + } > + else > > 2012/3/9 Lai, Michael <michael....@amd.com>: >> Can a gatekeeper review the following change? >> >> >> >> Added non-temporal store generation support for single basic block >> while loops. Extended the option -CG:movnti to include -1 as a means >> for the user to instruct the compiler to emit non-temporal stores >> unconditionally. >> (Behavior of existing functionality is not affected.) Some TOP code >> cleanup for non-temporal store. >> >> >> >> >> >> Index: osprey/be/cg/x8664/cgtarget.cxx >> >> =================================================================== >> >> --- osprey/be/cg/x8664/cgtarget.cxx (revision 3875) >> >> +++ osprey/be/cg/x8664/cgtarget.cxx (working copy) >> >> @@ -378,9 +378,6 @@ >> >> case TOP_vstsd: >> >> case TOP_vstsd_n32: >> >> case TOP_vstsdx: >> >> - case TOP_vstntsd: >> >> - case TOP_vstntsdx: >> >> - case TOP_vstntsdxx: >> >> case TOP_vstorelpd: >> >> case TOP_vdivxxxsd: >> >> case TOP_vfaddxxxsd: >> >> @@ -3233,12 +3230,20 @@ >> >> case TOP_stsd: return TOP_stntsd; break; >> >> case TOP_stsdx: return TOP_stntsdx; break; >> >> case TOP_stsdxx: return TOP_stntsdxx; break; >> >> - case TOP_vstss: return TOP_vstntss; break; >> >> - case TOP_vstssx: return TOP_vstntssx; break; >> >> - case TOP_vstssxx: return TOP_vstntssxx; break; >> >> - case TOP_vstsd: return TOP_vstntsd; break; >> >> - case TOP_vstsdx: return TOP_vstntsdx; break; >> >> - case TOP_vstsdxx: return TOP_vstntsdxx; break; >> >> + >> >> + /* following cases asserted by CG_movnti == -1 */ >> >> + case TOP_stups: return TOP_stntps; break; >> >> + case TOP_stupsx: return TOP_stntpsx; break; >> >> + case TOP_stupsxx: return TOP_stntpsxx; break; >> >> + case TOP_vstups: return TOP_vstntps; break; >> >> + case TOP_vstupsx: return TOP_vstntpsx; break; >> >> + case TOP_vstupsxx: return TOP_vstntpsxx; break; >> >> + case TOP_stupd: return TOP_stntpd; break; >> >> + case TOP_stupdx: return TOP_stntpdx; break; >> >> + case TOP_stupdxx: return TOP_stntpdxx; break; >> >> + case TOP_vstupd: return TOP_vstntpd; break; >> >> + case TOP_vstupdx: return TOP_vstntpdx; break; >> >> + case TOP_vstupdxx: return TOP_vstntpdxx; break; >> >> } >> >> FmtAssert(FALSE,("Non-Temporal Store: not supported!")); >> >> return TOP_UNDEFINED; >> >> @@ -3295,9 +3300,13 @@ >> >> { >> >> if(CG_movnti==0) return; >> >> >> >> + BB* body = LOOP_DESCR_loophead(loop); >> >> + OP* op = NULL; >> >> + >> >> + if (CG_movnti != -1) >> >> + { >> >> UINT32 trip_count = 0; >> >> TN* trip_count_tn = CG_LOOP_Trip_Count(loop); >> >> - BB* body = LOOP_DESCR_loophead(loop); >> >> >> >> if( trip_count_tn != NULL && >> >> TN_is_constant(trip_count_tn) ){ >> >> @@ -3305,12 +3314,13 @@ >> >> >> >> } else { >> >> const ANNOTATION* annot = ANNOT_Get(BB_annotations(body), >> ANNOT_LOOPINFO); >> >> - const LOOPINFO* info = ANNOT_loopinfo(annot); >> >> - >> >> - trip_count = WN_loop_trip_est(LOOPINFO_wn(info)); >> >> + const LOOPINFO* info = NULL; >> >> + if (annot != NULL) >> >> + info = ANNOT_loopinfo(annot); >> >> + if (info != NULL) >> >> + trip_count = WN_loop_trip_est(LOOPINFO_wn(info)); >> >> } >> >> >> >> - OP* op = NULL; >> >> INT64 size = 0; >> >> >> >> Working_Set.Clear(); >> >> @@ -3332,6 +3342,10 @@ >> >> >> >> if( size < cache_size ) >> >> return; >> >> + } >> >> + else >> >> + ; // CG_movnti == -1: generate non-temporal stores >> +unconditionally >> >> + >> >> FOR_ALL_BB_OPs_FWD( body, op ){ >> >> if( OP_prefetch( op ) ){ >> >> /* Get rid of any prefetchw operation, because it "loads the >> prefetched >> >> @@ -3396,6 +3410,17 @@ >> >> new_top = Movnti_Top(OP_code(op)); >> >> break; >> >> } >> >> + case TOP_stups: >> >> + case TOP_stupsx: >> >> + case TOP_stupsxx: >> >> + case TOP_vstups: >> >> + case TOP_vstupsx: >> >> + case TOP_vstupsxx: >> >> + { >> >> + if (CG_movnti == -1) >> >> + new_top = Movnti_Top(OP_code(op)); >> >> + break; >> >> + } >> >> //SSE2 support >> >> case TOP_stapd: >> >> case TOP_stapdx: >> >> @@ -3420,24 +3445,30 @@ >> >> new_top = Movnti_Top(OP_code(op)); >> >> break; >> >> } >> >> - //SSE4a support >> >> + //SSE4a/SSE41 support >> >> case TOP_stss: >> >> case TOP_stssx: >> >> case TOP_stssxx: >> >> case TOP_stsd: >> >> case TOP_stsdx: >> >> case TOP_stsdxx: >> >> - case TOP_vstss: >> >> - case TOP_vstssx: >> >> - case TOP_vstssxx: >> >> - case TOP_vstsd: >> >> - case TOP_vstsdx: >> >> - case TOP_vstsdxx: >> >> { >> >> - if(Is_Target_SSE4a()) >> >> + if(Is_Target_SSE4a() || Is_Target_SSE41()) >> >> new_top = Movnti_Top(OP_code(op)); >> >> break; >> >> } >> >> + case TOP_stupd: >> >> + case TOP_stupdx: >> >> + case TOP_stupdxx: >> >> + case TOP_vstupd: >> >> + case TOP_vstupdx: >> >> + case TOP_vstupdxx: >> >> + { >> >> + if (CG_movnti == -1 && >> >> + (Is_Target_SSE4a() || Is_Target_SSE41())) >> >> + new_top = Movnti_Top(OP_code(op)); >> >> + break; >> >> + } >> >> }//end switch >> >> >> >> if( new_top != TOP_UNDEFINED ) >> >> Index: osprey/be/cg/cg_flags.cxx >> >> =================================================================== >> >> --- osprey/be/cg/cg_flags.cxx (revision 3875) >> >> +++ osprey/be/cg/cg_flags.cxx (working copy) >> >> @@ -291,7 +291,7 @@ >> >> BOOL CG_use_short_form = FALSE; >> >> UINT64 CG_p2align_freq = 10000; >> >> UINT32 CG_p2align_max_skip_bytes = 3; >> >> -UINT32 CG_movnti = 1000; >> >> +INT32 CG_movnti = 1000; >> >> BOOL CG_use_incdec = TRUE; >> >> BOOL CG_use_xortozero = TRUE; // bug 8592 >> >> BOOL CG_use_xortozero_Set = FALSE; >> >> Index: osprey/be/cg/cgdriver.cxx >> >> =================================================================== >> >> --- osprey/be/cg/cgdriver.cxx (revision 3875) >> >> +++ osprey/be/cg/cgdriver.cxx (working copy) >> >> @@ -1237,7 +1237,7 @@ >> >> { OVK_BOOL, OV_INTERNAL, TRUE, "idivbyconst_opt", "", >> >> 0, 0, 0, &CG_idivbyconst_opt, NULL }, >> >> { OVK_UINT32, OV_INTERNAL, TRUE, "movnti", "", >> >> - 120, 0, UINT32_MAX>>1, &CG_movnti, NULL, >> >> + 120, -1, UINT32_MAX>>1, &CG_movnti, NULL, >> >> "Use x86-64's movnti instead of mov when writing memory blocks of >> this size or larger (in KB)" }, >> >> { OVK_BOOL, OV_INTERNAL, TRUE, "cloop", "", >> >> 0, 0, 0, &CG_LOOP_cloop, NULL }, >> >> Index: osprey/be/cg/cg_flags.h >> >> =================================================================== >> >> --- osprey/be/cg/cg_flags.h (revision 3875) >> >> +++ osprey/be/cg/cg_flags.h (working copy) >> >> @@ -881,7 +881,7 @@ >> >> extern BOOL CG_compute_to; >> >> extern UINT64 CG_p2align_freq; >> >> extern UINT32 CG_p2align_max_skip_bytes; >> >> -extern UINT32 CG_movnti; >> >> +extern INT32 CG_movnti; >> >> extern BOOL CG_use_xortozero; >> >> extern BOOL CG_use_xortozero_Set; >> >> extern BOOL CG_use_incdec; >> >> Index: osprey/be/cg/cg_loop.cxx >> >> =================================================================== >> >> --- osprey/be/cg/cg_loop.cxx (revision 3875) >> >> +++ osprey/be/cg/cg_loop.cxx (working copy) >> >> @@ -7442,6 +7442,9 @@ >> >> CG_LOOP_Trace_Loop(loop, "*** Before SINGLE_BB_WHILELOOP_UNROLL >> ***"); >> >> >> >> cg_loop.Build_CG_LOOP_Info(TRUE); >> >> +#ifdef TARG_X8664 >> >> + CGTARG_LOOP_Optimize(loop); >> >> +#endif >> >> cg_loop.Determine_Unroll_Factor(); >> >> Unroll_Dowhile_Loop(loop, cg_loop.Unroll_factor()); >> >> cg_loop.Recompute_Liveness(); >> >> >> ---------------------------------------------------------------------- >> -------- Virtualization & Cloud Management Using Capacity Planning >> Cloud computing makes use of virtualization - but cloud computing also >> focuses on allowing computing to be delivered as a service. >> http://www.accelacomm.com/jaw/sfnl/114/51521223/ >> _______________________________________________ >> Open64-devel mailing list >> Open64-devel@lists.sourceforge.net >> https://lists.sourceforge.net/lists/listinfo/open64-devel >> > > > > -- > Regards, > Lai Jian-Xin >
-- Regards, Lai Jian-Xin ------------------------------------------------------------------------------ Try before you buy = See our experts in action! The most comprehensive online learning library for Microsoft developers is just $99.99! Visual Studio, SharePoint, SQL - plus HTML5, CSS3, MVC3, Metro Style Apps, more. Free future releases when you subscribe now! http://p.sf.net/sfu/learndevnow-dev2 _______________________________________________ Open64-devel mailing list Open64-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/open64-devel