Hi, when no -O2 or -O3 the blocking version has 166 million data access , original version without blocking it has 10 million data access.
After run with gcc -O3 -o mar5ti mar5ti.c, 1.there are nearly no difference in the result. Why no difference? 2. does it mean that compiler has already done cache optimization? no need to consider cache optimization in c language programming any more nowadays? #define min(a,b) (((a)<(b))?(a):(b)) #define max(a,b) (((a)>(b))?(a):(b)) int main() { int x[100][100]; int y[100][100]; int z[100][100]; int i=0; int j=0; int k=0; int N=100; int r=0; int jj=0; int kk=0; int B = 5; /* for(i=0;i<N;++i) { for(j=0;j<N;++j) { r=0; for(k=0;k<N;++k) { r=r+y[i][k]*z[k][j]; } x[i][j]=r; } } */ for(jj=0;jj<N;jj=jj+B) for(kk=0;kk<N;kk=kk+B) for(i=0;i<N;++i) { for(j=0;j<min(jj+B,N);++j) { r=0; for(k=kk;k<min(kk+B,N);++k) { r=r+y[i][k]*z[k][j]; } x[i][j]=x[i][j]+r; } } return 0; } /* for(i=0;i<N;++i) { for(j=0;j<N;++j) { r=0; for(k=0;k<N;++k) { r=r+y[i][k]*z[k][j]; } x[i][j]=r; } } martin@ubuntu:~$ valgrind --tool=cachegrind ./mar5ti ==2934== Cachegrind, a cache and branch-prediction profiler ==2934== Copyright (C) 2002-2011, and GNU GPL'd, by Nicholas Nethercote et al. ==2934== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info ==2934== Command: ./mar5ti ==2934== --2934-- warning: L3 cache found, using its data for the LL simulation. ==2934== ==2934== I refs: 113,272 ==2934== I1 misses: 686 ==2934== LLi misses: 681 ==2934== I1 miss rate: 0.60% ==2934== LLi miss rate: 0.60% ==2934== ==2934== D refs: 52,724 (37,442 rd + 15,282 wr) ==2934== D1 misses: 1,075 ( 930 rd + 145 wr) ==2934== LLd misses: 982 ( 847 rd + 135 wr) ==2934== D1 miss rate: 2.0% ( 2.4% + 0.9% ) ==2934== LLd miss rate: 1.8% ( 2.2% + 0.8% ) ==2934== ==2934== LL refs: 1,761 ( 1,616 rd + 145 wr) ==2934== LL misses: 1,663 ( 1,528 rd + 135 wr) ==2934== LL miss rate: 1.0% ( 1.0% + 0.8% ) for(jj=0;jj<N;jj=jj+B) for(kk=0;kk<N;kk=kk+B) for(i=0;i<N;++i) { for(j=0;j<min(jj+B,N);++j) { r=0; for(k=kk;k<min(kk+B,N);++k) { r=r+y[i][k]*z[k][j]; } x[i][j]=x[i][j]+r; } } martin@ubuntu:~$ valgrind --tool=cachegrind ./mar5ti ==3047== Cachegrind, a cache and branch-prediction profiler ==3047== Copyright (C) 2002-2011, and GNU GPL'd, by Nicholas Nethercote et al. ==3047== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info ==3047== Command: ./mar5ti ==3047== --3047-- warning: L3 cache found, using its data for the LL simulation. ==3047== ==3047== I refs: 113,268 ==3047== I1 misses: 686 ==3047== LLi misses: 681 ==3047== I1 miss rate: 0.60% ==3047== LLi miss rate: 0.60% ==3047== ==3047== D refs: 52,724 (37,442 rd + 15,282 wr) ==3047== D1 misses: 1,075 ( 930 rd + 145 wr) ==3047== LLd misses: 982 ( 847 rd + 135 wr) ==3047== D1 miss rate: 2.0% ( 2.4% + 0.9% ) ==3047== LLd miss rate: 1.8% ( 2.2% + 0.8% ) ==3047== ==3047== LL refs: 1,761 ( 1,616 rd + 145 wr) ==3047== LL misses: 1,663 ( 1,528 rd + 135 wr) ==3047== LL miss rate: 1.0% ( 1.0% + 0.8% ) */ Regards, Martin ________________________________________ From: Josef Weidendorfer <josef.weidendor...@gmx.de> Sent: Tuesday, February 16, 2016 6:37 To: valgrind-users@lists.sourceforge.net Subject: Re: [Valgrind-users] why miss rate decrease but number of misses increase in ubuntu 12 in vmware player 12 ? Am 15.02.2016 um 11:25 schrieb Mandy Martino: > why > > I1 misses increase, LLi misses increase, LL misses increase, D1 misses > increase > though miss rate decrease at this row 0.1% + 0.0% ? > > which indicator show the correct number that can show the improvement > after optimization? I see you do blocking in the 2nd version. However, the number of data accesses is 166 million vs. 10 million in your 1st version. I assume this is because you did not compile with -O2 or -O3 ? Miss rate is a relative number, based on total number of accesses. A comparison is meaningless if the number of accesses is so different. Josef > > > #define min(a,b) (((a)<(b))?(a):(b)) > #define max(a,b) (((a)>(b))?(a):(b)) > int main() > { > int x[100][100]; > int y[100][100]; > int z[100][100]; > int i=0; > int j=0; > int k=0; > int N=100; > int r=0; > int jj=0; > int kk=0; > int B = 5; > /* > for(i=0;i<N;++i) > { > for(j=0;j<N;++j) > { > r=0; > for(k=0;k<N;++k) > { > r=r+y[i][k]*z[k][j]; > } > x[i][j]=r; > } > } > */ > for(jj=0;jj<N;jj=jj+B) > for(kk=0;kk<N;kk=kk+B) > for(i=0;i<N;++i) > { > for(j=0;j<min(jj+B,N);++j) > { > r=0; > for(k=kk;k<min(kk+B,N);++k) > { > r=r+y[i][k]*z[k][j]; > } > x[i][j]=x[i][j]+r; > } > } > return 0; > } > /* > for(i=0;i<N;++i) > { > for(j=0;j<N;++j) > { > r=0; > for(k=0;k<N;++k) > { > r=r+y[i][k]*z[k][j]; > } > x[i][j]=r; > } > } > > martin@ubuntu:~$ valgrind --tool=cachegrind ./mar5ti > ==4602== Cachegrind, a cache and branch-prediction profiler > ==4602== Copyright (C) 2002-2011, and GNU GPL'd, by Nicholas Nethercote > et al. > ==4602== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info > ==4602== Command: ./mar5ti > ==4602== > --4602-- warning: L3 cache found, using its data for the LL simulation. > ==4602== > ==4602== I refs: 14,264,184 > ==4602== I1 misses: 689 > ==4602== LLi misses: 684 > ==4602== I1 miss rate: 0.00% > ==4602== LLi miss rate: 0.00% > ==4602== > ==4602== D refs: 10,163,336 (10,117,945 rd + 45,391 wr) > ==4602== D1 misses: 64,978 ( 64,200 rd + 778 wr) > ==4602== LLd misses: 2,823 ( 2,063 rd + 760 wr) > ==4602== D1 miss rate: 0.6% ( 0.6% + 1.7% ) > ==4602== LLd miss rate: 0.0% ( 0.0% + 1.6% ) > ==4602== > ==4602== LL refs: 65,667 ( 64,889 rd + 778 wr) > ==4602== LL misses: 3,507 ( 2,747 rd + 760 wr) > ==4602== LL miss rate: 0.0% ( 0.0% + 1.6% ) > > for(jj=0;jj<N;jj=jj+B) > for(kk=0;kk<N;kk=kk+B) > for(i=0;i<N;++i) > { > for(j=0;j<min(jj+B,N);++j) > { > r=0; > for(k=kk;k<min(kk+B,N);++k) > { > r=r+y[i][k]*z[k][j]; > } > x[i][j]=x[i][j]+r; > } > } > martin@ubuntu:~$ valgrind --tool=cachegrind ./mar5ti > ==4654== Cachegrind, a cache and branch-prediction profiler > ==4654== Copyright (C) 2002-2011, and GNU GPL'd, by Nicholas Nethercote > et al. > ==4654== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info > ==4654== Command: ./mar5ti > ==4654== > --4654-- warning: L3 cache found, using its data for the LL simulation. > ==4654== > ==4654== I refs: 265,277,487 > ==4654== I1 misses: 690 > ==4654== LLi misses: 685 > ==4654== I1 miss rate: 0.00% > ==4654== LLi miss rate: 0.00% > ==4654== > ==4654== D refs: 166,275,677 (159,919,965 rd + 6,355,712 wr) > ==4654== D1 misses: 170,231 ( 170,082 rd + 149 wr) > ==4654== LLd misses: 2,823 ( 2,688 rd + 135 wr) > ==4654== D1 miss rate: 0.1% ( 0.1% + 0.0% ) > ==4654== LLd miss rate: 0.0% ( 0.0% + 0.0% ) > ==4654== > ==4654== LL refs: 170,921 ( 170,772 rd + 149 wr) > ==4654== LL misses: 3,508 ( 3,373 rd + 135 wr) > ==4654== LL miss rate: 0.0% ( 0.0% + 0.0% ) > > */ > > > Regards, > > > Martin > > > > ------------------------------------------------------------------------------ > Site24x7 APM Insight: Get Deep Visibility into Application Performance > APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month > Monitor end-to-end web transactions and take corrective actions now > Troubleshoot faster and improve end-user experience. Signup Now! > http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140 > > > > _______________________________________________ > Valgrind-users mailing list > Valgrind-users@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/valgrind-users > ------------------------------------------------------------------------------ Site24x7 APM Insight: Get Deep Visibility into Application Performance APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month Monitor end-to-end web transactions and take corrective actions now Troubleshoot faster and improve end-user experience. Signup Now! http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140 _______________________________________________ Valgrind-users mailing list Valgrind-users@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/valgrind-users ------------------------------------------------------------------------------ Site24x7 APM Insight: Get Deep Visibility into Application Performance APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month Monitor end-to-end web transactions and take corrective actions now Troubleshoot faster and improve end-user experience. Signup Now! http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140 _______________________________________________ Valgrind-users mailing list Valgrind-users@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/valgrind-users