Hi,

why

I1 misses increase, LLi misses increase, LL misses increase, D1 misses increase
though miss rate decrease at this row 0.1%     +       0.0% ?

which indicator show the correct number that can show the improvement after 
optimization?


#define min(a,b) (((a)<(b))?(a):(b))
#define max(a,b) (((a)>(b))?(a):(b))
int main()
{
 int x[100][100];
 int y[100][100];
 int z[100][100];
 int i=0;
 int j=0;
 int k=0;
 int N=100;
 int r=0;
 int jj=0;
 int kk=0;
 int B = 5;
/*
 for(i=0;i<N;++i)
 {
  for(j=0;j<N;++j)
  {
   r=0;
   for(k=0;k<N;++k)
   {
     r=r+y[i][k]*z[k][j];
   }
   x[i][j]=r;
  }
 }
*/
 for(jj=0;jj<N;jj=jj+B)
 for(kk=0;kk<N;kk=kk+B)
 for(i=0;i<N;++i)
 {
  for(j=0;j<min(jj+B,N);++j)
  {
   r=0;
   for(k=kk;k<min(kk+B,N);++k)
   {
     r=r+y[i][k]*z[k][j];
   }
   x[i][j]=x[i][j]+r;
  }
 }
 return 0;
}
/*
 for(i=0;i<N;++i)
 {
  for(j=0;j<N;++j)
  {
   r=0;
   for(k=0;k<N;++k)
   {
     r=r+y[i][k]*z[k][j];
   }
   x[i][j]=r;
  }
 }

martin@ubuntu:~$ valgrind --tool=cachegrind ./mar5ti
==4602== Cachegrind, a cache and branch-prediction profiler
==4602== Copyright (C) 2002-2011, and GNU GPL'd, by Nicholas Nethercote et al.
==4602== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
==4602== Command: ./mar5ti
==4602==
--4602-- warning: L3 cache found, using its data for the LL simulation.
==4602==
==4602== I   refs:      14,264,184
==4602== I1  misses:           689
==4602== LLi misses:           684
==4602== I1  miss rate:       0.00%
==4602== LLi miss rate:       0.00%
==4602==
==4602== D   refs:      10,163,336  (10,117,945 rd   + 45,391 wr)
==4602== D1  misses:        64,978  (    64,200 rd   +    778 wr)
==4602== LLd misses:         2,823  (     2,063 rd   +    760 wr)
==4602== D1  miss rate:        0.6% (       0.6%     +    1.7%  )
==4602== LLd miss rate:        0.0% (       0.0%     +    1.6%  )
==4602==
==4602== LL refs:           65,667  (    64,889 rd   +    778 wr)
==4602== LL misses:          3,507  (     2,747 rd   +    760 wr)
==4602== LL miss rate:         0.0% (       0.0%     +    1.6%  )

 for(jj=0;jj<N;jj=jj+B)
 for(kk=0;kk<N;kk=kk+B)
 for(i=0;i<N;++i)
 {
  for(j=0;j<min(jj+B,N);++j)
  {
   r=0;
   for(k=kk;k<min(kk+B,N);++k)
   {
     r=r+y[i][k]*z[k][j];
   }
   x[i][j]=x[i][j]+r;
  }
 }
martin@ubuntu:~$ valgrind --tool=cachegrind ./mar5ti
==4654== Cachegrind, a cache and branch-prediction profiler
==4654== Copyright (C) 2002-2011, and GNU GPL'd, by Nicholas Nethercote et al.
==4654== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
==4654== Command: ./mar5ti
==4654==
--4654-- warning: L3 cache found, using its data for the LL simulation.
==4654==
==4654== I   refs:      265,277,487
==4654== I1  misses:            690
==4654== LLi misses:            685
==4654== I1  miss rate:        0.00%
==4654== LLi miss rate:        0.00%
==4654==
==4654== D   refs:      166,275,677  (159,919,965 rd   + 6,355,712 wr)
==4654== D1  misses:        170,231  (    170,082 rd   +       149 wr)
==4654== LLd misses:          2,823  (      2,688 rd   +       135 wr)
==4654== D1  miss rate:         0.1% (        0.1%     +       0.0%  )
==4654== LLd miss rate:         0.0% (        0.0%     +       0.0%  )
==4654==
==4654== LL refs:           170,921  (    170,772 rd   +       149 wr)
==4654== LL misses:           3,508  (      3,373 rd   +       135 wr)
==4654== LL miss rate:          0.0% (        0.0%     +       0.0%  )

*/



Regards,


Martin
------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140
_______________________________________________
Valgrind-users mailing list
Valgrind-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/valgrind-users

Reply via email to