Hi,
why
I1 misses increase, LLi misses increase, LL misses increase, D1 misses increase
though miss rate decrease at this row 0.1% + 0.0% ?
which indicator show the correct number that can show the improvement after
optimization?
#define min(a,b) (((a)<(b))?(a):(b))
#define max(a,b) (((a)>(b))?(a):(b))
int main()
{
int x[100][100];
int y[100][100];
int z[100][100];
int i=0;
int j=0;
int k=0;
int N=100;
int r=0;
int jj=0;
int kk=0;
int B = 5;
/*
for(i=0;i<N;++i)
{
for(j=0;j<N;++j)
{
r=0;
for(k=0;k<N;++k)
{
r=r+y[i][k]*z[k][j];
}
x[i][j]=r;
}
}
*/
for(jj=0;jj<N;jj=jj+B)
for(kk=0;kk<N;kk=kk+B)
for(i=0;i<N;++i)
{
for(j=0;j<min(jj+B,N);++j)
{
r=0;
for(k=kk;k<min(kk+B,N);++k)
{
r=r+y[i][k]*z[k][j];
}
x[i][j]=x[i][j]+r;
}
}
return 0;
}
/*
for(i=0;i<N;++i)
{
for(j=0;j<N;++j)
{
r=0;
for(k=0;k<N;++k)
{
r=r+y[i][k]*z[k][j];
}
x[i][j]=r;
}
}
martin@ubuntu:~$ valgrind --tool=cachegrind ./mar5ti
==4602== Cachegrind, a cache and branch-prediction profiler
==4602== Copyright (C) 2002-2011, and GNU GPL'd, by Nicholas Nethercote et al.
==4602== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
==4602== Command: ./mar5ti
==4602==
--4602-- warning: L3 cache found, using its data for the LL simulation.
==4602==
==4602== I refs: 14,264,184
==4602== I1 misses: 689
==4602== LLi misses: 684
==4602== I1 miss rate: 0.00%
==4602== LLi miss rate: 0.00%
==4602==
==4602== D refs: 10,163,336 (10,117,945 rd + 45,391 wr)
==4602== D1 misses: 64,978 ( 64,200 rd + 778 wr)
==4602== LLd misses: 2,823 ( 2,063 rd + 760 wr)
==4602== D1 miss rate: 0.6% ( 0.6% + 1.7% )
==4602== LLd miss rate: 0.0% ( 0.0% + 1.6% )
==4602==
==4602== LL refs: 65,667 ( 64,889 rd + 778 wr)
==4602== LL misses: 3,507 ( 2,747 rd + 760 wr)
==4602== LL miss rate: 0.0% ( 0.0% + 1.6% )
for(jj=0;jj<N;jj=jj+B)
for(kk=0;kk<N;kk=kk+B)
for(i=0;i<N;++i)
{
for(j=0;j<min(jj+B,N);++j)
{
r=0;
for(k=kk;k<min(kk+B,N);++k)
{
r=r+y[i][k]*z[k][j];
}
x[i][j]=x[i][j]+r;
}
}
martin@ubuntu:~$ valgrind --tool=cachegrind ./mar5ti
==4654== Cachegrind, a cache and branch-prediction profiler
==4654== Copyright (C) 2002-2011, and GNU GPL'd, by Nicholas Nethercote et al.
==4654== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
==4654== Command: ./mar5ti
==4654==
--4654-- warning: L3 cache found, using its data for the LL simulation.
==4654==
==4654== I refs: 265,277,487
==4654== I1 misses: 690
==4654== LLi misses: 685
==4654== I1 miss rate: 0.00%
==4654== LLi miss rate: 0.00%
==4654==
==4654== D refs: 166,275,677 (159,919,965 rd + 6,355,712 wr)
==4654== D1 misses: 170,231 ( 170,082 rd + 149 wr)
==4654== LLd misses: 2,823 ( 2,688 rd + 135 wr)
==4654== D1 miss rate: 0.1% ( 0.1% + 0.0% )
==4654== LLd miss rate: 0.0% ( 0.0% + 0.0% )
==4654==
==4654== LL refs: 170,921 ( 170,772 rd + 149 wr)
==4654== LL misses: 3,508 ( 3,373 rd + 135 wr)
==4654== LL miss rate: 0.0% ( 0.0% + 0.0% )
*/
Regards,
Martin
------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140
_______________________________________________
Valgrind-users mailing list
Valgrind-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/valgrind-users