I tried running the following example program across cachegrind to see metrics 
that indicate cache line false-sharing.  Here's the example:

#include <pearl/version.h>
#include <boost/thread.hpp>
#include <boost/static_assert.hpp>
#include <iostream>

#define MAX_TESTS    100000
#define MAX_THREADS  100

#define CACHELINE_SIZE_INTS \
    (64/sizeof(int))

BOOST_STATIC_ASSERT(  (sizeof(int)*CACHELINE_SIZE_INTS)==64 );

struct Work {
    Work( int& d) : data( d ) {}
    void operator()() {
        for(int index=0; index<MAX_TESTS; ++index) {
            ++data;
        }
    }
    int& data;
};


int main(int argc,const char ** argv) {
    boost::thread_group threads;

    if( argc>2 )
    {
        std::cout << "FALSE SHARING version" << std::endl;
        int false_sharing[MAX_THREADS] = { 0 };
        for (int i = 0; i < MAX_THREADS; ++i) {
            int& var=false_sharing[i];
            threads.create_thread(Work(var));
        }
        threads.join_all();
    }
    else
    {
        std::cout << "NO FALSE SHARING version" << std::endl;
        int no_false_sharing[MAX_THREADS * CACHELINE_SIZE_INTS] = { 0 };
        for (int i = 0; i < MAX_THREADS; ++i) {
            int& var=no_false_sharing[i * CACHELINE_SIZE_INTS];
            threads.create_thread(Work(var));
        }
        threads.join_all();
    }



I compile/link this with:

g++  -fPIC -Wall -Wfloat-equal -Wundef -pipe -Wno-deprecated -ggdb3 -
O0 -DPROFILE -I/usr/local/include -fuse-cxa-atexit -DNDEBUG  
-DBUILDINFO="\"09/19/2011 at 06:37:43 AM by zap@iulius\"" 
-I.. -I../include  -Wno-deprecated -fno-strict-aliasing  -c -o 
false_sharing.o ../src/false_sharing.cpp
g++ -o false_sharing.1.0.28 -Wl,--demangle,-Map,false_sharing.1.0.28.map 
false_sharing.o  -lrt -lpthread -ldl -lz -lm -lboost_filesystem 
-lboost_program_options -lboost_regex -lboost_signals 
-lboost_serialization -lboost_thread -lboost_unit_test_framework 
-lboost_system 


Then run twice with:

valgrind --tool=cachegrind --branch-sim=yes --cache-sim=yes 
  ../profile/false_sharing

and


valgrind --tool=cachegrind --branch-sim=yes --cache-sim=yes 
   ../profile/false_sharing  x


The results are what bother me:  there is no difference in the stats gathered 
between runs.  Both show output like:

NO FALSE SHARING version
==13718== 
==13718== I   refs:      112,888,692
==13718== I1  misses:          3,135
==13718== L2i misses:          2,396
==13718== I1  miss rate:        0.00%
==13718== L2i miss rate:        0.00%
==13718== 
==13718== D   refs:       65,138,690  (53,817,934 rd   + 11,320,756 wr)
==13718== D1  misses:         51,954  (    47,947 rd   +      4,007 wr)
==13718== L2d misses:         20,929  (    17,626 rd   +      3,303 wr)
==13718== D1  miss rate:         0.0% (       0.0%     +        0.0%  )
==13718== L2d miss rate:         0.0% (       0.0%     +        0.0%  )
==13718== 
==13718== L2 refs:            55,089  (    51,082 rd   +      4,007 wr)
==13718== L2 misses:          23,325  (    20,022 rd   +      3,303 wr)
==13718== L2 miss rate:          0.0% (       0.0%     +        0.0%  )
==13718== 
==13718== Branches:       11,910,054  (11,886,870 cond +     23,184 ind)
==13718== Mispredicts:        55,848  (    53,014 cond +      2,834 ind)
==13718== Mispred rate:          0.4% (       0.4%     +       12.2%   )



And the other:
FALSE SHARING version
==13823== 
==13823== I   refs:      112,878,279
==13823== I1  misses:          3,117
==13823== L2i misses:          2,395
==13823== I1  miss rate:        0.00%
==13823== L2i miss rate:        0.00%
==13823== 
==13823== D   refs:       65,131,908  (53,814,295 rd   + 11,317,613 wr)
==13823== D1  misses:         51,587  (    47,744 rd   +      3,843 wr)
==13823== L2d misses:         20,896  (    17,624 rd   +      3,272 wr)
==13823== D1  miss rate:         0.0% (       0.0%     +        0.0%  )
==13823== L2d miss rate:         0.0% (       0.0%     +        0.0%  )
==13823== 
==13823== L2 refs:            54,704  (    50,861 rd   +      3,843 wr)
==13823== L2 misses:          23,291  (    20,019 rd   +      3,272 wr)
==13823== L2 miss rate:          0.0% (       0.0%     +        0.0%  )
==13823== 
==13823== Branches:       11,907,051  (11,883,867 cond +     23,184 ind)
==13823== Mispredicts:        55,946  (    53,121 cond +      2,825 ind)
==13823== Mispred rate:          0.4% (       0.4%     +       12.1%   )


How can the results be virtually identical?  I would have thought the "false 
sharing" version would be significantly higher than the other, but they are 
nearly identical.

Is there something I am doing wrong with the tool?  I want to be able to 
measure the false-sharing so that I can find and improve it in my target 
program.

Many thanks

-z




------------------------------------------------------------------------------
BlackBerry&reg; DevCon Americas, Oct. 18-20, San Francisco, CA
Learn about the latest advances in developing for the 
BlackBerry&reg; mobile platform with sessions, labs & more.
See new tools and technologies. Register for BlackBerry&reg; DevCon today!
http://p.sf.net/sfu/rim-devcon-copy1 
_______________________________________________
Valgrind-users mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/valgrind-users

Reply via email to