Hey Usman,
This is an example of false sharing in your code because the target address
is on the stack.
Other stack variables like t, total, n which are on the same cacheline.
Those are referenced in the for loop, which brings the
entire cacheline (including p) in the cache.
Based on your program, I created a working minimal example with the only
major change being allocating the addr in the heap.
With that I see:
Latency of cached load: 20
Latency of uncached load: 182
int main() {
volatile unsigned long time;
int *addr = new int(); // WORKS
*addr = 0x12221;
// Does not work.
// int pO = 123456;
// void * p = &pO;
// 1. Load to ensure addr gets cached.
// 2. Time access to cached addr.
// 3. Flush addr.
asm __volatile__ (
" mfence \n"
" movl (%1), %%eax \n"
" lfence \n"
" rdtsc \n"
" lfence \n"
" movl %%eax, %%esi \n"
" movl (%1), %%eax \n"
" lfence \n"
" rdtsc \n"
" subl %%esi, %%eax \n"
" clflush 0(%1) \n"
: "=a" (time)
: "c" (addr)
: "%esi", "%edx");
printf("\n Latency of cached load: %lu \n", time);
// Load value from memory.
asm __volatile__ (
" mfence \n"
" lfence \n"
" rdtsc \n"
" lfence \n"
" movl %%eax, %%esi \n"
" movl (%1), %%eax \n"
" lfence \n"
" rdtsc \n"
" subl %%esi, %%eax \n"
: "=a" (time)
: "c" (addr)
: "%esi", "%edx");
printf("\n Latency of uncached load: %lu \n", time);
}
Cheers,
Swapnil Haria,
PhD Candidate,
Dept of Computer Sciences,
University of Wisconsin-Madison
http://pages.cs.wisc.edu/~swapnilh/
On Fri, Oct 26, 2018 at 5:08 AM Usman Ali <[email protected]> wrote:
> Hi Swapnil Haria,
>
> Thanks for your kind reply, bellow is program which can be used to
> reproduce the effect,
>
> regards,
> Usman Ali
> MSEE Student, ITU, Lahore
>
> #include <stdio.h>
> #include <unistd.h>
>
>
> int probe(void *addr) {
> volatile unsigned long time;
> asm __volatile__ (
> " mfence \n"
> " lfence \n"
> " rdtsc \n"
> " lfence \n"
> " movl %%eax, %%esi \n"
> " movl (%1), %%eax \n"
> " lfence \n"
> " rdtsc \n"
> " subl %%esi, %%eax \n"
> " clflush 0(%1) \n"
> : "=a" (time)
> : "c" (addr)
> : "%esi", "%edx");
> return time;
>
> }
>
>
> int readOnly(void *addr) {
> volatile unsigned long time;
> asm __volatile__ (
> " mfence \n"
> " lfence \n"
> " rdtsc \n"
> " lfence \n"
> " movl %%eax, %%esi \n"
> " movl (%1), %%eax \n"
> " lfence \n"
> " rdtsc \n"
> " subl %%esi, %%eax \n"
> : "=a" (time)
> : "c" (addr)
> : "%esi", "%edx");
> return time;
>
> }
>
> int main(void){
>
> int t=0, total = 0, n=100;
>
> int pO = 123456;
> void * p = &pO;
>
> for(int i=0; i < n; i++){
>
> if(i%2){
>
> t = probe(p);
> t = probe(p);
> t = probe(p);
>
> }else{
> t = readOnly(p);
> t = readOnly(p);
> t = readOnly(p);
> }
>
> printf("\n----Cycle: %d \n", t);
>
> }
>
>
> printf("Done --ud-- \n");
> return 0;
>
> sleep(1);
> }
>
>
>
>
>
>
>
> // END of PROGRAM
>
>
_______________________________________________
gem5-users mailing list
[email protected]
http://m5sim.org/cgi-bin/mailman/listinfo/gem5-users