This is a note to let you know that I've just added the patch titled

    perf/x86: Fix local vs remote memory events for NHM/WSM

to the 3.2-stable tree which can be found at:
    
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     perf-x86-fix-local-vs-remote-memory-events-for-nhm-wsm.patch
and it can be found in the queue-3.2 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <[email protected]> know about it.


>From 87e24f4b67e68d9fd8df16e0bf9c66d1ad2a2533 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <[email protected]>
Date: Mon, 5 Mar 2012 23:59:25 +0100
Subject: perf/x86: Fix local vs remote memory events for NHM/WSM

From: Peter Zijlstra <[email protected]>

commit 87e24f4b67e68d9fd8df16e0bf9c66d1ad2a2533 upstream.

Verified using the below proglet.. before:

[root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
remote write

 Performance counter stats for './numa 0':

         2,101,554 node-stores
         2,096,931 node-store-misses

       5.021546079 seconds time elapsed

[root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
local write

 Performance counter stats for './numa 1':

           501,137 node-stores
               199 node-store-misses

       5.124451068 seconds time elapsed

After:

[root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
remote write

 Performance counter stats for './numa 0':

         2,107,516 node-stores
         2,097,187 node-store-misses

       5.012755149 seconds time elapsed

[root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
local write

 Performance counter stats for './numa 1':

         2,063,355 node-stores
               165 node-store-misses

       5.082091494 seconds time elapsed

#define _GNU_SOURCE

#include <sched.h>
#include <stdio.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <dirent.h>
#include <signal.h>
#include <unistd.h>
#include <numaif.h>
#include <stdlib.h>

#define SIZE (32*1024*1024)

volatile int done;

void sig_done(int sig)
{
        done = 1;
}

int main(int argc, char **argv)
{
        cpu_set_t *mask, *mask2;
        size_t size;
        int i, err, t;
        int nrcpus = 1024;
        char *mem;
        unsigned long nodemask = 0x01; /* node 0 */
        DIR *node;
        struct dirent *de;
        int read = 0;
        int local = 0;

        if (argc < 2) {
                printf("usage: %s [0-3]\n", argv[0]);
                printf("  bit0 - local/remote\n");
                printf("  bit1 - read/write\n");
                exit(0);
        }

        switch (atoi(argv[1])) {
        case 0:
                printf("remote write\n");
                break;
        case 1:
                printf("local write\n");
                local = 1;
                break;
        case 2:
                printf("remote read\n");
                read = 1;
                break;
        case 3:
                printf("local read\n");
                local = 1;
                read = 1;
                break;
        }

        mask = CPU_ALLOC(nrcpus);
        size = CPU_ALLOC_SIZE(nrcpus);
        CPU_ZERO_S(size, mask);

        node = opendir("/sys/devices/system/node/node0/");
        if (!node)
                perror("opendir");
        while ((de = readdir(node))) {
                int cpu;

                if (sscanf(de->d_name, "cpu%d", &cpu) == 1)
                        CPU_SET_S(cpu, size, mask);
        }
        closedir(node);

        mask2 = CPU_ALLOC(nrcpus);
        CPU_ZERO_S(size, mask2);
        for (i = 0; i < size; i++)
                CPU_SET_S(i, size, mask2);
        CPU_XOR_S(size, mask2, mask2, mask); // invert

        if (!local)
                mask = mask2;

        err = sched_setaffinity(0, size, mask);
        if (err)
                perror("sched_setaffinity");

        mem = mmap(0, SIZE, PROT_READ|PROT_WRITE,
                        MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
        err = mbind(mem, SIZE, MPOL_BIND, &nodemask, 8*sizeof(nodemask), 
MPOL_MF_MOVE);
        if (err)
                perror("mbind");

        signal(SIGALRM, sig_done);
        alarm(5);

        if (!read) {
                while (!done) {
                        for (i = 0; i < SIZE; i++)
                                mem[i] = 0x01;
                }
        } else {
                while (!done) {
                        for (i = 0; i < SIZE; i++)
                                t += *(volatile char *)(mem + i);
                }
        }

        return 0;
}

Signed-off-by: Peter Zijlstra <[email protected]>
Cc: Stephane Eranian <[email protected]>
Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>

---
 arch/x86/kernel/cpu/perf_event_intel.c |   17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -389,14 +389,15 @@ static __initconst const u64 westmere_hw
 #define NHM_LOCAL_DRAM         (1 << 14)
 #define NHM_NON_DRAM           (1 << 15)
 
-#define NHM_ALL_DRAM           (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
+#define NHM_LOCAL              (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_REMOTE             (NHM_REMOTE_DRAM)
 
 #define NHM_DMND_READ          (NHM_DMND_DATA_RD)
 #define NHM_DMND_WRITE         (NHM_DMND_RFO|NHM_DMND_WB)
 #define NHM_DMND_PREFETCH      (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
 
 #define NHM_L3_HIT     
(NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
-#define NHM_L3_MISS    (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_L3_MISS    
(NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
 #define NHM_L3_ACCESS  (NHM_L3_HIT|NHM_L3_MISS)
 
 static __initconst const u64 nehalem_hw_cache_extra_regs
@@ -420,16 +421,16 @@ static __initconst const u64 nehalem_hw_
  },
  [ C(NODE) ] = {
        [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
-               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
+               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
+               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE,
        },
        [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
-               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
+               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
+               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE,
        },
        [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
-               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
+               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
+               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE,
        },
  },
 };


Patches currently in stable-queue which might be from [email protected] are

queue-3.2/perf-x86-fix-local-vs-remote-memory-events-for-nhm-wsm.patch
--
To unsubscribe from this list: send the line "unsubscribe stable" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to