Going forward, future generation systems can have more hierarchy
within the chip/package level but currently we don't have any data source
encoding field in perf, which can be used to represent this level of data.

Add a new field called 'mem_hops' in the perf_mem_data_src structure
which can be used to represent intra-chip/package or inter-chip/off-package
details. This field is of size 3 bits where PERF_MEM_HOPS_{NA, 0..6} value
can be used to present different hop levels data.

Also add corresponding macros to define mem_hop field values
and shift value.

Currently we define macro for HOPS_0 which corresponds
to data coming from another core but same chip.

Add functionality to represent mem_hop field data in
perf_mem__lvl_scnprintf function with the help of added string
array called mem_hops.

For ex: Encodings for mem_hops fields with L2 cache:

L2                      - local L2
L2 | REMOTE | HOPS_0    - remote core, same chip L2

Since with the addition of HOPS field, now remote can be used to
denote cache access from the same chip but different core, a check
is added in the c2c_decode_stats function to set mrem only when HOPS
is zero along with set remote field.

Signed-off-by: Kajol Jain <kj...@linux.ibm.com>
---
 tools/include/uapi/linux/perf_event.h | 11 +++++++++--
 tools/perf/util/mem-events.c          | 19 ++++++++++++++++++-
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/tools/include/uapi/linux/perf_event.h 
b/tools/include/uapi/linux/perf_event.h
index e1701e9c7858..42680563228c 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -1210,14 +1210,16 @@ union perf_mem_data_src {
                        mem_remote:1,   /* remote */
                        mem_snoopx:2,   /* snoop mode, ext */
                        mem_blk:3,      /* access blocked */
-                       mem_rsvd:21;
+                       mem_hops:3,     /* hop level */
+                       mem_rsvd:18;
        };
 };
 #elif defined(__BIG_ENDIAN_BITFIELD)
 union perf_mem_data_src {
        __u64 val;
        struct {
-               __u64   mem_rsvd:21,
+               __u64   mem_rsvd:18,
+                       mem_hops:3,     /* hop level */
                        mem_blk:3,      /* access blocked */
                        mem_snoopx:2,   /* snoop mode, ext */
                        mem_remote:1,   /* remote */
@@ -1313,6 +1315,11 @@ union perf_mem_data_src {
 #define PERF_MEM_BLK_ADDR      0x04 /* address conflict */
 #define PERF_MEM_BLK_SHIFT     40
 
+/* hop level */
+#define PERF_MEM_HOPS_0                0x01 /* remote core, same chip */
+/* 2-7 available */
+#define PERF_MEM_HOPS_SHIFT    43
+
 #define PERF_MEM_S(a, s) \
        (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
 
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index ff7289e28192..585b29592a24 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -301,6 +301,16 @@ static const char * const mem_lvlnum[] = {
        [PERF_MEM_LVLNUM_NA] = "N/A",
 };
 
+static const char * const mem_hops[] = {
+       "N/A",
+       /*
+        * While printing, 'Remote' will be added to represent
+        * 'Remote core, same chip' accesses as remote field need
+        * to be set with mem_hops field.
+        */
+       "core, same chip",
+};
+
 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
 {
        size_t i, l = 0;
@@ -325,6 +335,9 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct 
mem_info *mem_info)
                l += 7;
        }
 
+       if (mem_info && mem_info->data_src.mem_hops)
+               l += scnprintf(out + l, sz - l, "%s ", 
mem_hops[mem_info->data_src.mem_hops]);
+
        printed = 0;
        for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
                if (!(m & 0x1))
@@ -471,8 +484,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct 
mem_info *mi)
        /*
         * Skylake might report unknown remote level via this
         * bit, consider it when evaluating remote HITMs.
+        *
+        * Incase of power, remote field can also be used to denote cache
+        * accesses from the another core of same chip. Hence, setting
+        * mrem only when HOPS is zero along with set remote field.
         */
-       bool mrem  = data_src->mem_remote;
+       bool mrem  = (data_src->mem_remote && !data_src->mem_hops);
        int err = 0;
 
 #define HITM_INC(__f)          \
-- 
2.26.2

Reply via email to