Re: [PATCH 4/4] powerpc/perf: Add data source encodings for power10 platform

2021-12-22 Thread Arnaldo Carvalho de Melo
Em Mon, Dec 06, 2021 at 02:47:49PM +0530, Kajol Jain escreveu:
> The code represent memory/cache level data based on PERF_MEM_LVL_*
> namespace, which is in the process of deprication in the favour of
> newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields.
> Add data source encodings to represent cache/memory data based on
> newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields.

Thanks, applied.

- Arnaldo

 
> Add data source encodings to represent data coming from local
> memory/Remote memory/distant memory and remote/distant cache hits.
> 
> Inorder to represent data coming from OpenCAPI cache/memory, we use
> LVLNUM "PMEM" field which is used to present persistent memory accesses.
> 
> Result in power10 system with patch changes:
> 
> localhost:# ./perf mem report --sort="mem,sym,dso" --stdio
>  # Overhead   Samples  Memory access Symbol   
>Shared Object
>  #       
> ..  
>  #
> 29.46%  2331  L1 or L1 hit  [.] __random  
>libc-2.28.so
> 23.11%  2121  L1 or L1 hit  [.] 
> producer_populate_cache  producer_consumer
> 18.56%  1758  L1 or L1 hit  [.] __random_r
>libc-2.28.so
> 15.64%  1559  L2 or L2 hit  [.] __random  
>libc-2.28.so
> .
> 0.09%  5  Remote socket, same board Any cache hit 
> [.] __random libc-2.28.so
> 0.07%  4  Remote socket, same board Any cache hit 
> [.] __random libc-2.28.so
> .
> 
> Reviewed-by: Madhavan Srinivasan 
> Signed-off-by: Kajol Jain 
> ---
>  arch/powerpc/perf/isa207-common.c | 54 ---
>  1 file changed, 42 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/perf/isa207-common.c 
> b/arch/powerpc/perf/isa207-common.c
> index 6c6bc8b7d887..4037ea652522 100644
> --- a/arch/powerpc/perf/isa207-common.c
> +++ b/arch/powerpc/perf/isa207-common.c
> @@ -229,13 +229,28 @@ static inline u64 isa207_find_source(u64 idx, u32 
> sub_idx)
>   ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
>   break;
>   case 4:
> - if (sub_idx <= 1)
> - ret = PH(LVL, LOC_RAM);
> - else if (sub_idx > 1 && sub_idx <= 2)
> - ret = PH(LVL, REM_RAM1);
> - else
> - ret = PH(LVL, REM_RAM2);
> - ret |= P(SNOOP, HIT);
> + if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> + ret = P(SNOOP, HIT);
> +
> + if (sub_idx == 1)
> + ret |= PH(LVL, LOC_RAM) | LEVEL(RAM);
> + else if (sub_idx == 2 || sub_idx == 3)
> + ret |= P(LVL, HIT) | LEVEL(PMEM);
> + else if (sub_idx == 4)
> + ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | 
> P(HOPS, 2);
> + else if (sub_idx == 5 || sub_idx == 7)
> + ret |= P(LVL, HIT) | LEVEL(PMEM) | REM;
> + else if (sub_idx == 6)
> + ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | 
> P(HOPS, 3);
> + } else {
> + if (sub_idx <= 1)
> + ret = PH(LVL, LOC_RAM);
> + else if (sub_idx > 1 && sub_idx <= 2)
> + ret = PH(LVL, REM_RAM1);
> + else
> + ret = PH(LVL, REM_RAM2);
> + ret |= P(SNOOP, HIT);
> + }
>   break;
>   case 5:
>   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> @@ -261,11 +276,26 @@ static inline u64 isa207_find_source(u64 idx, u32 
> sub_idx)
>   }
>   break;
>   case 6:
> - ret = PH(LVL, REM_CCE2);
> - if ((sub_idx == 0) || (sub_idx == 2))
> - ret |= P(SNOOP, HIT);
> - else if ((sub_idx == 1) || (sub_idx == 3))
> - ret |= P(SNOOP, HITM);
> + if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> + if (sub_idx == 0)
> + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | 
> REM |
> + P(SNOOP, HIT) | P(HOPS, 2);
> + else if (sub_idx == 1)
> + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | 
> REM |
> + P(SNOOP, HITM) | P(HOPS, 2);
> + else if (sub_idx == 2)
> + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | 
> REM |
> + P(SNOOP, HIT) | P(HOPS, 3);
> + else 

[PATCH 4/4] powerpc/perf: Add data source encodings for power10 platform

2021-12-06 Thread Kajol Jain
The code represent memory/cache level data based on PERF_MEM_LVL_*
namespace, which is in the process of deprication in the favour of
newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields.
Add data source encodings to represent cache/memory data based on
newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields.

Add data source encodings to represent data coming from local
memory/Remote memory/distant memory and remote/distant cache hits.

Inorder to represent data coming from OpenCAPI cache/memory, we use
LVLNUM "PMEM" field which is used to present persistent memory accesses.

Result in power10 system with patch changes:

localhost:# ./perf mem report --sort="mem,sym,dso" --stdio
 # Overhead   Samples  Memory access Symbol 
 Shared Object
 #       .. 
 
 #
29.46%  2331  L1 or L1 hit  [.] __random
 libc-2.28.so
23.11%  2121  L1 or L1 hit  [.] producer_populate_cache 
 producer_consumer
18.56%  1758  L1 or L1 hit  [.] __random_r  
 libc-2.28.so
15.64%  1559  L2 or L2 hit  [.] __random
 libc-2.28.so
.
0.09%  5  Remote socket, same board Any cache hit 
[.] __random libc-2.28.so
0.07%  4  Remote socket, same board Any cache hit 
[.] __random libc-2.28.so
.

Reviewed-by: Madhavan Srinivasan 
Signed-off-by: Kajol Jain 
---
 arch/powerpc/perf/isa207-common.c | 54 ---
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index 6c6bc8b7d887..4037ea652522 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -229,13 +229,28 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
break;
case 4:
-   if (sub_idx <= 1)
-   ret = PH(LVL, LOC_RAM);
-   else if (sub_idx > 1 && sub_idx <= 2)
-   ret = PH(LVL, REM_RAM1);
-   else
-   ret = PH(LVL, REM_RAM2);
-   ret |= P(SNOOP, HIT);
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   ret = P(SNOOP, HIT);
+
+   if (sub_idx == 1)
+   ret |= PH(LVL, LOC_RAM) | LEVEL(RAM);
+   else if (sub_idx == 2 || sub_idx == 3)
+   ret |= P(LVL, HIT) | LEVEL(PMEM);
+   else if (sub_idx == 4)
+   ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | 
P(HOPS, 2);
+   else if (sub_idx == 5 || sub_idx == 7)
+   ret |= P(LVL, HIT) | LEVEL(PMEM) | REM;
+   else if (sub_idx == 6)
+   ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | 
P(HOPS, 3);
+   } else {
+   if (sub_idx <= 1)
+   ret = PH(LVL, LOC_RAM);
+   else if (sub_idx > 1 && sub_idx <= 2)
+   ret = PH(LVL, REM_RAM1);
+   else
+   ret = PH(LVL, REM_RAM2);
+   ret |= P(SNOOP, HIT);
+   }
break;
case 5:
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
@@ -261,11 +276,26 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
}
break;
case 6:
-   ret = PH(LVL, REM_CCE2);
-   if ((sub_idx == 0) || (sub_idx == 2))
-   ret |= P(SNOOP, HIT);
-   else if ((sub_idx == 1) || (sub_idx == 3))
-   ret |= P(SNOOP, HITM);
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   if (sub_idx == 0)
+   ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | 
REM |
+   P(SNOOP, HIT) | P(HOPS, 2);
+   else if (sub_idx == 1)
+   ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | 
REM |
+   P(SNOOP, HITM) | P(HOPS, 2);
+   else if (sub_idx == 2)
+   ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | 
REM |
+   P(SNOOP, HIT) | P(HOPS, 3);
+   else if (sub_idx == 3)
+   ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | 
REM |
+   P(SNOOP, HITM) | P(HOPS, 3);
+   } else {
+