Dear libpfm4 team

Please find attached the patch for review and let me know if the patch is acceptable for upstream. We are working on an important requirement for one of IBM's Power10 customers which needs libpfm4/PAPI support. We are hoping to get the support added as soon as possible.

Thank you for your consideration.


Regards:
Sachin.
From 419305392dd7d347f9c1a2ec0f04fc8f8d7c5181 Mon Sep 17 00:00:00 2001
From: Sachin Monga <smo...@linux.ibm.com>
Date: Thu, 15 Aug 2024 12:54:51 -0400
Subject: [PATCH] IBM Power10 core PMU support

Added additional events for IBM Power 10 core PMU.

Signed-off-by: Sachin Monga <smo...@linux.ibm.com>
---
 lib/events/power10_events.h | 109 +++++++++++++++++++++++++++++++++++-
 1 file changed, 107 insertions(+), 2 deletions(-)

diff --git a/lib/events/power10_events.h b/lib/events/power10_events.h
index 2f2ad52..e55caf5 100644
--- a/lib/events/power10_events.h
+++ b/lib/events/power10_events.h
@@ -1,12 +1,12 @@
 /*
 * File:    power10_events.h
-* (C) Copyright IBM Corporation, 2021-2022.  All Rights Reserved.
+* (C) Copyright IBM Corporation, 2023-2024.  All Rights Reserved.
 * Author:  Will Schmidt
 *          will_schm...@vnet.ibm.com
 * Author:  Carl Love
 *          c...@us.ibm.com
 #
-* Content reworked May 11, 2022, - Will Schmidt.
+* Content reworked Aug 12, 2024, - Sachin Monga, Jeevitha P.
 * This file was automatically generated from event lists as
 * provided by the IBM PowerPC PMU team.  Any manual
 * updates should be clearly marked so they are not lost in
@@ -90,11 +90,21 @@ static const pme_power_entry_t power10_pe[] = {
        .pme_short_desc = "NA;A conditional branch finished with mispredicted 
direction using the Global Branch History Table.",
        .pme_long_desc = "NA;A conditional branch finished with mispredicted 
direction using the Global Branch History Table. Resolved not taken",
        },
+       {.pme_name = "PM_BR_COND_CMPL",
+        .pme_code = 0x4E058,
+        .pme_short_desc = "frontend;A conditional branch completed.",
+        .pme_long_desc = "frontend;A conditional branch completed.",
+       },
        {.pme_name = "PM_BR_MPRED_NTKN_COND_DIR_LBHT_GSEL",
        .pme_code = 0x000000E080,
        .pme_short_desc = "NA;A conditional branch finished with mispredicted 
direction using the Local Branch History Table selected with the global 
selector.",
        .pme_long_desc = "NA;A conditional branch finished with mispredicted 
direction using the Local Branch History Table selected with the global 
selector. Resolved not taken",
        },
+       {.pme_name = "PM_BR_TKN_FIN",
+        .pme_code = 0x00000040B4,
+        .pme_short_desc = "frontend; A taken branch (conditional or 
unconditional) finished",
+        .pme_long_desc = "frontend;A taken branch (conditional or 
unconditional) finished",
+       },
        {.pme_name = "PM_BR_MPRED_NTKN_COND_DIR_LBHT_LSEL",
        .pme_code = 0x00000058BC,
        .pme_short_desc = "NA;A conditional branch finished with mispredicted 
direction using the Local Branch History Table selected by the local selector.",
@@ -470,6 +480,26 @@ static const pme_power_entry_t power10_pe[] = {
        .pme_short_desc = "Data Source;The processor's L1 data cache was 
reloaded with a valid line that was not in the M (exclusive) state from another 
core's L2 on the same chip in the same regent due to a demand miss.",
        .pme_long_desc = "Data Source;The processor's L1 data cache was 
reloaded with a valid line that was not in the M (exclusive) state from another 
core's L2 on the same chip in the same regent due to a demand miss.",
        },
+       {.pme_name = "PM_DATA_FROM_L1MISS",
+       .pme_code = 0x003F40000001C040,
+       .pme_short_desc = "Data Source;The processor's L1 data cache was 
reloaded from a source beyond the local core's L1 due to a demand miss.",
+       .pme_long_desc = "Data Source;The processor's L1 data cache was 
reloaded from a source beyond the local core's L1 due to a demand miss.",
+       },
+       {.pme_name = "PM_DATA_FROM_L1MISS_ALT2",
+       .pme_code = 0x003F40000002C040,
+       .pme_short_desc = "Data Source;The processor's L1 data cache was 
reloaded from a source beyond the local core's L1 due to a demand miss.",
+       .pme_long_desc = "Data Source;The processor's L1 data cache was 
reloaded from a source beyond the local core's L1 due to a demand miss.",
+       },
+       {.pme_name = "PM_DATA_FROM_L1MISS_ALT3",
+       .pme_code = 0x003F40000003C040,
+       .pme_short_desc = "Data Source;The processor's L1 data cache was 
reloaded from a source beyond the local core's L1 due to a demand miss.",
+       .pme_long_desc = "Data Source;The processor's L1 data cache was 
reloaded from a source beyond the local core's L1 due to a demand miss.",
+       },
+       {.pme_name = "PM_DATA_FROM_L1MISS_ALT4",
+       .pme_code = 0x003F40000004C040,
+       .pme_short_desc = "Data Source;The processor's L1 data cache was 
reloaded from a source beyond the local core's L1 due to a demand miss.",
+       .pme_long_desc = "Data Source;The processor's L1 data cache was 
reloaded from a source beyond the local core's L1 due to a demand miss.",
+       },
        {.pme_name = "PM_DATA_FROM_L2MISS",
        .pme_code = 0x0003C0000001C040,
        .pme_short_desc = "Data Source;The processor's L1 data cache was 
reloaded from a source beyond the local core's L2 due to a demand miss.",
@@ -510,6 +540,11 @@ static const pme_power_entry_t power10_pe[] = {
        .pme_short_desc = "Data Source;The processor's L1 data cache was 
reloaded from the local core's L2 due to a demand miss.",
        .pme_long_desc = "Data Source;The processor's L1 data cache was 
reloaded from the local core's L2 due to a demand miss.",
        },
+       {.pme_name = "PM_ST_DATA_FROM_L2",
+       .pme_code = 0x0C0000016080,
+       .pme_short_desc = "Data Source;Store data line hit in the local L2. 
Includes cache-line states Sx, Tx, Mx.",
+       .pme_long_desc = "Data Source;Store data line hit in the local L2. 
Includes cache-line states Sx, Tx, Mx.Since the event happens in a 2:1 clock 
domain and is time-sliced across all 4 threads, the event count should be 
multiplied by 2.",
+       },
        {.pme_name = "PM_DATA_FROM_L31_NON_REGENT_MOD",
        .pme_code = 0x0AC040000001C040,
        .pme_short_desc = "Data Source;The processor's L1 data cache was 
reloaded with a line in the M (exclusive) state from another core's L3 on the 
same chip in a different regent due to a demand miss.",
@@ -650,6 +685,11 @@ static const pme_power_entry_t power10_pe[] = {
        .pme_short_desc = "Data Source;The processor's L1 data cache was 
reloaded from the local core's L3 due to a demand miss.",
        .pme_long_desc = "Data Source;The processor's L1 data cache was 
reloaded from the local core's L3 due to a demand miss.",
        },
+       {.pme_name = "PM_ST_DATA_FROM_L3",
+       .pme_code = 0x0C0000016880,
+       .pme_short_desc = "Data Source;Store data line hit in the local L3. 
Includes cache-line states Tx and Mx.",
+       .pme_long_desc = "Data Source;Store data line hit in the local L3. 
Includes cache-line states Tx and Mx. If the cache line is in the Sx state, the 
RC machine will send a RWITM command. Since the event happens in a 2:1 clock 
domain and is time-sliced across all 4 threads, the event count should be 
multiplied by 2.",
+       },
        {.pme_name = "PM_DATA_FROM_LMEM",
        .pme_code = 0x094040000001C040,
        .pme_short_desc = "Data Source;The processor's L1 data cache was 
reloaded from the local chip's memory due to a demand miss.",
@@ -1845,6 +1885,11 @@ static const pme_power_entry_t power10_pe[] = {
        .pme_short_desc = "pipeline;Cycles in which the oldest instruction in 
the pipeline was executing in the VSU (includes FXU, VSU, CRU).",
        .pme_long_desc = "pipeline;Cycles in which the oldest instruction in 
the pipeline was executing in the VSU (includes FXU, VSU, CRU).",
        },
+       {.pme_name = "PM_EXT_INT",
+       .pme_code = 0x200F8,
+       .pme_short_desc = "pipeline;Cycles an external interrupt was active",
+       .pme_long_desc = "pipeline;Cycles an external interrupt was active",
+       },
        {.pme_name = "PM_FLOP_CMPL",
        .pme_code = 0x100F4,
        .pme_short_desc = "floating point;Floating Point Operations Completed.",
@@ -1915,6 +1960,16 @@ static const pme_power_entry_t power10_pe[] = {
        .pme_short_desc = "pmc;PowerPC instruction completed",
        .pme_long_desc = "pmc;PowerPC instruction completed",
        },
+       {.pme_name = "PM_INST_DISP",
+       .pme_code = 0x200F2,
+       .pme_short_desc = "frontend;PowerPC instruction dispatched",
+       .pme_long_desc = "frontend;PowerPC instruction dispatched",
+       },
+       {.pme_name = "PM_INST_DISP_ALT",
+       .pme_code = 0x300F2,
+       .pme_short_desc = "frontend;PowerPC instruction dispatched",
+       .pme_long_desc = "frontend;PowerPC instruction dispatched",
+       },
        {.pme_name = "PM_INST_CMPL_ALT2",
        .pme_code = 0x20002,
        .pme_short_desc = "pmc;PowerPC instruction completed",
@@ -1935,6 +1990,31 @@ static const pme_power_entry_t power10_pe[] = {
        .pme_short_desc = "pmc;Instruction finished",
        .pme_long_desc = "pmc;Instruction finished",
        },
+       {.pme_name = "PM_INST_FROM_L1",
+       .pme_code = 0x0000004080,
+       .pme_short_desc = "NA;An instruction fetch hit in the L1.",
+       .pme_long_desc = "NA;An instruction fetch hit in the L1. Each fetch 
group contains 8 instructions. The same line can hit 4 times if 32 sequential 
instructions are fetched.",
+       },
+       {.pme_name = "PM_INST_FROM_L1MISS",
+       .pme_code = 0x003F00000001C040,
+       .pme_short_desc = "NA;The processor's instruction cache was reloaded 
from a source beyond the local core's L1 due to a demand miss.",
+       .pme_long_desc = "NA;The processor's instruction cache was reloaded 
from a source beyond the local core's L1 due to a demand miss.",
+       },
+       {.pme_name = "PM_INST_FROM_L1MISS_ALT2",
+       .pme_code = 0x003F00000002C040,
+       .pme_short_desc = "NA;The processor's instruction cache was reloaded 
from a source beyond the local core's L1 due to a demand miss.",
+       .pme_long_desc = "NA;The processor's instruction cache was reloaded 
from a source beyond the local core's L1 due to a demand miss.",
+       },
+       {.pme_name = "PM_INST_FROM_L1MISS_ALT3",
+       .pme_code = 0x003F00000003C040,
+       .pme_short_desc = "NA;The processor's instruction cache was reloaded 
from a source beyond the local core's L1 due to a demand miss.",
+       .pme_long_desc = "NA;The processor's instruction cache was reloaded 
from a source beyond the local core's L1 due to a demand miss.",
+       },
+       {.pme_name = "PM_INST_FROM_L1MISS_ALT4",
+       .pme_code = 0x003F00000004C040,
+       .pme_short_desc = "NA;The processor's instruction cache was reloaded 
from a source beyond the local core's L1 due to a demand miss.",
+       .pme_long_desc = "NA;The processor's instruction cache was reloaded 
from a source beyond the local core's L1 due to a demand miss.",
+       },
        {.pme_name = "PM_INST_FROM_DMEM",
        .pme_code = 0x0F4100000001C040,
        .pme_short_desc = "Data Source;The processor's instruction cache was 
reloaded from distant memory (MC slow) due to a demand miss.",
@@ -2745,6 +2825,16 @@ static const pme_power_entry_t power10_pe[] = {
        .pme_short_desc = "NA;All successful I-side-instruction-fetch (e.",
        .pme_long_desc = "NA;All successful I-side-instruction-fetch (e.g. 
i-demand, i-prefetch) dispatches for this thread. Since the event happens in a 
2:1 clock domain and is time-sliced across all 4 threads, the event count 
should be multiplied by 2.",
        },
+       {.pme_name = "PM_L2_INST_MISS",
+       .pme_code = 0x000000036880,
+       .pme_short_desc = "NA;All successful instruction (demand and prefetch) 
dispatches for this thread that missed in the L2.",
+       .pme_long_desc = "NA;All successful instruction (demand and prefetch) 
dispatches for this thread that missed in the L2. Since the event happens in a 
2:1 clock domain and is time-sliced across all 4 threads, the event count 
should be multiplied by 2.",
+       },
+       {.pme_name = "PM_L2_INST_MISS_ALT",
+       .pme_code = 0x0F0000046080,
+       .pme_short_desc = "NA;All successful instruction (demand and prefetch) 
dispatches for this thread that missed in the L2.",
+       .pme_long_desc = "NA;All successful instruction (demand and prefetch) 
dispatches for this thread that missed in the L2. Since the event happens in a 
2:1 clock domain and is time-sliced across all 4 threads, the event count 
should be multiplied by 2.",
+       },
        {.pme_name = "PM_L2_ISIDE_DSIDE_ATTEMPT",
        .pme_code = 0x020000016080,
        .pme_short_desc = "NA;All D-side-Ld or I-side-instruction-fetch 
dispatch attempts for this thread.",
@@ -2830,6 +2920,11 @@ static const pme_power_entry_t power10_pe[] = {
        .pme_short_desc = "NA;All successful D-Side Store dispatches for this 
thread that missed in the L2.",
        .pme_long_desc = "NA;All successful D-Side Store dispatches for this 
thread that missed in the L2. Since the event happens in a 2:1 clock domain and 
is time-sliced across all 4 threads, the event count should be multiplied by 
2.",
        },
+       {.pme_name = "PM_L2_ST_HIT",
+       .pme_code = 0x0F0000026880,
+       .pme_short_desc = "NA;All successful D-side store dispatches for this 
thread that were L2 hits",
+       .pme_long_desc = "NA;All successful D-side store dispatches for this 
thread that were L2 hits. Since the event happens in a 2:1 clock domain and is 
time-sliced across all 4 threads, the event count should be multiplied by 2.",
+       },
        {.pme_name = "PM_L2_ST",
        .pme_code = 0x000000016880,
        .pme_short_desc = "NA;All successful D-side store dispatches for this 
thread (L2 miss + L2 hits).",
@@ -2970,6 +3065,16 @@ static const pme_power_entry_t power10_pe[] = {
        .pme_short_desc = "pipeline;MMA instruction issued",
        .pme_long_desc = "pipeline;MMA instruction issued",
        },
+       {.pme_name = "PM_PRED_BR_TKN_COND_DIR",
+        .pme_code = 0x00000040B8,
+        .pme_short_desc = "frontend;A conditional branch finished with 
correctly predicted direction.",
+        .pme_long_desc = "frontend;A conditional branch finished with 
correctly predicted direction. Resolved taken",
+       },
+       {.pme_name = "PM_PRED_BR_NTKN_COND_DIR",
+       .pme_code = 0x00000048B8,
+       .pme_short_desc = "frontend;A conditional branch finished with 
correctly predicted direction.",
+       .pme_long_desc = "frontend;A conditional branch finished with correctly 
predicted direction. Resolved not taken",
+       },
        {.pme_name = "PM_MPRED_BR_NTKN_COND_DIR",
        .pme_code = 0x00000048BC,
        .pme_short_desc = "NA;A conditional branch finished with mispredicted 
direction.",
-- 
2.46.0

_______________________________________________
perfmon2-devel mailing list
perfmon2-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/perfmon2-devel

Reply via email to