Dear libpfm4 team
Please find attached the patch for review and let me know if the patch
is acceptable for upstream. We are working on an important requirement
for one of IBM's Power10 customers which needs libpfm4/PAPI support. We
are hoping to get the support added as soon as possible.
Thank you for your consideration.
Regards:
Sachin.
From 419305392dd7d347f9c1a2ec0f04fc8f8d7c5181 Mon Sep 17 00:00:00 2001
From: Sachin Monga <smo...@linux.ibm.com>
Date: Thu, 15 Aug 2024 12:54:51 -0400
Subject: [PATCH] IBM Power10 core PMU support
Added additional events for IBM Power 10 core PMU.
Signed-off-by: Sachin Monga <smo...@linux.ibm.com>
---
lib/events/power10_events.h | 109 +++++++++++++++++++++++++++++++++++-
1 file changed, 107 insertions(+), 2 deletions(-)
diff --git a/lib/events/power10_events.h b/lib/events/power10_events.h
index 2f2ad52..e55caf5 100644
--- a/lib/events/power10_events.h
+++ b/lib/events/power10_events.h
@@ -1,12 +1,12 @@
/*
* File: power10_events.h
-* (C) Copyright IBM Corporation, 2021-2022. All Rights Reserved.
+* (C) Copyright IBM Corporation, 2023-2024. All Rights Reserved.
* Author: Will Schmidt
* will_schm...@vnet.ibm.com
* Author: Carl Love
* c...@us.ibm.com
#
-* Content reworked May 11, 2022, - Will Schmidt.
+* Content reworked Aug 12, 2024, - Sachin Monga, Jeevitha P.
* This file was automatically generated from event lists as
* provided by the IBM PowerPC PMU team. Any manual
* updates should be clearly marked so they are not lost in
@@ -90,11 +90,21 @@ static const pme_power_entry_t power10_pe[] = {
.pme_short_desc = "NA;A conditional branch finished with mispredicted
direction using the Global Branch History Table.",
.pme_long_desc = "NA;A conditional branch finished with mispredicted
direction using the Global Branch History Table. Resolved not taken",
},
+ {.pme_name = "PM_BR_COND_CMPL",
+ .pme_code = 0x4E058,
+ .pme_short_desc = "frontend;A conditional branch completed.",
+ .pme_long_desc = "frontend;A conditional branch completed.",
+ },
{.pme_name = "PM_BR_MPRED_NTKN_COND_DIR_LBHT_GSEL",
.pme_code = 0x000000E080,
.pme_short_desc = "NA;A conditional branch finished with mispredicted
direction using the Local Branch History Table selected with the global
selector.",
.pme_long_desc = "NA;A conditional branch finished with mispredicted
direction using the Local Branch History Table selected with the global
selector. Resolved not taken",
},
+ {.pme_name = "PM_BR_TKN_FIN",
+ .pme_code = 0x00000040B4,
+ .pme_short_desc = "frontend; A taken branch (conditional or
unconditional) finished",
+ .pme_long_desc = "frontend;A taken branch (conditional or
unconditional) finished",
+ },
{.pme_name = "PM_BR_MPRED_NTKN_COND_DIR_LBHT_LSEL",
.pme_code = 0x00000058BC,
.pme_short_desc = "NA;A conditional branch finished with mispredicted
direction using the Local Branch History Table selected by the local selector.",
@@ -470,6 +480,26 @@ static const pme_power_entry_t power10_pe[] = {
.pme_short_desc = "Data Source;The processor's L1 data cache was
reloaded with a valid line that was not in the M (exclusive) state from another
core's L2 on the same chip in the same regent due to a demand miss.",
.pme_long_desc = "Data Source;The processor's L1 data cache was
reloaded with a valid line that was not in the M (exclusive) state from another
core's L2 on the same chip in the same regent due to a demand miss.",
},
+ {.pme_name = "PM_DATA_FROM_L1MISS",
+ .pme_code = 0x003F40000001C040,
+ .pme_short_desc = "Data Source;The processor's L1 data cache was
reloaded from a source beyond the local core's L1 due to a demand miss.",
+ .pme_long_desc = "Data Source;The processor's L1 data cache was
reloaded from a source beyond the local core's L1 due to a demand miss.",
+ },
+ {.pme_name = "PM_DATA_FROM_L1MISS_ALT2",
+ .pme_code = 0x003F40000002C040,
+ .pme_short_desc = "Data Source;The processor's L1 data cache was
reloaded from a source beyond the local core's L1 due to a demand miss.",
+ .pme_long_desc = "Data Source;The processor's L1 data cache was
reloaded from a source beyond the local core's L1 due to a demand miss.",
+ },
+ {.pme_name = "PM_DATA_FROM_L1MISS_ALT3",
+ .pme_code = 0x003F40000003C040,
+ .pme_short_desc = "Data Source;The processor's L1 data cache was
reloaded from a source beyond the local core's L1 due to a demand miss.",
+ .pme_long_desc = "Data Source;The processor's L1 data cache was
reloaded from a source beyond the local core's L1 due to a demand miss.",
+ },
+ {.pme_name = "PM_DATA_FROM_L1MISS_ALT4",
+ .pme_code = 0x003F40000004C040,
+ .pme_short_desc = "Data Source;The processor's L1 data cache was
reloaded from a source beyond the local core's L1 due to a demand miss.",
+ .pme_long_desc = "Data Source;The processor's L1 data cache was
reloaded from a source beyond the local core's L1 due to a demand miss.",
+ },
{.pme_name = "PM_DATA_FROM_L2MISS",
.pme_code = 0x0003C0000001C040,
.pme_short_desc = "Data Source;The processor's L1 data cache was
reloaded from a source beyond the local core's L2 due to a demand miss.",
@@ -510,6 +540,11 @@ static const pme_power_entry_t power10_pe[] = {
.pme_short_desc = "Data Source;The processor's L1 data cache was
reloaded from the local core's L2 due to a demand miss.",
.pme_long_desc = "Data Source;The processor's L1 data cache was
reloaded from the local core's L2 due to a demand miss.",
},
+ {.pme_name = "PM_ST_DATA_FROM_L2",
+ .pme_code = 0x0C0000016080,
+ .pme_short_desc = "Data Source;Store data line hit in the local L2.
Includes cache-line states Sx, Tx, Mx.",
+ .pme_long_desc = "Data Source;Store data line hit in the local L2.
Includes cache-line states Sx, Tx, Mx.Since the event happens in a 2:1 clock
domain and is time-sliced across all 4 threads, the event count should be
multiplied by 2.",
+ },
{.pme_name = "PM_DATA_FROM_L31_NON_REGENT_MOD",
.pme_code = 0x0AC040000001C040,
.pme_short_desc = "Data Source;The processor's L1 data cache was
reloaded with a line in the M (exclusive) state from another core's L3 on the
same chip in a different regent due to a demand miss.",
@@ -650,6 +685,11 @@ static const pme_power_entry_t power10_pe[] = {
.pme_short_desc = "Data Source;The processor's L1 data cache was
reloaded from the local core's L3 due to a demand miss.",
.pme_long_desc = "Data Source;The processor's L1 data cache was
reloaded from the local core's L3 due to a demand miss.",
},
+ {.pme_name = "PM_ST_DATA_FROM_L3",
+ .pme_code = 0x0C0000016880,
+ .pme_short_desc = "Data Source;Store data line hit in the local L3.
Includes cache-line states Tx and Mx.",
+ .pme_long_desc = "Data Source;Store data line hit in the local L3.
Includes cache-line states Tx and Mx. If the cache line is in the Sx state, the
RC machine will send a RWITM command. Since the event happens in a 2:1 clock
domain and is time-sliced across all 4 threads, the event count should be
multiplied by 2.",
+ },
{.pme_name = "PM_DATA_FROM_LMEM",
.pme_code = 0x094040000001C040,
.pme_short_desc = "Data Source;The processor's L1 data cache was
reloaded from the local chip's memory due to a demand miss.",
@@ -1845,6 +1885,11 @@ static const pme_power_entry_t power10_pe[] = {
.pme_short_desc = "pipeline;Cycles in which the oldest instruction in
the pipeline was executing in the VSU (includes FXU, VSU, CRU).",
.pme_long_desc = "pipeline;Cycles in which the oldest instruction in
the pipeline was executing in the VSU (includes FXU, VSU, CRU).",
},
+ {.pme_name = "PM_EXT_INT",
+ .pme_code = 0x200F8,
+ .pme_short_desc = "pipeline;Cycles an external interrupt was active",
+ .pme_long_desc = "pipeline;Cycles an external interrupt was active",
+ },
{.pme_name = "PM_FLOP_CMPL",
.pme_code = 0x100F4,
.pme_short_desc = "floating point;Floating Point Operations Completed.",
@@ -1915,6 +1960,16 @@ static const pme_power_entry_t power10_pe[] = {
.pme_short_desc = "pmc;PowerPC instruction completed",
.pme_long_desc = "pmc;PowerPC instruction completed",
},
+ {.pme_name = "PM_INST_DISP",
+ .pme_code = 0x200F2,
+ .pme_short_desc = "frontend;PowerPC instruction dispatched",
+ .pme_long_desc = "frontend;PowerPC instruction dispatched",
+ },
+ {.pme_name = "PM_INST_DISP_ALT",
+ .pme_code = 0x300F2,
+ .pme_short_desc = "frontend;PowerPC instruction dispatched",
+ .pme_long_desc = "frontend;PowerPC instruction dispatched",
+ },
{.pme_name = "PM_INST_CMPL_ALT2",
.pme_code = 0x20002,
.pme_short_desc = "pmc;PowerPC instruction completed",
@@ -1935,6 +1990,31 @@ static const pme_power_entry_t power10_pe[] = {
.pme_short_desc = "pmc;Instruction finished",
.pme_long_desc = "pmc;Instruction finished",
},
+ {.pme_name = "PM_INST_FROM_L1",
+ .pme_code = 0x0000004080,
+ .pme_short_desc = "NA;An instruction fetch hit in the L1.",
+ .pme_long_desc = "NA;An instruction fetch hit in the L1. Each fetch
group contains 8 instructions. The same line can hit 4 times if 32 sequential
instructions are fetched.",
+ },
+ {.pme_name = "PM_INST_FROM_L1MISS",
+ .pme_code = 0x003F00000001C040,
+ .pme_short_desc = "NA;The processor's instruction cache was reloaded
from a source beyond the local core's L1 due to a demand miss.",
+ .pme_long_desc = "NA;The processor's instruction cache was reloaded
from a source beyond the local core's L1 due to a demand miss.",
+ },
+ {.pme_name = "PM_INST_FROM_L1MISS_ALT2",
+ .pme_code = 0x003F00000002C040,
+ .pme_short_desc = "NA;The processor's instruction cache was reloaded
from a source beyond the local core's L1 due to a demand miss.",
+ .pme_long_desc = "NA;The processor's instruction cache was reloaded
from a source beyond the local core's L1 due to a demand miss.",
+ },
+ {.pme_name = "PM_INST_FROM_L1MISS_ALT3",
+ .pme_code = 0x003F00000003C040,
+ .pme_short_desc = "NA;The processor's instruction cache was reloaded
from a source beyond the local core's L1 due to a demand miss.",
+ .pme_long_desc = "NA;The processor's instruction cache was reloaded
from a source beyond the local core's L1 due to a demand miss.",
+ },
+ {.pme_name = "PM_INST_FROM_L1MISS_ALT4",
+ .pme_code = 0x003F00000004C040,
+ .pme_short_desc = "NA;The processor's instruction cache was reloaded
from a source beyond the local core's L1 due to a demand miss.",
+ .pme_long_desc = "NA;The processor's instruction cache was reloaded
from a source beyond the local core's L1 due to a demand miss.",
+ },
{.pme_name = "PM_INST_FROM_DMEM",
.pme_code = 0x0F4100000001C040,
.pme_short_desc = "Data Source;The processor's instruction cache was
reloaded from distant memory (MC slow) due to a demand miss.",
@@ -2745,6 +2825,16 @@ static const pme_power_entry_t power10_pe[] = {
.pme_short_desc = "NA;All successful I-side-instruction-fetch (e.",
.pme_long_desc = "NA;All successful I-side-instruction-fetch (e.g.
i-demand, i-prefetch) dispatches for this thread. Since the event happens in a
2:1 clock domain and is time-sliced across all 4 threads, the event count
should be multiplied by 2.",
},
+ {.pme_name = "PM_L2_INST_MISS",
+ .pme_code = 0x000000036880,
+ .pme_short_desc = "NA;All successful instruction (demand and prefetch)
dispatches for this thread that missed in the L2.",
+ .pme_long_desc = "NA;All successful instruction (demand and prefetch)
dispatches for this thread that missed in the L2. Since the event happens in a
2:1 clock domain and is time-sliced across all 4 threads, the event count
should be multiplied by 2.",
+ },
+ {.pme_name = "PM_L2_INST_MISS_ALT",
+ .pme_code = 0x0F0000046080,
+ .pme_short_desc = "NA;All successful instruction (demand and prefetch)
dispatches for this thread that missed in the L2.",
+ .pme_long_desc = "NA;All successful instruction (demand and prefetch)
dispatches for this thread that missed in the L2. Since the event happens in a
2:1 clock domain and is time-sliced across all 4 threads, the event count
should be multiplied by 2.",
+ },
{.pme_name = "PM_L2_ISIDE_DSIDE_ATTEMPT",
.pme_code = 0x020000016080,
.pme_short_desc = "NA;All D-side-Ld or I-side-instruction-fetch
dispatch attempts for this thread.",
@@ -2830,6 +2920,11 @@ static const pme_power_entry_t power10_pe[] = {
.pme_short_desc = "NA;All successful D-Side Store dispatches for this
thread that missed in the L2.",
.pme_long_desc = "NA;All successful D-Side Store dispatches for this
thread that missed in the L2. Since the event happens in a 2:1 clock domain and
is time-sliced across all 4 threads, the event count should be multiplied by
2.",
},
+ {.pme_name = "PM_L2_ST_HIT",
+ .pme_code = 0x0F0000026880,
+ .pme_short_desc = "NA;All successful D-side store dispatches for this
thread that were L2 hits",
+ .pme_long_desc = "NA;All successful D-side store dispatches for this
thread that were L2 hits. Since the event happens in a 2:1 clock domain and is
time-sliced across all 4 threads, the event count should be multiplied by 2.",
+ },
{.pme_name = "PM_L2_ST",
.pme_code = 0x000000016880,
.pme_short_desc = "NA;All successful D-side store dispatches for this
thread (L2 miss + L2 hits).",
@@ -2970,6 +3065,16 @@ static const pme_power_entry_t power10_pe[] = {
.pme_short_desc = "pipeline;MMA instruction issued",
.pme_long_desc = "pipeline;MMA instruction issued",
},
+ {.pme_name = "PM_PRED_BR_TKN_COND_DIR",
+ .pme_code = 0x00000040B8,
+ .pme_short_desc = "frontend;A conditional branch finished with
correctly predicted direction.",
+ .pme_long_desc = "frontend;A conditional branch finished with
correctly predicted direction. Resolved taken",
+ },
+ {.pme_name = "PM_PRED_BR_NTKN_COND_DIR",
+ .pme_code = 0x00000048B8,
+ .pme_short_desc = "frontend;A conditional branch finished with
correctly predicted direction.",
+ .pme_long_desc = "frontend;A conditional branch finished with correctly
predicted direction. Resolved not taken",
+ },
{.pme_name = "PM_MPRED_BR_NTKN_COND_DIR",
.pme_code = 0x00000048BC,
.pme_short_desc = "NA;A conditional branch finished with mispredicted
direction.",
--
2.46.0
_______________________________________________
perfmon2-devel mailing list
perfmon2-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/perfmon2-devel