Add some initial PMU testing.

- PMC5/6 tests
- PMAE / PMI test
- BHRB basic tests

Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---
 lib/powerpc/asm/processor.h |   2 +
 lib/powerpc/asm/reg.h       |   9 +
 lib/powerpc/asm/setup.h     |   1 +
 lib/powerpc/setup.c         |  23 +++
 powerpc/Makefile.common     |   3 +-
 powerpc/pmu.c               | 337 ++++++++++++++++++++++++++++++++++++
 powerpc/unittests.cfg       |   3 +
 7 files changed, 377 insertions(+), 1 deletion(-)
 create mode 100644 powerpc/pmu.c

diff --git a/lib/powerpc/asm/processor.h b/lib/powerpc/asm/processor.h
index 749155696..28239c610 100644
--- a/lib/powerpc/asm/processor.h
+++ b/lib/powerpc/asm/processor.h
@@ -14,6 +14,8 @@ extern bool cpu_has_hv;
 extern bool cpu_has_power_mce;
 extern bool cpu_has_siar;
 extern bool cpu_has_heai;
+extern bool cpu_has_bhrb;
+extern bool cpu_has_p10_bhrb;
 extern bool cpu_has_radix;
 extern bool cpu_has_prefix;
 extern bool cpu_has_sc_lev;
diff --git a/lib/powerpc/asm/reg.h b/lib/powerpc/asm/reg.h
index 69ef21adb..602fba1b6 100644
--- a/lib/powerpc/asm/reg.h
+++ b/lib/powerpc/asm/reg.h
@@ -40,10 +40,19 @@
 #define SPR_LPIDR      0x13f
 #define SPR_HEIR       0x153
 #define SPR_PTCR       0x1d0
+#define SPR_MMCRA      0x312
+#define   MMCRA_BHRBRD         UL(0x0000002000000000)
+#define   MMCRA_IFM_MASK       UL(0x00000000c0000000)
+#define SPR_PMC5       0x317
+#define SPR_PMC6       0x318
 #define SPR_MMCR0      0x31b
 #define   MMCR0_FC             UL(0x80000000)
+#define   MMCR0_FCP            UL(0x20000000)
 #define   MMCR0_PMAE           UL(0x04000000)
+#define   MMCR0_BHRBA          UL(0x00200000)
+#define   MMCR0_FCPC           UL(0x00001000)
 #define   MMCR0_PMAO           UL(0x00000080)
+#define   MMCR0_FC56           UL(0x00000010)
 #define SPR_SIAR       0x31c
 
 /* Machine State Register definitions: */
diff --git a/lib/powerpc/asm/setup.h b/lib/powerpc/asm/setup.h
index 9ca318ce6..8f0b58ed0 100644
--- a/lib/powerpc/asm/setup.h
+++ b/lib/powerpc/asm/setup.h
@@ -10,6 +10,7 @@
 #define NR_CPUS                        8       /* arbitrarily set for now */
 
 extern uint64_t tb_hz;
+extern uint64_t cpu_hz;
 
 #define NR_MEM_REGIONS         8
 #define MR_F_PRIMARY           (1U << 0)
diff --git a/lib/powerpc/setup.c b/lib/powerpc/setup.c
index 30b988a5c..42ba06ad1 100644
--- a/lib/powerpc/setup.c
+++ b/lib/powerpc/setup.c
@@ -32,6 +32,7 @@ u32 initrd_size;
 u32 cpu_to_hwid[NR_CPUS] = { [0 ... NR_CPUS-1] = (~0U) };
 int nr_cpus_present;
 uint64_t tb_hz;
+uint64_t cpu_hz;
 
 struct mem_region mem_regions[NR_MEM_REGIONS];
 phys_addr_t __physical_start, __physical_end;
@@ -41,6 +42,7 @@ struct cpu_set_params {
        unsigned icache_bytes;
        unsigned dcache_bytes;
        uint64_t tb_hz;
+       uint64_t cpu_hz;
 };
 
 static void cpu_set(int fdtnode, u64 regval, void *info)
@@ -94,6 +96,22 @@ static void cpu_set(int fdtnode, u64 regval, void *info)
                data = (u32 *)prop->data;
                params->tb_hz = fdt32_to_cpu(*data);
 
+               prop = fdt_get_property(dt_fdt(), fdtnode,
+                                       "ibm,extended-clock-frequency", NULL);
+               if (prop) {
+                       data = (u32 *)prop->data;
+                       params->cpu_hz = fdt32_to_cpu(*data);
+                       params->cpu_hz <<= 32;
+                       data = (u32 *)prop->data + 1;
+                       params->cpu_hz |= fdt32_to_cpu(*data);
+               } else {
+                       prop = fdt_get_property(dt_fdt(), fdtnode,
+                                               "clock-frequency", NULL);
+                       assert(prop != NULL);
+                       data = (u32 *)prop->data;
+                       params->cpu_hz = fdt32_to_cpu(*data);
+               }
+
                read_common_info = true;
        }
 }
@@ -102,6 +120,8 @@ bool cpu_has_hv;
 bool cpu_has_power_mce; /* POWER CPU machine checks */
 bool cpu_has_siar;
 bool cpu_has_heai;
+bool cpu_has_bhrb;
+bool cpu_has_p10_bhrb;
 bool cpu_has_radix;
 bool cpu_has_prefix;
 bool cpu_has_sc_lev; /* sc interrupt has LEV field in SRR1 */
@@ -118,12 +138,14 @@ static void cpu_init_params(void)
        __icache_bytes = params.icache_bytes;
        __dcache_bytes = params.dcache_bytes;
        tb_hz = params.tb_hz;
+       cpu_hz = params.cpu_hz;
 
        switch (mfspr(SPR_PVR) & PVR_VERSION_MASK) {
        case PVR_VER_POWER10:
                cpu_has_prefix = true;
                cpu_has_sc_lev = true;
                cpu_has_pause_short = true;
+               cpu_has_p10_bhrb = true;
        case PVR_VER_POWER9:
                cpu_has_radix = true;
        case PVR_VER_POWER8E:
@@ -132,6 +154,7 @@ static void cpu_init_params(void)
                cpu_has_power_mce = true;
                cpu_has_heai = true;
                cpu_has_siar = true;
+               cpu_has_bhrb = true;
                break;
        default:
                break;
diff --git a/powerpc/Makefile.common b/powerpc/Makefile.common
index 410a675d9..64a3d93e4 100644
--- a/powerpc/Makefile.common
+++ b/powerpc/Makefile.common
@@ -17,7 +17,8 @@ tests-common = \
        $(TEST_DIR)/smp.elf \
        $(TEST_DIR)/sprs.elf \
        $(TEST_DIR)/timebase.elf \
-       $(TEST_DIR)/interrupts.elf
+       $(TEST_DIR)/interrupts.elf \
+       $(TEST_DIR)/pmu.elf
 
 tests-all = $(tests-common) $(tests)
 all: directories $(TEST_DIR)/boot_rom.bin $(tests-all)
diff --git a/powerpc/pmu.c b/powerpc/pmu.c
new file mode 100644
index 000000000..df0604261
--- /dev/null
+++ b/powerpc/pmu.c
@@ -0,0 +1,337 @@
+/*
+ * Test PMU
+ *
+ * Copyright 2024 Nicholas Piggin, IBM Corp.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.
+ */
+#include <libcflat.h>
+#include <util.h>
+#include <migrate.h>
+#include <alloc.h>
+#include <asm/setup.h>
+#include <asm/handlers.h>
+#include <asm/hcall.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+#include <asm/barrier.h>
+#include <asm/mmu.h>
+#include "alloc_phys.h"
+#include "vmalloc.h"
+
+static volatile bool got_interrupt;
+static volatile struct pt_regs recorded_regs;
+static volatile unsigned long recorded_mmcr0;
+
+static void reset_mmcr0(void)
+{
+       mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+       mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_PMAE | MMCR0_PMAO));
+}
+
+static __attribute__((__noinline__)) unsigned long 
pmc5_count_nr_insns(unsigned long nr)
+{
+       reset_mmcr0();
+       mtspr(SPR_PMC5, 0);
+       mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+       asm volatile("mtctr %0 ; 1: bdnz 1b" :: "r"(nr) : "ctr");
+       mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+
+       return mfspr(SPR_PMC5);
+}
+
+static void test_pmc56(void)
+{
+       unsigned long tmp;
+
+       report_prefix_push("pmc56");
+
+       reset_mmcr0();
+       mtspr(SPR_PMC5, 0);
+       mtspr(SPR_PMC6, 0);
+       report(mfspr(SPR_PMC5) == 0, "PMC5 zeroed");
+       report(mfspr(SPR_PMC6) == 0, "PMC6 zeroed");
+       mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_FC);
+       msleep(100);
+       report(mfspr(SPR_PMC5) == 0, "PMC5 frozen");
+       report(mfspr(SPR_PMC6) == 0, "PMC6 frozen");
+       mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_FC56);
+       mdelay(100);
+       mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+       report(mfspr(SPR_PMC5) != 0, "PMC5 counting");
+       report(mfspr(SPR_PMC6) != 0, "PMC6 counting");
+
+       /* Dynamic frequency scaling could cause to be out, so don't fail. */
+       tmp = mfspr(SPR_PMC6);
+       report(true, "PMC6 ratio to reported clock frequency is %ld%%", tmp * 
1000 / cpu_hz);
+
+       tmp = pmc5_count_nr_insns(100);
+       tmp = pmc5_count_nr_insns(1000) - tmp;
+       report(tmp == 900, "PMC5 counts instructions precisely");
+
+       report_prefix_pop();
+}
+
+static void dec_ignore_handler(struct pt_regs *regs, void *data)
+{
+       mtspr(SPR_DEC, 0x7fffffff);
+}
+
+static void pmi_handler(struct pt_regs *regs, void *data)
+{
+       got_interrupt = true;
+       memcpy((void *)&recorded_regs, regs, sizeof(struct pt_regs));
+       recorded_mmcr0 = mfspr(SPR_MMCR0);
+       if (mfspr(SPR_MMCR0) & MMCR0_PMAO) {
+               /* This may cause infinite interrupts, so clear it. */
+               mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_PMAO);
+       }
+}
+
+static void test_pmi(void)
+{
+       report_prefix_push("pmi");
+       handle_exception(0x900, &dec_ignore_handler, NULL);
+       handle_exception(0xf00, &pmi_handler, NULL);
+       reset_mmcr0();
+       mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | MMCR0_PMAO);
+       mtmsr(mfmsr() | MSR_EE);
+       mtmsr(mfmsr() & ~MSR_EE);
+       report(got_interrupt, "PMAO caused interrupt");
+       handle_exception(0xf00, NULL, NULL);
+       handle_exception(0x900, NULL, NULL);
+       report_prefix_pop();
+}
+
+static void clrbhrb(void)
+{
+       asm volatile("clrbhrb" ::: "memory");
+}
+
+static inline unsigned long mfbhrbe(int nr)
+{
+       unsigned long e;
+
+       asm volatile("mfbhrbe %0,%1" : "=r"(e) : "i"(nr) : "memory");
+
+       return e;
+}
+
+extern unsigned char dummy_branch_1[];
+extern unsigned char dummy_branch_2[];
+
+static __attribute__((__noinline__)) void bhrb_dummy(int i)
+{
+       asm volatile(
+       "       cmpdi %0,1      \n\t"
+       "       beq 1f          \n\t"
+       ".global dummy_branch_1 \n\t"
+       "dummy_branch_1:        \n\t"
+       "       b 2f            \n\t"
+       "1:     trap            \n\t"
+       ".global dummy_branch_2 \n\t"
+       "dummy_branch_2:        \n\t"
+       "2:     bne 3f          \n\t"
+       "       trap            \n\t"
+       "3:     nop             \n\t"
+       : : "r"(i));
+}
+
+#define NR_BHRBE 16
+static unsigned long bhrbe[NR_BHRBE];
+static int nr_bhrbe;
+
+static void run_and_load_bhrb(void)
+{
+       int i;
+
+       mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_PMAE);
+       clrbhrb();
+       mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | MMCR0_BHRBA);
+       mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FCP | 
MMCR0_FCPC));
+       mtspr(SPR_MMCRA, mfspr(SPR_MMCRA) & ~(MMCRA_BHRBRD | MMCRA_IFM_MASK));
+
+       if (cpu_has_p10_bhrb) {
+               mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | MMCR0_PMAE);
+               asm volatile("isync" ::: "memory");
+               enter_usermode();
+               bhrb_dummy(0);
+               exit_usermode();
+               mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_PMAE);
+               asm volatile("isync" ::: "memory");
+       } else {
+               mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | MMCR0_PMAE);
+               asm volatile("isync" ::: "memory");
+               mtmsr(mfmsr());
+               asm volatile(".rept 100 ; nop ; .endr");
+               bhrb_dummy(0);
+               mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_PMAE);
+               asm volatile("isync" ::: "memory");
+       }
+
+       bhrbe[0] = mfbhrbe(0);
+       bhrbe[1] = mfbhrbe(1);
+       bhrbe[2] = mfbhrbe(2);
+       bhrbe[3] = mfbhrbe(3);
+       bhrbe[4] = mfbhrbe(4);
+       bhrbe[5] = mfbhrbe(5);
+       bhrbe[6] = mfbhrbe(6);
+       bhrbe[7] = mfbhrbe(7);
+       bhrbe[8] = mfbhrbe(8);
+       bhrbe[9] = mfbhrbe(9);
+       bhrbe[10] = mfbhrbe(10);
+       bhrbe[11] = mfbhrbe(11);
+       bhrbe[12] = mfbhrbe(12);
+       bhrbe[13] = mfbhrbe(13);
+       bhrbe[14] = mfbhrbe(14);
+       bhrbe[15] = mfbhrbe(15);
+
+       for (i = 0; i < NR_BHRBE; i++) {
+               bhrbe[i] &= ~0x1UL; /* remove prediction bit */
+               if (!bhrbe[i])
+                       break;
+       }
+       nr_bhrbe = i;
+}
+
+static void illegal_handler(struct pt_regs *regs, void *data)
+{
+       got_interrupt = true;
+       regs_advance_insn(regs);
+}
+
+static void test_bhrb(void)
+{
+       int i;
+
+       if (cpu_has_p10_bhrb && !vm_available())
+               return;
+
+       report_prefix_push("bhrb");
+
+       /* TCG doesn't impelment BHRB yet */
+       handle_exception(0x700, &illegal_handler, NULL);
+       handle_exception(0xe40, &illegal_handler, NULL);
+       clrbhrb();
+       handle_exception(0x700, NULL, NULL);
+       handle_exception(0xe40, NULL, NULL);
+       if (got_interrupt) {
+               got_interrupt = false;
+               report_skip("BHRB support missing");
+               report_prefix_pop();
+               return;
+       }
+
+       handle_exception(0x900, &illegal_handler, NULL);
+
+       if (vm_available()) {
+               handle_exception(0x900, &dec_ignore_handler, NULL);
+               setup_vm();
+       }
+       reset_mmcr0();
+       clrbhrb();
+       if (cpu_has_p10_bhrb) {
+               enter_usermode();
+               bhrb_dummy(0);
+               exit_usermode();
+       } else {
+               bhrb_dummy(0);
+       }
+       report(mfbhrbe(0) == 0, "BHRB is frozen");
+
+       /*
+        * BHRB may be cleared at any time (e.g., by OS or hypervisor)
+        * so this test could be occasionally incorrect. Try several
+        * times before giving up...
+        */
+
+       if (cpu_has_p10_bhrb) {
+               /*
+                * BHRB should have 8 entries:
+                * 1. enter_usermode blr
+                * 2. enter_usermode blr target
+                * 3. bl dummy
+                * 4. dummy unconditional
+                * 5. dummy conditional
+                * 6. dummy blr
+                * 7. dummy blr target
+                * 8. exit_usermode bl
+                *
+                * POWER10 often gives 4 entries, if other threads are
+                * running on the core, it seems to struggle.
+                */
+               for (i = 0; i < 200; i++) {
+                       run_and_load_bhrb();
+                       if (nr_bhrbe == 8)
+                               break;
+                       if (i > 100 && nr_bhrbe == 4)
+                               break;
+               }
+               if (nr_bhrbe != 8)
+                       printf("nr_bhrbe=%d\n", nr_bhrbe);
+               report(nr_bhrbe, "BHRB has been written");
+               if (nr_bhrbe == 8) {
+                       report(nr_bhrbe == 8, "BHRB has written 8 entries");
+                       report(bhrbe[4] == (unsigned long)dummy_branch_1,
+                                       "correct unconditional branch address");
+                       report(bhrbe[3] == (unsigned long)dummy_branch_2,
+                                       "correct conditional branch address");
+               } else if (nr_bhrbe == 4) {
+                       /* POWER10 workaround */
+                       report(nr_bhrbe == 4, "BHRB has written 4 entries");
+                       report(bhrbe[3] == (unsigned long)dummy_branch_2,
+                                       "correct conditional branch address");
+               }
+       } else {
+               /*
+                * BHRB should have 6 entries:
+                * 1. bl dummy
+                * 2. dummy unconditional
+                * 3. dummy conditional
+                * 4. dummy blr
+                * 5. dummy blr target
+                * 6. Final b loop before disabled.
+                *
+                * POWER9 often gives 4 entries, if other threads are
+                * running on the core, it seems to struggle.
+                */
+               for (i = 0; i < 200; i++) {
+                       run_and_load_bhrb();
+                       if (nr_bhrbe == 6)
+                               break;
+                       if (i > 100 && nr_bhrbe == 4)
+                               break;
+               }
+               report(nr_bhrbe, "BHRB has been written");
+               report(nr_bhrbe == 6, "BHRB has written 6 entries");
+               if (nr_bhrbe == 6) {
+                       report(bhrbe[4] == (unsigned long)dummy_branch_1,
+                                       "correct unconditional branch address");
+                       report(bhrbe[3] == (unsigned long)dummy_branch_2,
+                                       "correct conditional branch address");
+               } else if (nr_bhrbe == 4) {
+                       /* POWER9 workaround */
+                       report(nr_bhrbe == 4, "BHRB has written 4 entries");
+                       report(bhrbe[3] == (unsigned long)dummy_branch_2,
+                                       "correct conditional branch address");
+               }
+       }
+
+       handle_exception(0x900, NULL, NULL);
+
+       report_prefix_pop();
+}
+
+int main(int argc, char **argv)
+{
+       report_prefix_push("pmu");
+
+       test_pmc56();
+       test_pmi();
+       if (cpu_has_bhrb)
+               test_bhrb();
+
+       report_prefix_pop();
+
+       return report_summary();
+}
diff --git a/powerpc/unittests.cfg b/powerpc/unittests.cfg
index 008559b43..e275f389b 100644
--- a/powerpc/unittests.cfg
+++ b/powerpc/unittests.cfg
@@ -89,6 +89,9 @@ file = emulator.elf
 [interrupts]
 file = interrupts.elf
 
+[pmu]
+file = pmu.elf
+
 [smp]
 file = smp.elf
 smp = 2
-- 
2.42.0

Reply via email to