Subject: perf, intel: Expose SMI_COUNT as a fixed counter
From: Peter Zijlstra <[email protected]>
Date: Wed Sep 12 13:10:53 CEST 2012

The Intel SMI_COUNT sadly isn't a proper PMU event but a free-running
MSR, expose it by creating another fake fixed PMC and another pseudo
event.

Signed-off-by: Peter Zijlstra <[email protected]>
---

Only added to wsm because that's what my testbox is ;-)

 arch/x86/include/asm/perf_event.h      |   18 ++++++++
 arch/x86/kernel/cpu/perf_event.c       |   68 +++++++++++++++++++++++++--------
 arch/x86/kernel/cpu/perf_event.h       |    9 ++++
 arch/x86/kernel/cpu/perf_event_intel.c |   42 ++++++++++++++------
 4 files changed, 109 insertions(+), 28 deletions(-)
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -131,13 +131,29 @@ struct x86_pmu_capability {
 #define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
 
 /*
+ * Create a range of 'special' (fake) fixed purpose counters
+ */
+#define INTEL_PMC_IDX_FIXED_SPECIAL    (INTEL_PMC_IDX_FIXED + 16)
+#define INTEL_PMC_MSK_FIXED_SPECIAL    (1ULL << INTEL_PMC_IDX_FIXED_SPECIAL)
+
+/*
  * We model BTS tracing as another fixed-mode PMC.
  *
  * We choose a value in the middle of the fixed event range, since lower
  * values are used by actual fixed events and higher values are used
  * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
  */
-#define INTEL_PMC_IDX_FIXED_BTS                                
(INTEL_PMC_IDX_FIXED + 16)
+#define INTEL_PMC_IDX_FIXED_BTS                (INTEL_PMC_IDX_FIXED_SPECIAL + 
0)
+#define INTEL_PMC_MSK_FIXED_BTS                (1ULL << 
INTEL_PMC_IDX_FIXED_BTS)
+
+/*
+ * We model the SMI_COUNT as another fixed-mode PMC.
+ *
+ * This MSR (34h) is a free running counter of SMIs
+ */
+#define MSR_ARCH_SMI_COUNT             0x34
+#define INTEL_PMC_IDX_FIXED_SMI_COUNT  (INTEL_PMC_IDX_FIXED_SPECIAL + 1)
+#define INTEL_PMC_MSK_FIXED_SMI_COUNT  (1ULL << INTEL_PMC_IDX_FIXED_SMI_COUNT)
 
 /*
  * IBS cpuid feature detection
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -52,22 +52,14 @@ u64 __read_mostly hw_cache_extra_regs
                                [PERF_COUNT_HW_CACHE_OP_MAX]
                                [PERF_COUNT_HW_CACHE_RESULT_MAX];
 
-/*
- * Propagate event elapsed time into the generic event.
- * Can only be executed on the CPU where the event is active.
- * Returns the delta events processed.
- */
-u64 x86_perf_event_update(struct perf_event *event)
+static inline u64 __perf_event_update(struct perf_event *event,
+               u64 (*read)(struct hw_perf_event *hwc), int width)
 {
        struct hw_perf_event *hwc = &event->hw;
-       int shift = 64 - x86_pmu.cntval_bits;
+       int shift = 64 - width;
        u64 prev_raw_count, new_raw_count;
-       int idx = hwc->idx;
        s64 delta;
 
-       if (idx == INTEL_PMC_IDX_FIXED_BTS)
-               return 0;
-
        /*
         * Careful: an NMI might modify the previous event value.
         *
@@ -77,7 +69,7 @@ u64 x86_perf_event_update(struct perf_ev
         */
 again:
        prev_raw_count = local64_read(&hwc->prev_count);
-       rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+       new_raw_count = read(hwc);
 
        if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
                                        new_raw_count) != prev_raw_count)
@@ -100,6 +92,37 @@ u64 x86_perf_event_update(struct perf_ev
        return new_raw_count;
 }
 
+static inline u64 x86_rdpmc(struct hw_perf_event *hwc)
+{
+       u64 count;
+
+       rdpmcl(hwc->event_base_rdpmc, count);
+
+       return count;
+}
+
+/*
+ * Propagate event elapsed time into the generic event.
+ * Can only be executed on the CPU where the event is active.
+ * Returns the delta events processed.
+ */
+u64 x86_perf_event_update(struct perf_event *event)
+{
+       int idx = event->hw.idx;
+
+       if (unlikely(idx >= INTEL_PMC_IDX_FIXED_SPECIAL)) {
+               switch (idx) {
+               case INTEL_PMC_IDX_FIXED_BTS:
+                       return 0;
+
+               case INTEL_PMC_IDX_FIXED_SMI_COUNT:
+                       return __perf_event_update(event, x86_rdsmi, 32);
+               }
+       }
+
+       return __perf_event_update(event, x86_rdpmc, x86_pmu.cntval_bits);
+}
+
 /*
  * Find and validate any extra registers to set up.
  */
@@ -437,8 +460,22 @@ int x86_pmu_hw_config(struct perf_event 
        if (!event->attr.exclude_kernel)
                event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
 
-       if (event->attr.type == PERF_TYPE_RAW)
+       if (event->attr.type == PERF_TYPE_RAW) {
+               /*
+                * SMI_COUNT can only count..
+                */
+               if (event->attr.config == 0x0400) {
+                       if (event->attr.exclude_user ||
+                           event->attr.exclude_kernel ||
+                           event->attr.exclude_hv ||
+                           event->attr.exclude_idle ||
+                           event->attr.exclude_host ||
+                           event->attr.exclude_guest ||
+                           event->attr.sample_period)
+                               return -EINVAL;
+               }
                event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
+       }
 
        return x86_setup_perfctr(event);
 }
@@ -817,9 +854,10 @@ static inline void x86_assign_hw_event(s
        hwc->last_cpu = smp_processor_id();
        hwc->last_tag = ++cpuc->tags[i];
 
-       if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
+       if (hwc->idx >= INTEL_PMC_IDX_FIXED_SPECIAL) {
                hwc->config_base = 0;
                hwc->event_base = 0;
+               hwc->event_base_rdpmc = 0;
        } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
                hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
                hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - 
INTEL_PMC_IDX_FIXED);
@@ -925,7 +963,7 @@ int x86_perf_event_set_period(struct per
        s64 period = hwc->sample_period;
        int ret = 0, idx = hwc->idx;
 
-       if (idx == INTEL_PMC_IDX_FIXED_BTS)
+       if (unlikely(idx >= INTEL_PMC_IDX_FIXED_SPECIAL))
                return 0;
 
        /*
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -536,6 +536,15 @@ static inline void set_linear_ip(struct 
        regs->ip = ip;
 }
 
+static inline u64 x86_rdsmi(struct hw_perf_event *hwc)
+{
+       u64 count;
+
+       rdmsrl(MSR_ARCH_SMI_COUNT, count);
+
+       return count;
+}
+
 #ifdef CONFIG_CPU_SUP_AMD
 
 int amd_pmu_init(void);
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -89,6 +89,7 @@ static struct event_constraint intel_wes
        FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
        FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
        FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       FIXED_EVENT_CONSTRAINT(0x0400, 17), /* SMI_COUNT */
        INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
        INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
        INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -966,10 +967,16 @@ static void intel_pmu_disable_event(stru
        struct hw_perf_event *hwc = &event->hw;
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-       if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
-               intel_pmu_disable_bts();
-               intel_pmu_drain_bts_buffer();
-               return;
+       if (unlikely(hwc->idx >= INTEL_PMC_IDX_FIXED_SPECIAL)) {
+               switch (hwc->idx) {
+               case INTEL_PMC_IDX_FIXED_BTS:
+                       intel_pmu_disable_bts();
+                       intel_pmu_drain_bts_buffer();
+                       return;
+
+               case INTEL_PMC_IDX_FIXED_SMI_COUNT:
+                       return;
+               }
        }
 
        cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
@@ -1029,13 +1036,21 @@ static void intel_pmu_enable_event(struc
        struct hw_perf_event *hwc = &event->hw;
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-       if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
-               if (!__this_cpu_read(cpu_hw_events.enabled))
+       if (unlikely(hwc->idx >= INTEL_PMC_IDX_FIXED_SPECIAL)) {
+               switch (hwc->idx) {
+               case INTEL_PMC_IDX_FIXED_BTS:
+                       if (!__this_cpu_read(cpu_hw_events.enabled))
+                               return;
+
+                       intel_pmu_enable_bts(hwc->config);
                        return;
 
-               intel_pmu_enable_bts(hwc->config);
-               return;
+               case INTEL_PMC_IDX_FIXED_SMI_COUNT:
+                       local64_set(&hwc->prev_count, x86_rdsmi(hwc));
+                       return;
+               }
        }
+
        /*
         * must enabled before any actual event
         * because any event may be combined with LBR
@@ -2107,12 +2122,15 @@ __init int intel_pmu_init(void)
 
        if (x86_pmu.event_constraints) {
                /*
-                * event on fixed counter2 (REF_CYCLES) only works on this
-                * counter, so do not extend mask to generic counters
+                * Events on fixed counter2 (REF_CYCLES) only works on this
+                * counter, similar for the special fixed counters.
+                *
+                * So do not extend mask to generic counters.
                 */
                for_each_event_constraint(c, x86_pmu.event_constraints) {
-                       if (c->cmask != X86_RAW_EVENT_MASK
-                           || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
+                       if (c->cmask != X86_RAW_EVENT_MASK ||
+                           c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES ||
+                           c->idxmsk64 >= INTEL_PMC_MSK_FIXED_SPECIAL) {
                                continue;
                        }
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to