On Mon, Aug 26, 2019 at 07:47:35AM -0700, kan.li...@linux.intel.com wrote: > diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c > index 54534ff00940..1ae23db5c2d7 100644 > --- a/arch/x86/events/core.c > +++ b/arch/x86/events/core.c > @@ -76,6 +76,8 @@ u64 x86_perf_event_update(struct perf_event *event) > if (idx == INTEL_PMC_IDX_FIXED_BTS) > return 0; > > + if (is_topdown_count(event) && x86_pmu.update_topdown_event) > + return x86_pmu.update_topdown_event(event);
If is_topdown_count() is true; it had better bloody well have that function. But I really hate this. > /* > * Careful: an NMI might modify the previous event value. > * > @@ -1003,6 +1005,10 @@ static int collect_events(struct cpu_hw_events *cpuc, > struct perf_event *leader, > > max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed; > > + /* There are 4 TopDown metrics events. */ > + if (x86_pmu.intel_cap.perf_metrics) > + max_count += 4; I'm thinking this is wrong.. this unconditionally allows collecting 4 extra events on every part that has this metrics crud on. > /* current number of events already accepted */ > n = cpuc->n_events; > > @@ -1184,6 +1190,10 @@ int x86_perf_event_set_period(struct perf_event *event) > if (idx == INTEL_PMC_IDX_FIXED_BTS) > return 0; > > + if (unlikely(is_topdown_count(event)) && > + x86_pmu.set_topdown_event_period) > + return x86_pmu.set_topdown_event_period(event); Same as with the other method; and I similarly hates it. > diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c > index f4d6335a18e2..616313d7f3d7 100644 > --- a/arch/x86/events/intel/core.c > +++ b/arch/x86/events/intel/core.c > +static int icl_set_topdown_event_period(struct perf_event *event) > +{ > + struct hw_perf_event *hwc = &event->hw; > + s64 left = local64_read(&hwc->period_left); > + > + /* > + * Clear PERF_METRICS and Fixed counter 3 in initialization. > + * After that, both MSRs will be cleared for each read. > + * Don't need to clear them again. > + */ > + if (left == x86_pmu.max_period) { > + wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); > + wrmsrl(MSR_PERF_METRICS, 0); > + local64_set(&hwc->period_left, 0); > + } This really doesn't make sense to me; if you set FIXED_CTR3 := 0, you'll never trigger the overflow there; this then seems to suggest the actual counter value is irrelevant. Therefore you don't actually need this. > + > + perf_event_update_userpage(event); > + > + return 0; > +} > + > +static u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx) > +{ > + u32 val; > + > + /* > + * The metric is reported as an 8bit integer percentage s/percentage/fraction/ percentage is 1/100, 8bit is 256. > + * suming up to 0xff. > + * slots-in-metric = (Metric / 0xff) * slots > + */ > + val = (metric >> ((idx - INTEL_PMC_IDX_FIXED_METRIC_BASE) * 8)) & 0xff; > + return mul_u64_u32_div(slots, val, 0xff); This really utterly blows.. I'd have wasted range to be able to do a power-of-two fraction here. That is use 8 bits with a range [0,128]. But also; x86_64 seems to lack a sane implementation of that function, and it currently compiles into utter crap (it can be 2 instructions). > +} > +/* > + * Update all active Topdown events. > + * PMU has to be disabled before calling this function. Forgets to explain why... > + */