On Mon, Dec 18, 2017 at 03:34:49AM -0800, kan.li...@linux.intel.com wrote: > arch/x86/events/core.c | 14 ++++++++++++++ > arch/x86/events/intel/ds.c | 8 +++++++- > 2 files changed, 21 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c > index 35552ea..f74e21d 100644 > --- a/arch/x86/events/core.c > +++ b/arch/x86/events/core.c > @@ -100,6 +100,20 @@ u64 x86_perf_event_update(struct perf_event *event, > * of the count. > */ > delta = (new_raw_count << shift) - (prev_raw_count << shift); > + > + /* > + * Take auto-reload into account > + * For the auto-reload before the last time, it went through the > + * whole period (reload_val) every time. > + * Just simply add period * times to the event. > + * > + * For the last load, the elapsed delta (event-)time need to be > + * corrected by adding the period. Because the start point is -period. > + */ > + if (reload_times > 0) { > + delta += (reload_val << shift); > + local64_add(reload_val * (reload_times - 1), &event->count); > + } > delta >>= shift; > > local64_add(delta, &event->count); > diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c > index 0b693b7..f0f6026 100644 > --- a/arch/x86/events/intel/ds.c > +++ b/arch/x86/events/intel/ds.c > @@ -1256,11 +1256,17 @@ static void __intel_pmu_pebs_event(struct perf_event > *event, > void *base, void *top, > int bit, int count) > { > + struct hw_perf_event *hwc = &event->hw; > struct perf_sample_data data; > struct pt_regs regs; > void *at = get_next_pebs_record_by_bit(base, top, bit); > > - if (!intel_pmu_save_and_restart(event, 0, 0) && > + /* > + * Now, auto-reload is only enabled in fixed period mode. > + * The reload value is always hwc->sample_period. > + * May need to change it, if auto-reload is enabled in freq mode later. > + */ > + if (!intel_pmu_save_and_restart(event, hwc->sample_period, count - 1) && > !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)) > return; >
This all looks very wrong... In auto reload we should never call intel_pmu_save_and_restore() in the first place I think. Things like x86_perf_event_update() and x86_perf_event_set_period() simply _cannot_ do the right thing when we auto reload the counter.