Re: [PATCH V7 3/6] perf, x86: handle multiple records in PEBS buffer

Peter Zijlstra Tue, 05 May 2015 06:17:28 -0700

On Mon, Apr 20, 2015 at 04:07:47AM -0400, Kan Liang wrote:
> +static inline void *
> +get_next_pebs_record_by_bit(void *base, void *top, int bit)
> +{
> +     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> +     void *at;
> +     u64 pebs_status;
> +
> +     if (base == NULL)
> +             return NULL;
> +
> +     for (at = base; at < top; at += x86_pmu.pebs_record_size) {
> +             struct pebs_record_nhm *p = at;
> +
> +             if (test_bit(bit, (unsigned long *)&p->status)) {


Just wondering, is that BT better than: p->state & (1 << bit) ?

> +
> +                     if (p->status == (1 << bit))
> +                             return at;
> +
> +                     /* clear non-PEBS bit and re-check */
> +                     pebs_status = p->status & cpuc->pebs_enabled;
> +                     pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
> +                     if (pebs_status == (1 << bit))
> +                             return at;
> +             }
> +     }
> +     return NULL;
> +}
> +
>  static void __intel_pmu_pebs_event(struct perf_event *event,
> +                                struct pt_regs *iregs,
> +                                void *base, void *top,
> +                                int bit, int count)
>  {
>       struct perf_sample_data data;
>       struct pt_regs regs;
> +     int i;
> +     void *at = get_next_pebs_record_by_bit(base, top, bit);
>  
> +     if (!intel_pmu_save_and_restart(event) &&
> +         !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
>               return;
>  
> +     if (count > 1) {
> +             for (i = 0; i < count - 1; i++) {
> +                     setup_pebs_sample_data(event, iregs, at, &data, &regs);
> +                     perf_event_output(event, &data, &regs);
> +                     at += x86_pmu.pebs_record_size;
> +                     at = get_next_pebs_record_by_bit(at, top, bit);
> +             }
> +     }
> +
> +     setup_pebs_sample_data(event, iregs, at, &data, &regs);
>  
> +     /* all records are processed, handle event overflow now */

All but the last. There explicitly is one left to be able to call the
overflow handler is there not?

> +     if (perf_event_overflow(event, &data, &regs)) {
>               x86_pmu_stop(event, 0);
> +             return;
> +     }
> +
>  }
>  
>  static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
> @@ -1000,72 +1081,86 @@ static void intel_pmu_drain_pebs_core(struct pt_regs 
> *iregs)
>       if (!event->attr.precise_ip)
>               return;
>  
> +     n = (top - at) / x86_pmu.pebs_record_size;
>       if (n <= 0)
>               return;
>  
> +     __intel_pmu_pebs_event(event, iregs, at,
> +                            top, 0, n);
>  }
>  
>  static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
>  {
>       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>       struct debug_store *ds = cpuc->ds;
> +     struct perf_event *event;
> +     void *base, *at, *top;
>       int bit;
> +     int counts[MAX_PEBS_EVENTS] = {};
>  
>       if (!x86_pmu.pebs_active)
>               return;
>  
> +     base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
>       top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
>  
>       ds->pebs_index = ds->pebs_buffer_base;
>  
> +     if (unlikely(base >= top))
>               return;
>  
> +     for (at = base; at < top; at += x86_pmu.pebs_record_size) {
>               struct pebs_record_nhm *p = at;
>  
>               for_each_set_bit(bit, (unsigned long *)&p->status,
>                                x86_pmu.max_pebs_events) {
>                       event = cpuc->events[bit];
>                       WARN_ON_ONCE(!event);
>  
> +                     if (event->attr.precise_ip)
> +                             break;
> +             }

Would it make sense to delay looking for the event until you've found
there is a single bit set -- and already know which bit that is?

>  
> +             if (bit >= x86_pmu.max_pebs_events)
> +                     continue;
> +             if (!test_bit(bit, cpuc->active_mask))
> +                     continue;
> +             /*
> +              * The PEBS hardware does not deal well with the situation
> +              * when events happen near to each other and multiple bits
> +              * are set. But it should happen rarely.
> +              *
> +              * If these events include one PEBS and multiple non-PEBS
> +              * events, it doesn't impact PEBS record. The record will
> +              * be handled normally. (slow path)
> +              *
> +              * If these events include two or more PEBS events, the
> +              * records for the events can be collapsed into a single
> +              * one, and it's not possible to reconstruct all events
> +              * that caused the PEBS record. It's called collision.
> +              * If collision happened, the record will be dropped.
> +              *
> +              */
> +             if (p->status != (1 << bit)) {
> +                     u64 pebs_status;
> +
> +                     /* slow path */
> +                     pebs_status = p->status & cpuc->pebs_enabled;
> +                     pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
> +                     if (pebs_status != (1 << bit)) {
> +                             perf_log_lost(event);

Does it make sense to keep an error[bit] count and only log once with
the actual number in? -- when !0 obviously.

>                               continue;
> +                     }
>               }
> +             counts[bit]++;
> +     }
>  
> +     for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
> +             if (counts[bit] == 0)
>                       continue;
> +             event = cpuc->events[bit];
> +             __intel_pmu_pebs_event(event, iregs, base,
> +                                    top, bit, counts[bit]);
>       }
>  }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH V7 3/6] perf, x86: handle multiple records in PEBS buffer

Reply via email to