RE: [PATCH V7 3/6] perf, x86: handle multiple records in PEBS buffer

Liang, Kan Tue, 05 May 2015 09:31:48 -0700


> 
> On Mon, Apr 20, 2015 at 04:07:47AM -0400, Kan Liang wrote:
> > +static inline void *
> > +get_next_pebs_record_by_bit(void *base, void *top, int bit) {
> > +   struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> > +   void *at;
> > +   u64 pebs_status;
> > +
> > +   if (base == NULL)
> > +           return NULL;
> > +
> > +   for (at = base; at < top; at += x86_pmu.pebs_record_size) {
> > +           struct pebs_record_nhm *p = at;
> > +
> > +           if (test_bit(bit, (unsigned long *)&p->status)) {
> 
> Just wondering, is that BT better than: p->state & (1 << bit) ?


Technically, I think they are same here.
test_bit looks more common, and widely used in drain_pebs functions.
So I changed it according to Andi's comment.

> 
> > +
> > +                   if (p->status == (1 << bit))
> > +                           return at;
> > +
> > +                   /* clear non-PEBS bit and re-check */
> > +                   pebs_status = p->status & cpuc->pebs_enabled;
> > +                   pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
> > +                   if (pebs_status == (1 << bit))
> > +                           return at;
> > +           }
> > +   }
> > +   return NULL;
> > +}
> > +
> >  static void __intel_pmu_pebs_event(struct perf_event *event,
> > +                              struct pt_regs *iregs,
> > +                              void *base, void *top,
> > +                              int bit, int count)
> >  {
> >     struct perf_sample_data data;
> >     struct pt_regs regs;
> > +   int i;
> > +   void *at = get_next_pebs_record_by_bit(base, top, bit);
> >
> > +   if (!intel_pmu_save_and_restart(event) &&
> > +       !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
> >             return;
> >
> > +   if (count > 1) {
> > +           for (i = 0; i < count - 1; i++) {
> > +                   setup_pebs_sample_data(event, iregs, at, &data,
> &regs);
> > +                   perf_event_output(event, &data, &regs);
> > +                   at += x86_pmu.pebs_record_size;
> > +                   at = get_next_pebs_record_by_bit(at, top, bit);
> > +           }
> > +   }
> > +
> > +   setup_pebs_sample_data(event, iregs, at, &data, &regs);
> >
> > +   /* all records are processed, handle event overflow now */
> 
> All but the last. There explicitly is one left to be able to call the overflow
> handler is there not?

Right, I will change the comments.

> 
> > +   if (perf_event_overflow(event, &data, &regs)) {
> >             x86_pmu_stop(event, 0);
> > +           return;
> > +   }
> > +
> >  }
> >
> >  static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) @@
> > -1000,72 +1081,86 @@ static void intel_pmu_drain_pebs_core(struct
> pt_regs *iregs)
> >     if (!event->attr.precise_ip)
> >             return;
> >
> > +   n = (top - at) / x86_pmu.pebs_record_size;
> >     if (n <= 0)
> >             return;
> >
> > +   __intel_pmu_pebs_event(event, iregs, at,
> > +                          top, 0, n);
> >  }
> >
> >  static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)  {
> >     struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> >     struct debug_store *ds = cpuc->ds;
> > +   struct perf_event *event;
> > +   void *base, *at, *top;
> >     int bit;
> > +   int counts[MAX_PEBS_EVENTS] = {};
> >
> >     if (!x86_pmu.pebs_active)
> >             return;
> >
> > +   base = (struct pebs_record_nhm *)(unsigned
> > +long)ds->pebs_buffer_base;
> >     top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
> >
> >     ds->pebs_index = ds->pebs_buffer_base;
> >
> > +   if (unlikely(base >= top))
> >             return;
> >
> > +   for (at = base; at < top; at += x86_pmu.pebs_record_size) {
> >             struct pebs_record_nhm *p = at;
> >
> >             for_each_set_bit(bit, (unsigned long *)&p->status,
> >                              x86_pmu.max_pebs_events) {
> >                     event = cpuc->events[bit];
> >                     WARN_ON_ONCE(!event);
> >
> > +                   if (event->attr.precise_ip)
> > +                           break;
> > +           }
> 
> Would it make sense to delay looking for the event until you've found
> there is a single bit set -- and already know which bit that is?
>

Yes, I think we can test cpuc->pebs_enabled here.
It should be better than attr.precise_ip checking.

-       for (; at < top; at += x86_pmu.pebs_record_size) {
+       for (at = base; at < top; at += x86_pmu.pebs_record_size) {
                struct pebs_record_nhm *p = at;

                for_each_set_bit(bit, (unsigned long *)&p->status,
                                 x86_pmu.max_pebs_events) {
-                       event = cpuc->events[bit];
-                       if (!test_bit(bit, cpuc->active_mask))
-                               continue;
-
-                       WARN_ON_ONCE(!event);

-                       if (!event->attr.precise_ip)
-                               continue;
+                       if (test_bit(bit, cpuc->pebs_enabled))
+                               break;
+               }

... ...
 
+       for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
+               if (counts[bit] == 0)
                        continue;
-
-               __intel_pmu_pebs_event(event, iregs, at);
+               event = cpuc->events[bit];
+               WARN_ON_ONCE(!event);
+               WARN_ON_ONCE(!event->attr.precise_ip);
+               __intel_pmu_pebs_event(event, iregs, base,
+                                      top, bit, counts[bit]);
        }

> >
> > +           if (bit >= x86_pmu.max_pebs_events)
> > +                   continue;
> > +           if (!test_bit(bit, cpuc->active_mask))
> > +                   continue;
> > +           /*
> > +            * The PEBS hardware does not deal well with the situation
> > +            * when events happen near to each other and multiple
> bits
> > +            * are set. But it should happen rarely.
> > +            *
> > +            * If these events include one PEBS and multiple non-PEBS
> > +            * events, it doesn't impact PEBS record. The record will
> > +            * be handled normally. (slow path)
> > +            *
> > +            * If these events include two or more PEBS events, the
> > +            * records for the events can be collapsed into a single
> > +            * one, and it's not possible to reconstruct all events
> > +            * that caused the PEBS record. It's called collision.
> > +            * If collision happened, the record will be dropped.
> > +            *
> > +            */
> > +           if (p->status != (1 << bit)) {
> > +                   u64 pebs_status;
> > +
> > +                   /* slow path */
> > +                   pebs_status = p->status & cpuc->pebs_enabled;
> > +                   pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
> > +                   if (pebs_status != (1 << bit)) {
> > +                           perf_log_lost(event);
> 
> Does it make sense to keep an error[bit] count and only log once with the
> actual number in? -- when !0 obviously.

Yes, will do it.

Thanks,
Kan
> 
> >                             continue;
> > +                   }
> >             }
> > +           counts[bit]++;
> > +   }
> >
> > +   for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
> > +           if (counts[bit] == 0)
> >                     continue;
> > +           event = cpuc->events[bit];
> > +           __intel_pmu_pebs_event(event, iregs, base,
> > +                                  top, bit, counts[bit]);
> >     }
> >  }
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected] More majordomo
> info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: [PATCH V7 3/6] perf, x86: handle multiple records in PEBS buffer

Reply via email to