Many perf sideband events (context switches, namespaces, ...) are useful
by themselves without the need for subscribing to any overflow events.
However, it is not possible to subscribe for notifications when such
records are logged into the ring buffer. Introduce IOC_COUNT_RECORDS as
a way to request this.

With IOC_COUNT_RECORDS set, IOC_REFRESH takes the number of records
after which to generate a notification, rather than the number of
overflow events.

Signed-off-by: Naveen N. Rao <[email protected]>
---
 include/linux/perf_event.h      |  1 +
 include/uapi/linux/perf_event.h |  1 +
 kernel/events/core.c            | 16 +++++++++++++++-
 kernel/events/ring_buffer.c     |  9 +++++++++
 4 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 24a635887f28..016f2da2bba7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -683,6 +683,7 @@ struct perf_event {
        struct irq_work                 pending;
 
        atomic_t                        event_limit;
+       bool                            count_records;
 
        /* address range filters */
        struct perf_addr_filters_head   addr_filters;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b1c0b187acfe..fb989ac71ded 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -408,6 +408,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_ID              _IOR('$', 7, __u64 *)
 #define PERF_EVENT_IOC_SET_BPF         _IOW('$', 8, __u32)
 #define PERF_EVENT_IOC_PAUSE_OUTPUT    _IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_COUNT_RECORDS   _IO ('$', 10)
 
 enum perf_event_ioc_flags {
        PERF_IOC_FLAG_GROUP             = 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e75a5c9412d..637064880b36 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2674,6 +2674,16 @@ void perf_event_addr_filters_sync(struct perf_event 
*event)
 }
 EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);
 
+static int _perf_event_count_records(struct perf_event *event)
+{
+       if (event->attr.inherit || !is_sampling_event(event))
+               return -EINVAL;
+
+       event->count_records = 1;
+
+       return 0;
+}
+
 static int _perf_event_refresh(struct perf_event *event, int refresh)
 {
        /*
@@ -4699,6 +4709,9 @@ static long _perf_ioctl(struct perf_event *event, 
unsigned int cmd, unsigned lon
                func = _perf_event_reset;
                break;
 
+       case PERF_EVENT_IOC_COUNT_RECORDS:
+               return _perf_event_count_records(event);
+
        case PERF_EVENT_IOC_REFRESH:
                return _perf_event_refresh(event, arg);
 
@@ -7342,7 +7355,8 @@ static int __perf_event_overflow(struct perf_event *event,
         */
 
        event->pending_kill = POLL_IN;
-       if (events && atomic_dec_and_test(&event->event_limit)) {
+       if (events && !event->count_records &&
+                       atomic_dec_and_test(&event->event_limit)) {
                ret = 1;
                event->pending_kill = POLL_HUP;
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 2831480c63a2..9b9ca0608fed 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -126,6 +126,7 @@ __perf_output_begin(struct perf_output_handle *handle,
                u64                      id;
                u64                      lost;
        } lost_event;
+       int events = atomic_read(&event->event_limit);
 
        rcu_read_lock();
        /*
@@ -197,6 +198,14 @@ __perf_output_begin(struct perf_output_handle *handle,
        if (unlikely(head - local_read(&rb->wakeup) > rb->watermark))
                local_add(rb->watermark, &rb->wakeup);
 
+       if (events && event->count_records &&
+                       atomic_dec_and_test(&event->event_limit)) {
+               event->pending_kill = POLL_HUP;
+               local_inc(&rb->wakeup);
+
+               perf_event_disable_inatomic(event);
+       }
+
        page_shift = PAGE_SHIFT + page_order(rb);
 
        handle->page = (offset >> page_shift) & (rb->nr_pages - 1);
-- 
2.12.2

Reply via email to