The groups rbtree holding perf events, either for a CPU or a task, needs
to have multiple iterators that visit events in group_index (insertion)
order. Rather than linearly searching the iterators, use a min-heap to go
from a O(#iterators) search to a O(log2(#iterators)) insert cost per event
visited.

Signed-off-by: Ian Rogers <irog...@google.com>
---
 kernel/events/core.c | 131 +++++++++++++++++++++++++++++++++----------
 1 file changed, 102 insertions(+), 29 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 47df9935de0b..4d70df0415b9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3322,6 +3322,78 @@ static void cpu_ctx_sched_out(struct perf_cpu_context 
*cpuctx,
        ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
 }
 
+/*
+ * Data structure used to hold a min-heap, ordered by group_index, of a fixed
+ * maximum size.
+ */
+struct perf_event_heap {
+       struct perf_event **storage;
+       int num_elements;
+       int max_elements;
+};
+
+static void min_heap_swap(struct perf_event_heap *heap,
+                         int pos1, int pos2)
+{
+       struct perf_event *tmp = heap->storage[pos1];
+
+       heap->storage[pos1] = heap->storage[pos2];
+       heap->storage[pos2] = tmp;
+}
+
+/* Sift the perf_event at pos down the heap. */
+static void min_heapify(struct perf_event_heap *heap, int pos)
+{
+       int left_child, right_child;
+
+       while (pos < heap->num_elements / 2) {
+               left_child = pos * 2;
+               right_child = pos * 2 + 1;
+               if (heap->storage[pos]->group_index >
+                   heap->storage[left_child]->group_index) {
+                       min_heap_swap(heap, pos, left_child);
+                       pos = left_child;
+               } else if (heap->storage[pos]->group_index >
+                          heap->storage[right_child]->group_index) {
+                       min_heap_swap(heap, pos, right_child);
+                       pos = right_child;
+               } else {
+                       break;
+               }
+       }
+}
+
+/* Floyd's approach to heapification that is O(n). */
+static void min_heapify_all(struct perf_event_heap *heap)
+{
+       int i;
+
+       for (i = heap->num_elements / 2; i > 0; i--)
+               min_heapify(heap, i);
+}
+
+/* Remove minimum element from the heap. */
+static void min_heap_pop(struct perf_event_heap *heap)
+{
+       WARN_ONCE(heap->num_elements <= 0, "Popping an empty heap");
+       heap->num_elements--;
+       heap->storage[0] = heap->storage[heap->num_elements];
+       min_heapify(heap, 0);
+}
+
+/* Remove the minimum element and then push the given event. */
+static void min_heap_pop_push(struct perf_event_heap *heap,
+                             struct perf_event *event)
+{
+       WARN_ONCE(heap->num_elements <= 0, "Popping an empty heap");
+       if (event == NULL) {
+               min_heap_pop(heap);
+       } else {
+               heap->storage[0] = event;
+               min_heapify(heap, 0);
+       }
+}
+
 static int visit_groups_merge(struct perf_event_context *ctx,
                              struct perf_cpu_context *cpuctx,
                              struct perf_event_groups *groups,
@@ -3331,8 +3403,6 @@ static int visit_groups_merge(struct perf_event_context 
*ctx,
                                          int *),
                              int *data)
 {
-       /* The number of iterators in use. */
-       int num_itrs;
        /*
         * A set of iterators, the iterator for the visit is chosen by the
         * group_index. The size of the array is sized such that there is space:
@@ -3346,22 +3416,28 @@ static int visit_groups_merge(struct perf_event_context 
*ctx,
         *     only limited by MAX_PATH.
         */
        struct perf_event *itrs[IS_ENABLED(CONFIG_CGROUP_PERF) ? 17 : 2];
-       /* A reference to the selected iterator. */
-       struct perf_event **evt;
-       int ret, i, cpu = smp_processor_id();
+       struct perf_event_heap heap = {
+               .storage = itrs,
+               .num_elements = 0,
+               .max_elements = ARRAYSIZE(itrs)
+       };
+       int ret, cpu = smp_processor_id();
 
-       itrs[0] = perf_event_groups_first(groups, cpu, NULL);
+       heap.storage[0] = perf_event_groups_first(groups, cpu, NULL);
+       if (heap.storage[0])
+               heap.num_elements++;
 
        if (ctx != &cpuctx->ctx) {
                /*
                 * A task context only ever has an iterator for CPU or any CPU
                 * events.
                 */
-               itrs[1] = perf_event_groups_first(groups, -1, NULL);
-               num_itrs = 2;
+               heap.storage[heap.num_elements] =
+                               perf_event_groups_first(groups, -1, NULL);
+               if (heap.storage[heap.num_elements])
+                       heap.num_elements++;
        } else {
                /* Per-CPU events are by definition not on any CPU. */
-               num_itrs = 1;
 #ifdef CONFIG_CGROUP_PERF
                /*
                 * For itrs[1..MAX] add an iterator for each cgroup parent that
@@ -3371,36 +3447,33 @@ static int visit_groups_merge(struct perf_event_context 
*ctx,
                        struct cgroup_subsys_state *css;
 
                        for (css = &cpuctx->cgrp->css; css; css = css->parent) {
-                               itrs[num_itrs] = perf_event_groups_first(groups,
+                               heap.storage[heap.num_elements] =
+                                               perf_event_groups_first(groups,
                                                                   cpu,
                                                                   css->cgroup);
-                               if (itrs[num_itrs] &&
-                                   WARN_ONCE(++num_itrs == ARRAY_SIZE(iters)
-                                    "Insufficient iterators for cgroup depth"))
-                                       break;
+                               if (heap.storage[heap.num_elements]) {
+                                       heap.num_elements++;
+                                       if (heap.num_elements ==
+                                           heap.max_elements) {
+                                               WARN_ONCE(
+                                    max_cgroups_with_events_depth,
+                                    "Insufficient iterators for cgroup depth");
+                                               break;
+                                       }
+                               }
                        }
                }
 #endif
        }
+       min_heapify_all(&heap);
 
-       while (true) {
-               /* Find lowest group_index event. */
-               evt = NULL;
-               for (i = 0; i < num_itrs; ++i) {
-                       if (itrs[i] == NULL)
-                               continue;
-                       if (evt == NULL ||
-                           itrs[i]->group_index < (*evt)->group_index)
-                               evt = &itrs[i];
-               }
-               if (evt == NULL)
-                       break;
-
-               ret = func(ctx, cpuctx, *evt, data);
+       while (heap.num_elements > 0) {
+               ret = func(ctx, cpuctx, heap.storage[0], data);
                if (ret)
                        return ret;
 
-               *evt = perf_event_groups_next(*evt);
+               min_heap_pop_push(&heap,
+                                 perf_event_groups_next(heap.storage[0]));
        }
 
        return 0;
-- 
2.22.0.709.g102302147b-goog

Reply via email to