On Mon, Jul 01, 2019 at 11:59:51PM -0700, Ian Rogers wrote:
> The groups rbtree holding perf events, either for a CPU or a task, needs
> to have multiple iterators that visit events in group_index (insertion)
> order. Rather than linearly searching the iterators, use a min-heap to go
> from a O(#iterators) search to a O(log2(#iterators)) insert cost per event
> visited.

Is this actually faster for the common (very small n) case?

ISTR 'stupid' sorting algorithms are actually faster when the data fits
into L1

> Signed-off-by: Ian Rogers <[email protected]>
> ---
>  kernel/events/core.c | 123 +++++++++++++++++++++++++++++++++----------
>  1 file changed, 95 insertions(+), 28 deletions(-)
> 
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 9a2ad34184b8..396b5ac6dcd4 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -3318,6 +3318,77 @@ static void cpu_ctx_sched_out(struct perf_cpu_context 
> *cpuctx,
>       ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
>  }
>  
> +/* Data structure used to hold a min-heap, ordered by group_index, of a fixed
> + * maximum size.
> + */

Broken comment style.

> +struct perf_event_heap {
> +     struct perf_event **storage;
> +     int num_elements;
> +     int max_elements;
> +};
> +
> +static void min_heap_swap(struct perf_event_heap *heap,
> +                       int pos1, int pos2)
> +{
> +     struct perf_event *tmp = heap->storage[pos1];
> +
> +     heap->storage[pos1] = heap->storage[pos2];
> +     heap->storage[pos2] = tmp;
> +}
> +
> +/* Sift the perf_event at pos down the heap. */
> +static void min_heapify(struct perf_event_heap *heap, int pos)
> +{
> +     int left_child, right_child;
> +
> +     while (pos > heap->num_elements / 2) {
> +             left_child = pos * 2;
> +             right_child = pos * 2 + 1;
> +             if (heap->storage[pos]->group_index >
> +                 heap->storage[left_child]->group_index) {
> +                     min_heap_swap(heap, pos, left_child);
> +                     pos = left_child;
> +             } else if (heap->storage[pos]->group_index >
> +                        heap->storage[right_child]->group_index) {
> +                     min_heap_swap(heap, pos, right_child);
> +                     pos = right_child;
> +             } else {
> +                     break;
> +             }
> +     }
> +}
> +
> +/* Floyd's approach to heapification that is O(n). */
> +static void min_heapify_all(struct perf_event_heap *heap)
> +{
> +     int i;
> +
> +     for (i = heap->num_elements / 2; i > 0; i--)
> +             min_heapify(heap, i);
> +}
> +
> +/* Remove minimum element from the heap. */
> +static void min_heap_pop(struct perf_event_heap *heap)
> +{
> +     WARN_ONCE(heap->num_elements <= 0, "Popping an empty heap");
> +     heap->num_elements--;
> +     heap->storage[0] = heap->storage[heap->num_elements];
> +     min_heapify(heap, 0);
> +}

Is this really the first heap implementation in the kernel?

> @@ -3378,12 +3453,14 @@ static int visit_groups_merge(struct 
> perf_event_context *ctx,
>                       struct cgroup_subsys_state *css;
>  
>                       for (css = &cpuctx->cgrp->css; css; css = css->parent) {
> -                             itrs[num_itrs] = perf_event_groups_first(groups,
> +                             heap.storage[heap.num_elements] =
> +                                             perf_event_groups_first(groups,
>                                                                  cpu,
>                                                                  css->cgroup);
> -                             if (itrs[num_itrs]) {
> -                                     num_itrs++;
> -                                     if (num_itrs == max_itrs) {
> +                             if (heap.storage[heap.num_elements]) {
> +                                     heap.num_elements++;
> +                                     if (heap.num_elements ==
> +                                         heap.max_elements) {
>                                               WARN_ONCE(
>                                    max_cgroups_with_events_depth,
>                                    "Insufficient iterators for cgroup depth");

That's turning into unreadable garbage due to indentation; surely
there's a solution for that.

Reply via email to