Commit 443772776c69 (perf: Disable all pmus on unthrottling and
rescheduling) identified an issue with having multiple PMUs sharing a
perf_event_context, but only partially solved the issue.

While ctx::pmu will be disabled across all of its events being
scheduled, pmus which are not ctx::pmu will be repeatedly enabled and
disabled between events being added, possibly counting imbetween
pmu::add calls. This could be expensive and could lead to events
counting for differing periods.

Instead, this patch adds new helpers to disable/enable all pmus which
have events in a context. While perf_pmu_{dis,en}able may be called
repeatedly for a particular pmu, disabling is reference counted such
that the real pmu::{dis,en}able callbacks are only called once (were
this not the case, the current code would be broken for ctx::pmu).

Uses of perf_pmu{disable,enable}(ctx->pmu) are replaced with
perf_ctx_pmus_{disable,enable}(ctx). The now unnecessary calls to
perf_pmu_enable and perf_pmu_disable added by 443772776c69 are removed.

Signed-off-by: Mark Rutland <[email protected]>
Reviewed-by: Will Deacon <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Alexander Shishkin <[email protected]>
Cc: Andi Kleen <[email protected]>
---
 kernel/events/core.c | 65 ++++++++++++++++++++++++++++------------------------
 1 file changed, 35 insertions(+), 30 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 710c3fe..55c772e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -500,7 +500,7 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
                 */
                if (cpuctx->ctx.nr_cgroups > 0) {
                        perf_ctx_lock(cpuctx, cpuctx->task_ctx);
-                       perf_pmu_disable(cpuctx->ctx.pmu);
+                       perf_ctx_pmus_disable(&cpuctx->ctx);
 
                        if (mode & PERF_CGROUP_SWOUT) {
                                cpu_ctx_sched_out(cpuctx, EVENT_ALL);
@@ -521,7 +521,7 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
                                cpuctx->cgrp = perf_cgroup_from_task(task);
                                cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
                        }
-                       perf_pmu_enable(cpuctx->ctx.pmu);
+                       perf_ctx_pmus_enable(&cpuctx->ctx);
                        perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
                }
        }
@@ -857,6 +857,26 @@ void perf_pmu_enable(struct pmu *pmu)
                pmu->pmu_enable(pmu);
 }
 
+/*
+ * Must be called with ctx->lock or ctx->mutex held.
+ */
+void perf_ctx_pmus_disable(struct perf_event_context *ctx)
+{
+       struct perf_event *event;
+       list_for_each_entry(event, &ctx->event_list, event_entry)
+               perf_pmu_disable(event->pmu);
+}
+
+/*
+ * Must be called with ctx->lock or ctx->mutex held.
+ */
+void perf_ctx_pmus_enable(struct perf_event_context *ctx)
+{
+       struct perf_event *event;
+       list_for_each_entry(event, &ctx->event_list, event_entry)
+               perf_pmu_enable(event->pmu);
+}
+
 static DEFINE_PER_CPU(struct list_head, rotation_list);
 
 /*
@@ -1394,8 +1414,6 @@ event_sched_out(struct perf_event *event,
        if (event->state != PERF_EVENT_STATE_ACTIVE)
                return;
 
-       perf_pmu_disable(event->pmu);
-
        event->state = PERF_EVENT_STATE_INACTIVE;
        if (event->pending_disable) {
                event->pending_disable = 0;
@@ -1412,8 +1430,6 @@ event_sched_out(struct perf_event *event,
                ctx->nr_freq--;
        if (event->attr.exclusive || !cpuctx->active_oncpu)
                cpuctx->exclusive = 0;
-
-       perf_pmu_enable(event->pmu);
 }
 
 static void
@@ -1654,7 +1670,6 @@ event_sched_in(struct perf_event *event,
                 struct perf_event_context *ctx)
 {
        u64 tstamp = perf_event_time(event);
-       int ret = 0;
 
        if (event->state <= PERF_EVENT_STATE_OFF)
                return 0;
@@ -1677,13 +1692,10 @@ event_sched_in(struct perf_event *event,
         */
        smp_wmb();
 
-       perf_pmu_disable(event->pmu);
-
        if (event->pmu->add(event, PERF_EF_START)) {
                event->state = PERF_EVENT_STATE_INACTIVE;
                event->oncpu = -1;
-               ret = -EAGAIN;
-               goto out;
+               return -EAGAIN;
        }
 
        event->tstamp_running += tstamp - event->tstamp_stopped;
@@ -1699,10 +1711,7 @@ event_sched_in(struct perf_event *event,
        if (event->attr.exclusive)
                cpuctx->exclusive = 1;
 
-out:
-       perf_pmu_enable(event->pmu);
-
-       return ret;
+       return 0;
 }
 
 static int
@@ -1848,7 +1857,7 @@ static int  __perf_install_in_context(void *info)
        struct task_struct *task = current;
 
        perf_ctx_lock(cpuctx, task_ctx);
-       perf_pmu_disable(cpuctx->ctx.pmu);
+       perf_ctx_pmus_disable(&cpuctx->ctx);
 
        /*
         * If there was an active task_ctx schedule it out.
@@ -1889,7 +1898,7 @@ static int  __perf_install_in_context(void *info)
         */
        perf_event_sched_in(cpuctx, task_ctx, task);
 
-       perf_pmu_enable(cpuctx->ctx.pmu);
+       perf_ctx_pmus_enable(&cpuctx->ctx);
        perf_ctx_unlock(cpuctx, task_ctx);
 
        return 0;
@@ -2147,7 +2156,7 @@ static void ctx_sched_out(struct perf_event_context *ctx,
        if (!ctx->nr_active)
                return;
 
-       perf_pmu_disable(ctx->pmu);
+       perf_ctx_pmus_disable(ctx);
        if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) {
                list_for_each_entry(event, &ctx->pinned_groups, group_entry)
                        group_sched_out(event, cpuctx, ctx);
@@ -2157,7 +2166,7 @@ static void ctx_sched_out(struct perf_event_context *ctx,
                list_for_each_entry(event, &ctx->flexible_groups, group_entry)
                        group_sched_out(event, cpuctx, ctx);
        }
-       perf_pmu_enable(ctx->pmu);
+       perf_ctx_pmus_enable(ctx);
 }
 
 /*
@@ -2491,7 +2500,7 @@ static void perf_event_context_sched_in(struct 
perf_event_context *ctx,
                return;
 
        perf_ctx_lock(cpuctx, ctx);
-       perf_pmu_disable(ctx->pmu);
+       perf_ctx_pmus_disable(ctx);
        /*
         * We want to keep the following priority order:
         * cpu pinned (that don't need to move), task pinned,
@@ -2504,7 +2513,7 @@ static void perf_event_context_sched_in(struct 
perf_event_context *ctx,
 
        perf_event_sched_in(cpuctx, cpuctx->task_ctx, task);
 
-       perf_pmu_enable(ctx->pmu);
+       perf_ctx_pmus_enable(ctx);
        perf_ctx_unlock(cpuctx, ctx);
 
        /*
@@ -2736,7 +2745,7 @@ static void perf_adjust_freq_unthr_context(struct 
perf_event_context *ctx,
                return;
 
        raw_spin_lock(&ctx->lock);
-       perf_pmu_disable(ctx->pmu);
+       perf_ctx_pmus_disable(ctx);
 
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
                if (event->state != PERF_EVENT_STATE_ACTIVE)
@@ -2745,8 +2754,6 @@ static void perf_adjust_freq_unthr_context(struct 
perf_event_context *ctx,
                if (!event_filter_match(event))
                        continue;
 
-               perf_pmu_disable(event->pmu);
-
                hwc = &event->hw;
 
                if (hwc->interrupts == MAX_INTERRUPTS) {
@@ -2756,7 +2763,7 @@ static void perf_adjust_freq_unthr_context(struct 
perf_event_context *ctx,
                }
 
                if (!event->attr.freq || !event->attr.sample_freq)
-                       goto next;
+                       continue;
 
                /*
                 * stop the event and update event->count
@@ -2778,11 +2785,9 @@ static void perf_adjust_freq_unthr_context(struct 
perf_event_context *ctx,
                        perf_adjust_period(event, period, delta, false);
 
                event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
-       next:
-               perf_pmu_enable(event->pmu);
        }
 
-       perf_pmu_enable(ctx->pmu);
+       perf_ctx_pmus_enable(ctx);
        raw_spin_unlock(&ctx->lock);
 }
 
@@ -2826,7 +2831,7 @@ static int perf_rotate_context(struct perf_cpu_context 
*cpuctx)
                goto done;
 
        perf_ctx_lock(cpuctx, cpuctx->task_ctx);
-       perf_pmu_disable(cpuctx->ctx.pmu);
+       perf_ctx_pmus_disable(&cpuctx->ctx);
 
        cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
        if (ctx)
@@ -2838,7 +2843,7 @@ static int perf_rotate_context(struct perf_cpu_context 
*cpuctx)
 
        perf_event_sched_in(cpuctx, ctx, current);
 
-       perf_pmu_enable(cpuctx->ctx.pmu);
+       perf_ctx_pmus_enable(&cpuctx->ctx);
        perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
 done:
        if (remove)
-- 
1.8.1.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to