perf_pmu_sched_task() returns early when cpuctx->task_ctx is non-NULL,
deferring to perf_ctx_sched_task_cb() in the context sched_in/out
paths. But perf_ctx_sched_task_cb() only walks the task context's
pmu_ctx_list -- PMUs that have only CPU-wide events are not on that
list and their sched_task callback is silently skipped.
On ARM64 with CPU-wide branch recording:
perf record -b -e cycles -a -- ls
armv8pmu_sched_task() is skipped whenever the scheduled task has an
unrelated perf event (e.g. a software event), and branch records leak
across task boundaries.
A second problem exists in __perf_pmu_sched_task(): it passes
cpc->task_epc directly to pmu->sched_task(), but task_epc is NULL for
PMUs with only CPU-wide events. When perf_pmu_sched_task() does reach
the loop (because cpuctx->task_ctx is NULL), this causes a NULL
pointer dereference:
Unable to handle kernel NULL pointer dereference at virtual address 00[.]
PC is at armv8pmu_sched_task+0x14/0x50
Call trace:
armv8pmu_sched_task+0x14/0x50 (P)
perf_pmu_sched_task+0xac/0x108
__perf_event_task_sched_out+0x6c/0xe0
Fix both:
- Remove the blanket early return in perf_pmu_sched_task() when
cpuctx->task_ctx is set. Instead, skip individual CPCs that have a
task_epc (those are handled by perf_ctx_sched_task_cb()). CPCs
without a task_epc are CPU-only and must be handled here.
- Fall back to &cpc->epc in __perf_pmu_sched_task() when task_epc is
NULL, so the callback always gets a valid pmu_ctx.
Fixes: bd2756811766 ("perf: Rewrite core context handling")
Signed-off-by: Puranjay Mohan <[email protected]>
---
kernel/events/core.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6d1f8bad7e1c..6604f6e8f352 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3906,7 +3906,8 @@ static void __perf_pmu_sched_task(struct
perf_cpu_pmu_context *cpc,
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(pmu);
- pmu->sched_task(cpc->task_epc, task, sched_in);
+ pmu->sched_task(cpc->task_epc ? cpc->task_epc : &cpc->epc,
+ task, sched_in);
perf_pmu_enable(pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
@@ -3919,12 +3920,20 @@ static void perf_pmu_sched_task(struct task_struct
*prev,
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
struct perf_cpu_pmu_context *cpc;
- /* cpuctx->task_ctx will be handled in perf_event_context_sched_in/out
*/
- if (prev == next || cpuctx->task_ctx)
+ if (prev == next)
return;
- list_for_each_entry(cpc, this_cpu_ptr(&sched_cb_list), sched_cb_entry)
+ list_for_each_entry(cpc, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
+ /*
+ * PMUs with per-task events are handled by
+ * perf_ctx_sched_task_cb() via perf_event_context_sched_in/out
+ * when a task context is active.
+ */
+ if (cpuctx->task_ctx && cpc->task_epc)
+ continue;
+
__perf_pmu_sched_task(cpc, sched_in ? next : prev, sched_in);
+ }
}
static void perf_event_switch(struct task_struct *task,
--
2.53.0-Meta