On Sat, May 13, 2017 at 06:40:03AM -0700, Paul E. McKenney wrote: > On Fri, May 12, 2017 at 05:34:48PM -0400, Steven Rostedt wrote: > > On Fri, 12 May 2017 21:49:56 +0200 > > [ . . . ] > > > This means that text_mutex, which was taken by the alternative code, no > > longer is taken in cpu hotplug code. That means there's no longer a > > deadlock scenario, as we don't have anyplace(*) that grabs > > get_online_cpus() and takes the text_mutex. Removing that will > > simplify things tremendously! > > > > (*) with one exception: perf. > > > > Currently perf does a get_online_cpu() at a high level. Will it be > > possible to move that down, such that we don't have it taken when we do > > any software events? > > Can perf get rid of get_online_cpus(), perhaps using the mutexes acquired > by perf_event_init_cpu() or by perf_event_exit_cpu()?
Best I could come up with is something like that below that moves the get_online_cpus() to a possibly less onerous place. Compile tested only.. --- include/linux/perf_event.h | 2 ++ kernel/events/core.c | 85 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 66 insertions(+), 21 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 24a635887f28..7d6aa29094b2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -801,6 +801,8 @@ struct perf_cpu_context { struct list_head sched_cb_entry; int sched_cb_usage; + + int online; }; struct perf_output_handle { diff --git a/kernel/events/core.c b/kernel/events/core.c index 13f5b942580b..f9a595641d32 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3812,14 +3812,6 @@ find_get_context(struct pmu *pmu, struct task_struct *task, if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EACCES); - /* - * We could be clever and allow to attach a event to an - * offline CPU and activate it when the CPU comes up, but - * that's for later. - */ - if (!cpu_online(cpu)) - return ERR_PTR(-ENODEV); - cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); ctx = &cpuctx->ctx; get_ctx(ctx); @@ -9005,6 +8997,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) { int cpu, ret; + get_online_cpus(); mutex_lock(&pmus_lock); ret = -ENOMEM; pmu->pmu_disable_count = alloc_percpu(int); @@ -9064,6 +9057,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock); cpuctx->ctx.pmu = pmu; + cpuctx->online = cpu_online(cpu); __perf_mux_hrtimer_init(cpuctx, cpu); } @@ -9099,6 +9093,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) ret = 0; unlock: mutex_unlock(&pmus_lock); + put_online_cpus(); return ret; @@ -9908,13 +9903,11 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & PERF_FLAG_PID_CGROUP) cgroup_fd = pid; - get_online_cpus(); - event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL, NULL, cgroup_fd); if (IS_ERR(event)) { err = PTR_ERR(event); - goto err_cpus; + goto err_cred; } if (is_sampling_event(event)) { @@ -10078,6 +10071,23 @@ SYSCALL_DEFINE5(perf_event_open, goto err_locked; } + if (!task) { + /* + * Check if the @cpu we're creating an event for is online. + * + * We use the perf_cpu_context::ctx::mutex to serialize against + * the hotplug notifiers. See perf_event_{init,exit}_cpu(). + */ + struct perf_cpu_context *cpuctx = + container_of(ctx, struct perf_cpu_context, ctx); + + if (!cpuctx->online) { + err = -ENODEV; + goto err_locked; + } + } + + /* * Must be under the same ctx::mutex as perf_install_in_context(), * because we need to serialize with concurrent event creation. @@ -10162,8 +10172,6 @@ SYSCALL_DEFINE5(perf_event_open, perf_event_ctx_unlock(group_leader, gctx); mutex_unlock(&ctx->mutex); - put_online_cpus(); - if (task) { mutex_unlock(&task->signal->cred_guard_mutex); put_task_struct(task); @@ -10205,8 +10213,6 @@ SYSCALL_DEFINE5(perf_event_open, */ if (!event_file) free_event(event); -err_cpus: - put_online_cpus(); err_cred: if (task) mutex_unlock(&task->signal->cred_guard_mutex); @@ -10237,7 +10243,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, struct perf_event *event; int err; - get_online_cpus(); /* * Get the target context (task or percpu): */ @@ -10264,6 +10269,21 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, goto err_unlock; } + if (!task) { + /* + * Check if the @cpu we're creating an event for is online. + * + * We use the perf_cpu_context::ctx::mutex to serialize against + * the hotplug notifiers. See perf_event_{init,exit}_cpu(). + */ + struct perf_cpu_context *cpuctx = + container_of(ctx, struct perf_cpu_context, ctx); + if (!cpuctx->online) { + err = -ENODEV; + goto err_unlock; + } + } + if (!exclusive_event_installable(event, ctx)) { err = -EBUSY; goto err_unlock; @@ -10272,7 +10292,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, perf_install_in_context(ctx, event, cpu); perf_unpin_context(ctx); mutex_unlock(&ctx->mutex); - put_online_cpus(); return event; err_unlock: @@ -10282,7 +10301,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, err_free: free_event(event); err: - put_online_cpus(); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); @@ -10951,7 +10969,7 @@ static void __init perf_event_init_all_cpus(void) } } -int perf_event_init_cpu(unsigned int cpu) +void perf_swevent_init_cpu(unsigned int cpu) { struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); @@ -10964,7 +10982,6 @@ int perf_event_init_cpu(unsigned int cpu) rcu_assign_pointer(swhash->swevent_hlist, hlist); } mutex_unlock(&swhash->hlist_mutex); - return 0; } #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE @@ -10982,16 +10999,19 @@ static void __perf_event_exit_context(void *__info) static void perf_event_exit_cpu_context(int cpu) { + struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; struct pmu *pmu; int idx; idx = srcu_read_lock(&pmus_srcu); list_for_each_entry_rcu(pmu, &pmus, entry) { - ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx; + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); + ctx = &cpuctx->ctx; mutex_lock(&ctx->mutex); smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1); + cpuctx->online = 0; mutex_unlock(&ctx->mutex); } srcu_read_unlock(&pmus_srcu, idx); @@ -11002,6 +11022,29 @@ static void perf_event_exit_cpu_context(int cpu) { } #endif +int perf_event_init_cpu(unsigned int cpu) +{ + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; + struct pmu *pmu; + int idx; + + perf_swevent_init_cpu(cpu); + + idx = srcu_read_lock(&pmus_srcu); + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); + ctx = &cpuctx->ctx; + + mutex_lock(&ctx->mutex); + cpuctx->online = 1; + mutex_unlock(&ctx->mutex); + } + srcu_read_unlock(&pmus_srcu, idx); + + return 0; +} + int perf_event_exit_cpu(unsigned int cpu) { perf_event_exit_cpu_context(cpu);