On Tue, Apr 21, 2015 at 05:18:27PM +0200, Jiri Olsa wrote:
> On Tue, Apr 21, 2015 at 05:12:16PM +0200, Peter Zijlstra wrote:
> > On Tue, Apr 21, 2015 at 10:54:25AM +0200, Jiri Olsa wrote:
> > > @@ -2559,6 +2563,9 @@ static __initconst const struct x86_pmu core_pmu = {
> > >   .guest_get_msrs         = core_guest_get_msrs,
> > >   .format_attrs           = intel_arch_formats_attr,
> > >   .events_sysfs_show      = intel_event_sysfs_show,
> > 
> > Could you also insert a little comment here how these methods are for
> > 'funny' 'hardware' ?
> 
> ook, will do v3

attached,
jirka


---
The core_pmu does not define cpu_* callbacks, which handles
allocation of 'struct cpu_hw_events::shared_regs' data,
initialization of debug store and PMU_FL_EXCL_CNTRS counters.

While this probably won't happen on bare metal, virtual CPU can
define x86_pmu.extra_regs together with PMU version 1 and thus
be using core_pmu -> using shared_regs data without it being
allocated. That could could leave to following panic:

BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<ffffffff8152cd4f>] _spin_lock_irqsave+0x1f/0x40

SNIP

 [<ffffffff81024bd9>] __intel_shared_reg_get_constraints+0x69/0x1e0
 [<ffffffff81024deb>] intel_get_event_constraints+0x9b/0x180
 [<ffffffff8101e815>] x86_schedule_events+0x75/0x1d0
 [<ffffffff810586dc>] ? check_preempt_curr+0x7c/0x90
 [<ffffffff810649fe>] ? try_to_wake_up+0x24e/0x3e0
 [<ffffffff81064ba2>] ? default_wake_function+0x12/0x20
 [<ffffffff8109eb16>] ? autoremove_wake_function+0x16/0x40
 [<ffffffff810577e9>] ? __wake_up_common+0x59/0x90
 [<ffffffff811a9517>] ? __d_lookup+0xa7/0x150
 [<ffffffff8119db5f>] ? do_lookup+0x9f/0x230
 [<ffffffff811a993a>] ? dput+0x9a/0x150
 [<ffffffff8119c8f5>] ? path_to_nameidata+0x25/0x60
 [<ffffffff8119e90a>] ? __link_path_walk+0x7da/0x1000
 [<ffffffff8101d8f9>] ? x86_pmu_add+0xb9/0x170
 [<ffffffff8101d7a7>] x86_pmu_commit_txn+0x67/0xc0
 [<ffffffff811b07b0>] ? mntput_no_expire+0x30/0x110
 [<ffffffff8119c731>] ? path_put+0x31/0x40
 [<ffffffff8107c297>] ? current_fs_time+0x27/0x30
 [<ffffffff8117d170>] ? mem_cgroup_get_reclaim_stat_from_page+0x20/0x70
 [<ffffffff8111b7aa>] group_sched_in+0x13a/0x170
 [<ffffffff81014a29>] ? sched_clock+0x9/0x10
 [<ffffffff8111bac8>] ctx_sched_in+0x2e8/0x330
 [<ffffffff8111bb7b>] perf_event_sched_in+0x6b/0xb0
 [<ffffffff8111bc36>] perf_event_context_sched_in+0x76/0xc0
 [<ffffffff8111eb3b>] perf_event_comm+0x1bb/0x2e0
 [<ffffffff81195ee9>] set_task_comm+0x69/0x80
 [<ffffffff81195fe1>] setup_new_exec+0xe1/0x2e0
 [<ffffffff811ea68e>] load_elf_binary+0x3ce/0x1ab0

Adding cpu_(prepare|starting|dying) for core_pmu to have shared_regs
data allocated for core_pmu. AFAICS there's no harm to initialize
debug store and PMU_FL_EXCL_CNTRS either for core_pmu.

Signed-off-by: Jiri Olsa <[email protected]>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 66 +++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c 
b/arch/x86/kernel/cpu/perf_event_intel.c
index 9da2400c2ec3..8a4e21eb2087 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2533,34 +2533,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
        return x86_event_sysfs_show(page, config, event);
 }
 
-static __initconst const struct x86_pmu core_pmu = {
-       .name                   = "core",
-       .handle_irq             = x86_pmu_handle_irq,
-       .disable_all            = x86_pmu_disable_all,
-       .enable_all             = core_pmu_enable_all,
-       .enable                 = core_pmu_enable_event,
-       .disable                = x86_pmu_disable_event,
-       .hw_config              = x86_pmu_hw_config,
-       .schedule_events        = x86_schedule_events,
-       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
-       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
-       .event_map              = intel_pmu_event_map,
-       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
-       .apic                   = 1,
-       /*
-        * Intel PMCs cannot be accessed sanely above 32 bit width,
-        * so we install an artificial 1<<31 period regardless of
-        * the generic event period:
-        */
-       .max_period             = (1ULL << 31) - 1,
-       .get_event_constraints  = intel_get_event_constraints,
-       .put_event_constraints  = intel_put_event_constraints,
-       .event_constraints      = intel_core_event_constraints,
-       .guest_get_msrs         = core_guest_get_msrs,
-       .format_attrs           = intel_arch_formats_attr,
-       .events_sysfs_show      = intel_event_sysfs_show,
-};
-
 struct intel_shared_regs *allocate_shared_regs(int cpu)
 {
        struct intel_shared_regs *regs;
@@ -2743,6 +2715,44 @@ static struct attribute *intel_arch3_formats_attr[] = {
        NULL,
 };
 
+static __initconst const struct x86_pmu core_pmu = {
+       .name                   = "core",
+       .handle_irq             = x86_pmu_handle_irq,
+       .disable_all            = x86_pmu_disable_all,
+       .enable_all             = core_pmu_enable_all,
+       .enable                 = core_pmu_enable_event,
+       .disable                = x86_pmu_disable_event,
+       .hw_config              = x86_pmu_hw_config,
+       .schedule_events        = x86_schedule_events,
+       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
+       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
+       .event_map              = intel_pmu_event_map,
+       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
+       .apic                   = 1,
+       /*
+        * Intel PMCs cannot be accessed sanely above 32 bit width,
+        * so we install an artificial 1<<31 period regardless of
+        * the generic event period:
+        */
+       .max_period             = (1ULL << 31) - 1,
+       .get_event_constraints  = intel_get_event_constraints,
+       .put_event_constraints  = intel_put_event_constraints,
+       .event_constraints      = intel_core_event_constraints,
+       .guest_get_msrs         = core_guest_get_msrs,
+       .format_attrs           = intel_arch_formats_attr,
+       .events_sysfs_show      = intel_event_sysfs_show,
+
+       /*
+        * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
+        * together with PMU version 1 and thus be using core_pmu with
+        * shared_regs. We need following callbacks here to allocate
+        * it properly.
+        */
+       .cpu_prepare            = intel_pmu_cpu_prepare,
+       .cpu_starting           = intel_pmu_cpu_starting,
+       .cpu_dying              = intel_pmu_cpu_dying,
+};
+
 static __initconst const struct x86_pmu intel_pmu = {
        .name                   = "Intel",
        .handle_irq             = intel_pmu_handle_irq,
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to