Re: [PATCH v2 3/4] perf/x86/intel/pt: Add Intel PT logger

2015-09-15 Thread Alexander Shishkin
Takao Indoh  writes:

> On 2015/09/08 18:48, Alexander Shishkin wrote:
>> Takao Indoh  writes:
>> 
>>> +/* intel_pt */
>>> +static struct perf_event_attr pt_attr_pt = {
>>> +   .config = 0x400, /* bit10: TSCEn */
>> 
>> Doesn't it make sense to make these things configurable via sysfs or
>> whatnot?
>
> That make sense, will do.
>
>> 
>>> +static int pt_log_buf_nr_pages = 128; /* number of pages for log buffer */
>> 
>> Same here.
>> 
>>> +static struct cpumask pt_log_cpu_mask;
>>> +
>>> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_pt);
>>> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_sched);
>>> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_dummy);
>>> +
>>> +/* Saved registers on panic */
>>> +static DEFINE_PER_CPU(u64, saved_msr_ctl);
>>> +static DEFINE_PER_CPU(u64, saved_msr_status);
>>> +static DEFINE_PER_CPU(u64, saved_msr_output_base);
>>> +static DEFINE_PER_CPU(u64, saved_msr_output_mask);
>>> +
>>> +void save_intel_pt_registers(void)
>>> +{
>>> +   int cpu = smp_processor_id();
>>> +   u64 ctl;
>>> +
>>> +   if (!cpumask_test_cpu(cpu, _log_cpu_mask))
>>> +   return;
>>> +
>>> +   /* Save RTIT_CTL register */
>>> +   rdmsrl(MSR_IA32_RTIT_CTL, ctl);
>>> +   per_cpu(saved_msr_ctl, cpu) = ctl;
>>> +
>>> +   /* Stop tracing */
>>> +   ctl &= ~RTIT_CTL_TRACEEN;
>>> +   wrmsrl(MSR_IA32_RTIT_CTL, ctl);
>>> +
>>> +   /* Save other registers */
>>> +   rdmsrl(MSR_IA32_RTIT_STATUS, per_cpu(saved_msr_status, cpu));
>>> +   rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, per_cpu(saved_msr_output_base, cpu));
>>> +   rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, per_cpu(saved_msr_output_mask, cpu));
>> 
>> I'd really like to keep the PT msr accesses confined to the intel_pt
>> driver. Maybe have a similar function there? That way you could also use
>> pt_config_start() instead of clearing TraceEn by hand.
>> 
>> Do you need these saved msr values for the crash tool? I'm guessing
>> you'd need the write pointer to figure out where the most recent data
>> is. But then again, if you go the perf_event_disable() path, it'll all
>> happen automatically in the driver. Or rather __perf_event_disable()
>> type of thing since this is strictly cpu-local. Or even
>> event::pmu::stop() would do the trick. The buffer's write head would
>> then be in this_cpu_ptr(_ctx)->handle.head.
>
> Yes, what I need is the last position where Intel PT hardware wrote
> data. Once kernel panic occurs, basically we should minimize the access
> to kernel data or functions because they may be broken. That is why I
> touch msr directly in this patch. But I agree to limit the access to msr
> except intel_pt driver. Using pmu.stop() or pt_event_stop() looks good
> to me.

Ok, thanks!

Regards,
--
Alex
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/4] perf/x86/intel/pt: Add Intel PT logger

2015-09-15 Thread Alexander Shishkin
Takao Indoh  writes:

> On 2015/09/08 18:48, Alexander Shishkin wrote:
>> Takao Indoh  writes:
>> 
>>> +/* intel_pt */
>>> +static struct perf_event_attr pt_attr_pt = {
>>> +   .config = 0x400, /* bit10: TSCEn */
>> 
>> Doesn't it make sense to make these things configurable via sysfs or
>> whatnot?
>
> That make sense, will do.
>
>> 
>>> +static int pt_log_buf_nr_pages = 128; /* number of pages for log buffer */
>> 
>> Same here.
>> 
>>> +static struct cpumask pt_log_cpu_mask;
>>> +
>>> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_pt);
>>> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_sched);
>>> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_dummy);
>>> +
>>> +/* Saved registers on panic */
>>> +static DEFINE_PER_CPU(u64, saved_msr_ctl);
>>> +static DEFINE_PER_CPU(u64, saved_msr_status);
>>> +static DEFINE_PER_CPU(u64, saved_msr_output_base);
>>> +static DEFINE_PER_CPU(u64, saved_msr_output_mask);
>>> +
>>> +void save_intel_pt_registers(void)
>>> +{
>>> +   int cpu = smp_processor_id();
>>> +   u64 ctl;
>>> +
>>> +   if (!cpumask_test_cpu(cpu, _log_cpu_mask))
>>> +   return;
>>> +
>>> +   /* Save RTIT_CTL register */
>>> +   rdmsrl(MSR_IA32_RTIT_CTL, ctl);
>>> +   per_cpu(saved_msr_ctl, cpu) = ctl;
>>> +
>>> +   /* Stop tracing */
>>> +   ctl &= ~RTIT_CTL_TRACEEN;
>>> +   wrmsrl(MSR_IA32_RTIT_CTL, ctl);
>>> +
>>> +   /* Save other registers */
>>> +   rdmsrl(MSR_IA32_RTIT_STATUS, per_cpu(saved_msr_status, cpu));
>>> +   rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, per_cpu(saved_msr_output_base, cpu));
>>> +   rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, per_cpu(saved_msr_output_mask, cpu));
>> 
>> I'd really like to keep the PT msr accesses confined to the intel_pt
>> driver. Maybe have a similar function there? That way you could also use
>> pt_config_start() instead of clearing TraceEn by hand.
>> 
>> Do you need these saved msr values for the crash tool? I'm guessing
>> you'd need the write pointer to figure out where the most recent data
>> is. But then again, if you go the perf_event_disable() path, it'll all
>> happen automatically in the driver. Or rather __perf_event_disable()
>> type of thing since this is strictly cpu-local. Or even
>> event::pmu::stop() would do the trick. The buffer's write head would
>> then be in this_cpu_ptr(_ctx)->handle.head.
>
> Yes, what I need is the last position where Intel PT hardware wrote
> data. Once kernel panic occurs, basically we should minimize the access
> to kernel data or functions because they may be broken. That is why I
> touch msr directly in this patch. But I agree to limit the access to msr
> except intel_pt driver. Using pmu.stop() or pt_event_stop() looks good
> to me.

Ok, thanks!

Regards,
--
Alex
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/4] perf/x86/intel/pt: Add Intel PT logger

2015-09-08 Thread Takao Indoh
On 2015/09/08 18:48, Alexander Shishkin wrote:
> Takao Indoh  writes:
> 
>> +/* intel_pt */
>> +static struct perf_event_attr pt_attr_pt = {
>> +.config = 0x400, /* bit10: TSCEn */
> 
> Doesn't it make sense to make these things configurable via sysfs or
> whatnot?

That make sense, will do.

> 
>> +static int pt_log_buf_nr_pages = 128; /* number of pages for log buffer */
> 
> Same here.
> 
>> +static struct cpumask pt_log_cpu_mask;
>> +
>> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_pt);
>> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_sched);
>> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_dummy);
>> +
>> +/* Saved registers on panic */
>> +static DEFINE_PER_CPU(u64, saved_msr_ctl);
>> +static DEFINE_PER_CPU(u64, saved_msr_status);
>> +static DEFINE_PER_CPU(u64, saved_msr_output_base);
>> +static DEFINE_PER_CPU(u64, saved_msr_output_mask);
>> +
>> +void save_intel_pt_registers(void)
>> +{
>> +int cpu = smp_processor_id();
>> +u64 ctl;
>> +
>> +if (!cpumask_test_cpu(cpu, _log_cpu_mask))
>> +return;
>> +
>> +/* Save RTIT_CTL register */
>> +rdmsrl(MSR_IA32_RTIT_CTL, ctl);
>> +per_cpu(saved_msr_ctl, cpu) = ctl;
>> +
>> +/* Stop tracing */
>> +ctl &= ~RTIT_CTL_TRACEEN;
>> +wrmsrl(MSR_IA32_RTIT_CTL, ctl);
>> +
>> +/* Save other registers */
>> +rdmsrl(MSR_IA32_RTIT_STATUS, per_cpu(saved_msr_status, cpu));
>> +rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, per_cpu(saved_msr_output_base, cpu));
>> +rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, per_cpu(saved_msr_output_mask, cpu));
> 
> I'd really like to keep the PT msr accesses confined to the intel_pt
> driver. Maybe have a similar function there? That way you could also use
> pt_config_start() instead of clearing TraceEn by hand.
> 
> Do you need these saved msr values for the crash tool? I'm guessing
> you'd need the write pointer to figure out where the most recent data
> is. But then again, if you go the perf_event_disable() path, it'll all
> happen automatically in the driver. Or rather __perf_event_disable()
> type of thing since this is strictly cpu-local. Or even
> event::pmu::stop() would do the trick. The buffer's write head would
> then be in this_cpu_ptr(_ctx)->handle.head.

Yes, what I need is the last position where Intel PT hardware wrote
data. Once kernel panic occurs, basically we should minimize the access
to kernel data or functions because they may be broken. That is why I
touch msr directly in this patch. But I agree to limit the access to msr
except intel_pt driver. Using pmu.stop() or pt_event_stop() looks good
to me.

Thanks,
Takao Indoh


> 
> Thanks,
> --
> Alex
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/4] perf/x86/intel/pt: Add Intel PT logger

2015-09-08 Thread Alexander Shishkin
Takao Indoh  writes:

> +/* intel_pt */
> +static struct perf_event_attr pt_attr_pt = {
> + .config = 0x400, /* bit10: TSCEn */

Doesn't it make sense to make these things configurable via sysfs or
whatnot?

> +static int pt_log_buf_nr_pages = 128; /* number of pages for log buffer */

Same here.

> +static struct cpumask pt_log_cpu_mask;
> +
> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_pt);
> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_sched);
> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_dummy);
> +
> +/* Saved registers on panic */
> +static DEFINE_PER_CPU(u64, saved_msr_ctl);
> +static DEFINE_PER_CPU(u64, saved_msr_status);
> +static DEFINE_PER_CPU(u64, saved_msr_output_base);
> +static DEFINE_PER_CPU(u64, saved_msr_output_mask);
> +
> +void save_intel_pt_registers(void)
> +{
> + int cpu = smp_processor_id();
> + u64 ctl;
> +
> + if (!cpumask_test_cpu(cpu, _log_cpu_mask))
> + return;
> +
> + /* Save RTIT_CTL register */
> + rdmsrl(MSR_IA32_RTIT_CTL, ctl);
> + per_cpu(saved_msr_ctl, cpu) = ctl;
> +
> + /* Stop tracing */
> + ctl &= ~RTIT_CTL_TRACEEN;
> + wrmsrl(MSR_IA32_RTIT_CTL, ctl);
> +
> + /* Save other registers */
> + rdmsrl(MSR_IA32_RTIT_STATUS, per_cpu(saved_msr_status, cpu));
> + rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, per_cpu(saved_msr_output_base, cpu));
> + rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, per_cpu(saved_msr_output_mask, cpu));

I'd really like to keep the PT msr accesses confined to the intel_pt
driver. Maybe have a similar function there? That way you could also use
pt_config_start() instead of clearing TraceEn by hand.

Do you need these saved msr values for the crash tool? I'm guessing
you'd need the write pointer to figure out where the most recent data
is. But then again, if you go the perf_event_disable() path, it'll all
happen automatically in the driver. Or rather __perf_event_disable()
type of thing since this is strictly cpu-local. Or even
event::pmu::stop() would do the trick. The buffer's write head would
then be in this_cpu_ptr(_ctx)->handle.head.

Thanks,
--
Alex
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/4] perf/x86/intel/pt: Add Intel PT logger

2015-09-08 Thread Alexander Shishkin
Takao Indoh  writes:

> +/* intel_pt */
> +static struct perf_event_attr pt_attr_pt = {
> + .config = 0x400, /* bit10: TSCEn */

Doesn't it make sense to make these things configurable via sysfs or
whatnot?

> +static int pt_log_buf_nr_pages = 128; /* number of pages for log buffer */

Same here.

> +static struct cpumask pt_log_cpu_mask;
> +
> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_pt);
> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_sched);
> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_dummy);
> +
> +/* Saved registers on panic */
> +static DEFINE_PER_CPU(u64, saved_msr_ctl);
> +static DEFINE_PER_CPU(u64, saved_msr_status);
> +static DEFINE_PER_CPU(u64, saved_msr_output_base);
> +static DEFINE_PER_CPU(u64, saved_msr_output_mask);
> +
> +void save_intel_pt_registers(void)
> +{
> + int cpu = smp_processor_id();
> + u64 ctl;
> +
> + if (!cpumask_test_cpu(cpu, _log_cpu_mask))
> + return;
> +
> + /* Save RTIT_CTL register */
> + rdmsrl(MSR_IA32_RTIT_CTL, ctl);
> + per_cpu(saved_msr_ctl, cpu) = ctl;
> +
> + /* Stop tracing */
> + ctl &= ~RTIT_CTL_TRACEEN;
> + wrmsrl(MSR_IA32_RTIT_CTL, ctl);
> +
> + /* Save other registers */
> + rdmsrl(MSR_IA32_RTIT_STATUS, per_cpu(saved_msr_status, cpu));
> + rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, per_cpu(saved_msr_output_base, cpu));
> + rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, per_cpu(saved_msr_output_mask, cpu));

I'd really like to keep the PT msr accesses confined to the intel_pt
driver. Maybe have a similar function there? That way you could also use
pt_config_start() instead of clearing TraceEn by hand.

Do you need these saved msr values for the crash tool? I'm guessing
you'd need the write pointer to figure out where the most recent data
is. But then again, if you go the perf_event_disable() path, it'll all
happen automatically in the driver. Or rather __perf_event_disable()
type of thing since this is strictly cpu-local. Or even
event::pmu::stop() would do the trick. The buffer's write head would
then be in this_cpu_ptr(_ctx)->handle.head.

Thanks,
--
Alex
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/4] perf/x86/intel/pt: Add Intel PT logger

2015-09-08 Thread Takao Indoh
On 2015/09/08 18:48, Alexander Shishkin wrote:
> Takao Indoh  writes:
> 
>> +/* intel_pt */
>> +static struct perf_event_attr pt_attr_pt = {
>> +.config = 0x400, /* bit10: TSCEn */
> 
> Doesn't it make sense to make these things configurable via sysfs or
> whatnot?

That make sense, will do.

> 
>> +static int pt_log_buf_nr_pages = 128; /* number of pages for log buffer */
> 
> Same here.
> 
>> +static struct cpumask pt_log_cpu_mask;
>> +
>> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_pt);
>> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_sched);
>> +static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_dummy);
>> +
>> +/* Saved registers on panic */
>> +static DEFINE_PER_CPU(u64, saved_msr_ctl);
>> +static DEFINE_PER_CPU(u64, saved_msr_status);
>> +static DEFINE_PER_CPU(u64, saved_msr_output_base);
>> +static DEFINE_PER_CPU(u64, saved_msr_output_mask);
>> +
>> +void save_intel_pt_registers(void)
>> +{
>> +int cpu = smp_processor_id();
>> +u64 ctl;
>> +
>> +if (!cpumask_test_cpu(cpu, _log_cpu_mask))
>> +return;
>> +
>> +/* Save RTIT_CTL register */
>> +rdmsrl(MSR_IA32_RTIT_CTL, ctl);
>> +per_cpu(saved_msr_ctl, cpu) = ctl;
>> +
>> +/* Stop tracing */
>> +ctl &= ~RTIT_CTL_TRACEEN;
>> +wrmsrl(MSR_IA32_RTIT_CTL, ctl);
>> +
>> +/* Save other registers */
>> +rdmsrl(MSR_IA32_RTIT_STATUS, per_cpu(saved_msr_status, cpu));
>> +rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, per_cpu(saved_msr_output_base, cpu));
>> +rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, per_cpu(saved_msr_output_mask, cpu));
> 
> I'd really like to keep the PT msr accesses confined to the intel_pt
> driver. Maybe have a similar function there? That way you could also use
> pt_config_start() instead of clearing TraceEn by hand.
> 
> Do you need these saved msr values for the crash tool? I'm guessing
> you'd need the write pointer to figure out where the most recent data
> is. But then again, if you go the perf_event_disable() path, it'll all
> happen automatically in the driver. Or rather __perf_event_disable()
> type of thing since this is strictly cpu-local. Or even
> event::pmu::stop() would do the trick. The buffer's write head would
> then be in this_cpu_ptr(_ctx)->handle.head.

Yes, what I need is the last position where Intel PT hardware wrote
data. Once kernel panic occurs, basically we should minimize the access
to kernel data or functions because they may be broken. That is why I
touch msr directly in this patch. But I agree to limit the access to msr
except intel_pt driver. Using pmu.stop() or pt_event_stop() looks good
to me.

Thanks,
Takao Indoh


> 
> Thanks,
> --
> Alex
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 3/4] perf/x86/intel/pt: Add Intel PT logger

2015-09-07 Thread Takao Indoh
This patch provides Intel PT logging feature. When system boots with a
parameter "intel_pt_log", log buffers for Intel PT are allocated and
logging starts, then processor flow information is written in the log
buffer by hardware like flight recorder. This is very helpful to
investigate a cause of kernel panic.

The log buffer size is specified by the parameter
"intel_pt_log_buf_len=". This buffer is used as circular buffer,
therefore old events are overwritten by new events.

Signed-off-by: Takao Indoh 
---
 arch/x86/Kconfig  |   16 +++
 arch/x86/include/asm/intel_pt_log.h   |   13 ++
 arch/x86/kernel/cpu/Makefile  |2 +
 arch/x86/kernel/cpu/intel_pt_log.c|  178 +
 arch/x86/kernel/cpu/perf_event_intel_pt.c |6 +
 5 files changed, 215 insertions(+), 0 deletions(-)
 create mode 100644 arch/x86/include/asm/intel_pt_log.h
 create mode 100644 arch/x86/kernel/cpu/intel_pt_log.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f37010f..2b99ba2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1722,6 +1722,22 @@ config X86_INTEL_MPX
 
  If unsure, say N.
 
+config X86_INTEL_PT_LOG
+   prompt "Intel PT logger"
+   def_bool n
+   depends on PERF_EVENTS && CPU_SUP_INTEL
+   ---help---
+ Intel PT is a hardware features that can capture information
+ about program execution flow. Once Intel PT is enabled, the
+ events which change program flow, like branch instructions,
+ exceptions, interruptions, traps and so on are logged in
+ the memory.
+
+ This option enables starting Intel PT logging feature at boot
+ time. When kernel panic occurs, Intel PT log buffer can be
+ retrieved from crash dump file and enables to reconstruct the
+ detailed flow that led to the panic.
+
 config EFI
bool "EFI runtime service support"
depends on ACPI
diff --git a/arch/x86/include/asm/intel_pt_log.h 
b/arch/x86/include/asm/intel_pt_log.h
new file mode 100644
index 000..cef63f7
--- /dev/null
+++ b/arch/x86/include/asm/intel_pt_log.h
@@ -0,0 +1,13 @@
+#ifndef __INTEL_PT_LOG_H__
+#define __INTEL_PT_LOG_H__
+
+#if defined(CONFIG_X86_INTEL_PT_LOG)
+
+#include 
+
+void pt_log_start(struct pmu *pmu);
+void save_intel_pt_registers(void);
+
+#endif
+
+#endif /* __INTEL_PT_LOG_H__ */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4eb065c..67c17f0 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)+= 
perf_event_intel_uncore.o \
   perf_event_intel_uncore_nhmex.o
 obj-$(CONFIG_CPU_SUP_INTEL)+= perf_event_msr.o
 obj-$(CONFIG_CPU_SUP_AMD)  += perf_event_msr.o
+
+obj-$(CONFIG_X86_INTEL_PT_LOG) += intel_pt_log.o
 endif
 
 
diff --git a/arch/x86/kernel/cpu/intel_pt_log.c 
b/arch/x86/kernel/cpu/intel_pt_log.c
new file mode 100644
index 000..eb345fd
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_pt_log.c
@@ -0,0 +1,178 @@
+/*
+ * Intel Processor Trace Logger
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include 
+#include 
+
+#define SAMPLE_TYPE_BASE \
+   (PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_TIME|PERF_SAMPLE_IDENTIFIER)
+#define SAMPLE_TYPE_PT \
+   (SAMPLE_TYPE_BASE|PERF_SAMPLE_CPU|PERF_SAMPLE_RAW)
+#define SAMPLE_TYPE_SCHED \
+   (SAMPLE_TYPE_BASE|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD|PERF_SAMPLE_RAW)
+#define SAMPLE_TYPE_DUMMY \
+   (SAMPLE_TYPE_BASE)
+
+/* intel_pt */
+static struct perf_event_attr pt_attr_pt = {
+   .config = 0x400, /* bit10: TSCEn */
+   .size   = sizeof(struct perf_event_attr),
+   .sample_type= SAMPLE_TYPE_PT,
+   .read_format= PERF_FORMAT_ID,
+   .inherit= 1,
+   .pinned = 1,
+   .sample_id_all  = 1,
+   .exclude_guest  = 1
+};
+
+/* sched:sched_switch */
+static struct perf_event_attr pt_attr_sched = {
+   .type   = PERF_TYPE_TRACEPOINT,
+   .size   = sizeof(struct perf_event_attr),
+   .sample_type= SAMPLE_TYPE_SCHED,
+   .read_format= PERF_FORMAT_ID,
+   .inherit= 1,
+   .sample_id_all  = 1,
+   .exclude_guest  = 1
+};
+
+/* dummy:u */
+static struct perf_event_attr pt_attr_dummy = {
+   .type   = PERF_TYPE_SOFTWARE,
+   .config = PERF_COUNT_SW_DUMMY,
+   .size   = sizeof(struct perf_event_attr),
+   .sample_type= SAMPLE_TYPE_DUMMY,
+   .read_format= PERF_FORMAT_ID,
+   .inherit= 1,
+   .exclude_kernel = 1,
+   .exclude_hv = 1,
+   .comm   = 1,
+   .task   = 1,
+   .sample_id_all  = 1,
+   .comm_exec  = 1
+};
+
+static int pt_log_enabled;
+static int pt_log_buf_nr_pages = 128; /* number of pages for log buffer */
+static struct cpumask pt_log_cpu_mask;

[PATCH v2 3/4] perf/x86/intel/pt: Add Intel PT logger

2015-09-07 Thread Takao Indoh
This patch provides Intel PT logging feature. When system boots with a
parameter "intel_pt_log", log buffers for Intel PT are allocated and
logging starts, then processor flow information is written in the log
buffer by hardware like flight recorder. This is very helpful to
investigate a cause of kernel panic.

The log buffer size is specified by the parameter
"intel_pt_log_buf_len=". This buffer is used as circular buffer,
therefore old events are overwritten by new events.

Signed-off-by: Takao Indoh 
---
 arch/x86/Kconfig  |   16 +++
 arch/x86/include/asm/intel_pt_log.h   |   13 ++
 arch/x86/kernel/cpu/Makefile  |2 +
 arch/x86/kernel/cpu/intel_pt_log.c|  178 +
 arch/x86/kernel/cpu/perf_event_intel_pt.c |6 +
 5 files changed, 215 insertions(+), 0 deletions(-)
 create mode 100644 arch/x86/include/asm/intel_pt_log.h
 create mode 100644 arch/x86/kernel/cpu/intel_pt_log.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f37010f..2b99ba2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1722,6 +1722,22 @@ config X86_INTEL_MPX
 
  If unsure, say N.
 
+config X86_INTEL_PT_LOG
+   prompt "Intel PT logger"
+   def_bool n
+   depends on PERF_EVENTS && CPU_SUP_INTEL
+   ---help---
+ Intel PT is a hardware features that can capture information
+ about program execution flow. Once Intel PT is enabled, the
+ events which change program flow, like branch instructions,
+ exceptions, interruptions, traps and so on are logged in
+ the memory.
+
+ This option enables starting Intel PT logging feature at boot
+ time. When kernel panic occurs, Intel PT log buffer can be
+ retrieved from crash dump file and enables to reconstruct the
+ detailed flow that led to the panic.
+
 config EFI
bool "EFI runtime service support"
depends on ACPI
diff --git a/arch/x86/include/asm/intel_pt_log.h 
b/arch/x86/include/asm/intel_pt_log.h
new file mode 100644
index 000..cef63f7
--- /dev/null
+++ b/arch/x86/include/asm/intel_pt_log.h
@@ -0,0 +1,13 @@
+#ifndef __INTEL_PT_LOG_H__
+#define __INTEL_PT_LOG_H__
+
+#if defined(CONFIG_X86_INTEL_PT_LOG)
+
+#include 
+
+void pt_log_start(struct pmu *pmu);
+void save_intel_pt_registers(void);
+
+#endif
+
+#endif /* __INTEL_PT_LOG_H__ */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4eb065c..67c17f0 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)+= 
perf_event_intel_uncore.o \
   perf_event_intel_uncore_nhmex.o
 obj-$(CONFIG_CPU_SUP_INTEL)+= perf_event_msr.o
 obj-$(CONFIG_CPU_SUP_AMD)  += perf_event_msr.o
+
+obj-$(CONFIG_X86_INTEL_PT_LOG) += intel_pt_log.o
 endif
 
 
diff --git a/arch/x86/kernel/cpu/intel_pt_log.c 
b/arch/x86/kernel/cpu/intel_pt_log.c
new file mode 100644
index 000..eb345fd
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_pt_log.c
@@ -0,0 +1,178 @@
+/*
+ * Intel Processor Trace Logger
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include 
+#include 
+
+#define SAMPLE_TYPE_BASE \
+   (PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_TIME|PERF_SAMPLE_IDENTIFIER)
+#define SAMPLE_TYPE_PT \
+   (SAMPLE_TYPE_BASE|PERF_SAMPLE_CPU|PERF_SAMPLE_RAW)
+#define SAMPLE_TYPE_SCHED \
+   (SAMPLE_TYPE_BASE|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD|PERF_SAMPLE_RAW)
+#define SAMPLE_TYPE_DUMMY \
+   (SAMPLE_TYPE_BASE)
+
+/* intel_pt */
+static struct perf_event_attr pt_attr_pt = {
+   .config = 0x400, /* bit10: TSCEn */
+   .size   = sizeof(struct perf_event_attr),
+   .sample_type= SAMPLE_TYPE_PT,
+   .read_format= PERF_FORMAT_ID,
+   .inherit= 1,
+   .pinned = 1,
+   .sample_id_all  = 1,
+   .exclude_guest  = 1
+};
+
+/* sched:sched_switch */
+static struct perf_event_attr pt_attr_sched = {
+   .type   = PERF_TYPE_TRACEPOINT,
+   .size   = sizeof(struct perf_event_attr),
+   .sample_type= SAMPLE_TYPE_SCHED,
+   .read_format= PERF_FORMAT_ID,
+   .inherit= 1,
+   .sample_id_all  = 1,
+   .exclude_guest  = 1
+};
+
+/* dummy:u */
+static struct perf_event_attr pt_attr_dummy = {
+   .type   = PERF_TYPE_SOFTWARE,
+   .config = PERF_COUNT_SW_DUMMY,
+   .size   = sizeof(struct perf_event_attr),
+   .sample_type= SAMPLE_TYPE_DUMMY,
+   .read_format= PERF_FORMAT_ID,
+   .inherit= 1,
+   .exclude_kernel = 1,
+   .exclude_hv = 1,
+   .comm   = 1,
+   .task   = 1,
+   .sample_id_all  = 1,
+   .comm_exec  = 1
+};
+
+static int pt_log_enabled;
+static int pt_log_buf_nr_pages = 128; /* number of pages for log buffer */
+static