[PATCH v1 2/2] perf/kvm: perf-kvm-stat to report syscalls

2015-02-27 Thread Hemant Kumar
Some of the kvm_hv exits are due to hcalls. So, this patch adds necessary
support to display the number of hcalls grouped according to their type
(H_IPI, H_CONFER, etc) with perf kvm stat report --event=syscall.

The patch defines the reasons in kvm_trace_symbol_hcall. It adds
kvm_hv:kvm_hcall_enter tracepoint to be recorded when
perf kvm stat record is invoked. It defines the handler functions to
handle a kvm_hcall_enter event sample from the samples recorded in
perf.data.guest.

To reuse the pSeries hypervisor opcodes, they codes are removed from
arch/powerpc/include/asm/hvcall.h and added to a new .h file in
arch/powerpc/include/uapi/asm/hcall_codes.h.
Also the hcall_code to hcall_reason string mapping is removed from
arch/powerpc/kvm/trace_hv.h to a new file
arch/powerpc/include/uapi/asm/trace_hcall.h
so that perf in the userspace can use them.

A sample output :
# pgrep qemu
19378
60515

# ./perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ]

# ./perf kvm stat report -p 60515 --event=syscall


Analyze events for pid(s) 60515, all VCPUs:

   SYSCALL-EVENTSamples  Samples% Time%Min TimeMax Time 
Avg time

H_VIO_SIGNAL   103438.44%15.77%  0.36us  1.59us 
 0.44us ( +-   0.66% )
  H_SEND_CRQ65224.24%10.97%  0.39us  1.84us 
 0.49us ( +-   1.20% )
   H_IPI52319.44%62.05%  1.35us 19.70us 
 3.44us ( +-   2.88% )
 H_PUT_TERM_CHAR41115.28% 8.03%  0.38us  3.77us 
 0.57us ( +-   1.61% )
 H_GET_TERM_CHAR 50 1.86% 0.99%  0.40us  0.98us 
 0.57us ( +-   3.37% )
   H_EOI 20 0.74% 2.19%  2.22us  4.72us 
 3.17us ( +-   5.96% )

Total Samples:2690, Total events handled time:2896.94us.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/hvcall.h   |  120 --
 arch/powerpc/include/uapi/asm/hcall_codes.h |  123 +++
 arch/powerpc/include/uapi/asm/kvm_perf.h|4 +
 arch/powerpc/include/uapi/asm/trace_hcall.h |  122 +++
 arch/powerpc/kvm/trace_hv.h |  117 --
 tools/perf/arch/powerpc/util/kvm-stat.c |   61 +
 6 files changed, 313 insertions(+), 234 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/hcall_codes.h
 create mode 100644 arch/powerpc/include/uapi/asm/trace_hcall.h

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 85bc8c0..f810466 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -155,124 +155,8 @@
 /* Each control block has to be on a 4K boundary */
 #define H_CB_ALIGNMENT  4096
 
-/* pSeries hypervisor opcodes */
-#define H_REMOVE   0x04
-#define H_ENTER0x08
-#define H_READ 0x0c
-#define H_CLEAR_MOD0x10
-#define H_CLEAR_REF0x14
-#define H_PROTECT  0x18
-#define H_GET_TCE  0x1c
-#define H_PUT_TCE  0x20
-#define H_SET_SPRG00x24
-#define H_SET_DABR 0x28
-#define H_PAGE_INIT0x2c
-#define H_SET_ASR  0x30
-#define H_ASR_ON   0x34
-#define H_ASR_OFF  0x38
-#define H_LOGICAL_CI_LOAD  0x3c
-#define H_LOGICAL_CI_STORE 0x40
-#define H_LOGICAL_CACHE_LOAD   0x44
-#define H_LOGICAL_CACHE_STORE  0x48
-#define H_LOGICAL_ICBI 0x4c
-#define H_LOGICAL_DCBF 0x50
-#define H_GET_TERM_CHAR0x54
-#define H_PUT_TERM_CHAR0x58
-#define H_REAL_TO_LOGICAL  0x5c
-#define H_HYPERVISOR_DATA  0x60
-#define H_EOI  0x64
-#define H_CPPR 0x68
-#define H_IPI  0x6c
-#define H_IPOLL0x70
-#define H_XIRR 0x74
-#define H_PERFMON  0x7c
-#define H_MIGRATE_DMA  0x78
-#define H_REGISTER_VPA 0xDC
-#define H_CEDE 0xE0
-#define H_CONFER   0xE4
-#define H_PROD 0xE8
-#define H_GET_PPP  0xEC
-#define H_SET_PPP  0xF0
-#define H_PURR 0xF4
-#define H_PIC  0xF8
-#define H_REG_CRQ  0xFC
-#define H_FREE_CRQ 0x100
-#define H_VIO_SIGNAL   0x104
-#define H_SEND_CRQ 0x108
-#define H_COPY_RDMA0x110
-#define H_REGISTER_LOGICAL_LAN 0x114
-#define H_FREE_LOGICAL_LAN 0x118
-#define H_ADD_LOGICAL_LAN_BUFFER 0x11C
-#define H_SEND_LOGICAL_LAN 0x120
-#define H_BULK_REMOVE  0x124
-#define H_MULTICAST_CTRL   0x130
-#define H_SET_XDABR0x134
-#define H_STUFF_TCE0x138
-#define H_PUT_TCE_INDIRECT 0x13C
-#define

[PATCH v1 0/2] perf/kvm: perf-kvm-stat on powerpc

2015-02-27 Thread Hemant Kumar
perf kvm stat record/report which can be used to analyze KVM related statistics
isn't enabled on powerpc. This patchset enables perf kvm stat on powerpc.
The first patch enables perf kvm stat record and report to report kvm exits.

record enables recording of the tracepoints: kvm_hv:kvm_guest_enter and
kvm_hv:kvm_guest_exit in the first patch and kvm_hv:kvm_hcall_enter and
kvm_hv:kvm_hcall_exit in the second patch.

This command can be used to record kvm events on the host:
# perf kvm stat record -a

To report the kvm guest related exit events, use:
# perf kvm stat report
or
# perf kvm stat report --event=vmexit

This should show the exit events along with the exit reasons.

The second patch in this series adds support to show the hcall events too with:
# perf kvm stat report --event=syscall

---

Hemant Kumar (1):
  perf/kvm: perf-kvm-stat to report syscalls

Srikar Dronamraju (1):
  perf/kvm: Enable perf-kvm-stat record/report on powerpc


 arch/powerpc/include/asm/hvcall.h|  120 -
 arch/powerpc/include/uapi/asm/hcall_codes.h  |  123 ++
 arch/powerpc/include/uapi/asm/kvm_perf.h |   19 
 arch/powerpc/include/uapi/asm/trace_book3s.h |   33 +++
 arch/powerpc/include/uapi/asm/trace_hcall.h  |  122 ++
 arch/powerpc/kvm/trace_book3s.h  |   32 ---
 arch/powerpc/kvm/trace_hv.h  |  119 -
 arch/powerpc/kvm/trace_pr.h  |2 
 tools/perf/arch/powerpc/Makefile |1 
 tools/perf/arch/powerpc/util/Build   |1 
 tools/perf/arch/powerpc/util/kvm-stat.c  |   94 
 11 files changed, 398 insertions(+), 268 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/hcall_codes.h
 create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf.h
 create mode 100644 arch/powerpc/include/uapi/asm/trace_book3s.h
 create mode 100644 arch/powerpc/include/uapi/asm/trace_hcall.h
 delete mode 100644 arch/powerpc/kvm/trace_book3s.h
 create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

--

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v1 1/2] perf/kvm: Enable perf-kvm-stat record/report on powerpc

2015-02-27 Thread Hemant Kumar
From: Srikar Dronamraju sri...@linux.vnet.ibm.com

perf kvm stat record/report isn't supported on powerpc.
This patch enables perf to record kvm events (kvm_hv:kvm_guest_entry and exit)
and to display the stats related to the events.
When perf kvm stat record -a is invoked, the kvm_hv related tracepoints
kvm_hv:kvm_guest_enter (defined as KVM_ENTRY_TRACE) and kvm_hv:kvm_guest_exit
(defined as KVM_EXIT_TRACE) are enabled. All these data are dumped to
perf.data.guest file.
After recording, use perf kvm stat report to view the vm exit related
stats which shows how many times, the VM exited from guest to host/hypervisor 
mode.
All these exits are grouped as per their reasons to exit. The exit reasons
are defined in kvm_trace_symbol_exit.

The reasons related to kvm_exits, hcalls, etc were previously defined in
arch/powerpc/kvm/trace_book3s.h. To reuse all the reasons defined there, this 
patch
moves the exit_reasons arch/powerpc/include/uapi/asm/trace_book3s.h.

This patch defines the tracepoint events kvm_hv:kvm_guest_exit and
kvm_hv:kvm_guest_enter to be sampled and registers the exit events' 
operations.

Here is a sample o/p:

# pgrep qemu
19378
60515

# perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ]

# perf kvm stat report -p 60515


Analyze events for pid(s) 60515, all VCPUs:

 VM-EXITSamples  Samples% Time%Min TimeMax Time 
Avg time

  H_DATA_STORAGE   500635.30% 0.13%  1.94us 49.46us 
12.37us ( +-   0.52% )
  HV_DECREMENTER   445731.43% 0.02%  0.72us 16.14us 
 1.91us ( +-   0.96% )
 SYSCALL   269018.97% 0.10%  2.84us528.24us 
18.29us ( +-   3.75% )
  RETURN_TO_HOST   178912.61%99.76%  1.58us 672791.91us  
27470.23us ( +-   3.00% )
EXTERNAL240 1.69% 0.00%  0.69us 10.67us 
 1.33us ( +-   5.34% )

Total Samples:14182, Total events handled time:49264158.30us.

Signed-off-by: Srikar Dronamraju sri...@linux.vnet.ibm.com
Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
 arch/powerpc/include/uapi/asm/kvm_perf.h |   15 
 arch/powerpc/include/uapi/asm/trace_book3s.h |   33 ++
 arch/powerpc/kvm/trace_book3s.h  |   32 -
 arch/powerpc/kvm/trace_hv.h  |2 +-
 arch/powerpc/kvm/trace_pr.h  |2 +-
 tools/perf/arch/powerpc/Makefile |1 +
 tools/perf/arch/powerpc/util/Build   |1 +
 tools/perf/arch/powerpc/util/kvm-stat.c  |   33 ++
 8 files changed, 85 insertions(+), 34 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf.h
 create mode 100644 arch/powerpc/include/uapi/asm/trace_book3s.h
 delete mode 100644 arch/powerpc/kvm/trace_book3s.h
 create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h 
b/arch/powerpc/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000..30fa670
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_POWERPC_KVM_PERF_H
+#define _ASM_POWERPC_KVM_PERF_H
+
+#include asm/trace_book3s.h
+#include asm/kvm.h
+
+#define DECODE_STR_LEN 20
+
+#define VCPU_ID vcpu_id
+
+#define KVM_ENTRY_TRACE kvm_hv:kvm_guest_enter
+#define KVM_EXIT_TRACE kvm_hv:kvm_guest_exit
+#define KVM_EXIT_REASON trap
+
+#endif /* _ASM_POWERPC_KVM_PERF_H */
diff --git a/arch/powerpc/include/uapi/asm/trace_book3s.h 
b/arch/powerpc/include/uapi/asm/trace_book3s.h
new file mode 100644
index 000..1e79e0e
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/trace_book3s.h
@@ -0,0 +1,33 @@
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit  \
+   {0x0, RETURN_TO_HOST},\
+   {0x100, SYSTEM_RESET},\
+   {0x200, MACHINE_CHECK},   \
+   {0x300, DATA_STORAGE},\
+   {0x380, DATA_SEGMENT},\
+   {0x400, INST_STORAGE},\
+   {0x480, INST_SEGMENT},\
+   {0x500, EXTERNAL},\
+   {0x501, EXTERNAL_LEVEL},  \
+   {0x502, EXTERNAL_HV}, \
+   {0x600, ALIGNMENT},   \
+   {0x700, PROGRAM}, \
+   {0x800, FP_UNAVAIL},  \
+   {0x900, DECREMENTER}, \
+   {0x980, HV_DECREMENTER

Re: [PATCH v1 0/2] perf/kvm: perf-kvm-stat on powerpc

2015-03-24 Thread Hemant Kumar

Hi Michael,

These patches were posted a month back. We don't have any review 
comments to handle at this time. Can you pull these patches to your tree?
Please, do let me know if you want me to rebase these patches to a 
different tree (like Arnaldo's/tip etc).


On 02/27/2015 03:13 PM, Hemant Kumar wrote:

perf kvm stat record/report which can be used to analyze KVM related statistics
isn't enabled on powerpc. This patchset enables perf kvm stat on powerpc.
The first patch enables perf kvm stat record and report to report kvm exits.

record enables recording of the tracepoints: kvm_hv:kvm_guest_enter and
kvm_hv:kvm_guest_exit in the first patch and kvm_hv:kvm_hcall_enter and
kvm_hv:kvm_hcall_exit in the second patch.

This command can be used to record kvm events on the host:
# perf kvm stat record -a

To report the kvm guest related exit events, use:
# perf kvm stat report
or
# perf kvm stat report --event=vmexit

This should show the exit events along with the exit reasons.

The second patch in this series adds support to show the hcall events too with:
# perf kvm stat report --event=syscall

---

Hemant Kumar (1):
   perf/kvm: perf-kvm-stat to report syscalls

Srikar Dronamraju (1):
   perf/kvm: Enable perf-kvm-stat record/report on powerpc


  arch/powerpc/include/asm/hvcall.h|  120 -
  arch/powerpc/include/uapi/asm/hcall_codes.h  |  123 ++
  arch/powerpc/include/uapi/asm/kvm_perf.h |   19 
  arch/powerpc/include/uapi/asm/trace_book3s.h |   33 +++
  arch/powerpc/include/uapi/asm/trace_hcall.h  |  122 ++
  arch/powerpc/kvm/trace_book3s.h  |   32 ---
  arch/powerpc/kvm/trace_hv.h  |  119 -
  arch/powerpc/kvm/trace_pr.h  |2
  tools/perf/arch/powerpc/Makefile |1
  tools/perf/arch/powerpc/util/Build   |1
  tools/perf/arch/powerpc/util/kvm-stat.c  |   94 
  11 files changed, 398 insertions(+), 268 deletions(-)
  create mode 100644 arch/powerpc/include/uapi/asm/hcall_codes.h
  create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf.h
  create mode 100644 arch/powerpc/include/uapi/asm/trace_book3s.h
  create mode 100644 arch/powerpc/include/uapi/asm/trace_hcall.h
  delete mode 100644 arch/powerpc/kvm/trace_book3s.h
  create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

--

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 2/5] kvm/powerpc: Add exit reason for return code 0x0

2015-04-20 Thread Hemant Kumar
This patch adds an exit reason RETURN_TO_HOST for the return code
0x0.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
 arch/powerpc/include/uapi/asm/trace_book3s.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/include/uapi/asm/trace_book3s.h 
b/arch/powerpc/include/uapi/asm/trace_book3s.h
index f647ce0..8635005 100644
--- a/arch/powerpc/include/uapi/asm/trace_book3s.h
+++ b/arch/powerpc/include/uapi/asm/trace_book3s.h
@@ -6,6 +6,7 @@
  */
 
 #define kvm_trace_symbol_exit \
+   {0x0,   RETURN_TO_HOST}, \
{0x100, SYSTEM_RESET}, \
{0x200, MACHINE_CHECK}, \
{0x300, DATA_STORAGE}, \
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 5/5] perf/kvm: HCALL events analysis

2015-04-20 Thread Hemant Kumar
This patch adds KVM hypervisor call analysis to perf for powerpc.
 - Trace hcall events :
  perf kvm stat record

 - Show the results :
  perf kvm stat report --event=hcall

The results show the number of hypervisor calls from the guest grouped
into their respective reasons displayed with the frequency.

This patch makes use of two additional tracepoints kvm_hv:kvm_hcall_enter
and kvm_hv:kvm_hcall_exit. It uses the pSeries hypervisor codes
exported through uapi to classify the hcalls into their respective reasons.

 # pgrep qemu
A sample output :
19378
60515

2 VMs running.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ]

 # perf kvm stat report -p 60515 --event=hcall
Analyze events for pid(s) 60515, all VCPUs:

 HCALL-EVENTSamples  Samples% Time%Min TimeMax Time 
Avg time

H_VIO_SIGNAL   103438.44%15.77%  0.36us  1.59us 
 0.44us ( +-   0.66% )
  H_SEND_CRQ65224.24%10.97%  0.39us  1.84us 
 0.49us ( +-   1.20% )
   H_IPI52319.44%62.05%  1.35us 19.70us 
 3.44us ( +-   2.88% )
 H_PUT_TERM_CHAR41115.28% 8.03%  0.38us  3.77us 
 0.57us ( +-   1.61% )
 H_GET_TERM_CHAR 50 1.86% 0.99%  0.40us  0.98us 
 0.57us ( +-   3.37% )
   H_EOI 20 0.74% 2.19%  2.22us  4.72us 
 3.17us ( +-   5.96% )

Total Samples:2690, Total events handled time:2896.94us.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
 arch/powerpc/include/uapi/asm/kvm_perf.h |  4 +++
 tools/perf/arch/powerpc/util/kvm-stat.c  | 61 
 2 files changed, 65 insertions(+)

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h 
b/arch/powerpc/include/uapi/asm/kvm_perf.h
index 30fa670..440902e 100644
--- a/arch/powerpc/include/uapi/asm/kvm_perf.h
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -3,6 +3,7 @@
 
 #include asm/trace_book3s.h
 #include asm/kvm.h
+#include asm/trace_hcall.h
 
 #define DECODE_STR_LEN 20
 
@@ -11,5 +12,8 @@
 #define KVM_ENTRY_TRACE kvm_hv:kvm_guest_enter
 #define KVM_EXIT_TRACE kvm_hv:kvm_guest_exit
 #define KVM_EXIT_REASON trap
+#define KVM_HCALL_ENTRY_TRACE kvm_hv:kvm_hcall_enter
+#define KVM_HCALL_EXIT_TRACE kvm_hv:kvm_hcall_exit
+#define KVM_HCALL_REASON req
 
 #endif /* _ASM_POWERPC_KVM_PERF_H */
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
index 62cdcc1..685201c 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -1,7 +1,9 @@
 #include ../../util/kvm-stat.h
 #include asm/kvm_perf.h
+#include ../../util/debug.h
 
 define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall);
 
 static struct kvm_events_ops exit_events = {
.is_begin_event = exit_event_begin,
@@ -10,14 +12,73 @@ static struct kvm_events_ops exit_events = {
.name = VM-EXIT
 };
 
+static void hcall_event_get_key(struct perf_evsel *evsel,
+   struct perf_sample *sample,
+   struct event_key *key)
+{
+   key-info = 0;
+   key-key = perf_evsel__intval(evsel, sample, KVM_HCALL_REASON);
+}
+
+static const char *get_exit_reason(u64 exit_code)
+{
+   struct exit_reasons_table *tbl = hcall_reasons;
+
+   while (tbl-reason != NULL) {
+   if (tbl-exit_code == exit_code)
+   return tbl-reason;
+   tbl++;
+   }
+
+   pr_err(Unknown kvm hcall exit code: %lld\n,
+  (unsigned long long)exit_code);
+   return UNKNOWN;
+}
+
+static bool hcall_event_end(struct perf_evsel *evsel,
+   struct perf_sample *sample __maybe_unused,
+   struct event_key *key __maybe_unused)
+{
+   return (!strcmp(evsel-name, KVM_HCALL_EXIT_TRACE));
+}
+
+static bool hcall_event_begin(struct perf_evsel *evsel,
+ struct perf_sample *sample, struct event_key *key)
+{
+   if (!strcmp(evsel-name, KVM_HCALL_ENTRY_TRACE)) {
+   hcall_event_get_key(evsel, sample, key);
+   return true;
+   }
+
+return false;
+}
+static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+  struct event_key *key,
+  char *decode)
+{
+   const char *hcall_reason = get_exit_reason(key-key);
+
+   scnprintf(decode, DECODE_STR_LEN, %s, hcall_reason);
+}
+
+static struct kvm_events_ops hcall_events = {
+   .is_begin_event = hcall_event_begin,
+   .is_end_event = hcall_event_end,
+   .decode_key = hcall_event_decode_key,
+   .name = HCALL-EVENT,
+};
+
 const char *const kvm_events_tp

[PATCH v2 1/5] kvm/powerpc: Export exit reasons as uapi

2015-04-20 Thread Hemant Kumar
To analyze the kvm exits with perf, we will need to map the exit codes
with the exit reasons. Such a mapping exists today in
trace_book3s.h. But its not exported to tools like perf.

This patch moves these kvm exit reasons and their mapping from
arch/powerpc/kvm/trace_book3s.h to
arch/powerpc/include/uapi/asm/trace_book3s.h.

We will also need to change the path of trace_book3s.h included in files
trace_hv.h and trace_pr.h.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
 arch/powerpc/include/uapi/asm/trace_book3s.h | 32 
 arch/powerpc/kvm/trace_book3s.h  | 32 
 arch/powerpc/kvm/trace_hv.h  |  2 +-
 arch/powerpc/kvm/trace_pr.h  |  2 +-
 4 files changed, 34 insertions(+), 34 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/trace_book3s.h
 delete mode 100644 arch/powerpc/kvm/trace_book3s.h

diff --git a/arch/powerpc/include/uapi/asm/trace_book3s.h 
b/arch/powerpc/include/uapi/asm/trace_book3s.h
new file mode 100644
index 000..f647ce0
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/trace_book3s.h
@@ -0,0 +1,32 @@
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit \
+   {0x100, SYSTEM_RESET}, \
+   {0x200, MACHINE_CHECK}, \
+   {0x300, DATA_STORAGE}, \
+   {0x380, DATA_SEGMENT}, \
+   {0x400, INST_STORAGE}, \
+   {0x480, INST_SEGMENT}, \
+   {0x500, EXTERNAL}, \
+   {0x501, EXTERNAL_LEVEL}, \
+   {0x502, EXTERNAL_HV}, \
+   {0x600, ALIGNMENT}, \
+   {0x700, PROGRAM}, \
+   {0x800, FP_UNAVAIL}, \
+   {0x900, DECREMENTER}, \
+   {0x980, HV_DECREMENTER}, \
+   {0xc00, SYSCALL}, \
+   {0xd00, TRACE}, \
+   {0xe00, H_DATA_STORAGE}, \
+   {0xe20, H_INST_STORAGE}, \
+   {0xe40, H_EMUL_ASSIST}, \
+   {0xf00, PERFMON}, \
+   {0xf20, ALTIVEC}, \
+   {0xf40, VSX}
+
+#endif
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
deleted file mode 100644
index f647ce0..000
--- a/arch/powerpc/kvm/trace_book3s.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#if !defined(_TRACE_KVM_BOOK3S_H)
-#define _TRACE_KVM_BOOK3S_H
-
-/*
- * Common defines used by the trace macros in trace_pr.h and trace_hv.h
- */
-
-#define kvm_trace_symbol_exit \
-   {0x100, SYSTEM_RESET}, \
-   {0x200, MACHINE_CHECK}, \
-   {0x300, DATA_STORAGE}, \
-   {0x380, DATA_SEGMENT}, \
-   {0x400, INST_STORAGE}, \
-   {0x480, INST_SEGMENT}, \
-   {0x500, EXTERNAL}, \
-   {0x501, EXTERNAL_LEVEL}, \
-   {0x502, EXTERNAL_HV}, \
-   {0x600, ALIGNMENT}, \
-   {0x700, PROGRAM}, \
-   {0x800, FP_UNAVAIL}, \
-   {0x900, DECREMENTER}, \
-   {0x980, HV_DECREMENTER}, \
-   {0xc00, SYSCALL}, \
-   {0xd00, TRACE}, \
-   {0xe00, H_DATA_STORAGE}, \
-   {0xe20, H_INST_STORAGE}, \
-   {0xe40, H_EMUL_ASSIST}, \
-   {0xf00, PERFMON}, \
-   {0xf20, ALTIVEC}, \
-   {0xf40, VSX}
-
-#endif
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index 33d9daf..02d0a07 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -2,7 +2,7 @@
 #define _TRACE_KVM_HV_H
 
 #include linux/tracepoint.h
-#include trace_book3s.h
+#include uapi/asm/trace_book3s.h
 #include asm/hvcall.h
 #include asm/kvm_asm.h
 
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index 810507c..a9850c6 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -3,7 +3,7 @@
 #define _TRACE_KVM_PR_H
 
 #include linux/tracepoint.h
-#include trace_book3s.h
+#include uapi/asm/trace_book3s.h
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm_pr
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 0/5] KVM events analysis on powerpc with perf

2015-04-20 Thread Hemant Kumar
Hello!
This patchset enables perf kvm stat record/report on powerpc,
which can be used to analyze certain KVM events : KVM exits and
hypervisor calls. The statistics can be shown individually for
each running VM in the host and hence, can be useful in giving
an idea of the performance of a VM on a certain workload.

Example usages are shown in each of the patches individually.
Here is a sample output :
Analyze events for pid(s) 60515, all VCPUs:

  VM-EXITSamples  Samples% Time%Min TimeMax Time
 Avg time

   H_DATA_STORAGE   500635.30% 0.13%  1.94us 49.46us 
12.37us ( +-   0.52% )
   HV_DECREMENTER   445731.43% 0.02%  0.72us 16.14us  
1.91us ( +-   0.96% )
  SYSCALL   269018.97% 0.10%  2.84us528.24us 
18.29us ( +-   3.75% )
   RETURN_TO_HOST   178912.61%99.76%  1.58us 672791.91us  
27470.23us ( +-   3.00% )
 EXTERNAL240 1.69% 0.00%  0.69us 10.67us  
1.33us ( +-   5.34% )

Total Samples:14182, Total events handled time:49264158.30us.

The above example shows how many number of kvm exits have happened
during a certain period of time. Along with the total number of exits,
it also groups all the exits based on their reasons. Frequency for
individual exit reasons is also shown.

This patchset makes use of kvm_hv tracepoints and enables perf kvm
stat record to trace on them. After recording, perf kvm stat report
does all the post processing of parsing the events captured and
classifying them according to their exit reasons (which are already
availabe in trace_book3s.h). Similar method is used with hcall
analysis.

Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 3/5] perf/kvm: KVM exit events analysis

2015-04-20 Thread Hemant Kumar
From: Srikar Dronamraju sri...@linux.vnet.ibm.com

This patch adds KVM exit event analysis support to perf for powerpc.

 - Trace KVM events :
  perf kvm stat record
  If many guests are running, we can track for a specific guest by using
  --pid as in : perf kvm stat record --pid pid

 - Show the results :
  perf kvm stat report

The results show the number of exits (from the guest context to
host/hypervisor context) grouped into their respective exit reasons with
their frequency.

This patch makes use of the guest exit reasons available in
trace_book3s.h. It records on two already available tracepoints :
kvm_hv:kvm_guest_exit and kvm_hv:kvm_guest_enter.

Here is a sample o/p:
 # pgrep qemu
19378
60515

2 Guests are running on the host.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ]

 # perf kvm stat report -p 60515
Analyze events for pid(s) 60515, all VCPUs:

   VM-EXITSamples  Samples% Time%Min Time Max
Time Avg time

H_DATA_STORAGE   500635.30% 0.13%  1.94us 49.46us 
12.37us ( +-   0.52% )
HV_DECREMENTER   445731.43% 0.02%  0.72us 16.14us  
1.91us ( +-   0.96% )
   SYSCALL   269018.97% 0.10%  2.84us528.24us 
18.29us ( +-   3.75% )
RETURN_TO_HOST   178912.61%99.76%  1.58us 672791.91us  
27470.23us ( +-   3.00% )
  EXTERNAL240 1.69% 0.00%0.69us 10.67us  
1.33us ( +-   5.34% )

Total Samples:14182, Total events handled time:49264158.30us.

Signed-off-by: Srikar Dronamraju sri...@linux.vnet.ibm.com
Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
 arch/powerpc/include/uapi/asm/kvm_perf.h | 15 +++
 tools/perf/arch/powerpc/Makefile |  1 +
 tools/perf/arch/powerpc/util/Build   |  1 +
 tools/perf/arch/powerpc/util/kvm-stat.c  | 33 
 4 files changed, 50 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf.h
 create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h 
b/arch/powerpc/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000..30fa670
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_POWERPC_KVM_PERF_H
+#define _ASM_POWERPC_KVM_PERF_H
+
+#include asm/trace_book3s.h
+#include asm/kvm.h
+
+#define DECODE_STR_LEN 20
+
+#define VCPU_ID vcpu_id
+
+#define KVM_ENTRY_TRACE kvm_hv:kvm_guest_enter
+#define KVM_EXIT_TRACE kvm_hv:kvm_guest_exit
+#define KVM_EXIT_REASON trap
+
+#endif /* _ASM_POWERPC_KVM_PERF_H */
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..21322e0 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+HAVE_KVM_STAT_SUPPORT := 1
diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 0af6e9b..dd47b5e 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,4 +1,5 @@
 libperf-y += header.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
new file mode 100644
index 000..62cdcc1
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -0,0 +1,33 @@
+#include ../../util/kvm-stat.h
+#include asm/kvm_perf.h
+
+define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+
+static struct kvm_events_ops exit_events = {
+   .is_begin_event = exit_event_begin,
+   .is_end_event = exit_event_end,
+   .decode_key = exit_event_decode_key,
+   .name = VM-EXIT
+};
+
+const char *const kvm_events_tp[] = {
+   kvm_hv:kvm_guest_exit,
+   kvm_hv:kvm_guest_enter,
+   NULL,
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+   { .name = vmexit, .ops = exit_events },
+   { NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+   NULL,
+};
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
+{
+   kvm-exit_reasons = hv_exit_reasons;
+   kvm-exit_reasons_isa = HV;
+   return 0;
+}
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 4/5] kvm/powerpc: Move HCALL reason codes to uapi

2015-04-20 Thread Hemant Kumar
For tools like perf to analyze the KVM events like hcalls, we need the
hypervisor calls and their codes to be exported through uapi.

This patch moves most of the pSeries hcall codes from
arch/powerpc/include/asm/hvcall.h to
arch/powerpc/include/uapi/asm/hcall_codes.h.
It also moves the mapping hcall_code-to-hcall_reason from
arch/powerpc/kvm/trace_hv.h to
arch/powerpc/include/uapi/asm/trace_hcall.h.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/hvcall.h   | 120 +--
 arch/powerpc/include/uapi/asm/hcall_codes.h | 123 
 arch/powerpc/include/uapi/asm/trace_hcall.h | 122 +++
 arch/powerpc/kvm/trace_hv.h | 117 +-
 4 files changed, 248 insertions(+), 234 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/hcall_codes.h
 create mode 100644 arch/powerpc/include/uapi/asm/trace_hcall.h

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 85bc8c0..799677d 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -155,124 +155,8 @@
 /* Each control block has to be on a 4K boundary */
 #define H_CB_ALIGNMENT  4096
 
-/* pSeries hypervisor opcodes */
-#define H_REMOVE   0x04
-#define H_ENTER0x08
-#define H_READ 0x0c
-#define H_CLEAR_MOD0x10
-#define H_CLEAR_REF0x14
-#define H_PROTECT  0x18
-#define H_GET_TCE  0x1c
-#define H_PUT_TCE  0x20
-#define H_SET_SPRG00x24
-#define H_SET_DABR 0x28
-#define H_PAGE_INIT0x2c
-#define H_SET_ASR  0x30
-#define H_ASR_ON   0x34
-#define H_ASR_OFF  0x38
-#define H_LOGICAL_CI_LOAD  0x3c
-#define H_LOGICAL_CI_STORE 0x40
-#define H_LOGICAL_CACHE_LOAD   0x44
-#define H_LOGICAL_CACHE_STORE  0x48
-#define H_LOGICAL_ICBI 0x4c
-#define H_LOGICAL_DCBF 0x50
-#define H_GET_TERM_CHAR0x54
-#define H_PUT_TERM_CHAR0x58
-#define H_REAL_TO_LOGICAL  0x5c
-#define H_HYPERVISOR_DATA  0x60
-#define H_EOI  0x64
-#define H_CPPR 0x68
-#define H_IPI  0x6c
-#define H_IPOLL0x70
-#define H_XIRR 0x74
-#define H_PERFMON  0x7c
-#define H_MIGRATE_DMA  0x78
-#define H_REGISTER_VPA 0xDC
-#define H_CEDE 0xE0
-#define H_CONFER   0xE4
-#define H_PROD 0xE8
-#define H_GET_PPP  0xEC
-#define H_SET_PPP  0xF0
-#define H_PURR 0xF4
-#define H_PIC  0xF8
-#define H_REG_CRQ  0xFC
-#define H_FREE_CRQ 0x100
-#define H_VIO_SIGNAL   0x104
-#define H_SEND_CRQ 0x108
-#define H_COPY_RDMA0x110
-#define H_REGISTER_LOGICAL_LAN 0x114
-#define H_FREE_LOGICAL_LAN 0x118
-#define H_ADD_LOGICAL_LAN_BUFFER 0x11C
-#define H_SEND_LOGICAL_LAN 0x120
-#define H_BULK_REMOVE  0x124
-#define H_MULTICAST_CTRL   0x130
-#define H_SET_XDABR0x134
-#define H_STUFF_TCE0x138
-#define H_PUT_TCE_INDIRECT 0x13C
-#define H_CHANGE_LOGICAL_LAN_MAC 0x14C
-#define H_VTERM_PARTNER_INFO   0x150
-#define H_REGISTER_VTERM   0x154
-#define H_FREE_VTERM   0x158
-#define H_RESET_EVENTS  0x15C
-#define H_ALLOC_RESOURCE0x160
-#define H_FREE_RESOURCE 0x164
-#define H_MODIFY_QP 0x168
-#define H_QUERY_QP  0x16C
-#define H_REREGISTER_PMR0x170
-#define H_REGISTER_SMR  0x174
-#define H_QUERY_MR  0x178
-#define H_QUERY_MW  0x17C
-#define H_QUERY_HCA 0x180
-#define H_QUERY_PORT0x184
-#define H_MODIFY_PORT   0x188
-#define H_DEFINE_AQP1   0x18C
-#define H_GET_TRACE_BUFFER  0x190
-#define H_DEFINE_AQP0   0x194
-#define H_RESIZE_MR 0x198
-#define H_ATTACH_MCQP   0x19C
-#define H_DETACH_MCQP   0x1A0
-#define H_CREATE_RPT0x1A4
-#define H_REMOVE_RPT0x1A8
-#define H_REGISTER_RPAGES   0x1AC
-#define H_DISABLE_AND_GETC  0x1B0
-#define H_ERROR_DATA0x1B4
-#define H_GET_HCA_INFO  0x1B8
-#define H_GET_PERF_COUNT0x1BC
-#define H_MANAGE_TRACE  0x1C0
-#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
-#define H_QUERY_INT_STATE   0x1E4
-#define H_POLL_PENDING 0x1D8
-#define H_ILLAN_ATTRIBUTES 0x244
-#define H_MODIFY_HEA_QP0x250
-#define H_QUERY_HEA_QP 0x254
-#define H_QUERY_HEA0x258
-#define H_QUERY_HEA_PORT   0x25C
-#define H_MODIFY_HEA_PORT  0x260
-#define H_REG_BCMC 0x264
-#define H_DEREG_BCMC   0x268
-#define H_REGISTER_HEA_RPAGES  0x26C
-#define H_DISABLE_AND_GET_HEA  0x270
-#define

[PATCH v3 1/3] kvm/powerpc: Export kvm exit reasons

2015-05-07 Thread Hemant Kumar
To analyze the kvm exits with perf, we will need to map the exit codes
with the exit reasons. Such a mapping exists today in trace_book3s.h.
Currently its not exported to perf.

This patch moves these kvm exit reasons and their mapping from
arch/powerpc/kvm/trace_book3s.h to
arch/powerpc/include/uapi/asm/trace_book3s.h.

Accordingly change the include files in trace_hv.h and trace_pr.h.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Changes :
- Original patchset split into 2 patchsets now: for perf and powerpc
  side changes.

 arch/powerpc/include/uapi/asm/trace_book3s.h | 32 
 arch/powerpc/kvm/trace_book3s.h  | 32 
 arch/powerpc/kvm/trace_hv.h  |  2 +-
 arch/powerpc/kvm/trace_pr.h  |  2 +-
 4 files changed, 34 insertions(+), 34 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/trace_book3s.h
 delete mode 100644 arch/powerpc/kvm/trace_book3s.h

diff --git a/arch/powerpc/include/uapi/asm/trace_book3s.h 
b/arch/powerpc/include/uapi/asm/trace_book3s.h
new file mode 100644
index 000..f647ce0
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/trace_book3s.h
@@ -0,0 +1,32 @@
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit \
+   {0x100, SYSTEM_RESET}, \
+   {0x200, MACHINE_CHECK}, \
+   {0x300, DATA_STORAGE}, \
+   {0x380, DATA_SEGMENT}, \
+   {0x400, INST_STORAGE}, \
+   {0x480, INST_SEGMENT}, \
+   {0x500, EXTERNAL}, \
+   {0x501, EXTERNAL_LEVEL}, \
+   {0x502, EXTERNAL_HV}, \
+   {0x600, ALIGNMENT}, \
+   {0x700, PROGRAM}, \
+   {0x800, FP_UNAVAIL}, \
+   {0x900, DECREMENTER}, \
+   {0x980, HV_DECREMENTER}, \
+   {0xc00, SYSCALL}, \
+   {0xd00, TRACE}, \
+   {0xe00, H_DATA_STORAGE}, \
+   {0xe20, H_INST_STORAGE}, \
+   {0xe40, H_EMUL_ASSIST}, \
+   {0xf00, PERFMON}, \
+   {0xf20, ALTIVEC}, \
+   {0xf40, VSX}
+
+#endif
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
deleted file mode 100644
index f647ce0..000
--- a/arch/powerpc/kvm/trace_book3s.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#if !defined(_TRACE_KVM_BOOK3S_H)
-#define _TRACE_KVM_BOOK3S_H
-
-/*
- * Common defines used by the trace macros in trace_pr.h and trace_hv.h
- */
-
-#define kvm_trace_symbol_exit \
-   {0x100, SYSTEM_RESET}, \
-   {0x200, MACHINE_CHECK}, \
-   {0x300, DATA_STORAGE}, \
-   {0x380, DATA_SEGMENT}, \
-   {0x400, INST_STORAGE}, \
-   {0x480, INST_SEGMENT}, \
-   {0x500, EXTERNAL}, \
-   {0x501, EXTERNAL_LEVEL}, \
-   {0x502, EXTERNAL_HV}, \
-   {0x600, ALIGNMENT}, \
-   {0x700, PROGRAM}, \
-   {0x800, FP_UNAVAIL}, \
-   {0x900, DECREMENTER}, \
-   {0x980, HV_DECREMENTER}, \
-   {0xc00, SYSCALL}, \
-   {0xd00, TRACE}, \
-   {0xe00, H_DATA_STORAGE}, \
-   {0xe20, H_INST_STORAGE}, \
-   {0xe40, H_EMUL_ASSIST}, \
-   {0xf00, PERFMON}, \
-   {0xf20, ALTIVEC}, \
-   {0xf40, VSX}
-
-#endif
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index 33d9daf..02d0a07 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -2,7 +2,7 @@
 #define _TRACE_KVM_HV_H
 
 #include linux/tracepoint.h
-#include trace_book3s.h
+#include uapi/asm/trace_book3s.h
 #include asm/hvcall.h
 #include asm/kvm_asm.h
 
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index 810507c..a9850c6 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -3,7 +3,7 @@
 #define _TRACE_KVM_PR_H
 
 #include linux/tracepoint.h
-#include trace_book3s.h
+#include uapi/asm/trace_book3s.h
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm_pr
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 2/2] perf/kvm: Support HCALL events

2015-05-07 Thread Hemant Kumar
powerpc provides hcall events that also provide insights into guest
behaviour. Enhance perf kvm to record and analyze hcall events.

 - To trace hcall events :
  perf kvm stat record

 - To show the results :
  perf kvm stat report --event=hcall

The result shows the number of hypervisor calls from the guest grouped
by their respective reasons displayed with the frequency.

This patch makes use of two additional tracepoints kvm_hv:kvm_hcall_enter
and kvm_hv:kvm_hcall_exit. It uses the pSeries hypervisor codes
exported through uapi to classify the hcalls into their respective reasons.

Note : This patch has a dependency on kvm/powerpc: Export HCALL reason
codes which exports HCALL reasons through uapi.

 # pgrep qemu
A sample output :
19378
60515

2 VMs running.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ]

 # perf kvm stat report -p 60515 --event=hcall
Analyze events for pid(s) 60515, all VCPUs:

 HCALL-EVENTSamples  Samples% Time%Min TimeMax Time 
Avg time

H_VIO_SIGNAL   103438.44%15.77%  0.36us  1.59us 
 0.44us ( +-   0.66% )
  H_SEND_CRQ65224.24%10.97%  0.39us  1.84us 
 0.49us ( +-   1.20% )
   H_IPI52319.44%62.05%  1.35us 19.70us 
 3.44us ( +-   2.88% )
 H_PUT_TERM_CHAR41115.28% 8.03%  0.38us  3.77us 
 0.57us ( +-   1.61% )
 H_GET_TERM_CHAR 50 1.86% 0.99%  0.40us  0.98us 
 0.57us ( +-   3.37% )
   H_EOI 20 0.74% 2.19%  2.22us  4.72us 
 3.17us ( +-   5.96% )

Total Samples:2690, Total events handled time:2896.94us.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Patch has a dependency on https://patchwork.ozlabs.org/patch/469841/
which exports the HCALL reason codes to perf.

 arch/powerpc/include/uapi/asm/kvm_perf.h |  4 +++
 tools/perf/arch/powerpc/util/kvm-stat.c  | 61 
 2 files changed, 65 insertions(+)

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h 
b/arch/powerpc/include/uapi/asm/kvm_perf.h
index 30fa670..440902e 100644
--- a/arch/powerpc/include/uapi/asm/kvm_perf.h
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -3,6 +3,7 @@
 
 #include asm/trace_book3s.h
 #include asm/kvm.h
+#include asm/trace_hcall.h
 
 #define DECODE_STR_LEN 20
 
@@ -11,5 +12,8 @@
 #define KVM_ENTRY_TRACE kvm_hv:kvm_guest_enter
 #define KVM_EXIT_TRACE kvm_hv:kvm_guest_exit
 #define KVM_EXIT_REASON trap
+#define KVM_HCALL_ENTRY_TRACE kvm_hv:kvm_hcall_enter
+#define KVM_HCALL_EXIT_TRACE kvm_hv:kvm_hcall_exit
+#define KVM_HCALL_REASON req
 
 #endif /* _ASM_POWERPC_KVM_PERF_H */
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
index 62cdcc1..685201c 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -1,7 +1,9 @@
 #include ../../util/kvm-stat.h
 #include asm/kvm_perf.h
+#include ../../util/debug.h
 
 define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall);
 
 static struct kvm_events_ops exit_events = {
.is_begin_event = exit_event_begin,
@@ -10,14 +12,73 @@ static struct kvm_events_ops exit_events = {
.name = VM-EXIT
 };
 
+static void hcall_event_get_key(struct perf_evsel *evsel,
+   struct perf_sample *sample,
+   struct event_key *key)
+{
+   key-info = 0;
+   key-key = perf_evsel__intval(evsel, sample, KVM_HCALL_REASON);
+}
+
+static const char *get_exit_reason(u64 exit_code)
+{
+   struct exit_reasons_table *tbl = hcall_reasons;
+
+   while (tbl-reason != NULL) {
+   if (tbl-exit_code == exit_code)
+   return tbl-reason;
+   tbl++;
+   }
+
+   pr_err(Unknown kvm hcall exit code: %lld\n,
+  (unsigned long long)exit_code);
+   return UNKNOWN;
+}
+
+static bool hcall_event_end(struct perf_evsel *evsel,
+   struct perf_sample *sample __maybe_unused,
+   struct event_key *key __maybe_unused)
+{
+   return (!strcmp(evsel-name, KVM_HCALL_EXIT_TRACE));
+}
+
+static bool hcall_event_begin(struct perf_evsel *evsel,
+ struct perf_sample *sample, struct event_key *key)
+{
+   if (!strcmp(evsel-name, KVM_HCALL_ENTRY_TRACE)) {
+   hcall_event_get_key(evsel, sample, key);
+   return true;
+   }
+
+return false;
+}
+static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+  struct event_key *key,
+  char *decode)
+{
+   const char *hcall_reason = get_exit_reason(key-key);
+
+   scnprintf(decode

[PATCH v3 3/3] kvm/powerpc: Export HCALL reason codes

2015-05-07 Thread Hemant Kumar
For perf to analyze the KVM events like hcalls, we need the
hypervisor calls and their codes to be exported through uapi.

This patch moves most of the pSeries hcall codes from
arch/powerpc/include/asm/hvcall.h to
arch/powerpc/include/uapi/asm/hcall_codes.h.
It also moves the mapping hcall_code-to-hcall_reason from
arch/powerpc/kvm/trace_hv.h to
arch/powerpc/include/uapi/asm/trace_hcall.h.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/hvcall.h   | 120 +--
 arch/powerpc/include/uapi/asm/hcall_codes.h | 123 
 arch/powerpc/include/uapi/asm/trace_hcall.h | 122 +++
 arch/powerpc/kvm/trace_hv.h | 117 +-
 4 files changed, 248 insertions(+), 234 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/hcall_codes.h
 create mode 100644 arch/powerpc/include/uapi/asm/trace_hcall.h

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 85bc8c0..799677d 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -155,124 +155,8 @@
 /* Each control block has to be on a 4K boundary */
 #define H_CB_ALIGNMENT  4096
 
-/* pSeries hypervisor opcodes */
-#define H_REMOVE   0x04
-#define H_ENTER0x08
-#define H_READ 0x0c
-#define H_CLEAR_MOD0x10
-#define H_CLEAR_REF0x14
-#define H_PROTECT  0x18
-#define H_GET_TCE  0x1c
-#define H_PUT_TCE  0x20
-#define H_SET_SPRG00x24
-#define H_SET_DABR 0x28
-#define H_PAGE_INIT0x2c
-#define H_SET_ASR  0x30
-#define H_ASR_ON   0x34
-#define H_ASR_OFF  0x38
-#define H_LOGICAL_CI_LOAD  0x3c
-#define H_LOGICAL_CI_STORE 0x40
-#define H_LOGICAL_CACHE_LOAD   0x44
-#define H_LOGICAL_CACHE_STORE  0x48
-#define H_LOGICAL_ICBI 0x4c
-#define H_LOGICAL_DCBF 0x50
-#define H_GET_TERM_CHAR0x54
-#define H_PUT_TERM_CHAR0x58
-#define H_REAL_TO_LOGICAL  0x5c
-#define H_HYPERVISOR_DATA  0x60
-#define H_EOI  0x64
-#define H_CPPR 0x68
-#define H_IPI  0x6c
-#define H_IPOLL0x70
-#define H_XIRR 0x74
-#define H_PERFMON  0x7c
-#define H_MIGRATE_DMA  0x78
-#define H_REGISTER_VPA 0xDC
-#define H_CEDE 0xE0
-#define H_CONFER   0xE4
-#define H_PROD 0xE8
-#define H_GET_PPP  0xEC
-#define H_SET_PPP  0xF0
-#define H_PURR 0xF4
-#define H_PIC  0xF8
-#define H_REG_CRQ  0xFC
-#define H_FREE_CRQ 0x100
-#define H_VIO_SIGNAL   0x104
-#define H_SEND_CRQ 0x108
-#define H_COPY_RDMA0x110
-#define H_REGISTER_LOGICAL_LAN 0x114
-#define H_FREE_LOGICAL_LAN 0x118
-#define H_ADD_LOGICAL_LAN_BUFFER 0x11C
-#define H_SEND_LOGICAL_LAN 0x120
-#define H_BULK_REMOVE  0x124
-#define H_MULTICAST_CTRL   0x130
-#define H_SET_XDABR0x134
-#define H_STUFF_TCE0x138
-#define H_PUT_TCE_INDIRECT 0x13C
-#define H_CHANGE_LOGICAL_LAN_MAC 0x14C
-#define H_VTERM_PARTNER_INFO   0x150
-#define H_REGISTER_VTERM   0x154
-#define H_FREE_VTERM   0x158
-#define H_RESET_EVENTS  0x15C
-#define H_ALLOC_RESOURCE0x160
-#define H_FREE_RESOURCE 0x164
-#define H_MODIFY_QP 0x168
-#define H_QUERY_QP  0x16C
-#define H_REREGISTER_PMR0x170
-#define H_REGISTER_SMR  0x174
-#define H_QUERY_MR  0x178
-#define H_QUERY_MW  0x17C
-#define H_QUERY_HCA 0x180
-#define H_QUERY_PORT0x184
-#define H_MODIFY_PORT   0x188
-#define H_DEFINE_AQP1   0x18C
-#define H_GET_TRACE_BUFFER  0x190
-#define H_DEFINE_AQP0   0x194
-#define H_RESIZE_MR 0x198
-#define H_ATTACH_MCQP   0x19C
-#define H_DETACH_MCQP   0x1A0
-#define H_CREATE_RPT0x1A4
-#define H_REMOVE_RPT0x1A8
-#define H_REGISTER_RPAGES   0x1AC
-#define H_DISABLE_AND_GETC  0x1B0
-#define H_ERROR_DATA0x1B4
-#define H_GET_HCA_INFO  0x1B8
-#define H_GET_PERF_COUNT0x1BC
-#define H_MANAGE_TRACE  0x1C0
-#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
-#define H_QUERY_INT_STATE   0x1E4
-#define H_POLL_PENDING 0x1D8
-#define H_ILLAN_ATTRIBUTES 0x244
-#define H_MODIFY_HEA_QP0x250
-#define H_QUERY_HEA_QP 0x254
-#define H_QUERY_HEA0x258
-#define H_QUERY_HEA_PORT   0x25C
-#define H_MODIFY_HEA_PORT  0x260
-#define H_REG_BCMC 0x264
-#define H_DEREG_BCMC   0x268
-#define H_REGISTER_HEA_RPAGES  0x26C
-#define H_DISABLE_AND_GET_HEA  0x270
-#define

[PATCH v3 2/3] kvm/powerpc: Add exit reason for return code 0x0

2015-05-07 Thread Hemant Kumar
This patch adds an exit reason RETURN_TO_HOST for the return code
0x0.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
 arch/powerpc/include/uapi/asm/trace_book3s.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/include/uapi/asm/trace_book3s.h 
b/arch/powerpc/include/uapi/asm/trace_book3s.h
index f647ce0..8635005 100644
--- a/arch/powerpc/include/uapi/asm/trace_book3s.h
+++ b/arch/powerpc/include/uapi/asm/trace_book3s.h
@@ -6,6 +6,7 @@
  */
 
 #define kvm_trace_symbol_exit \
+   {0x0,   RETURN_TO_HOST}, \
{0x100, SYSTEM_RESET}, \
{0x200, MACHINE_CHECK}, \
{0x300, DATA_STORAGE}, \
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 1/2] perf/kvm: Port perf kvm to powerpc

2015-05-07 Thread Hemant Kumar
From: Srikar Dronamraju sri...@linux.vnet.ibm.com

perf kvm can be used to analyze guest exit reasons. This support already
exists in x86. Hence, porting it to powerpc.

 - To trace KVM events :
  perf kvm stat record
  If many guests are running, we can track for a specific guest by using
  --pid as in : perf kvm stat record --pid pid

 - To see the results :
  perf kvm stat report

The result shows the number of exits (from the guest context to
host/hypervisor context) grouped by their respective exit reasons with
their frequency.

This patch makes use of the guest exit reasons available in
trace_book3s.h. It records on two already available tracepoints :
kvm_hv:kvm_guest_exit and kvm_hv:kvm_guest_enter.

Note : This patch has a dependency on the patch kvm/powerpc: Export
kvm exit reasons which exports the KVM exit reasons through the uapi.

Here is a sample o/p:
 # pgrep qemu
19378
60515

2 Guests are running on the host.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ]

 # perf kvm stat report -p 60515
Analyze events for pid(s) 60515, all VCPUs:

   VM-EXITSamples  Samples% Time%Min Time Max
Time Avg time

H_DATA_STORAGE   500635.30% 0.13%  1.94us 49.46us 
12.37us ( +-   0.52% )
HV_DECREMENTER   445731.43% 0.02%  0.72us 16.14us  
1.91us ( +-   0.96% )
   SYSCALL   269018.97% 0.10%  2.84us528.24us 
18.29us ( +-   3.75% )
RETURN_TO_HOST   178912.61%99.76%  1.58us 672791.91us  
27470.23us ( +-   3.00% )
  EXTERNAL240 1.69% 0.00%0.69us 10.67us  
1.33us ( +-   5.34% )

Total Samples:14182, Total events handled time:49264158.30us.

Signed-off-by: Srikar Dronamraju sri...@linux.vnet.ibm.com
Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Patch has a dependency on : https://patchwork.ozlabs.org/patch/469839/
which exports the exit reasons to perf through uapi.

Changes:
- Original series split into two patchsets now : perf and powerpc
  side changes.

 arch/powerpc/include/uapi/asm/kvm_perf.h | 15 +++
 tools/perf/arch/powerpc/Makefile |  1 +
 tools/perf/arch/powerpc/util/Build   |  1 +
 tools/perf/arch/powerpc/util/kvm-stat.c  | 33 
 4 files changed, 50 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf.h
 create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h 
b/arch/powerpc/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000..30fa670
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_POWERPC_KVM_PERF_H
+#define _ASM_POWERPC_KVM_PERF_H
+
+#include asm/trace_book3s.h
+#include asm/kvm.h
+
+#define DECODE_STR_LEN 20
+
+#define VCPU_ID vcpu_id
+
+#define KVM_ENTRY_TRACE kvm_hv:kvm_guest_enter
+#define KVM_EXIT_TRACE kvm_hv:kvm_guest_exit
+#define KVM_EXIT_REASON trap
+
+#endif /* _ASM_POWERPC_KVM_PERF_H */
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..21322e0 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+HAVE_KVM_STAT_SUPPORT := 1
diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 0af6e9b..dd47b5e 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,4 +1,5 @@
 libperf-y += header.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
new file mode 100644
index 000..62cdcc1
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -0,0 +1,33 @@
+#include ../../util/kvm-stat.h
+#include asm/kvm_perf.h
+
+define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+
+static struct kvm_events_ops exit_events = {
+   .is_begin_event = exit_event_begin,
+   .is_end_event = exit_event_end,
+   .decode_key = exit_event_decode_key,
+   .name = VM-EXIT
+};
+
+const char *const kvm_events_tp[] = {
+   kvm_hv:kvm_guest_exit,
+   kvm_hv:kvm_guest_enter,
+   NULL,
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+   { .name = vmexit, .ops = exit_events },
+   { NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+   NULL,
+};
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
+{
+   kvm-exit_reasons = hv_exit_reasons;
+   kvm-exit_reasons_isa = HV;
+   return 0;
+}
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 1/2] perf/kvm: Port perf kvm to powerpc

2015-05-08 Thread Hemant Kumar


On 05/08/2015 09:58 AM, Ingo Molnar wrote:

* Hemant Kumar hem...@linux.vnet.ibm.com wrote:


  # perf kvm stat report -p 60515
Analyze events for pid(s) 60515, all VCPUs:

VM-EXITSamples  Samples% Time%Min Time Max
Time Avg time

H_DATA_STORAGE   500635.30% 0.13%  1.94us 49.46us 
12.37us ( +-   0.52% )
HV_DECREMENTER   445731.43% 0.02%  0.72us 16.14us  
1.91us ( +-   0.96% )
SYSCALL   269018.97% 0.10%  2.84us528.24us 
18.29us ( +-   3.75% )
RETURN_TO_HOST   178912.61%99.76%  1.58us 672791.91us  
27470.23us ( +-   3.00% )
   EXTERNAL240 1.69% 0.00%0.69us 10.67us  
1.33us ( +-   5.34% )

Where is the last line misaligned? Copy  paste error or does perf kvm
produce it in such a way?


Its a copy-paste error. Thanks for pointing this out.

Shall I resend the patches with the correct alignment of the o/p?


Thanks,

Ingo



--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 1/2] perf/kvm: Port perf kvm to powerpc

2015-05-12 Thread Hemant Kumar

Hi Scott,

On 05/12/2015 03:38 AM, Scott Wood wrote:

On Fri, 2015-05-08 at 06:37 +0530, Hemant Kumar wrote:

From: Srikar Dronamraju sri...@linux.vnet.ibm.com

perf kvm can be used to analyze guest exit reasons. This support already
exists in x86. Hence, porting it to powerpc.

  - To trace KVM events :
   perf kvm stat record
   If many guests are running, we can track for a specific guest by using
   --pid as in : perf kvm stat record --pid pid

  - To see the results :
   perf kvm stat report

The result shows the number of exits (from the guest context to
host/hypervisor context) grouped by their respective exit reasons with
their frequency.

This patch makes use of the guest exit reasons available in
trace_book3s.h. It records on two already available tracepoints :
kvm_hv:kvm_guest_exit and kvm_hv:kvm_guest_enter.

Note : This patch has a dependency on the patch kvm/powerpc: Export
kvm exit reasons which exports the KVM exit reasons through the uapi.

Here is a sample o/p:
  # pgrep qemu
19378
60515

2 Guests are running on the host.

  # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ]

  # perf kvm stat report -p 60515
Analyze events for pid(s) 60515, all VCPUs:

VM-EXITSamples  Samples% Time%Min Time Max
Time Avg time

H_DATA_STORAGE   500635.30% 0.13%  1.94us 49.46us 
12.37us ( +-   0.52% )
HV_DECREMENTER   445731.43% 0.02%  0.72us 16.14us  
1.91us ( +-   0.96% )
SYSCALL   269018.97% 0.10%  2.84us528.24us 
18.29us ( +-   3.75% )
RETURN_TO_HOST   178912.61%99.76%  1.58us 672791.91us  
27470.23us ( +-   3.00% )
   EXTERNAL240 1.69% 0.00%0.69us 10.67us  
1.33us ( +-   5.34% )

Total Samples:14182, Total events handled time:49264158.30us.

Signed-off-by: Srikar Dronamraju sri...@linux.vnet.ibm.com
Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Patch has a dependency on : https://patchwork.ozlabs.org/patch/469839/
which exports the exit reasons to perf through uapi.

Changes:
- Original series split into two patchsets now : perf and powerpc
   side changes.

  arch/powerpc/include/uapi/asm/kvm_perf.h | 15 +++
  tools/perf/arch/powerpc/Makefile |  1 +
  tools/perf/arch/powerpc/util/Build   |  1 +
  tools/perf/arch/powerpc/util/kvm-stat.c  | 33 
  4 files changed, 50 insertions(+)
  create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf.h
  create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h 
b/arch/powerpc/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000..30fa670
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_POWERPC_KVM_PERF_H
+#define _ASM_POWERPC_KVM_PERF_H
+
+#include asm/trace_book3s.h
+#include asm/kvm.h
+
+#define DECODE_STR_LEN 20
+
+#define VCPU_ID vcpu_id
+
+#define KVM_ENTRY_TRACE kvm_hv:kvm_guest_enter
+#define KVM_EXIT_TRACE kvm_hv:kvm_guest_exit
+#define KVM_EXIT_REASON trap
+
+#endif /* _ASM_POWERPC_KVM_PERF_H */

Please make sure that anything book3s-specific is named that way.


Are you suggesting to name it to something like _ASM_POWERPC_BOOK3S_PERF_H ?


And shouldn't this be part of the arch/powerpc-side patchset?


It should. Thanks, will move this to arch/powerpc side patchset.


diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..21322e0 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,3 +1,4 @@
  ifndef NO_DWARF
  PERF_HAVE_DWARF_REGS := 1
  endif
+HAVE_KVM_STAT_SUPPORT := 1

Does this stuff fail gracefully if used on a PPC target that doesn't
support this?


Yes, it does.


-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 3/3] kvm/powerpc: Export HCALL reason codes

2015-05-12 Thread Hemant Kumar


On 05/12/2015 03:44 AM, Scott Wood wrote:

On Fri, 2015-05-08 at 06:23 +0530, Hemant Kumar wrote:

For perf to analyze the KVM events like hcalls, we need the
hypervisor calls and their codes to be exported through uapi.

This patch moves most of the pSeries hcall codes from
arch/powerpc/include/asm/hvcall.h to
arch/powerpc/include/uapi/asm/hcall_codes.h.
It also moves the mapping hcall_code-to-hcall_reason from
arch/powerpc/kvm/trace_hv.h to
arch/powerpc/include/uapi/asm/trace_hcall.h.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
  arch/powerpc/include/asm/hvcall.h   | 120 +--
  arch/powerpc/include/uapi/asm/hcall_codes.h | 123 
  arch/powerpc/include/uapi/asm/trace_hcall.h | 122 +++
  arch/powerpc/kvm/trace_hv.h | 117 +-

When moving to uapi please add proper namespacing to indicate that this
is pseries specific.



Sure, will add that.


diff --git a/arch/powerpc/include/uapi/asm/trace_hcall.h 
b/arch/powerpc/include/uapi/asm/trace_hcall.h
new file mode 100644
index 000..00eac01
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/trace_hcall.h
@@ -0,0 +1,122 @@
+#ifndef _KVM_TRACE_HCALL_MAP_H
+#define _KVM_TRACE_HCALL_MAP_H
+
+#include hcall_codes.h
+
+#define kvm_trace_symbol_hcall\
+   {H_REMOVE,H_REMOVE},\
+   {H_ENTER,H_ENTER},  \
+   {H_READ,H_READ},\
+   {H_CLEAR_MOD,H_CLEAR_MOD},  \

This is a rather odd way of exposing an array to userspace...



Didn't get you here. Can you please elaborate?

I see some other files like arch/x86/include/uapi/asm/vmx.h exposing the 
reasons in a similar way.


Thanks for the review.

--
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC PATCH 0/1] perf/script: Ganged exits and VM topology

2015-05-14 Thread Hemant Kumar
In powerpc, if a thread running inside a guest needs to exit to the
host to serve interrupts like the external interrupt, or the hcall
interrupts, etc., all the threads running in that specific vcore
inside the guest exit to the host. These events are called as ganged
exits.

Because of the ganged exits, the other threads (if any) doing useful
work need to exit to the host. They can serve as a parameter to relate
the performance of the VM with their topology.

Here are a couple of examples to correlate this performance metric
with the topology of a VM.

The following setup was used :
Setup 1a :
VM (with 4 vcpus and one core)
ebizzy running on 2 vcpus.
No other load on the other 2 vcpus.
Resultant throughput for ebizzy in this case : 24373 records/sec
Total gang exits : 1174

Setup 1b:
VM (with 4 vcpus and one core)
ebizzy running on 2 vcpus.
Spinloop (while 1) loop running on other 2 vcpus.
Resultant throughput for ebizzy in this case : 20373 records/sec
Total gang exits : 1676

Setup 1c:
VM (with 4 vcpus and one core)
ebizzy running on 2 vcpus.
ping -f running on other 2 vcpus.
Resultant throughput for ebizzy in this case : 7841 records/sec
Total gang exits : 871073

Due to an increase in number of the gang exits, performance of ebizzy
dropped.

To verify the degradation in performance of ebizzy with the other
workloads running on the same core, the same set of loads were run on
the host machine too, with SMT on:
In all the following setups, ebizzy was pinned to 2 cpus and for
setups where some other load is running, the loads were pinned to
the other cpus of the same core.

Setup 2a:
ebizzy alone.
Resultant throughput for ebizzy in this case : 25099 records/sec

Setup 2b:
ebizzy and a spin loop (while 1) running on other cpus of the same
core.
Resultant throughput for ebizzy in this case : 22818 records/sec

Setup 2c:
ebizzy and ping -f (to a other machine in the same subnet).
Resultant throughput for ebizzy in this case : 17982 records/sec

We can see that the performance of ebizzy is dropping due to the
some load running on the other threads of the same core.

The gang_exits can serve as a parameter to define the topology of a
VM so that the load running on the VM can give us a maximum
throughput.

Here is an example with redis benchmark :

A VM running on 1 core and having two threads.
Running redis benchmark on this VM gives this throughput:
SET: 30048.08 requests per second
GET: 31806.62 requests per second
INCR: 247524.75 requests per second
LPUSH: 30284.68 requests per second
LPOP: 34036.76 requests per second
SADD: 168634.06 requests per second
SPOP: 261096.61 requests per second
MSET (10 keys): 11107.41 requests per second

For the entire run of redis :
Total gang_exits = 1192893

To see if we can reduce the number of gang_exits and increase the
throughput of redis benchmark by trying out a different topology and
system configuration, the cores were split into subcores. Each subcore
now has 2 threads each (SMT 2 mode).

So, the VM was started again with 2 subcores (with 1 thread each)
in SMT 1 mode. Running redis now gives this throughput :
SET: 36231.88 requests per second
GET: 57438.25 requests per second
INCR: 292397.66 requests per second
LPUSH: 38343.56 requests per second
LPOP: 53792.36 requests per second
SADD: 267379.66 requests per second
SPOP: 247524.75 requests per second
MSET (10 keys): 9922.60 requests per second

We see an increase in the performance of redis.
Total gang exits for this case : 0 (because of SMT 1)

The number of vcpus allocated to VM remained the same in both the
cases.

In the host, with the help of gang_exit numbers, we can change the
configuration of the host and the topology of the VM to increase the
throughput of the load (running on a VM).

If there is a single active thread on that core, none of the exits
should be counted in gang_exits.

Do have a look at the patch and let me know your feedback.

Thanks,

---
Hemant Kumar (1):
  perf/script: Python script to display the ganged exits count on powerpc

 tools/perf/scripts/python/gang_exits.py | 65 +
 1 file changed, 65 insertions(+)
 create mode 100644 tools/perf/scripts/python/gang_exits.py

-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC PATCH 1/1] perf/script: Script to display the ganged exits count on powerpc

2015-05-14 Thread Hemant Kumar
In powerpc, when a thread running in the guest context needs to exit to
the hypervisor to serve interrupts like the external interrupt, or the
hcall interrupt, etc, all the threads running in that specific vcore
inside the guest exit. These events can be classified as gang exits
which mean that they are forced exits. Only if the other vcpus cede,
then it won't be counted as a ganged exit.

What this script does is, it post processes the perf.data file to look
for two events : kvm_hv:kvmppc_run_core and kvm_hv:kvm_guest_exit. For a
kvm_hv:kvmppc_run_core tracepoint event, it initializes :

- if its an 'Entry', it gets the tgid and for that tgid, it initializes
  gang-exit count and cedes count.
- if its an 'Exit', it gets the runnable thread count and subtracts it
  from the no of cedes to see (if) how many runnable threads were in
  that core and how many of them ceded. If the difference is more than
  1 (its 1 because, we have to exclude the running thread itself), then
  its a ganged exit.

For a kvm_hv:kvm_guest_exit event, it checks if the vcpu ceded. If it
ceded, then increment the counter for cedes.

Usage :
 # perf record -e kvm_hv:kvm_guest_exit -e kvm_hv:kvmppc_run_core -a sleep 10
[ perf record: Woken up 96 times to write data ]
[ perf record: Captured and wrote 26.198 MB perf.data (~1144590 samples)]

 # perf script -s gang-exits.py
Ganged exits summary

Ganged exits for process 14000 :535
Ganged exits for process 13988 :  25314
===

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
 tools/perf/scripts/python/gang_exits.py | 65 +
 1 file changed, 65 insertions(+)
 create mode 100644 tools/perf/scripts/python/gang_exits.py

diff --git a/tools/perf/scripts/python/gang_exits.py 
b/tools/perf/scripts/python/gang_exits.py
new file mode 100644
index 000..011aa56
--- /dev/null
+++ b/tools/perf/scripts/python/gang_exits.py
@@ -0,0 +1,65 @@
+# gang-exits.py: Count the ganged exits of a VM
+#
+# In case of powerpc, When a thread running inside a guest needs to exit to
+# the hypervisor to serve interrupts like the external interrupt, or the hcall
+# interrupts, etc., all the threads running in that specific vcore
+# inside the guest exit to the host. These events are called as ganged exits.
+# These exits are forced. Only if the vcpus cede, then it/they won't be counted
+# as ganged exit(s).
+#
+# Usage :
+# So, if in powerpc, first we do :
+# perf record -e kvm_hv:kvm_guest_exit -e kvm_hv:kvmppc_run_core -aR sleep 
nsecs
+# Using the perf.data, we have to do :
+# perf script -s gang-exits
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+usage = perf script -s gang_exits.py\n;
+
+stats = {}
+pid_tgid = {}
+
+def trace_begin():
+   print Ganged exits summary
+
+def trace_end():
+   print_ganged_exits()
+
+def kvm_hv__kvm_guest_exit(event_name, context, common_cpu,
+   common_secs, common_nsecs, common_pid, common_comm,
+   vcpu_id, reason, nip, msr, ceded):
+
+   if common_pid in pid_tgid:
+   if ceded:   # vcpu ceded ?
+   stats[pid_tgid[common_pid]]['nr_cedes'] += ceded
+
+def kvm_hv__kvmppc_run_core(event_name, context, common_cpu,
+   common_secs, common_nsecs, common_pid, common_comm,
+   n_runnable, runner_vcpu, where, tgid):
+
+   if (where): # kvmppc_run_core: Exit
+   if tgid in stats:
+   forced = n_runnable - stats[tgid]['nr_cedes']
+   if (forced  1):
+   stats[tgid]['gang-exits'] += 1
+   else:   # kvmppc_run_core: Enter, init the counts
+   if tgid in stats:
+   stats[tgid]['nr_cedes'] = 0
+   else:
+   stats[tgid] = {'gang-exits': 0, 'nr_cedes': 0}
+   if common_pid not in pid_tgid:
+   pid_tgid[common_pid] = tgid
+
+def print_ganged_exits():
+   for i in stats.keys():
+   print \nGanged exits for process %d : %20d %(i, 
stats[i]['gang-exits'])
+
+   print ===
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 1/2] perf/kvm: Port perf kvm to powerpc

2015-05-20 Thread Hemant Kumar
From: Srikar Dronamraju sri...@linux.vnet.ibm.com

perf kvm can be used to analyze guest exit reasons. This support already
exists in x86. Hence, porting it to powerpc.

 - To trace KVM events :
  perf kvm stat record
  If many guests are running, we can track for a specific guest by using
  --pid as in : perf kvm stat record --pid pid

 - To see the results :
  perf kvm stat report

The result shows the number of exits (from the guest context to
host/hypervisor context) grouped by their respective exit reasons with
their frequency.

This patch makes use of the guest exit reasons available in
trace_book3s.h. It records on two already available tracepoints :
kvm_hv:kvm_guest_exit and kvm_hv:kvm_guest_enter.

Note : This patch has a dependency on the patch kvm/powerpc: Export
kvm exit reasons which exports the KVM exit reasons through the uapi.

Here is a sample o/p:
 # pgrep qemu
19378
60515

2 Guests are running on the host.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ]

 # perf kvm stat report -p 60515
Analyze events for pid(s) 60515, all VCPUs:

   VM-EXITSamples  Samples% Time%Min Time Max
Time Avg time

H_DATA_STORAGE   500635.30% 0.13%  1.94us 49.46us 
12.37us ( +-   0.52% )
HV_DECREMENTER   445731.43% 0.02%  0.72us 16.14us  
1.91us ( +-   0.96% )
   SYSCALL   269018.97% 0.10%  2.84us528.24us 
18.29us ( +-   3.75% )
RETURN_TO_HOST   178912.61%99.76%  1.58us 672791.91us  
27470.23us ( +-   3.00% )
  EXTERNAL240 1.69% 0.00%  0.69us 10.67us  
1.33us ( +-   5.34% )

Total Samples:14182, Total events handled time:49264158.30us.

Signed-off-by: Srikar Dronamraju sri...@linux.vnet.ibm.com
Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Changes :
- Moved the uapi related changes to arch/powerpc patchset.

This patch has a dependency on :
http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg89485.html
which exports the kvm exit reasons.

 tools/perf/arch/powerpc/Makefile|  1 +
 tools/perf/arch/powerpc/util/Build  |  1 +
 tools/perf/arch/powerpc/util/kvm-stat.c | 33 +
 3 files changed, 35 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..21322e0 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+HAVE_KVM_STAT_SUPPORT := 1
diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..c8fe207 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
new file mode 100644
index 000..24e06bf
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -0,0 +1,33 @@
+#include ../../util/kvm-stat.h
+#include asm/kvm_perf_book3s.h
+
+define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+
+static struct kvm_events_ops exit_events = {
+   .is_begin_event = exit_event_begin,
+   .is_end_event = exit_event_end,
+   .decode_key = exit_event_decode_key,
+   .name = VM-EXIT
+};
+
+const char *const kvm_events_tp[] = {
+   kvm_hv:kvm_guest_exit,
+   kvm_hv:kvm_guest_enter,
+   NULL,
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+   { .name = vmexit, .ops = exit_events },
+   { NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+   NULL,
+};
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
+{
+   kvm-exit_reasons = hv_exit_reasons;
+   kvm-exit_reasons_isa = HV;
+   return 0;
+}
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 2/2] perf/kvm: Support HCALL events

2015-05-20 Thread Hemant Kumar
powerpc provides hcall events that also provides insights into guest
behaviour. Enhance perf kvm to record and analyze hcall events.

 - To trace hcall events :
  perf kvm stat record

 - To show the results :
  perf kvm stat report --event=hcall

The result shows the number of hypervisor calls from the guest grouped
by their respective reasons displayed with the frequency.

This patch makes use of two additional tracepoints
kvm_hv:kvm_hcall_enter and kvm_hv:kvm_hcall_exit. It uses the
pSeries hypervisor codes exported through uapi to classify the hcalls
into their respective reasons.

Note : This patch has a dependency on kvm/powerpc: Export HCALL reason
codes which exports HCALL reasons through uapi.

 # pgrep qemu
A sample output :
19378
60515

2 VMs running.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ]

 # perf kvm stat report -p 60515 --event=hcall
Analyze events for pid(s) 60515, all VCPUs:

 HCALL-EVENTSamples  Samples% Time%Min TimeMax Time 
Avg time

H_VIO_SIGNAL   103438.44%15.77%  0.36us  1.59us 
 0.44us ( +-   0.66% )
  H_SEND_CRQ65224.24%10.97%  0.39us  1.84us 
 0.49us ( +-   1.20% )
   H_IPI52319.44%62.05%  1.35us 19.70us 
 3.44us ( +-   2.88% )
 H_PUT_TERM_CHAR41115.28% 8.03%  0.38us  3.77us 
 0.57us ( +-   1.61% )
 H_GET_TERM_CHAR 50 1.86% 0.99%  0.40us  0.98us 
 0.57us ( +-   3.37% )
   H_EOI 20 0.74% 2.19%  2.22us  4.72us 
 3.17us ( +-   5.96% )

Total Samples:2690, Total events handled time:2896.94us.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Changes:
- Moved the uapi related changes to arch/powerpc side patchset.

This patch has a dependency on :
http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg89487.html
which export hcall reasons through uapi.

 tools/perf/arch/powerpc/util/kvm-stat.c | 61 +
 1 file changed, 61 insertions(+)

diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
index 24e06bf..0d3ea47 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -1,7 +1,9 @@
 #include ../../util/kvm-stat.h
 #include asm/kvm_perf_book3s.h
+#include ../../util/debug.h
 
 define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall);
 
 static struct kvm_events_ops exit_events = {
.is_begin_event = exit_event_begin,
@@ -10,14 +12,73 @@ static struct kvm_events_ops exit_events = {
.name = VM-EXIT
 };
 
+static void hcall_event_get_key(struct perf_evsel *evsel,
+   struct perf_sample *sample,
+   struct event_key *key)
+{
+   key-info = 0;
+   key-key = perf_evsel__intval(evsel, sample, KVM_HCALL_REASON);
+}
+
+static const char *get_exit_reason(u64 exit_code)
+{
+   struct exit_reasons_table *tbl = hcall_reasons;
+
+   while (tbl-reason != NULL) {
+   if (tbl-exit_code == exit_code)
+   return tbl-reason;
+   tbl++;
+   }
+
+   pr_err(Unknown kvm hcall exit code: %lld\n,
+  (unsigned long long)exit_code);
+   return UNKNOWN;
+}
+
+static bool hcall_event_end(struct perf_evsel *evsel,
+   struct perf_sample *sample __maybe_unused,
+   struct event_key *key __maybe_unused)
+{
+   return (!strcmp(evsel-name, KVM_HCALL_EXIT_TRACE));
+}
+
+static bool hcall_event_begin(struct perf_evsel *evsel,
+ struct perf_sample *sample, struct event_key *key)
+{
+   if (!strcmp(evsel-name, KVM_HCALL_ENTRY_TRACE)) {
+   hcall_event_get_key(evsel, sample, key);
+   return true;
+   }
+
+return false;
+}
+static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+  struct event_key *key,
+  char *decode)
+{
+   const char *hcall_reason = get_exit_reason(key-key);
+
+   scnprintf(decode, DECODE_STR_LEN, %s, hcall_reason);
+}
+
+static struct kvm_events_ops hcall_events = {
+   .is_begin_event = hcall_event_begin,
+   .is_end_event = hcall_event_end,
+   .decode_key = hcall_event_decode_key,
+   .name = HCALL-EVENT,
+};
+
 const char *const kvm_events_tp[] = {
kvm_hv:kvm_guest_exit,
kvm_hv:kvm_guest_enter,
+   kvm_hv:kvm_hcall_enter,
+   kvm_hv:kvm_hcall_exit,
NULL,
 };
 
 struct kvm_reg_events_ops kvm_reg_events_ops[] = {
{ .name = vmexit, .ops = exit_events },
+   { .name = hcall, .ops = hcall_events },
{ NULL, NULL

[PATCH v4 1/3] kvm/powerpc: Export kvm exit reasons

2015-05-20 Thread Hemant Kumar
To analyze the kvm exits with perf, we will need to map the exit codes
with the exit reasons. Such a mapping exists today in trace_book3s.h.
Currently its not exported to perf.

This patch moves these kvm exit reasons and their mapping from
arch/powerpc/kvm/trace_book3s.h to
arch/powerpc/include/uapi/asm/trace_book3s.h.
Accordingly change the include files in trace_hv.h and trace_pr.h.

Also, add a file kvm_perf_book3s.h which defines the kvm tracepoints to
trace for kvm exit events. This is added to indicate that the
tracepoints are book3s specific. Generic kvm_perf.h then can just
include kvm_perf_book3s.h.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Changes :
- Moved the uapi related changes from the perf side patchset to this
  patchset.
- Made name space changes to indicate changes specific to book3s
  (Suggested by Scott Wood)

 arch/powerpc/include/uapi/asm/kvm_perf.h|  6 +
 arch/powerpc/include/uapi/asm/kvm_perf_book3s.h | 15 
 arch/powerpc/include/uapi/asm/trace_book3s.h| 32 +
 arch/powerpc/kvm/trace_book3s.h | 32 -
 arch/powerpc/kvm/trace_hv.h |  2 +-
 arch/powerpc/kvm/trace_pr.h |  2 +-
 6 files changed, 55 insertions(+), 34 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf.h
 create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
 create mode 100644 arch/powerpc/include/uapi/asm/trace_book3s.h
 delete mode 100644 arch/powerpc/kvm/trace_book3s.h

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h 
b/arch/powerpc/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000..5ed2ff3
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_POWERPC_KVM_PERF_H
+#define _ASM_POWERPC_KVM_PERF_H
+
+#include asm/kvm_perf_book3s.h
+
+#endif
diff --git a/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h 
b/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
new file mode 100644
index 000..735901f
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_POWERPC_KVM_PERF_BOOK3S_H
+#define _ASM_POWERPC_KVM_PERF_BOOK3S_H
+
+#include asm/trace_book3s.h
+#include asm/kvm.h
+
+#define DECODE_STR_LEN 20
+
+#define VCPU_ID vcpu_id
+
+#define KVM_ENTRY_TRACE kvm_hv:kvm_guest_enter
+#define KVM_EXIT_TRACE kvm_hv:kvm_guest_exit
+#define KVM_EXIT_REASON trap
+
+#endif /* _ASM_POWERPC_KVM_PERF_BOOK3S_H */
diff --git a/arch/powerpc/include/uapi/asm/trace_book3s.h 
b/arch/powerpc/include/uapi/asm/trace_book3s.h
new file mode 100644
index 000..f647ce0
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/trace_book3s.h
@@ -0,0 +1,32 @@
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit \
+   {0x100, SYSTEM_RESET}, \
+   {0x200, MACHINE_CHECK}, \
+   {0x300, DATA_STORAGE}, \
+   {0x380, DATA_SEGMENT}, \
+   {0x400, INST_STORAGE}, \
+   {0x480, INST_SEGMENT}, \
+   {0x500, EXTERNAL}, \
+   {0x501, EXTERNAL_LEVEL}, \
+   {0x502, EXTERNAL_HV}, \
+   {0x600, ALIGNMENT}, \
+   {0x700, PROGRAM}, \
+   {0x800, FP_UNAVAIL}, \
+   {0x900, DECREMENTER}, \
+   {0x980, HV_DECREMENTER}, \
+   {0xc00, SYSCALL}, \
+   {0xd00, TRACE}, \
+   {0xe00, H_DATA_STORAGE}, \
+   {0xe20, H_INST_STORAGE}, \
+   {0xe40, H_EMUL_ASSIST}, \
+   {0xf00, PERFMON}, \
+   {0xf20, ALTIVEC}, \
+   {0xf40, VSX}
+
+#endif
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
deleted file mode 100644
index f647ce0..000
--- a/arch/powerpc/kvm/trace_book3s.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#if !defined(_TRACE_KVM_BOOK3S_H)
-#define _TRACE_KVM_BOOK3S_H
-
-/*
- * Common defines used by the trace macros in trace_pr.h and trace_hv.h
- */
-
-#define kvm_trace_symbol_exit \
-   {0x100, SYSTEM_RESET}, \
-   {0x200, MACHINE_CHECK}, \
-   {0x300, DATA_STORAGE}, \
-   {0x380, DATA_SEGMENT}, \
-   {0x400, INST_STORAGE}, \
-   {0x480, INST_SEGMENT}, \
-   {0x500, EXTERNAL}, \
-   {0x501, EXTERNAL_LEVEL}, \
-   {0x502, EXTERNAL_HV}, \
-   {0x600, ALIGNMENT}, \
-   {0x700, PROGRAM}, \
-   {0x800, FP_UNAVAIL}, \
-   {0x900, DECREMENTER}, \
-   {0x980, HV_DECREMENTER}, \
-   {0xc00, SYSCALL}, \
-   {0xd00, TRACE}, \
-   {0xe00, H_DATA_STORAGE}, \
-   {0xe20, H_INST_STORAGE}, \
-   {0xe40, H_EMUL_ASSIST}, \
-   {0xf00, PERFMON}, \
-   {0xf20, ALTIVEC}, \
-   {0xf40, VSX}
-
-#endif
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index 33d9daf..02d0a07 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -2,7 +2,7 @@
 #define _TRACE_KVM_HV_H
 
 #include linux/tracepoint.h
-#include trace_book3s.h
+#include uapi/asm/trace_book3s.h
 #include asm

Re: [PATCH v3 1/2] perf/kvm: Port perf kvm to powerpc

2015-05-20 Thread Hemant Kumar

Hi Scott,

On 05/13/2015 08:52 AM, Scott Wood wrote:

On Tue, 2015-05-12 at 21:34 +0530, Hemant Kumar wrote:

Hi Scott,

On 05/12/2015 03:38 AM, Scott Wood wrote:

On Fri, 2015-05-08 at 06:37 +0530, Hemant Kumar wrote:

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h 
b/arch/powerpc/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000..30fa670
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_POWERPC_KVM_PERF_H
+#define _ASM_POWERPC_KVM_PERF_H
+
+#include asm/trace_book3s.h
+#include asm/kvm.h
+
+#define DECODE_STR_LEN 20
+
+#define VCPU_ID vcpu_id
+
+#define KVM_ENTRY_TRACE kvm_hv:kvm_guest_enter
+#define KVM_EXIT_TRACE kvm_hv:kvm_guest_exit
+#define KVM_EXIT_REASON trap
+
+#endif /* _ASM_POWERPC_KVM_PERF_H */

Please make sure that anything book3s-specific is named that way.

Are you suggesting to name it to something like _ASM_POWERPC_BOOK3S_PERF_H ?

My concern is seeing a generically named kvm_perf.h include a file
called trace_book3s.h which defines kvm_trace_symbol_hcall with
presumably book3s-specific content, as well as wondering how much of the
rest of the file would be applicable if booke PPC were to implement perf
kvm.

I don't know enough about perf kvm to answer that question, but I've
seen enough cases of book3s or pseries specific code that was apparently
written with the belief that no other ppc64 implementations exist, or
that no other ppc implementations would want to implement a certain
feature, to be suspicous.  Usually such cases can be dealt with after
the fact (albeit not as easily as if things were organized/namespaced
properly from the beginning), but this is uapi...

-Scott



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Tried to address your comments in v4 :
http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg89490.html
and
http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg89485.html

--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 2/3] kvm/powerpc: Add exit reason for return code 0x0

2015-05-20 Thread Hemant Kumar
This patch adds an exit reason RETURN_TO_HOST for the return code
0x0. Note that this is not related to any interrupt vector address, but
this is added just to make sure that perf doesn't complain if and when a
kvm exit happens with a trap code as 0x0.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
 arch/powerpc/include/uapi/asm/trace_book3s.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/include/uapi/asm/trace_book3s.h 
b/arch/powerpc/include/uapi/asm/trace_book3s.h
index f647ce0..8635005 100644
--- a/arch/powerpc/include/uapi/asm/trace_book3s.h
+++ b/arch/powerpc/include/uapi/asm/trace_book3s.h
@@ -6,6 +6,7 @@
  */
 
 #define kvm_trace_symbol_exit \
+   {0x0,   RETURN_TO_HOST}, \
{0x100, SYSTEM_RESET}, \
{0x200, MACHINE_CHECK}, \
{0x300, DATA_STORAGE}, \
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 3/3] kvm/powerpc: Export HCALL reason codes

2015-05-20 Thread Hemant Kumar
For perf to analyze the KVM events like hcalls, we need the
hypervisor calls and their codes to be exported through uapi.

This patch moves most of the pSeries hcall codes from
arch/powerpc/include/asm/hvcall.h to
arch/powerpc/include/uapi/asm/pseries_hcalls.h.
It also moves the mapping hcall_code-to-hcall_reason from
arch/powerpc/kvm/trace_hv.h to
arch/powerpc/include/uapi/asm/trace_hcall_pseries.h.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Changes :
- Made name space changes to indicate changes related to pseries
  (Suggested by Scott Wood)

 arch/powerpc/include/asm/hvcall.h  | 120 +---
 arch/powerpc/include/uapi/asm/kvm_perf_book3s.h|   4 +
 arch/powerpc/include/uapi/asm/pseries_hcalls.h | 123 +
 .../powerpc/include/uapi/asm/trace_hcall_pseries.h | 122 
 arch/powerpc/kvm/trace_hv.h| 117 +---
 5 files changed, 252 insertions(+), 234 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/pseries_hcalls.h
 create mode 100644 arch/powerpc/include/uapi/asm/trace_hcall_pseries.h

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 85bc8c0..6e38210 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -155,124 +155,8 @@
 /* Each control block has to be on a 4K boundary */
 #define H_CB_ALIGNMENT  4096
 
-/* pSeries hypervisor opcodes */
-#define H_REMOVE   0x04
-#define H_ENTER0x08
-#define H_READ 0x0c
-#define H_CLEAR_MOD0x10
-#define H_CLEAR_REF0x14
-#define H_PROTECT  0x18
-#define H_GET_TCE  0x1c
-#define H_PUT_TCE  0x20
-#define H_SET_SPRG00x24
-#define H_SET_DABR 0x28
-#define H_PAGE_INIT0x2c
-#define H_SET_ASR  0x30
-#define H_ASR_ON   0x34
-#define H_ASR_OFF  0x38
-#define H_LOGICAL_CI_LOAD  0x3c
-#define H_LOGICAL_CI_STORE 0x40
-#define H_LOGICAL_CACHE_LOAD   0x44
-#define H_LOGICAL_CACHE_STORE  0x48
-#define H_LOGICAL_ICBI 0x4c
-#define H_LOGICAL_DCBF 0x50
-#define H_GET_TERM_CHAR0x54
-#define H_PUT_TERM_CHAR0x58
-#define H_REAL_TO_LOGICAL  0x5c
-#define H_HYPERVISOR_DATA  0x60
-#define H_EOI  0x64
-#define H_CPPR 0x68
-#define H_IPI  0x6c
-#define H_IPOLL0x70
-#define H_XIRR 0x74
-#define H_PERFMON  0x7c
-#define H_MIGRATE_DMA  0x78
-#define H_REGISTER_VPA 0xDC
-#define H_CEDE 0xE0
-#define H_CONFER   0xE4
-#define H_PROD 0xE8
-#define H_GET_PPP  0xEC
-#define H_SET_PPP  0xF0
-#define H_PURR 0xF4
-#define H_PIC  0xF8
-#define H_REG_CRQ  0xFC
-#define H_FREE_CRQ 0x100
-#define H_VIO_SIGNAL   0x104
-#define H_SEND_CRQ 0x108
-#define H_COPY_RDMA0x110
-#define H_REGISTER_LOGICAL_LAN 0x114
-#define H_FREE_LOGICAL_LAN 0x118
-#define H_ADD_LOGICAL_LAN_BUFFER 0x11C
-#define H_SEND_LOGICAL_LAN 0x120
-#define H_BULK_REMOVE  0x124
-#define H_MULTICAST_CTRL   0x130
-#define H_SET_XDABR0x134
-#define H_STUFF_TCE0x138
-#define H_PUT_TCE_INDIRECT 0x13C
-#define H_CHANGE_LOGICAL_LAN_MAC 0x14C
-#define H_VTERM_PARTNER_INFO   0x150
-#define H_REGISTER_VTERM   0x154
-#define H_FREE_VTERM   0x158
-#define H_RESET_EVENTS  0x15C
-#define H_ALLOC_RESOURCE0x160
-#define H_FREE_RESOURCE 0x164
-#define H_MODIFY_QP 0x168
-#define H_QUERY_QP  0x16C
-#define H_REREGISTER_PMR0x170
-#define H_REGISTER_SMR  0x174
-#define H_QUERY_MR  0x178
-#define H_QUERY_MW  0x17C
-#define H_QUERY_HCA 0x180
-#define H_QUERY_PORT0x184
-#define H_MODIFY_PORT   0x188
-#define H_DEFINE_AQP1   0x18C
-#define H_GET_TRACE_BUFFER  0x190
-#define H_DEFINE_AQP0   0x194
-#define H_RESIZE_MR 0x198
-#define H_ATTACH_MCQP   0x19C
-#define H_DETACH_MCQP   0x1A0
-#define H_CREATE_RPT0x1A4
-#define H_REMOVE_RPT0x1A8
-#define H_REGISTER_RPAGES   0x1AC
-#define H_DISABLE_AND_GETC  0x1B0
-#define H_ERROR_DATA0x1B4
-#define H_GET_HCA_INFO  0x1B8
-#define H_GET_PERF_COUNT0x1BC
-#define H_MANAGE_TRACE  0x1C0
-#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
-#define H_QUERY_INT_STATE   0x1E4
-#define H_POLL_PENDING 0x1D8
-#define H_ILLAN_ATTRIBUTES 0x244
-#define H_MODIFY_HEA_QP0x250
-#define H_QUERY_HEA_QP 0x254
-#define H_QUERY_HEA0x258
-#define H_QUERY_HEA_PORT   0x25C
-#define

Re: [RFC PATCH] perf/kvm: Guest Symbol Resolution for powerpc

2015-06-16 Thread Hemant Kumar

Hi David,

Thanks for the review.

On 06/16/2015 08:23 PM, David Ahern wrote:

On 6/15/15 8:50 PM, Hemant Kumar wrote:

+/*
+ * Get the instruction pointer from the tracepoint data
+ */
+u64 arch__get_ip(struct perf_evsel *evsel, struct perf_sample *data)
+{
+u64 tp_ip = data-ip;
+int trap;
+
+if (!strcmp(KVMPPC_EXIT, evsel-name)) {
+trap = raw_field_value(evsel-tp_format, trap, 
data-raw_data);

+
+if (trap == HV_DECREMENTER)
+tp_ip = raw_field_value(evsel-tp_format, pc,
+data-raw_data);
+}
+return tp_ip;
+}


You can tie a handler to an event; see builtin-trace.c for example 
(evsel-handler = handler). Then have the sample handler call it (e.g, 
see trace__process_sample). Then you don't have to check event names 
on each pass like this and just do event based processing.



+
+/*
+ * Get the HV and PR bits and accordingly, determine the cpumode
+ */
+u8 arch__get_cpumode(union perf_event *event, struct perf_evsel *evsel,
+ struct perf_sample *data)
+{
+unsigned long hv, pr, msr;
+u8 cpumode = event-header.misc  PERF_RECORD_MISC_CPUMODE_MASK;
+
+if (strcmp(KVMPPC_EXIT, evsel-name))
+goto ret;
+
+if (data-raw_data)
+msr = raw_field_value(evsel-tp_format, msr, data-raw_data);
+else
+goto ret;
+
+hv = msr  ((long unsigned)1  (PPC_MAX - HV_BIT));
+pr = msr  ((long unsigned)1  (PPC_MAX - PR_BIT));
+
+if (!hv  pr)
+cpumode = PERF_RECORD_MISC_GUEST_USER;
+else
+cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
+ret:
+return cpumode;
+}


Why isn't that set properly kernel side when the sample is generated?




Because, this depends on the kernel tracepoint kvm_hv:kvm_guest_exit.
perf_prepare_sample() in the kernel side sets the event-header.misc 
field to

PERF_RECORD_MISC_KERNEL through perf_misc_flags(pt_regs). In case of
tracepoints which always get hit in the host kernel context, the
perf_misc_flags() will always return PERF_RECORD_MISC_KERNEL.

IMHO we will rather have to set the cpumode in the user space for this 
tracepoint

and we can't depend on the event-header.misc field for this case.

What would you suggest?

--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH] perf/kvm: Guest Symbol Resolution for powerpc

2015-06-17 Thread Hemant Kumar

Hi Arnaldo,

On 06/16/2015 09:08 PM, Arnaldo Carvalho de Melo wrote:

Em Tue, Jun 16, 2015 at 08:20:53AM +0530, Hemant Kumar escreveu:

perf kvm {record|report} is used to record and report the performance
profile of any workload on a guest. From the host, we can collect
guest kernel statistics which is useful in finding out any contentions
in guest kernel symbols for a certain workload.

This feature is not available on powerpc because perf relies on the
cycles event (a PMU event) to profile the guest. However, for powerpc,
this can't be used from the host because the PMUs are controlled by the
guest rather than the host.

Due to this problem, we need a different approach to profile the
workload in the guest. There exists a tracepoint kvm_hv:kvm_guest_exit
in powerpc which is hit whenever any of the threads exit the guest
context. The guest instruction pointer dumped along with this
tracepoint data in the field pc, can be used as guest instruction
pointer while postprocessing the trace data to map this IP to symbol
from guest.kallsyms.

However, to have some kind of periodicity, we can't use all the kvm
exits, rather exits which are bound to happen in certain intervals.
HV_DECREMENTER Interrupt forces the threads to exit after an interval
of 10 ms.

This patch makes use of the kvm_guest_exit tracepoint and checks the
exit reason for any kvm exit. If it is HV_DECREMENTER, then the
instruction pointer dumped along with this tracepoint is retrieved and
mapped with the guest kallsyms.

This patch is a prototype asking for suggestions/comments as to whether
the approach is right or is there any way better than this (like using
a different event to profile for, etc) to profile the guest from the
host.

Thank You.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
  tools/perf/arch/powerpc/Makefile|  1 +
  tools/perf/arch/powerpc/util/parse-tp.c | 55 +
  tools/perf/builtin-report.c |  9 ++
  tools/perf/util/event.c |  7 -
  tools/perf/util/evsel.c |  7 +
  tools/perf/util/evsel.h |  4 +++
  tools/perf/util/session.c   |  7 +++--
  7 files changed, 86 insertions(+), 4 deletions(-)
  create mode 100644 tools/perf/arch/powerpc/util/parse-tp.c

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 6f7782b..992a0d5 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -4,3 +4,4 @@ LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
  LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/skip-callchain-idx.o
  endif
  LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/parse-tp.o
diff --git a/tools/perf/arch/powerpc/util/parse-tp.c 
b/tools/perf/arch/powerpc/util/parse-tp.c
new file mode 100644
index 000..4c6e49c
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/parse-tp.c
@@ -0,0 +1,55 @@
+#include ../../util/evsel.h
+#include ../../util/trace-event.h
+#include ../../util/session.h
+
+#define KVMPPC_EXIT kvm_hv:kvm_guest_exit
+#define HV_DECREMENTER 2432
+#define HV_BIT 3
+#define PR_BIT 49
+#define PPC_MAX 63
+
+/*
+ * Get the instruction pointer from the tracepoint data
+ */
+u64 arch__get_ip(struct perf_evsel *evsel, struct perf_sample *data)
+{
+   u64 tp_ip = data-ip;
+   int trap;
+
+   if (!strcmp(KVMPPC_EXIT, evsel-name)) {

Can't you cache this somewhere? I.e. something like
  
	static int kvmppc_exit = -1;


if (evsel-attr.type != PERF_TRACEPOINT)
goto out;

if (unlikely(kvmppc_exit == -1)) {
if (strcmp(KVMPPC_EXIT, evsel-name)))
goto out;

kvmppc_exit = evsel-attr.config;
} else (if kvmppc_exit != evsel-attr.config)
goto out;


Will try this.




+   trap = raw_field_value(evsel-tp_format, trap, data-raw_data);
+
+   if (trap == HV_DECREMENTER)
+   tp_ip = raw_field_value(evsel-tp_format, pc,
+   data-raw_data);

out:


+   return tp_ip;
+}


Also we have:

u64 perf_evsel__intval(struct perf_evsel *evsel,
   struct perf_sample *sample, const char *name);

So:

trap = perf_evsel__intval(evsel, sample, trap);

And:

tp_ip = perf_evsel__intval(evsel, sample, pc);

Makes it a bit shorter and allows for optimizations in how to find that
field by name made at the evsel code.


Thanks, missed perf_evsel__intval, will use this in the next iteration.


- Arnaldo


+
+/*
+ * Get the HV and PR bits and accordingly, determine the cpumode
+ */
+u8 arch__get_cpumode(union perf_event *event, struct perf_evsel *evsel,
+struct perf_sample *data)
+{
+   unsigned long hv, pr, msr;
+   u8 cpumode = event-header.misc  PERF_RECORD_MISC_CPUMODE_MASK;
+
+   if (strcmp(KVMPPC_EXIT, evsel-name))
+   goto ret;
+
+   if (data-raw_data

[RFC PATCH] perf/kvm: Guest Symbol Resolution for powerpc

2015-06-15 Thread Hemant Kumar
perf kvm {record|report} is used to record and report the performance
profile of any workload on a guest. From the host, we can collect
guest kernel statistics which is useful in finding out any contentions
in guest kernel symbols for a certain workload.

This feature is not available on powerpc because perf relies on the
cycles event (a PMU event) to profile the guest. However, for powerpc,
this can't be used from the host because the PMUs are controlled by the
guest rather than the host.

Due to this problem, we need a different approach to profile the
workload in the guest. There exists a tracepoint kvm_hv:kvm_guest_exit
in powerpc which is hit whenever any of the threads exit the guest
context. The guest instruction pointer dumped along with this
tracepoint data in the field pc, can be used as guest instruction
pointer while postprocessing the trace data to map this IP to symbol
from guest.kallsyms.

However, to have some kind of periodicity, we can't use all the kvm
exits, rather exits which are bound to happen in certain intervals.
HV_DECREMENTER Interrupt forces the threads to exit after an interval
of 10 ms.

This patch makes use of the kvm_guest_exit tracepoint and checks the
exit reason for any kvm exit. If it is HV_DECREMENTER, then the
instruction pointer dumped along with this tracepoint is retrieved and
mapped with the guest kallsyms.

This patch is a prototype asking for suggestions/comments as to whether
the approach is right or is there any way better than this (like using
a different event to profile for, etc) to profile the guest from the
host.

Thank You.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
 tools/perf/arch/powerpc/Makefile|  1 +
 tools/perf/arch/powerpc/util/parse-tp.c | 55 +
 tools/perf/builtin-report.c |  9 ++
 tools/perf/util/event.c |  7 -
 tools/perf/util/evsel.c |  7 +
 tools/perf/util/evsel.h |  4 +++
 tools/perf/util/session.c   |  7 +++--
 7 files changed, 86 insertions(+), 4 deletions(-)
 create mode 100644 tools/perf/arch/powerpc/util/parse-tp.c

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 6f7782b..992a0d5 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -4,3 +4,4 @@ LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/skip-callchain-idx.o
 endif
 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/parse-tp.o
diff --git a/tools/perf/arch/powerpc/util/parse-tp.c 
b/tools/perf/arch/powerpc/util/parse-tp.c
new file mode 100644
index 000..4c6e49c
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/parse-tp.c
@@ -0,0 +1,55 @@
+#include ../../util/evsel.h
+#include ../../util/trace-event.h
+#include ../../util/session.h
+
+#define KVMPPC_EXIT kvm_hv:kvm_guest_exit
+#define HV_DECREMENTER 2432
+#define HV_BIT 3
+#define PR_BIT 49
+#define PPC_MAX 63
+
+/*
+ * Get the instruction pointer from the tracepoint data
+ */
+u64 arch__get_ip(struct perf_evsel *evsel, struct perf_sample *data)
+{
+   u64 tp_ip = data-ip;
+   int trap;
+
+   if (!strcmp(KVMPPC_EXIT, evsel-name)) {
+   trap = raw_field_value(evsel-tp_format, trap, 
data-raw_data);
+
+   if (trap == HV_DECREMENTER)
+   tp_ip = raw_field_value(evsel-tp_format, pc,
+   data-raw_data);
+   }
+   return tp_ip;
+}
+
+/*
+ * Get the HV and PR bits and accordingly, determine the cpumode
+ */
+u8 arch__get_cpumode(union perf_event *event, struct perf_evsel *evsel,
+struct perf_sample *data)
+{
+   unsigned long hv, pr, msr;
+   u8 cpumode = event-header.misc  PERF_RECORD_MISC_CPUMODE_MASK;
+
+   if (strcmp(KVMPPC_EXIT, evsel-name))
+   goto ret;
+
+   if (data-raw_data)
+   msr = raw_field_value(evsel-tp_format, msr, data-raw_data);
+   else
+   goto ret;
+
+   hv = msr  ((long unsigned)1  (PPC_MAX - HV_BIT));
+   pr = msr  ((long unsigned)1  (PPC_MAX - PR_BIT));
+
+   if (!hv  pr)
+   cpumode = PERF_RECORD_MISC_GUEST_USER;
+   else
+   cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
+ret:
+   return cpumode;
+}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 072ae8a..e3fe5d0 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -141,6 +141,13 @@ out:
return err;
 }
 
+u8 __weak arch__get_cpumode(union perf_event *event,
+   __maybe_unused struct perf_evsel *evsel,
+   __maybe_unused struct perf_sample *sample)
+{
+   return event-header.misc  PERF_RECORD_MISC_CPUMODE_MASK;
+}
+
 static int process_sample_event(struct perf_tool *tool,
union perf_event *event

Re: [PATCH RESEND v4 1/3] kvm/powerpc: Export kvm exit reasons

2015-06-15 Thread Hemant Kumar

Hi Paul,

On 06/15/2015 11:09 AM, Paul Mackerras wrote:

On Mon, Jun 15, 2015 at 10:26:07AM +0530, Hemant Kumar wrote:

To analyze the kvm exits with perf, we will need to map the exit codes
with the exit reasons. Such a mapping exists today in trace_book3s.h.
Currently its not exported to perf.

This patch moves these kvm exit reasons and their mapping from
arch/powerpc/kvm/trace_book3s.h to
arch/powerpc/include/uapi/asm/trace_book3s.h.
Accordingly change the include files in trace_hv.h and trace_pr.h.

These are not really exit reasons so much as Power ISA interrupt
vectors, defined externally to the kernel (in the Power ISA document)
and not subject to change (at least, kernel developers can't change
them).  So I don't see why this needs to be exported from the
kernel.

Paul.



The exit reasons are needed in the perf userspace and we wanted to avoid
code duplication, so, if there are any changes, we won't need to update them
at both places.
However, we could add them to perf userspace itself separately and let perf
userspace use those.

What would you suggest?

--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH RESEND v4 1/2] perf/kvm: Port perf kvm to powerpc

2015-06-14 Thread Hemant Kumar
From: Srikar Dronamraju sri...@linux.vnet.ibm.com

perf kvm can be used to analyze guest exit reasons. This support already
exists in x86. Hence, porting it to powerpc.

 - To trace KVM events :
  perf kvm stat record
  If many guests are running, we can track for a specific guest by using
  --pid as in : perf kvm stat record --pid pid

 - To see the results :
  perf kvm stat report

The result shows the number of exits (from the guest context to
host/hypervisor context) grouped by their respective exit reasons with
their frequency.

This patch makes use of the guest exit reasons available in
trace_book3s.h. It records on two already available tracepoints :
kvm_hv:kvm_guest_exit and kvm_hv:kvm_guest_enter.

Note : This patch has a dependency on the patch kvm/powerpc: Export
kvm exit reasons which exports the KVM exit reasons through the uapi.

Here is a sample o/p:
 # pgrep qemu
19378
60515

2 Guests are running on the host.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ]

 # perf kvm stat report -p 60515
Analyze events for pid(s) 60515, all VCPUs:

   VM-EXITSamples  Samples% Time%Min Time Max
Time Avg time

H_DATA_STORAGE   500635.30% 0.13%  1.94us 49.46us 
12.37us ( +-   0.52% )
HV_DECREMENTER   445731.43% 0.02%  0.72us 16.14us  
1.91us ( +-   0.96% )
   SYSCALL   269018.97% 0.10%  2.84us528.24us 
18.29us ( +-   3.75% )
RETURN_TO_HOST   178912.61%99.76%  1.58us 672791.91us  
27470.23us ( +-   3.00% )
  EXTERNAL240 1.69% 0.00%  0.69us 10.67us  
1.33us ( +-   5.34% )

Total Samples:14182, Total events handled time:49264158.30us.

Signed-off-by: Srikar Dronamraju sri...@linux.vnet.ibm.com
Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Patch Resend :
- Added few more people to cc list.
- Rebased it to current tip.
Changes :
- Moved the uapi related changes to arch/powerpc patchset.

This patch has a dependency on :
http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg90506.html
which exports the kvm exit reasons.

 tools/perf/arch/powerpc/Makefile|  1 +
 tools/perf/arch/powerpc/util/Build  |  1 +
 tools/perf/arch/powerpc/util/kvm-stat.c | 33 +
 3 files changed, 35 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..21322e0 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+HAVE_KVM_STAT_SUPPORT := 1
diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..c8fe207 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
new file mode 100644
index 000..24e06bf
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -0,0 +1,33 @@
+#include ../../util/kvm-stat.h
+#include asm/kvm_perf_book3s.h
+
+define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+
+static struct kvm_events_ops exit_events = {
+   .is_begin_event = exit_event_begin,
+   .is_end_event = exit_event_end,
+   .decode_key = exit_event_decode_key,
+   .name = VM-EXIT
+};
+
+const char *const kvm_events_tp[] = {
+   kvm_hv:kvm_guest_exit,
+   kvm_hv:kvm_guest_enter,
+   NULL,
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+   { .name = vmexit, .ops = exit_events },
+   { NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+   NULL,
+};
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
+{
+   kvm-exit_reasons = hv_exit_reasons;
+   kvm-exit_reasons_isa = HV;
+   return 0;
+}
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH RESEND v4 2/2] perf/kvm: Support HCALL events

2015-06-14 Thread Hemant Kumar
powerpc provides hcall events that also provides insights into guest
behaviour. Enhance perf kvm to record and analyze hcall events.

 - To trace hcall events :
  perf kvm stat record

 - To show the results :
  perf kvm stat report --event=hcall

The result shows the number of hypervisor calls from the guest grouped
by their respective reasons displayed with the frequency.

This patch makes use of two additional tracepoints
kvm_hv:kvm_hcall_enter and kvm_hv:kvm_hcall_exit. It uses the
pSeries hypervisor codes exported through uapi to classify the hcalls
into their respective reasons.

Note : This patch has a dependency on kvm/powerpc: Export HCALL reason
codes which exports HCALL reasons through uapi.

 # pgrep qemu
A sample output :
19378
60515

2 VMs running.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ]

 # perf kvm stat report -p 60515 --event=hcall
Analyze events for pid(s) 60515, all VCPUs:

 HCALL-EVENTSamples  Samples% Time%Min TimeMax Time 
Avg time

H_VIO_SIGNAL   103438.44%15.77%  0.36us  1.59us 
 0.44us ( +-   0.66% )
  H_SEND_CRQ65224.24%10.97%  0.39us  1.84us 
 0.49us ( +-   1.20% )
   H_IPI52319.44%62.05%  1.35us 19.70us 
 3.44us ( +-   2.88% )
 H_PUT_TERM_CHAR41115.28% 8.03%  0.38us  3.77us 
 0.57us ( +-   1.61% )
 H_GET_TERM_CHAR 50 1.86% 0.99%  0.40us  0.98us 
 0.57us ( +-   3.37% )
   H_EOI 20 0.74% 2.19%  2.22us  4.72us 
 3.17us ( +-   5.96% )

Total Samples:2690, Total events handled time:2896.94us.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Changes:
- Moved the uapi related changes to arch/powerpc side patchset.

This patch has a dependency on :
http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg90507.html
which export hcall reasons through uapi.

 tools/perf/arch/powerpc/util/kvm-stat.c | 61 +
 1 file changed, 61 insertions(+)

diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
index 24e06bf..0d3ea47 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -1,7 +1,9 @@
 #include ../../util/kvm-stat.h
 #include asm/kvm_perf_book3s.h
+#include ../../util/debug.h
 
 define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall);
 
 static struct kvm_events_ops exit_events = {
.is_begin_event = exit_event_begin,
@@ -10,14 +12,73 @@ static struct kvm_events_ops exit_events = {
.name = VM-EXIT
 };
 
+static void hcall_event_get_key(struct perf_evsel *evsel,
+   struct perf_sample *sample,
+   struct event_key *key)
+{
+   key-info = 0;
+   key-key = perf_evsel__intval(evsel, sample, KVM_HCALL_REASON);
+}
+
+static const char *get_exit_reason(u64 exit_code)
+{
+   struct exit_reasons_table *tbl = hcall_reasons;
+
+   while (tbl-reason != NULL) {
+   if (tbl-exit_code == exit_code)
+   return tbl-reason;
+   tbl++;
+   }
+
+   pr_err(Unknown kvm hcall exit code: %lld\n,
+  (unsigned long long)exit_code);
+   return UNKNOWN;
+}
+
+static bool hcall_event_end(struct perf_evsel *evsel,
+   struct perf_sample *sample __maybe_unused,
+   struct event_key *key __maybe_unused)
+{
+   return (!strcmp(evsel-name, KVM_HCALL_EXIT_TRACE));
+}
+
+static bool hcall_event_begin(struct perf_evsel *evsel,
+ struct perf_sample *sample, struct event_key *key)
+{
+   if (!strcmp(evsel-name, KVM_HCALL_ENTRY_TRACE)) {
+   hcall_event_get_key(evsel, sample, key);
+   return true;
+   }
+
+return false;
+}
+static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+  struct event_key *key,
+  char *decode)
+{
+   const char *hcall_reason = get_exit_reason(key-key);
+
+   scnprintf(decode, DECODE_STR_LEN, %s, hcall_reason);
+}
+
+static struct kvm_events_ops hcall_events = {
+   .is_begin_event = hcall_event_begin,
+   .is_end_event = hcall_event_end,
+   .decode_key = hcall_event_decode_key,
+   .name = HCALL-EVENT,
+};
+
 const char *const kvm_events_tp[] = {
kvm_hv:kvm_guest_exit,
kvm_hv:kvm_guest_enter,
+   kvm_hv:kvm_hcall_enter,
+   kvm_hv:kvm_hcall_exit,
NULL,
 };
 
 struct kvm_reg_events_ops kvm_reg_events_ops[] = {
{ .name = vmexit, .ops = exit_events },
+   { .name = hcall, .ops = hcall_events },
{ NULL, NULL

[PATCH RESEND v4 1/3] kvm/powerpc: Export kvm exit reasons

2015-06-14 Thread Hemant Kumar
To analyze the kvm exits with perf, we will need to map the exit codes
with the exit reasons. Such a mapping exists today in trace_book3s.h.
Currently its not exported to perf.

This patch moves these kvm exit reasons and their mapping from
arch/powerpc/kvm/trace_book3s.h to
arch/powerpc/include/uapi/asm/trace_book3s.h.
Accordingly change the include files in trace_hv.h and trace_pr.h.

Also, add a file kvm_perf_book3s.h which defines the kvm tracepoints to
trace for kvm exit events. This is added to indicate that the
tracepoints are book3s specific. Generic kvm_perf.h then can just
include kvm_perf_book3s.h.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Resend :
- Rebased to the latest tip.
- Added few people to cc list.

Changes :
- Moved the uapi related changes from the perf side patchset to this
  patchset.
- Made name space changes to indicate changes specific to book3s
  (Suggested by Scott Wood)

 arch/powerpc/include/uapi/asm/kvm_perf.h|  6 +
 arch/powerpc/include/uapi/asm/kvm_perf_book3s.h | 15 
 arch/powerpc/include/uapi/asm/trace_book3s.h| 32 +
 arch/powerpc/kvm/trace_book3s.h | 32 -
 arch/powerpc/kvm/trace_hv.h |  2 +-
 arch/powerpc/kvm/trace_pr.h |  2 +-
 6 files changed, 55 insertions(+), 34 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf.h
 create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
 create mode 100644 arch/powerpc/include/uapi/asm/trace_book3s.h
 delete mode 100644 arch/powerpc/kvm/trace_book3s.h

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h 
b/arch/powerpc/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000..5ed2ff3
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_POWERPC_KVM_PERF_H
+#define _ASM_POWERPC_KVM_PERF_H
+
+#include asm/kvm_perf_book3s.h
+
+#endif
diff --git a/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h 
b/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
new file mode 100644
index 000..735901f
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_POWERPC_KVM_PERF_BOOK3S_H
+#define _ASM_POWERPC_KVM_PERF_BOOK3S_H
+
+#include asm/trace_book3s.h
+#include asm/kvm.h
+
+#define DECODE_STR_LEN 20
+
+#define VCPU_ID vcpu_id
+
+#define KVM_ENTRY_TRACE kvm_hv:kvm_guest_enter
+#define KVM_EXIT_TRACE kvm_hv:kvm_guest_exit
+#define KVM_EXIT_REASON trap
+
+#endif /* _ASM_POWERPC_KVM_PERF_BOOK3S_H */
diff --git a/arch/powerpc/include/uapi/asm/trace_book3s.h 
b/arch/powerpc/include/uapi/asm/trace_book3s.h
new file mode 100644
index 000..f647ce0
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/trace_book3s.h
@@ -0,0 +1,32 @@
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit \
+   {0x100, SYSTEM_RESET}, \
+   {0x200, MACHINE_CHECK}, \
+   {0x300, DATA_STORAGE}, \
+   {0x380, DATA_SEGMENT}, \
+   {0x400, INST_STORAGE}, \
+   {0x480, INST_SEGMENT}, \
+   {0x500, EXTERNAL}, \
+   {0x501, EXTERNAL_LEVEL}, \
+   {0x502, EXTERNAL_HV}, \
+   {0x600, ALIGNMENT}, \
+   {0x700, PROGRAM}, \
+   {0x800, FP_UNAVAIL}, \
+   {0x900, DECREMENTER}, \
+   {0x980, HV_DECREMENTER}, \
+   {0xc00, SYSCALL}, \
+   {0xd00, TRACE}, \
+   {0xe00, H_DATA_STORAGE}, \
+   {0xe20, H_INST_STORAGE}, \
+   {0xe40, H_EMUL_ASSIST}, \
+   {0xf00, PERFMON}, \
+   {0xf20, ALTIVEC}, \
+   {0xf40, VSX}
+
+#endif
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
deleted file mode 100644
index f647ce0..000
--- a/arch/powerpc/kvm/trace_book3s.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#if !defined(_TRACE_KVM_BOOK3S_H)
-#define _TRACE_KVM_BOOK3S_H
-
-/*
- * Common defines used by the trace macros in trace_pr.h and trace_hv.h
- */
-
-#define kvm_trace_symbol_exit \
-   {0x100, SYSTEM_RESET}, \
-   {0x200, MACHINE_CHECK}, \
-   {0x300, DATA_STORAGE}, \
-   {0x380, DATA_SEGMENT}, \
-   {0x400, INST_STORAGE}, \
-   {0x480, INST_SEGMENT}, \
-   {0x500, EXTERNAL}, \
-   {0x501, EXTERNAL_LEVEL}, \
-   {0x502, EXTERNAL_HV}, \
-   {0x600, ALIGNMENT}, \
-   {0x700, PROGRAM}, \
-   {0x800, FP_UNAVAIL}, \
-   {0x900, DECREMENTER}, \
-   {0x980, HV_DECREMENTER}, \
-   {0xc00, SYSCALL}, \
-   {0xd00, TRACE}, \
-   {0xe00, H_DATA_STORAGE}, \
-   {0xe20, H_INST_STORAGE}, \
-   {0xe40, H_EMUL_ASSIST}, \
-   {0xf00, PERFMON}, \
-   {0xf20, ALTIVEC}, \
-   {0xf40, VSX}
-
-#endif
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index 33d9daf..02d0a07 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -2,7 +2,7 @@
 #define _TRACE_KVM_HV_H
 
 #include linux/tracepoint.h
-#include

[PATCH RESEND v4 3/3] kvm/powerpc: Export HCALL reason codes

2015-06-14 Thread Hemant Kumar
For perf to analyze the KVM events like hcalls, we need the
hypervisor calls and their codes to be exported through uapi.

This patch moves most of the pSeries hcall codes from
arch/powerpc/include/asm/hvcall.h to
arch/powerpc/include/uapi/asm/pseries_hcalls.h.
It also moves the mapping hcall_code-to-hcall_reason from
arch/powerpc/kvm/trace_hv.h to
arch/powerpc/include/uapi/asm/trace_hcall_pseries.h.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Changes :
- Made name space changes to indicate changes related to pseries
  (Suggested by Scott Wood)

 arch/powerpc/include/asm/hvcall.h  | 120 +---
 arch/powerpc/include/uapi/asm/kvm_perf_book3s.h|   4 +
 arch/powerpc/include/uapi/asm/pseries_hcalls.h | 123 +
 .../powerpc/include/uapi/asm/trace_hcall_pseries.h | 122 
 arch/powerpc/kvm/trace_hv.h| 117 +---
 5 files changed, 252 insertions(+), 234 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/pseries_hcalls.h
 create mode 100644 arch/powerpc/include/uapi/asm/trace_hcall_pseries.h

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 85bc8c0..6e38210 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -155,124 +155,8 @@
 /* Each control block has to be on a 4K boundary */
 #define H_CB_ALIGNMENT  4096
 
-/* pSeries hypervisor opcodes */
-#define H_REMOVE   0x04
-#define H_ENTER0x08
-#define H_READ 0x0c
-#define H_CLEAR_MOD0x10
-#define H_CLEAR_REF0x14
-#define H_PROTECT  0x18
-#define H_GET_TCE  0x1c
-#define H_PUT_TCE  0x20
-#define H_SET_SPRG00x24
-#define H_SET_DABR 0x28
-#define H_PAGE_INIT0x2c
-#define H_SET_ASR  0x30
-#define H_ASR_ON   0x34
-#define H_ASR_OFF  0x38
-#define H_LOGICAL_CI_LOAD  0x3c
-#define H_LOGICAL_CI_STORE 0x40
-#define H_LOGICAL_CACHE_LOAD   0x44
-#define H_LOGICAL_CACHE_STORE  0x48
-#define H_LOGICAL_ICBI 0x4c
-#define H_LOGICAL_DCBF 0x50
-#define H_GET_TERM_CHAR0x54
-#define H_PUT_TERM_CHAR0x58
-#define H_REAL_TO_LOGICAL  0x5c
-#define H_HYPERVISOR_DATA  0x60
-#define H_EOI  0x64
-#define H_CPPR 0x68
-#define H_IPI  0x6c
-#define H_IPOLL0x70
-#define H_XIRR 0x74
-#define H_PERFMON  0x7c
-#define H_MIGRATE_DMA  0x78
-#define H_REGISTER_VPA 0xDC
-#define H_CEDE 0xE0
-#define H_CONFER   0xE4
-#define H_PROD 0xE8
-#define H_GET_PPP  0xEC
-#define H_SET_PPP  0xF0
-#define H_PURR 0xF4
-#define H_PIC  0xF8
-#define H_REG_CRQ  0xFC
-#define H_FREE_CRQ 0x100
-#define H_VIO_SIGNAL   0x104
-#define H_SEND_CRQ 0x108
-#define H_COPY_RDMA0x110
-#define H_REGISTER_LOGICAL_LAN 0x114
-#define H_FREE_LOGICAL_LAN 0x118
-#define H_ADD_LOGICAL_LAN_BUFFER 0x11C
-#define H_SEND_LOGICAL_LAN 0x120
-#define H_BULK_REMOVE  0x124
-#define H_MULTICAST_CTRL   0x130
-#define H_SET_XDABR0x134
-#define H_STUFF_TCE0x138
-#define H_PUT_TCE_INDIRECT 0x13C
-#define H_CHANGE_LOGICAL_LAN_MAC 0x14C
-#define H_VTERM_PARTNER_INFO   0x150
-#define H_REGISTER_VTERM   0x154
-#define H_FREE_VTERM   0x158
-#define H_RESET_EVENTS  0x15C
-#define H_ALLOC_RESOURCE0x160
-#define H_FREE_RESOURCE 0x164
-#define H_MODIFY_QP 0x168
-#define H_QUERY_QP  0x16C
-#define H_REREGISTER_PMR0x170
-#define H_REGISTER_SMR  0x174
-#define H_QUERY_MR  0x178
-#define H_QUERY_MW  0x17C
-#define H_QUERY_HCA 0x180
-#define H_QUERY_PORT0x184
-#define H_MODIFY_PORT   0x188
-#define H_DEFINE_AQP1   0x18C
-#define H_GET_TRACE_BUFFER  0x190
-#define H_DEFINE_AQP0   0x194
-#define H_RESIZE_MR 0x198
-#define H_ATTACH_MCQP   0x19C
-#define H_DETACH_MCQP   0x1A0
-#define H_CREATE_RPT0x1A4
-#define H_REMOVE_RPT0x1A8
-#define H_REGISTER_RPAGES   0x1AC
-#define H_DISABLE_AND_GETC  0x1B0
-#define H_ERROR_DATA0x1B4
-#define H_GET_HCA_INFO  0x1B8
-#define H_GET_PERF_COUNT0x1BC
-#define H_MANAGE_TRACE  0x1C0
-#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
-#define H_QUERY_INT_STATE   0x1E4
-#define H_POLL_PENDING 0x1D8
-#define H_ILLAN_ATTRIBUTES 0x244
-#define H_MODIFY_HEA_QP0x250
-#define H_QUERY_HEA_QP 0x254
-#define H_QUERY_HEA0x258
-#define H_QUERY_HEA_PORT   0x25C
-#define

[PATCH RESEND v4 2/3] kvm/powerpc: Add exit reason for return code 0x0

2015-06-14 Thread Hemant Kumar
This patch adds an exit reason RETURN_TO_HOST for the return code
0x0. Note that this is not related to any interrupt vector address, but
this is added just to make sure that perf doesn't complain if and when a
kvm exit happens with a trap code as 0x0.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
 arch/powerpc/include/uapi/asm/trace_book3s.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/include/uapi/asm/trace_book3s.h 
b/arch/powerpc/include/uapi/asm/trace_book3s.h
index f647ce0..8635005 100644
--- a/arch/powerpc/include/uapi/asm/trace_book3s.h
+++ b/arch/powerpc/include/uapi/asm/trace_book3s.h
@@ -6,6 +6,7 @@
  */
 
 #define kvm_trace_symbol_exit \
+   {0x0,   RETURN_TO_HOST}, \
{0x100, SYSTEM_RESET}, \
{0x200, MACHINE_CHECK}, \
{0x300, DATA_STORAGE}, \
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 1/2] perf,kvm/ppc: Add kvm_perf.h for powerpc

2015-07-29 Thread Hemant Kumar

Hi Scott,

On 07/17/2015 01:40 AM, Scott Wood wrote:

On Thu, 2015-07-16 at 21:18 +0530, Hemant Kumar wrote:

To analyze the exit events with perf, we need kvm_perf.h to be added in
the arch/powerpc directory, where the kvm tracepoints needed to trace
the KVM exit events are defined.

This patch adds kvm_perf_book3s.h to indicate that the tracepoints are
book3s specific. Generic kvm_perf.h then can just include
kvm_perf_book3s.h.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Changes:
- Not exporting the exit reasons compared to previous patchset (suggested
by Paul)

  arch/powerpc/include/uapi/asm/kvm_perf.h|  6 ++
  arch/powerpc/include/uapi/asm/kvm_perf_book3s.h | 14 ++
  2 files changed, 20 insertions(+)
  create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf.h
  create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf_book3s.h

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h
b/arch/powerpc/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000..5ed2ff3
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_POWERPC_KVM_PERF_H
+#define _ASM_POWERPC_KVM_PERF_H
+
+#include asm/kvm_perf_book3s.h
+
+#endif
diff --git a/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
b/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
new file mode 100644
index 000..8c8d8c2
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_POWERPC_KVM_PERF_BOOK3S_H
+#define _ASM_POWERPC_KVM_PERF_BOOK3S_H
+
+#include asm/kvm.h
+
+#define DECODE_STR_LEN 20
+
+#define VCPU_ID vcpu_id
+
+#define KVM_ENTRY_TRACE kvm_hv:kvm_guest_enter
+#define KVM_EXIT_TRACE kvm_hv:kvm_guest_exit
+#define KVM_EXIT_REASON trap
+
+#endif /* _ASM_POWERPC_KVM_PERF_BOOK3S_H */

Again, why is book3s stuff being presented via uapi as generic
asm/kvm_perf.h with generic symbol names?

-Scott


Ok.

We can change the KVM_ENTRY_TRACE macro to something like
KVM_BOOK3S_ENTRY_TRACE and likewise for KVM_EXIT_TRACE
and KVM_EXIT_REASON and then, to resolve the issue of generic
macro names in the userspace side, we can handle it using __weak
modifier.

What would you suggest?


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 1/2] perf,kvm/ppc: Add kvm_perf.h for powerpc

2015-07-31 Thread Hemant Kumar


On 07/30/2015 03:52 AM, Scott Wood wrote:

On Wed, 2015-07-29 at 16:07 +0530, Hemant Kumar wrote:

Hi Scott,

On 07/17/2015 01:40 AM, Scott Wood wrote:

On Thu, 2015-07-16 at 21:18 +0530, Hemant Kumar wrote:

To analyze the exit events with perf, we need kvm_perf.h to be added in
the arch/powerpc directory, where the kvm tracepoints needed to trace
the KVM exit events are defined.

This patch adds kvm_perf_book3s.h to indicate that the tracepoints are
book3s specific. Generic kvm_perf.h then can just include
kvm_perf_book3s.h.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Changes:
- Not exporting the exit reasons compared to previous patchset
(suggested
by Paul)

   arch/powerpc/include/uapi/asm/kvm_perf.h|  6 ++
   arch/powerpc/include/uapi/asm/kvm_perf_book3s.h | 14 ++
   2 files changed, 20 insertions(+)
   create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf.h
   create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf_book3s.h

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h
b/arch/powerpc/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000..5ed2ff3
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_POWERPC_KVM_PERF_H
+#define _ASM_POWERPC_KVM_PERF_H
+
+#include asm/kvm_perf_book3s.h
+
+#endif
diff --git a/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
b/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
new file mode 100644
index 000..8c8d8c2
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_POWERPC_KVM_PERF_BOOK3S_H
+#define _ASM_POWERPC_KVM_PERF_BOOK3S_H
+
+#include asm/kvm.h
+
+#define DECODE_STR_LEN 20
+
+#define VCPU_ID vcpu_id
+
+#define KVM_ENTRY_TRACE kvm_hv:kvm_guest_enter
+#define KVM_EXIT_TRACE kvm_hv:kvm_guest_exit
+#define KVM_EXIT_REASON trap
+
+#endif /* _ASM_POWERPC_KVM_PERF_BOOK3S_H */

Again, why is book3s stuff being presented via uapi as generic
asm/kvm_perf.h with generic symbol names?

-Scott

Ok.

We can change the KVM_ENTRY_TRACE macro to something like
KVM_BOOK3S_ENTRY_TRACE and likewise for KVM_EXIT_TRACE
and KVM_EXIT_REASON

What about DECODE_STR_LEN and VCPU_ID?


DECODE_STR_LEN can be common, we can give a big enough size to it, if
we need to.
And, VCPU_ID depends on the field in the tracepoint payload data which is
specific to that tracepoint. This field is used to maintain the per vcpu 
record
and this field gives us the vcpu id. So, yeah, I guess, since, I can't 
find any

such field as vcpu_id in the kvm_exit tracepoint for book3e, we have to
make this specific to book3s.



Where is this API documented?


  and then, to resolve the issue of generic
macro names in the userspace side, we can handle it using __weak
modifier.

Does userspace get built differently for book3s versus book3e?  For now it'd

be fine for userspace to check for book3s and not use the feature if it's

book3e.  If and when book3e gains this feature, then userspace can be changed.


Well, I couldn't find any way to build user space differently for book3s and
book3e.

How about keeping this as it is after modifying the tracepoint macro names
to book3s specific in the uapi? And as and when booke decides to implement
this feature, a runtime check for event availability can be added then, 
IMHO.


What do you think?


What would you suggest?

Another option would be to explain this interface so that we can figure out
if book3e would even want different values for these, and if not, move it to
asm/kvm.h.


Here is my understanding of the interface. We need to add handlers for
is_begin_event, is_end_event and decode_key for any event type
(for which we want to collect the stats).
The first two handlers check when the respective events started/ended
and hence, the time difference stats, event start/end time etc. is 
calculated

in these functions. To check if the event has started or ended, they make
use of the macros KVM_ENTRY_TRACE and KVM_EXIT_TRACE. These
macros are exported from the kernel as uapi. Atleast, that's how x86 and
s390 do it.
decode_key hanlder is used to find out the reason for
that event (in case of book3s, its trap field of kvm_hv:kvm_guest_exit
payload) in semantic terms. It maps an info of interest found in that
particular tracepoint's data to a name(string) through a
table kvm_trace_symbol_exit. All the events are then classified into groups
based on this info.

So, for an exit event in case of book3s, kvm_hv:kvm_guest_exit has a trap
field which tells us the reason for a thread to exit the guest context by
encoding the trap code. We can map this trap code to the strings through
kvm_trace_symbol_exit table and then classify all the exits into groups 
based

on this trap code.

--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 2/2] perf,kvm/ppc: Add hcall related info to kvm_perf.h

2015-07-16 Thread Hemant Kumar
To analyze the hcalls with perf, we need the hcall related tracepoints
information to be exported.

This patch adds hcall tracepoints kvm_hv:kvm_hcall_enter and
kvm_hv:kvm_hcall_exit to kvm_perf.h. So, perf will now know as to what
tracepoints to look for if we are using perf kvm stat record to
collect guest hcall statistics.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Changes:
- Not exporting the hcall related codes and names through uapi compared to
  previous patch.

 arch/powerpc/include/uapi/asm/kvm_perf_book3s.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h 
b/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
index 8c8d8c2..1378a8d 100644
--- a/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
+++ b/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
@@ -11,4 +11,8 @@
 #define KVM_EXIT_TRACE kvm_hv:kvm_guest_exit
 #define KVM_EXIT_REASON trap
 
+#define KVM_HCALL_ENTRY_TRACE kvm_hv:kvm_hcall_enter
+#define KVM_HCALL_EXIT_TRACE kvm_hv:kvm_hcall_exit
+#define KVM_HCALL_REASON req
+
 #endif /* _ASM_POWERPC_KVM_PERF_BOOK3S_H */
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 1/2] perf/kvm: Port perf kvm stat to powerpc

2015-07-16 Thread Hemant Kumar
From: Srikar Dronamraju sri...@linux.vnet.ibm.com

perf kvm can be used to analyze guest exit reasons. This support already
exists in x86. Hence, porting it to powerpc.

 - To trace KVM events :
  perf kvm stat record
  If many guests are running, we can track for a specific guest by using
  --pid as in : perf kvm stat record --pid pid

 - To see the results :
  perf kvm stat report

The result shows the number of exits (from the guest context to
host/hypervisor context) grouped by their respective exit reasons with
their frequency.

To analyze the different exits, group them and present them (in a
slightly descriptive way) to the user, we need a mapping between the
exit code (dumped in the kvm_guest_exit tracepoint data) and to its
related Interrupt vector description (exit reason). This patch adds this
mapping in book3s_exits.h.

It records on two available KVM tracepoints :
kvm_hv:kvm_guest_exit and kvm_hv:kvm_guest_enter.

Note that this patch has a direct dependency on
perf,kvm/ppc: Add kvm_perf.h for powerpc which adds kvm_perf.h, where
the required kvm tracpoints are defined for perf kvm stat to be used.

Here is a sample o/p:
 # pgrep qemu
19378
60515

2 Guests are running on the host.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ]

 # perf kvm stat report -p 60515
Analyze events for pid(s) 60515, all VCPUs:

   VM-EXITSamples  Samples% Time%Min Time Max
Time Avg time

H_DATA_STORAGE   500635.30% 0.13%  1.94us 49.46us 
12.37us ( +-   0.52% )
HV_DECREMENTER   445731.43% 0.02%  0.72us 16.14us  
1.91us ( +-   0.96% )
   SYSCALL   269018.97% 0.10%  2.84us528.24us 
18.29us ( +-   3.75% )
RETURN_TO_HOST   178912.61%99.76%  1.58us 672791.91us  
27470.23us ( +-   3.00% )
  EXTERNAL240 1.69% 0.00%  0.69us 10.67us  
1.33us ( +-   5.34% )

Total Samples:14182, Total events handled time:49264158.30us.

Signed-off-by: Srikar Dronamraju sri...@linux.vnet.ibm.com
Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
This patch has a direct dependency on:
http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg91603.html

Changes :
- Added exit reasons definitions(unlikely to change) in the userspace side.

 tools/perf/arch/powerpc/Makefile|  1 +
 tools/perf/arch/powerpc/util/Build  |  1 +
 tools/perf/arch/powerpc/util/book3s_exits.h | 33 +
 tools/perf/arch/powerpc/util/kvm-stat.c | 33 +
 4 files changed, 68 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_exits.h
 create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..21322e0 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+HAVE_KVM_STAT_SUPPORT := 1
diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..c8fe207 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/book3s_exits.h 
b/tools/perf/arch/powerpc/util/book3s_exits.h
new file mode 100644
index 000..94c58f4
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_exits.h
@@ -0,0 +1,33 @@
+#ifndef ARCH_PERF_BOOK3S_EXITS_H
+#define ARCH_PERF_BOOK3S_EXITS_H
+
+/*
+ * PowerPC Interrupt vectors : exit code to name mapping
+ */
+
+#define kvm_trace_symbol_exit \
+   {0x0,   RETURN_TO_HOST}, \
+   {0x100, SYSTEM_RESET}, \
+   {0x200, MACHINE_CHECK}, \
+   {0x300, DATA_STORAGE}, \
+   {0x380, DATA_SEGMENT}, \
+   {0x400, INST_STORAGE}, \
+   {0x480, INST_SEGMENT}, \
+   {0x500, EXTERNAL}, \
+   {0x501, EXTERNAL_LEVEL}, \
+   {0x502, EXTERNAL_HV}, \
+   {0x600, ALIGNMENT}, \
+   {0x700, PROGRAM}, \
+   {0x800, FP_UNAVAIL}, \
+   {0x900, DECREMENTER}, \
+   {0x980, HV_DECREMENTER}, \
+   {0xc00, SYSCALL}, \
+   {0xd00, TRACE}, \
+   {0xe00, H_DATA_STORAGE}, \
+   {0xe20, H_INST_STORAGE}, \
+   {0xe40, H_EMUL_ASSIST}, \
+   {0xf00, PERFMON}, \
+   {0xf20, ALTIVEC}, \
+   {0xf40, VSX}
+
+#endif
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
new file mode 100644
index 000..d0e1930
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -0,0 +1,33 @@
+#include ../../util/kvm-stat.h
+#include book3s_exits.h
+
+define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+
+static

[PATCH v5 2/2] perf/kvm: Support HCALL events

2015-07-16 Thread Hemant Kumar
Powerpc provides hcall events that also provides insights into guest
behaviour. Enhance perf kvm stat to record and analyze hcall events.

 - To trace hcall events :
  perf kvm stat record

 - To show the results :
  perf kvm stat report --event=hcall

The result shows the number of hypervisor calls from the guest grouped
by their respective reasons displayed with the frequency.

This patch makes use of two additional tracepoints
kvm_hv:kvm_hcall_enter and kvm_hv:kvm_hcall_exit. To map the hcall
codes to their respective names, it needs a mapping. Such mapping is
added in this patch in book3s_hcalls.h.

Note that this patch has a dependency on 
perf,kvm/ppc: Add hcall related info to kvm_perf.h which adds the
hcall related tracepoints to kvm_perf.h to let perf kvm stat know
about these tracepoints.

 # pgrep qemu
A sample output :
19378
60515

2 VMs running.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624 samples) ]

 # perf kvm stat report -p 60515 --event=hcall
Analyze events for pid(s) 60515, all VCPUs:

 HCALL-EVENTSamples  Samples% Time%Min TimeMax Time 
Avg time

H_VIO_SIGNAL   103438.44%15.77%  0.36us  1.59us 
 0.44us ( +-   0.66% )
  H_SEND_CRQ65224.24%10.97%  0.39us  1.84us 
 0.49us ( +-   1.20% )
   H_IPI52319.44%62.05%  1.35us 19.70us 
 3.44us ( +-   2.88% )
 H_PUT_TERM_CHAR41115.28% 8.03%  0.38us  3.77us 
 0.57us ( +-   1.61% )
 H_GET_TERM_CHAR 50 1.86% 0.99%  0.40us  0.98us 
 0.57us ( +-   3.37% )
   H_EOI 20 0.74% 2.19%  2.22us  4.72us 
 3.17us ( +-   5.96% )

Total Samples:2690, Total events handled time:2896.94us.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
This patch has a direct dependency on :
http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg91605.html

Changes:
- Added definitions for hcall code to hcall reason mapping in the userspace 
side.

 tools/perf/arch/powerpc/util/book3s_hcalls.h | 123 +++
 tools/perf/arch/powerpc/util/kvm-stat.c  |  64 ++
 2 files changed, 187 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_hcalls.h

diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h 
b/tools/perf/arch/powerpc/util/book3s_hcalls.h
new file mode 100644
index 000..3d50def
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -0,0 +1,123 @@
+#ifndef ARCH_PERF_BOOK3S_HCALLS_H
+#define ARCH_PERF_BOOK3S_HCALLS_H
+
+/*
+ * PowerPC HCALL codes : hcall name to reason mapping
+ */
+#define kvm_trace_symbol_hcall \
+   {0x4,H_REMOVE},   \
+   {0x8,H_ENTER},\
+   {0xc,H_READ}, \
+   {0x10,H_CLEAR_MOD},   \
+   {0x14,H_CLEAR_REF},   \
+   {0x18,H_PROTECT}, \
+   {0x1c,H_GET_TCE}, \
+   {0x20,H_PUT_TCE}, \
+   {0x24,H_SET_SPRG0},   \
+   {0x28,H_SET_DABR},\
+   {0x2c,H_PAGE_INIT},   \
+   {0x30,H_SET_ASR}, \
+   {0x34,H_ASR_ON},  \
+   {0x38,H_ASR_OFF}, \
+   {0x3c,H_LOGICAL_CI_LOAD}, \
+   {0x40,H_LOGICAL_CI_STORE},\
+   {0x44,H_LOGICAL_CACHE_LOAD},  \
+   {0x48,H_LOGICAL_CACHE_STORE}, \
+   {0x4c,H_LOGICAL_ICBI},\
+   {0x50,H_LOGICAL_DCBF},\
+   {0x54,H_GET_TERM_CHAR},   \
+   {0x58,H_PUT_TERM_CHAR},   \
+   {0x5c,H_REAL_TO_LOGICAL}, \
+   {0x60,H_HYPERVISOR_DATA}, \
+   {0x64,H_EOI}, \
+   {0x68,H_CPPR},\
+   {0x6c,H_IPI}, \
+   {0x70,H_IPOLL},   \
+   {0x74,H_XIRR},\
+   {0x78,H_MIGRATE_DMA}, \
+   {0x7c,H_PERFMON}, \
+   {0xdc,H_REGISTER_VPA},\
+   {0xe0,H_CEDE},\
+   {0xe4,H_CONFER},  \
+   {0xe8,H_PROD},\
+   {0xec,H_GET_PPP}, \
+   {0xf0,H_SET_PPP}, \
+   {0xf4,H_PURR},\
+   {0xf8,H_PIC

[PATCH v5 1/2] perf,kvm/ppc: Add kvm_perf.h for powerpc

2015-07-16 Thread Hemant Kumar
To analyze the exit events with perf, we need kvm_perf.h to be added in
the arch/powerpc directory, where the kvm tracepoints needed to trace
the KVM exit events are defined.

This patch adds kvm_perf_book3s.h to indicate that the tracepoints are
book3s specific. Generic kvm_perf.h then can just include
kvm_perf_book3s.h.

Signed-off-by: Hemant Kumar hem...@linux.vnet.ibm.com
---
Changes:
- Not exporting the exit reasons compared to previous patchset (suggested by 
Paul)

 arch/powerpc/include/uapi/asm/kvm_perf.h|  6 ++
 arch/powerpc/include/uapi/asm/kvm_perf_book3s.h | 14 ++
 2 files changed, 20 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf.h
 create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf_book3s.h

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h 
b/arch/powerpc/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000..5ed2ff3
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_POWERPC_KVM_PERF_H
+#define _ASM_POWERPC_KVM_PERF_H
+
+#include asm/kvm_perf_book3s.h
+
+#endif
diff --git a/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h 
b/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
new file mode 100644
index 000..8c8d8c2
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf_book3s.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_POWERPC_KVM_PERF_BOOK3S_H
+#define _ASM_POWERPC_KVM_PERF_BOOK3S_H
+
+#include asm/kvm.h
+
+#define DECODE_STR_LEN 20
+
+#define VCPU_ID vcpu_id
+
+#define KVM_ENTRY_TRACE kvm_hv:kvm_guest_enter
+#define KVM_EXIT_TRACE kvm_hv:kvm_guest_exit
+#define KVM_EXIT_REASON trap
+
+#endif /* _ASM_POWERPC_KVM_PERF_BOOK3S_H */
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v9 1/4] perf, kvm/{x86, s390}: Remove dependency on uapi/kvm_perf.h

2015-10-26 Thread Hemant Kumar

Hi Arnaldo,

Could you please take a look at this series and pull it?

---
Thanks,
Hemant

On 10/07/2015 07:55 AM, Hemant Kumar wrote:

Its better to remove the dependency on uapi/kvm_perf.h to allow dynamic
discovery of kvm events (if its needed). To do this, some extern
variables have been introduced with which we can keep the generic
functions generic.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changelog:
v8 to v9:
- Removed the macro definitions.
- Changed the access of kvm_entry_trace and kvm_exit_trace
- Removed unnecessary formatting.
v7 to v8:
- Removed unnecessary __unused_parameter modifiers.

  tools/perf/arch/s390/util/kvm-stat.c |  8 +++-
  tools/perf/arch/x86/util/kvm-stat.c  | 14 +++---
  tools/perf/builtin-kvm.c | 32 ++--
  tools/perf/util/kvm-stat.h   |  5 +
  4 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/tools/perf/arch/s390/util/kvm-stat.c 
b/tools/perf/arch/s390/util/kvm-stat.c
index a5dbc07..b85a94b 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -10,7 +10,7 @@
   */

  #include "../../util/kvm-stat.h"
-#include 
+#include 

  define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
  define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
@@ -18,6 +18,12 @@ define_exit_reasons_table(sie_sigp_order_codes, 
sigp_order_codes);
  define_exit_reasons_table(sie_diagnose_codes, diagnose_codes);
  define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes);

+const char *vcpu_id_str = "id";
+const int decode_str_len = 40;
+const char *kvm_exit_reason = "icptcode";
+const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter";
+const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit";
+
  static void event_icpt_insn_get_key(struct perf_evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
diff --git a/tools/perf/arch/x86/util/kvm-stat.c 
b/tools/perf/arch/x86/util/kvm-stat.c
index 14e4e66..babefda 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -1,5 +1,7 @@
  #include "../../util/kvm-stat.h"
-#include 
+#include 
+#include 
+#include 

  define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
  define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
@@ -11,6 +13,12 @@ static struct kvm_events_ops exit_events = {
.name = "VM-EXIT"
  };

+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 20;
+const char *kvm_exit_reason = "exit_reason";
+const char *kvm_entry_trace = "kvm:kvm_entry";
+const char *kvm_exit_trace = "kvm:kvm_exit";
+
  /*
   * For the mmio events, we treat:
   * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
@@ -65,7 +73,7 @@ static void mmio_event_decode_key(struct perf_kvm_stat *kvm 
__maybe_unused,
  struct event_key *key,
  char *decode)
  {
-   scnprintf(decode, DECODE_STR_LEN, "%#lx:%s",
+   scnprintf(decode, decode_str_len, "%#lx:%s",
  (unsigned long)key->key,
  key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
  }
@@ -109,7 +117,7 @@ static void ioport_event_decode_key(struct perf_kvm_stat 
*kvm __maybe_unused,
struct event_key *key,
char *decode)
  {
-   scnprintf(decode, DECODE_STR_LEN, "%#llx:%s",
+   scnprintf(decode, decode_str_len, "%#llx:%s",
  (unsigned long long)key->key,
  key->info ? "POUT" : "PIN");
  }
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index fc1cffb..5104c7e 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -31,7 +31,6 @@
  #include 

  #ifdef HAVE_KVM_STAT_SUPPORT
-#include 
  #include "util/kvm-stat.h"

  void exit_event_get_key(struct perf_evsel *evsel,
@@ -39,12 +38,12 @@ void exit_event_get_key(struct perf_evsel *evsel,
struct event_key *key)
  {
key->info = 0;
-   key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON);
+   key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
  }

  bool kvm_exit_event(struct perf_evsel *evsel)
  {
-   return !strcmp(evsel->name, KVM_EXIT_TRACE);
+   return !strcmp(evsel->name, kvm_exit_trace);
  }

  bool exit_event_begin(struct perf_evsel *evsel,
@@ -60,7 +59,7 @@ bool exit_event_begin(struct perf_evsel *evsel,

  bool kvm_entry_event(struct perf_evsel *evsel)
  {
-   return !strcmp(evsel->name, KVM_ENTRY_TRACE);
+   return !strcmp(evsel->name, kvm_entry_trace);
  }

  bool exit_event_end(struct pe

Re: [PATCH v9 1/4] perf, kvm/{x86, s390}: Remove dependency on uapi/kvm_perf.h

2015-10-29 Thread Hemant Kumar



On 10/29/2015 02:17 AM, Alexander Yarygin wrote:

Hemant Kumar writes:


Hi David,


On 10/07/2015 09:41 PM, David Ahern wrote:

On 10/6/15 8:25 PM, Hemant Kumar wrote:

@@ -358,7 +357,12 @@ static bool handle_end_event(struct
perf_kvm_stat *kvm,
   time_diff = sample->time - time_begin;

   if (kvm->duration && time_diff > kvm->duration) {
-char decode[DECODE_STR_LEN];
+char *decode = zalloc(decode_str_len);

decode can still be a stack variable even with variable length.


Yeah, we can do that. But, I am not sure whether its a standard way.


Well, I also vote for making them variable length arrays. I guess that
wouldn't be a problem because the "variable" here is actually a constant
compile time value, even if it's extern.

But if people are strongly against it, as an alternative I can suggest
to move the 'char *decode' variable to the perf_kvm_stat structure,
allocate it once e.g. in kvm_events_report() and just write to it via
decode_key(). If I'm not mistaken, we always write \0 trimmed strings,
so garbage after \0 shouldn't be a problem.


I agree. We can do that. But, since this is a small change (making
the variable a constant compile time value rather than the array being
a run time value), we can do that subsequently. For now, we can go
with the current patchset.


It's not a real problem anyway :)


Yeah.


For s390 parts:
Acked-by: Alexander Yarygin <yary...@linux.vnet.ibm.com>


Thanks a lot for testing and acking it. :)


-8<-


@@ -575,7 +581,7 @@ static void show_timeofday(void)

   static void print_result(struct perf_kvm_stat *kvm)
   {
-char decode[DECODE_STR_LEN];
+char *decode;

and a stack variable here too.


Same here.


David
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v9 1/4] perf, kvm/{x86, s390}: Remove dependency on uapi/kvm_perf.h

2015-10-08 Thread Hemant Kumar

Hi David,


On 10/07/2015 09:41 PM, David Ahern wrote:

On 10/6/15 8:25 PM, Hemant Kumar wrote:
@@ -358,7 +357,12 @@ static bool handle_end_event(struct 
perf_kvm_stat *kvm,

  time_diff = sample->time - time_begin;

  if (kvm->duration && time_diff > kvm->duration) {
-char decode[DECODE_STR_LEN];
+char *decode = zalloc(decode_str_len);


decode can still be a stack variable even with variable length.



Yeah, we can do that. But, I am not sure whether its a standard way.


+
+if (!decode) {
+pr_err("Not enough memory\n");
+return false;
+}

  kvm->events_ops->decode_key(kvm, >key, decode);
  if (!skip_event(decode)) {
@@ -366,6 +370,7 @@ static bool handle_end_event(struct perf_kvm_stat 
*kvm,

   sample->time, sample->pid, vcpu_record->vcpu_id,
   decode, time_diff/1000);
  }
+free(decode);
  }

  return update_kvm_event(event, vcpu, time_diff);
@@ -386,7 +391,8 @@ struct vcpu_event_record *per_vcpu_record(struct 
thread *thread,


-8<-


@@ -575,7 +581,7 @@ static void show_timeofday(void)

  static void print_result(struct perf_kvm_stat *kvm)
  {
-char decode[DECODE_STR_LEN];
+char *decode;


and a stack variable here too.



Same here.


David
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v9 1/4] perf, kvm/{x86, s390}: Remove dependency on uapi/kvm_perf.h

2015-10-06 Thread Hemant Kumar
Its better to remove the dependency on uapi/kvm_perf.h to allow dynamic
discovery of kvm events (if its needed). To do this, some extern
variables have been introduced with which we can keep the generic
functions generic.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changelog:
v8 to v9:
- Removed the macro definitions.
- Changed the access of kvm_entry_trace and kvm_exit_trace
- Removed unnecessary formatting.
v7 to v8:
- Removed unnecessary __unused_parameter modifiers.

 tools/perf/arch/s390/util/kvm-stat.c |  8 +++-
 tools/perf/arch/x86/util/kvm-stat.c  | 14 +++---
 tools/perf/builtin-kvm.c | 32 ++--
 tools/perf/util/kvm-stat.h   |  5 +
 4 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/tools/perf/arch/s390/util/kvm-stat.c 
b/tools/perf/arch/s390/util/kvm-stat.c
index a5dbc07..b85a94b 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -10,7 +10,7 @@
  */
 
 #include "../../util/kvm-stat.h"
-#include 
+#include 
 
 define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
 define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
@@ -18,6 +18,12 @@ define_exit_reasons_table(sie_sigp_order_codes, 
sigp_order_codes);
 define_exit_reasons_table(sie_diagnose_codes, diagnose_codes);
 define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes);
 
+const char *vcpu_id_str = "id";
+const int decode_str_len = 40;
+const char *kvm_exit_reason = "icptcode";
+const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter";
+const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit";
+
 static void event_icpt_insn_get_key(struct perf_evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
diff --git a/tools/perf/arch/x86/util/kvm-stat.c 
b/tools/perf/arch/x86/util/kvm-stat.c
index 14e4e66..babefda 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -1,5 +1,7 @@
 #include "../../util/kvm-stat.h"
-#include 
+#include 
+#include 
+#include 
 
 define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
 define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
@@ -11,6 +13,12 @@ static struct kvm_events_ops exit_events = {
.name = "VM-EXIT"
 };
 
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 20;
+const char *kvm_exit_reason = "exit_reason";
+const char *kvm_entry_trace = "kvm:kvm_entry";
+const char *kvm_exit_trace = "kvm:kvm_exit";
+
 /*
  * For the mmio events, we treat:
  * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
@@ -65,7 +73,7 @@ static void mmio_event_decode_key(struct perf_kvm_stat *kvm 
__maybe_unused,
  struct event_key *key,
  char *decode)
 {
-   scnprintf(decode, DECODE_STR_LEN, "%#lx:%s",
+   scnprintf(decode, decode_str_len, "%#lx:%s",
  (unsigned long)key->key,
  key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
 }
@@ -109,7 +117,7 @@ static void ioport_event_decode_key(struct perf_kvm_stat 
*kvm __maybe_unused,
struct event_key *key,
char *decode)
 {
-   scnprintf(decode, DECODE_STR_LEN, "%#llx:%s",
+   scnprintf(decode, decode_str_len, "%#llx:%s",
  (unsigned long long)key->key,
  key->info ? "POUT" : "PIN");
 }
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index fc1cffb..5104c7e 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -31,7 +31,6 @@
 #include 
 
 #ifdef HAVE_KVM_STAT_SUPPORT
-#include 
 #include "util/kvm-stat.h"
 
 void exit_event_get_key(struct perf_evsel *evsel,
@@ -39,12 +38,12 @@ void exit_event_get_key(struct perf_evsel *evsel,
struct event_key *key)
 {
key->info = 0;
-   key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON);
+   key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
 }
 
 bool kvm_exit_event(struct perf_evsel *evsel)
 {
-   return !strcmp(evsel->name, KVM_EXIT_TRACE);
+   return !strcmp(evsel->name, kvm_exit_trace);
 }
 
 bool exit_event_begin(struct perf_evsel *evsel,
@@ -60,7 +59,7 @@ bool exit_event_begin(struct perf_evsel *evsel,
 
 bool kvm_entry_event(struct perf_evsel *evsel)
 {
-   return !strcmp(evsel->name, KVM_ENTRY_TRACE);
+   return !strcmp(evsel->name, kvm_entry_trace);
 }
 
 bool exit_event_end(struct perf_evsel *evsel,
@@ -92,7 +91,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm,
const char *exit_reason = get_exit_reason(kvm, key->exit

[PATCH v9 3/4] perf,kvm/powerpc: Port perf kvm stat to powerpc

2015-10-06 Thread Hemant Kumar
perf kvm can be used to analyze guest exit reasons. This support already
exists in x86. Hence, porting it to powerpc.

 - To trace KVM events :
  perf kvm stat record
  If many guests are running, we can track for a specific guest by using
  --pid as in : perf kvm stat record --pid 

 - To see the results :
  perf kvm stat report

The result shows the number of exits (from the guest context to
host/hypervisor context) grouped by their respective exit reasons with
their frequency.

Since, different powerpc machines have different KVM tracepoints, this
patch discovers the available tracepoints dynamically and accordingly
looks for them. If any single tracepoint is not present, this support
won't be enabled for reporting. To record, this will fail if any of the
events we are looking to record isn't available.
Right now, its only supported on PowerPC Book3S_HV architectures.

To analyze the different exits, group them and present them (in a slight
descriptive way) to the user, we need a mapping between the "exit
code" (dumped in the kvm_guest_exit tracepoint data) and to its related
Interrupt vector description (exit reason). This patch adds this mapping
in book3s_hv_exits.h.

It records on two available KVM tracepoints for book3s_hv:
"kvm_hv:kvm_guest_exit" and "kvm_hv:kvm_guest_enter".

Here is a sample o/p:
 # pgrep qemu
19378
60515

2 Guests are running on the host.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624
samples) ]

 # perf kvm stat report -p 60515

Analyze events for pid(s) 60515, all VCPUs:

   VM-EXITSamples  Samples% Time%Min TimeMax Time 
Avg time

   SYSCALL   914163.67% 7.49%  1.26us   5782.39us  
9.87us ( +-   6.46% )
H_DATA_STORAGE   411428.66% 5.07%  1.72us   4597.68us 
14.84us ( +-  20.06% )
HV_DECREMENTER418 2.91% 4.26%  0.70us  30002.22us
122.58us ( +-  70.29% )
  EXTERNAL392 2.73% 0.06%  0.64us104.10us  
1.94us ( +-  18.83% )
RETURN_TO_HOST287 2.00%83.11%  1.53us 124240.15us   
3486.52us ( +-  16.81% )
H_INST_STORAGE  5 0.03% 0.00%  1.88us  3.73us  
2.39us ( +-  14.20% )

Total Samples:14357, Total events handled time:1203918.42us.

Signed-off-by: Srikar Dronamraju <sri...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changelog:
v8 to v9:
- Moved the book3s specific setup into one function.
- Removed the macros (which were being used only once).
- Formatting changes.
v7 to v8:
- Fixed a perf kvm stat live bug.
v6 to v7:
- Removed dependency on uapi.
v4 to v5:
- Removed dependency on arch/powerpc/kvm/trace_book3s.h and added them in
the userspace side.
- No more arch side dependency.
v1 to v3:
- Split the patches for powerpc and perf

 tools/perf/arch/powerpc/Makefile   |   2 +
 tools/perf/arch/powerpc/util/Build |   1 +
 tools/perf/arch/powerpc/util/book3s_hv_exits.h |  33 
 tools/perf/arch/powerpc/util/kvm-stat.c| 100 +
 tools/perf/builtin-kvm.c   |  18 +
 tools/perf/util/kvm-stat.h |   1 +
 6 files changed, 155 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_hv_exits.h
 create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..9f9cea3 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,3 +1,5 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+HAVE_KVM_STAT_SUPPORT := 1
diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..c8fe207 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h 
b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
new file mode 100644
index 000..e68ba2d
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
@@ -0,0 +1,33 @@
+#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H
+#define ARCH_PERF_BOOK3S_HV_EXITS_H
+
+/*
+ * PowerPC Interrupt vectors : exit code to name mapping
+ */
+
+#define kvm_trace_symbol_exit \
+   {0x0,   "RETURN_TO_HOST"}, \
+   {0x100, "SYSTEM_RESET"}, \
+   {0x200, "MACHINE_CHECK"}, \
+   {0x300, "DATA_STORAGE"}, \
+   {0x380, "DATA_SEGMENT"}, \
+   {0x400, "INST_STORAGE"}, \
+   {0x480, "INST_SEGMENT"}, \
+   {0x500, "EXTERNAL"}, \
+   {0x501, "EXTERNAL_LEVEL"}, \
+   {0x

[PATCH v9 2/4] perf,kvm/{x86,s390}: Remove const from kvm_events_tp

2015-10-06 Thread Hemant Kumar
This patch removes the "const" qualifier from kvm_events_tp declaration
to account for the fact that some architectures may need to update this
variable dynamically. For instance, powerpc will need to update this
variable dynamically depending on the machine type.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 tools/perf/arch/s390/util/kvm-stat.c | 2 +-
 tools/perf/arch/x86/util/kvm-stat.c  | 2 +-
 tools/perf/util/kvm-stat.h   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/arch/s390/util/kvm-stat.c 
b/tools/perf/arch/s390/util/kvm-stat.c
index b85a94b..ed57df2 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -79,7 +79,7 @@ static struct kvm_events_ops exit_events = {
.name = "VM-EXIT"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
"kvm:kvm_s390_sie_enter",
"kvm:kvm_s390_sie_exit",
"kvm:kvm_s390_intercept_instruction",
diff --git a/tools/perf/arch/x86/util/kvm-stat.c 
b/tools/perf/arch/x86/util/kvm-stat.c
index babefda..b63d4be 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -129,7 +129,7 @@ static struct kvm_events_ops ioport_events = {
.name = "IO Port Access"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
"kvm:kvm_entry",
"kvm:kvm_exit",
"kvm:kvm_mmio",
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index dd55548..c965dc8 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -133,7 +133,7 @@ bool kvm_entry_event(struct perf_evsel *evsel);
  */
 int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
 
-extern const char * const kvm_events_tp[];
+extern const char *kvm_events_tp[];
 extern struct kvm_reg_events_ops kvm_reg_events_ops[];
 extern const char * const kvm_skip_events[];
 extern const char *vcpu_id_str;
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v9 4/4] perf,kvm/powerpc: Add support for HCALL reasons

2015-10-06 Thread Hemant Kumar
Powerpc provides hcall events that also provides insights into guest
behaviour. Enhance perf kvm stat to record and analyze hcall events.

 - To trace hcall events :
  perf kvm stat record

 - To show the results :
  perf kvm stat report --event=hcall

The result shows the number of hypervisor calls from the guest grouped
by their respective reasons displayed with the frequency.

This patch makes use of two additional tracepoints
"kvm_hv:kvm_hcall_enter" and "kvm_hv:kvm_hcall_exit". To map the hcall
codes to their respective names, it needs a mapping. Such mapping is
added in this patch in book3s_hcalls.h.

 # pgrep qemu
A sample output :
19378
60515

2 VMs running.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624
samples) ]

 # perf kvm stat report -p 60515 --event=hcall

Analyze events for all VMs, all VCPUs:

HCALL-EVENTSamples  Samples% Time%Min TimeMax Time 
Avg time

  H_IPI82266.08%88.10%  0.63us 11.38us  
2.05us ( +-   1.42% )
 H_SEND_CRQ14411.58% 3.77%  0.41us  0.88us  
0.50us ( +-   1.47% )
   H_VIO_SIGNAL118 9.49% 2.86%  0.37us  0.83us  
0.47us ( +-   1.43% )
H_PUT_TERM_CHAR 76 6.11% 2.07%  0.37us  0.90us  
0.52us ( +-   2.43% )
H_GET_TERM_CHAR 74 5.95% 2.23%  0.37us  1.70us  
0.58us ( +-   4.77% )
 H_RTAS  6 0.48% 0.85%  1.10us  9.25us  
2.70us ( +-  48.57% )
  H_PERFMON  4 0.32% 0.12%  0.41us  0.96us  
0.59us ( +-  20.92% )

Total Samples:1244, Total events handled time:1916.69us.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changelog:
v8 to v9:
- Removed the macros (which were being used only once).
v6 to v7:
- Removed dependency on uapi.
v4 to v5:
- Removed dependency on arch/powerpc/include/asm/hvall.h and added them
in userspace side.
- No more arch side dependency.
v1 to v2:
- Split the patches for powerpc and perf.

 tools/perf/arch/powerpc/util/book3s_hcalls.h | 123 +++
 tools/perf/arch/powerpc/util/kvm-stat.c  |  65 +-
 2 files changed, 187 insertions(+), 1 deletion(-)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_hcalls.h

diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h 
b/tools/perf/arch/powerpc/util/book3s_hcalls.h
new file mode 100644
index 000..0dd6b7f
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -0,0 +1,123 @@
+#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H
+#define ARCH_PERF_BOOK3S_HV_HCALLS_H
+
+/*
+ * PowerPC HCALL codes : hcall code to name mapping
+ */
+#define kvm_trace_symbol_hcall \
+   {0x4, "H_REMOVE"},  \
+   {0x8, "H_ENTER"},   \
+   {0xc, "H_READ"},\
+   {0x10, "H_CLEAR_MOD"},  \
+   {0x14, "H_CLEAR_REF"},  \
+   {0x18, "H_PROTECT"},\
+   {0x1c, "H_GET_TCE"},\
+   {0x20, "H_PUT_TCE"},\
+   {0x24, "H_SET_SPRG0"},  \
+   {0x28, "H_SET_DABR"},   \
+   {0x2c, "H_PAGE_INIT"},  \
+   {0x30, "H_SET_ASR"},\
+   {0x34, "H_ASR_ON"}, \
+   {0x38, "H_ASR_OFF"},\
+   {0x3c, "H_LOGICAL_CI_LOAD"},\
+   {0x40, "H_LOGICAL_CI_STORE"},   \
+   {0x44, "H_LOGICAL_CACHE_LOAD"}, \
+   {0x48, "H_LOGICAL_CACHE_STORE"},\
+   {0x4c, "H_LOGICAL_ICBI"},   \
+   {0x50, "H_LOGICAL_DCBF"},   \
+   {0x54, "H_GET_TERM_CHAR"},  \
+   {0x58, "H_PUT_TERM_CHAR"},  \
+   {0x5c, "H_REAL_TO_LOGICAL"},\
+   {0x60, "H_HYPERVISOR_DATA"},\
+   {0x64, "H_EOI"},\
+   {0x68, "H_CPPR"},   \
+   {0x6c, "H_IPI"},\
+   {0x70, "H_IPOLL"},  \
+   {0x74, "H_XIRR"}, 

[PATCH v6 2/2] perf, kvm/powerpc: Add hcall related info to kvm_perf.h

2015-08-31 Thread Hemant Kumar
To analyze the hcalls with perf kvm stat, we need the hcall related
tracepoint information to be exported.

This patch adds hcall tracepoints "kvm_hv:kvm_hcall_enter" and
"kvm_hv:kvm_hcall_exit" to kvm_perf.h. So, perf will now know to look
for these tracepoints if "perf kvm stat record" is invoked to collect
guest hcall statistics.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/include/uapi/asm/kvm_perf.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h 
b/arch/powerpc/include/uapi/asm/kvm_perf.h
index 68f105e..2af6290 100644
--- a/arch/powerpc/include/uapi/asm/kvm_perf.h
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -12,6 +12,10 @@
 #define KVM_EXIT_TRACE_HV "kvm_hv:kvm_guest_exit"
 #define KVM_EXIT_REASON_HV "trap"
 
+#define KVM_HCALL_ENTRY_TRACE_HV "kvm_hv:kvm_hcall_enter"
+#define KVM_HCALL_EXIT_TRACE_HV "kvm_hv:kvm_hcall_exit"
+#define KVM_HCALL_REASON_HV "req"
+
 /* This is to shut the compiler up */
 #define KVM_ENTRY_TRACE ""
 #define KVM_EXIT_TRACE ""
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v6 1/2] perf,kvm/powerpc: Add kvm_perf.h for powerpc

2015-08-31 Thread Hemant Kumar
To analyze the exit events with perf, we need to export the related
tracepoints through kvm_perf.h. kvm_perf.h is to be added in the
arch/powerpc directory, where the kvm tracepoints needed to trace the
KVM exit events are defined.

To indicate that the tracepoints are book3s_hv specific, suffix _HV has
been added to the tracepoint macros. Additionally, we also need to
define the generic macros (albeit, with null strings) suffix, because
the preprocessor looks for them in the generic code in builtin-kvm.c.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changes since v5:
- Moved back the tracepoint definitions from kvm_perf_book3s.h to kvm_perf.h

 arch/powerpc/include/uapi/asm/kvm_perf.h | 21 +
 1 file changed, 21 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf.h

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h 
b/arch/powerpc/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000..68f105e
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,21 @@
+#ifndef _ASM_POWERPC_KVM_PERF_H
+#define _ASM_POWERPC_KVM_PERF_H
+
+#include 
+
+#define DECODE_STR_LEN 40
+
+#define VCPU_ID "vcpu_id"
+
+/* For Book3S_HV machines */
+#define KVM_ENTRY_TRACE_HV "kvm_hv:kvm_guest_enter"
+#define KVM_EXIT_TRACE_HV "kvm_hv:kvm_guest_exit"
+#define KVM_EXIT_REASON_HV "trap"
+
+/* This is to shut the compiler up */
+#define KVM_ENTRY_TRACE ""
+#define KVM_EXIT_TRACE ""
+#define KVM_EXIT_REASON ""
+
+
+#endif /* _ASM_POWERPC_KVM_PERF_H */
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v6 1/3] perf,kvm/powerpc: Remove const from kvm_events_tp

2015-08-31 Thread Hemant Kumar
This patch removes the "const" qualifier from kvm_events_tp declaration
to account for the fact that powerpc will need to update this variable
dynamically depending on the machine type.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 tools/perf/arch/s390/util/kvm-stat.c | 2 +-
 tools/perf/arch/x86/util/kvm-stat.c  | 2 +-
 tools/perf/util/kvm-stat.h   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/arch/s390/util/kvm-stat.c 
b/tools/perf/arch/s390/util/kvm-stat.c
index a5dbc07..488a8c7 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -73,7 +73,7 @@ static struct kvm_events_ops exit_events = {
.name = "VM-EXIT"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
"kvm:kvm_s390_sie_enter",
"kvm:kvm_s390_sie_exit",
"kvm:kvm_s390_intercept_instruction",
diff --git a/tools/perf/arch/x86/util/kvm-stat.c 
b/tools/perf/arch/x86/util/kvm-stat.c
index 14e4e66..11188d5 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -121,7 +121,7 @@ static struct kvm_events_ops ioport_events = {
.name = "IO Port Access"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
"kvm:kvm_entry",
"kvm:kvm_exit",
"kvm:kvm_mmio",
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index ae825d4..6384672 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -133,7 +133,7 @@ bool kvm_entry_event(struct perf_evsel *evsel);
  */
 int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
 
-extern const char * const kvm_events_tp[];
+extern const char *kvm_events_tp[];
 extern struct kvm_reg_events_ops kvm_reg_events_ops[];
 extern const char * const kvm_skip_events[];
 
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v6 2/3] perf,kvm/powerpc: Port perf kvm stat to powerpc

2015-08-31 Thread Hemant Kumar
perf kvm can be used to analyze guest exit reasons. This support already
exists in x86. Hence, porting it to powerpc.

 - To trace KVM events :
  perf kvm stat record
  If many guests are running, we can track for a specific guest by using
  --pid as in : perf kvm stat record --pid 

 - To see the results :
  perf kvm stat report

The result shows the number of exits (from the guest context to
host/hypervisor context) grouped by their respective exit reasons with
their frequency.

Since, different powerpc machines have different KVM tracepoints, this
patch discovers the machine type dynamically from /proc/cpuinfo's
"machine" tag  and accordingly sets kvm tracepoints. Right now, it only
supports Book3S_HV tracepoints.

To analyze the different exits, group them and present them (in a slight
descriptive way) to the user, we need a mapping between the "exit
code" (dumped in the kvm_guest_exit tracepoint data) and to its related
Interrupt vector description (exit reason). This patch adds this mapping
in book3s_hv_exits.h.

It records on two available KVM tracepoints :
"kvm_hv:kvm_guest_exit" and "kvm_hv:kvm_guest_enter" exported through
arch/powerpc/include/uapi/asm/kvm_perf.h.

Here is a sample o/p:
 # pgrep qemu
19378
60515

2 Guests are running on the host.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624
samples) ]

 # perf kvm stat report -p 60515

Analyze events for pid(s) 60515, all VCPUs:

   VM-EXITSamples  Samples% Time%Min TimeMax Time 
Avg time

   SYSCALL   914163.67% 7.49%  1.26us   5782.39us  
9.87us ( +-   6.46% )
H_DATA_STORAGE   411428.66% 5.07%  1.72us   4597.68us 
14.84us ( +-  20.06% )
HV_DECREMENTER418 2.91% 4.26%  0.70us  30002.22us
122.58us ( +-  70.29% )
  EXTERNAL392 2.73% 0.06%  0.64us104.10us  
1.94us ( +-  18.83% )
RETURN_TO_HOST287 2.00%83.11%  1.53us 124240.15us   
3486.52us ( +-  16.81% )
H_INST_STORAGE  5 0.03% 0.00%  1.88us  3.73us  
2.39us ( +-  14.20% )

Total Samples:14357, Total events handled time:1203918.42us.

Signed-off-by: Srikar Dronamraju <sri...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
This patch has a direct dependency on :
http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg93620.html

Changes since v5 :
- Added a dynamic discovery check for machine type.
- Made the kvm tracepoints recording/reporting conditional on machine type.
  (Suggested by Scott Wood)

 tools/perf/arch/powerpc/Makefile   |   2 +
 tools/perf/arch/powerpc/util/Build |   1 +
 tools/perf/arch/powerpc/util/book3s_hv_exits.h |  33 ++
 tools/perf/arch/powerpc/util/kvm-stat.c| 151 +
 tools/perf/builtin-kvm.c   |  16 ++-
 tools/perf/util/kvm-stat.h |   1 +
 6 files changed, 201 insertions(+), 3 deletions(-)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_hv_exits.h
 create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..9f9cea3 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,3 +1,5 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+HAVE_KVM_STAT_SUPPORT := 1
diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..c8fe207 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h 
b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
new file mode 100644
index 000..e68ba2d
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
@@ -0,0 +1,33 @@
+#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H
+#define ARCH_PERF_BOOK3S_HV_EXITS_H
+
+/*
+ * PowerPC Interrupt vectors : exit code to name mapping
+ */
+
+#define kvm_trace_symbol_exit \
+   {0x0,   "RETURN_TO_HOST"}, \
+   {0x100, "SYSTEM_RESET"}, \
+   {0x200, "MACHINE_CHECK"}, \
+   {0x300, "DATA_STORAGE"}, \
+   {0x380, "DATA_SEGMENT"}, \
+   {0x400, "INST_STORAGE"}, \
+   {0x480, "INST_SEGMENT"}, \
+   {0x500, "EXTERNAL"}, \
+   {0x501, "EXTERNAL_LEVEL"}, \
+   {0x502, "EXTERNAL_HV"}, \
+   {0x600, "ALIGNMENT"}, \
+   {0x700, "PROGRAM"}, \
+   {0x800, "FP_UNAVAIL"}, \
+   {0x900, "DECREMENTER"}, \
+   {0x980, "HV_DE

[PATCH v6 3/3] perf,kvm/powerpc: Add support for HCALL reasons

2015-08-31 Thread Hemant Kumar
Powerpc provides hcall events that also provides insights into guest
behaviour. Enhance perf kvm to record and analyze hcall events.

 - To trace hcall events :
  perf kvm stat record

 - To show the results :
  perf kvm stat report --event=hcall

The result shows the number of hypervisor calls from the guest grouped
by their respective reasons displayed with the frequency.

This patch makes use of two additional tracepoints
"kvm_hv:kvm_hcall_enter" and "kvm_hv:kvm_hcall_exit". To map the hcall
codes to their respective names, it needs a mapping. Such mapping is
added in this patch in book3s_hcalls.h.

 # pgrep qemu
A sample output :
19378
60515

2 VMs running.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624
samples) ]

 # perf kvm stat report -p 60515 --event=hcall

Analyze events for all VMs, all VCPUs:

HCALL-EVENTSamples  Samples% Time%Min TimeMax Time 
Avg time

  H_IPI82266.08%88.10%  0.63us 11.38us  
2.05us ( +-   1.42% )
 H_SEND_CRQ14411.58% 3.77%  0.41us  0.88us  
0.50us ( +-   1.47% )
   H_VIO_SIGNAL118 9.49% 2.86%  0.37us  0.83us  
0.47us ( +-   1.43% )
H_PUT_TERM_CHAR 76 6.11% 2.07%  0.37us  0.90us  
0.52us ( +-   2.43% )
H_GET_TERM_CHAR 74 5.95% 2.23%  0.37us  1.70us  
0.58us ( +-   4.77% )
 H_RTAS  6 0.48% 0.85%  1.10us  9.25us  
2.70us ( +-  48.57% )
  H_PERFMON  4 0.32% 0.12%  0.41us  0.96us  
0.59us ( +-  20.92% )

Total Samples:1244, Total events handled time:1916.69us.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
This patch has a direct dependency on :
http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg93619.html

Changes since v5:
- hcall tracepoints recording/reporting will be based on machine type check.

 tools/perf/arch/powerpc/util/book3s_hcalls.h | 123 +++
 tools/perf/arch/powerpc/util/kvm-stat.c  |  76 +++--
 2 files changed, 193 insertions(+), 6 deletions(-)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_hcalls.h

diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h 
b/tools/perf/arch/powerpc/util/book3s_hcalls.h
new file mode 100644
index 000..0dd6b7f
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -0,0 +1,123 @@
+#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H
+#define ARCH_PERF_BOOK3S_HV_HCALLS_H
+
+/*
+ * PowerPC HCALL codes : hcall code to name mapping
+ */
+#define kvm_trace_symbol_hcall \
+   {0x4, "H_REMOVE"},  \
+   {0x8, "H_ENTER"},   \
+   {0xc, "H_READ"},\
+   {0x10, "H_CLEAR_MOD"},  \
+   {0x14, "H_CLEAR_REF"},  \
+   {0x18, "H_PROTECT"},\
+   {0x1c, "H_GET_TCE"},\
+   {0x20, "H_PUT_TCE"},\
+   {0x24, "H_SET_SPRG0"},  \
+   {0x28, "H_SET_DABR"},   \
+   {0x2c, "H_PAGE_INIT"},  \
+   {0x30, "H_SET_ASR"},\
+   {0x34, "H_ASR_ON"}, \
+   {0x38, "H_ASR_OFF"},\
+   {0x3c, "H_LOGICAL_CI_LOAD"},\
+   {0x40, "H_LOGICAL_CI_STORE"},   \
+   {0x44, "H_LOGICAL_CACHE_LOAD"}, \
+   {0x48, "H_LOGICAL_CACHE_STORE"},\
+   {0x4c, "H_LOGICAL_ICBI"},   \
+   {0x50, "H_LOGICAL_DCBF"},   \
+   {0x54, "H_GET_TERM_CHAR"},  \
+   {0x58, "H_PUT_TERM_CHAR"},  \
+   {0x5c, "H_REAL_TO_LOGICAL"},\
+   {0x60, "H_HYPERVISOR_DATA"},\
+   {0x64, "H_EOI"},\
+   {0x68, "H_CPPR"},   \
+   {0x6c, "H_IPI"},\
+   {0x70, "H_IPOLL"},  \
+   {0x74, "H_XIRR"},   \
+   {0x78, "H_MIGRATE_DMA&q

Re: [PATCH v6 1/2] perf,kvm/powerpc: Add kvm_perf.h for powerpc

2015-09-01 Thread Hemant Kumar

(cc'ing Michael Ellerman with this reply)

Hi Arnaldo,

On 09/01/2015 01:43 AM, Arnaldo Carvalho de Melo wrote:

Em Mon, Aug 31, 2015 at 12:18:00PM +0530, Hemant Kumar escreveu:

To analyze the exit events with perf, we need to export the related
tracepoints through kvm_perf.h. kvm_perf.h is to be added in the
arch/powerpc directory, where the kvm tracepoints needed to trace the
KVM exit events are defined.

To indicate that the tracepoints are book3s_hv specific, suffix _HV has
been added to the tracepoint macros. Additionally, we also need to
define the generic macros (albeit, with null strings) suffix, because
the preprocessor looks for them in the generic code in builtin-kvm.c.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>

humm, there are two patchkits, this one with two patches, the other with
3, this one for the kernel, the other one for tools/, but for the tools/
part to work, does this one needs to be applied first?

Should I try to process the 5 together, applying thest two first?


Yes, this patchset needs to be applied before applying the other patchset,
since there is a direct dependency on these two for the tooling part to
work.


I see there are no acks from powerpc arch maintainers, how should we
proceed here? If there are no problems with the arch bits, and if it is
just to enable the tooling part, again, should I process the 5 as just
one series?


The reason to split the earlier patchset into two was to separate the
tooling/perf/ and arch/powerpc/ side patches, as asked by Michael..

Here is the link to that discussion :
http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg86916.html

If Michael is ok with the patches, you can process all the 5 patches 
together. Michael?




- Arnaldo


---
Changes since v5:
- Moved back the tracepoint definitions from kvm_perf_book3s.h to kvm_perf.h

  arch/powerpc/include/uapi/asm/kvm_perf.h | 21 +
  1 file changed, 21 insertions(+)
  create mode 100644 arch/powerpc/include/uapi/asm/kvm_perf.h

diff --git a/arch/powerpc/include/uapi/asm/kvm_perf.h 
b/arch/powerpc/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000..68f105e
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,21 @@
+#ifndef _ASM_POWERPC_KVM_PERF_H
+#define _ASM_POWERPC_KVM_PERF_H
+
+#include 
+
+#define DECODE_STR_LEN 40
+
+#define VCPU_ID "vcpu_id"
+
+/* For Book3S_HV machines */
+#define KVM_ENTRY_TRACE_HV "kvm_hv:kvm_guest_enter"
+#define KVM_EXIT_TRACE_HV "kvm_hv:kvm_guest_exit"
+#define KVM_EXIT_REASON_HV "trap"
+
+/* This is to shut the compiler up */
+#define KVM_ENTRY_TRACE ""
+#define KVM_EXIT_TRACE ""
+#define KVM_EXIT_REASON ""
+
+
+#endif /* _ASM_POWERPC_KVM_PERF_H */
--
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v6 1/2] perf,kvm/powerpc: Add kvm_perf.h for powerpc

2015-09-07 Thread Hemant Kumar



On 09/07/2015 10:40 AM, Michael Ellerman wrote:

On Fri, 2015-09-04 at 17:51 -0300, Arnaldo Carvalho de Melo wrote:

Em Tue, Sep 01, 2015 at 12:18:47PM +0530, Hemant Kumar escreveu:

Should I try to process the 5 together, applying thest two first?
  

Yes, this patchset needs to be applied before applying the other patchset,
since there is a direct dependency on these two for the tooling part to
work.
  

I see there are no acks from powerpc arch maintainers, how should we
proceed here? If there are no problems with the arch bits, and if it is
just to enable the tooling part, again, should I process the 5 as just
one series?
  

The reason to split the earlier patchset into two was to separate the
tooling/perf/ and arch/powerpc/ side patches, as asked by Michael..
  

Here is the link to that discussion :
http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg86916.html
  

If Michael is ok with the patches, you can process all the 5 patches
together. Michael?

Michael?

I'm not particularly happy with it.

Can we at least remove this hunk from the uapi header:

+/* This is to shut the compiler up */
+#define KVM_ENTRY_TRACE ""
+#define KVM_EXIT_TRACE ""
+#define KVM_EXIT_REASON ""



Agreed, I didn't like this too, but I kept this because of the generic
perf userspace code that looks for KVM_{ENTRY,EXIT}_TRACE and
KVM_EXIT_REASON. We can remove this and put this hunk in the
userspace side.

Arnaldo,
Can we remove the dependency on uapi altogether (also suggested
by Scott) because it doesn't seem to fulfill much purpose? Rather,
hardcode the events in the userspace completely (since, tracepoint
event names are unlikely to change) ? Some of what is being done
by x86 already in kvm-stat.c where its defining kvm_events_tp[] and
its not using the macros, rather, the tracepoints directly. Macros are
only being used in builtin-kvm.c where the tracepoint names are
matched with KVM_{ENTRY,EXIT}_TRACE and when we are looking
for the key KVM_EXIT_REASON.

--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v8 1/4] perf, kvm/{x86, s390}: Remove dependency on uapi/kvm_perf.h

2015-09-30 Thread Hemant Kumar



On 09/28/2015 08:51 PM, David Ahern wrote:

On 9/28/15 9:16 AM, Scott Wood wrote:

On Mon, 2015-09-28 at 08:31 -0600, David Ahern wrote:

On 9/28/15 7:00 AM, Alexander Yarygin wrote:

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index fc1cffb..ef25fcf 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -31,20 +31,18 @@
   #include  


[SNIP]


@@ -60,7 +58,7 @@ bool exit_event_begin(struct perf_evsel *evsel,

   bool kvm_entry_event(struct perf_evsel *evsel)
   {
- return !strcmp(evsel->name, KVM_ENTRY_TRACE);
+ return !strncmp(evsel->name, kvm_events_tp[0], 
strlen(evsel->name));

   }

   bool exit_event_end(struct perf_evsel *evsel,


I agree; don't rely on kvm_events_tp. Define KVM_ENTRY_TRACE and
KVM_EXIT_TRACE like x86.


If you mean defining them in uapi, that doesn't work for arches that 
have

multiple subarches that may have different trace events.  This patchset
doesn't actually implement dynamic support for the subarches, but it 
avoids
adding constants to uapi headers that only apply to one of the 
subarches.


I don't agree on relying on kvm_events_tp[0] and [1]. If you need that 
to be a runtime definition then change KVM_ENTRY_TRACE to const char 
*kvm_entry_trace and s390 and other arches can have code to set 
kvm_{entry,exit}_trace at runtime.




Yeah, will change them to kvm_{entry,exit}_trace, instead.

--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v8 1/4] perf, kvm/{x86, s390}: Remove dependency on uapi/kvm_perf.h

2015-09-30 Thread Hemant Kumar

Hi,

Thanks for the review.

On 09/28/2015 06:30 PM, Alexander Yarygin wrote:

Hemant Kumar <hem...@linux.vnet.ibm.com> writes:


Its better to remove the dependency on uapi/kvm_perf.h to allow dynamic
discovery of kvm events (if its needed). To do this, some extern
variables have been introduced with which we can keep the generic
functions generic.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changes since v7:
- Removed __maybe_unused for some parameters which weren't needed.

  tools/perf/arch/s390/util/kvm-stat.c | 10 -
  tools/perf/arch/x86/util/kvm-stat.c  | 12 ++-
  tools/perf/builtin-kvm.c | 39 +++-
  tools/perf/util/kvm-stat.h   |  3 +++
  4 files changed, 48 insertions(+), 16 deletions(-)

Hello,


The patchset doesn't break s390 code (and at least build on x86), but I


Thanks for testing it on s390.


don't really like some things here (e.g. direct access to
kvm_events_tp), see below.

Thanks.

CC: David Ahern


diff --git a/tools/perf/arch/s390/util/kvm-stat.c 
b/tools/perf/arch/s390/util/kvm-stat.c
index a5dbc07..c2acb3e 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -10,7 +10,11 @@
   */

  #include "../../util/kvm-stat.h"
-#include 
+#include 
+
+#define DECODE_STR_LEN 40
+#define VCPU_ID "id"
+#define KVM_EXIT_REASON "icptcode"


I would probably drop them. There are no users besides newly
introduced const char *vcpu_id_str and decore_str_len etc anyway.


Right, will drop them.


  define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
  define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
@@ -83,6 +87,10 @@ const char * const kvm_events_tp[] = {
NULL,
  };

+const char *vcpu_id_str = VCPU_ID;
+const int decode_str_len = DECODE_STR_LEN;
+const char *exit_reason_code = KVM_EXIT_REASON;
+
  struct kvm_reg_events_ops kvm_reg_events_ops[] = {
{ .name = "vmexit", .ops = _events },
{ NULL, NULL },
diff --git a/tools/perf/arch/x86/util/kvm-stat.c 
b/tools/perf/arch/x86/util/kvm-stat.c
index 14e4e66..2d0d43b5 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -1,5 +1,11 @@
  #include "../../util/kvm-stat.h"
-#include 
+#include 
+#include 
+#include 
+
+#define DECODE_STR_LEN 20
+#define VCPU_ID "vcpu_id"
+#define KVM_EXIT_REASON "exit_reason"

  define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
  define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
@@ -129,6 +135,10 @@ const char * const kvm_events_tp[] = {
NULL,
  };

+const char *vcpu_id_str = VCPU_ID;
+const int decode_str_len = DECODE_STR_LEN;
+const char *exit_reason_code = KVM_EXIT_REASON;
+
  struct kvm_reg_events_ops kvm_reg_events_ops[] = {
{ .name = "vmexit", .ops = _events },
{ .name = "mmio", .ops = _events },
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index fc1cffb..ef25fcf 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -31,20 +31,18 @@
  #include 

  #ifdef HAVE_KVM_STAT_SUPPORT
-#include 
  #include "util/kvm-stat.h"

-void exit_event_get_key(struct perf_evsel *evsel,
-   struct perf_sample *sample,
+void exit_event_get_key(struct perf_evsel *evsel, struct perf_sample *sample,
struct event_key *key)
  {
key->info = 0;
-   key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON);
+   key->key = perf_evsel__intval(evsel, sample, exit_reason_code);
  }

  bool kvm_exit_event(struct perf_evsel *evsel)
  {
-   return !strcmp(evsel->name, KVM_EXIT_TRACE);
+   return !strncmp(evsel->name, kvm_events_tp[1], strlen(evsel->name));
  }

Hmm, direct access to kvm_events_tp? Maybe add a getter for this or
something like extern char *kvm_exit_trace;?


Makes sense, will make them extern const kvm_{exit,entry}_trace and
assign them in their respective archs.


/* why strncmp? */


  bool exit_event_begin(struct perf_evsel *evsel,
@@ -60,7 +58,7 @@ bool exit_event_begin(struct perf_evsel *evsel,

  bool kvm_entry_event(struct perf_evsel *evsel)
  {
-   return !strcmp(evsel->name, KVM_ENTRY_TRACE);
+   return !strncmp(evsel->name, kvm_events_tp[0], strlen(evsel->name));
  }

  bool exit_event_end(struct perf_evsel *evsel,
@@ -71,8 +69,8 @@ bool exit_event_end(struct perf_evsel *evsel,
  }

  static const char *get_exit_reason(struct perf_kvm_stat *kvm,
-  struct exit_reasons_table *tbl,
-  u64 exit_code)
+   struct exit_reasons_table *tbl,
+   u64 exit_code)
  {
while (tbl->reason != NULL) {
if (tbl->exit_code == exit_code)
@@ -92,7 +90,7 @@ void exit_event_decode_key(struct pe

[PATCH v7 1/4] perf, kvm/{x86, s390}: Remove dependency on uapi/kvm_perf.h

2015-09-24 Thread Hemant Kumar
Its better to remove the dependency on uapi/kvm_perf.h to allow dynamic
discovery of kvm events (if its needed). To do this, some extern
variables have been introduced with which we can keep the generic
functions generic.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 tools/perf/arch/s390/util/kvm-stat.c | 10 +++-
 tools/perf/arch/x86/util/kvm-stat.c  | 12 +-
 tools/perf/builtin-kvm.c | 44 ++--
 tools/perf/util/kvm-stat.h   |  3 +++
 4 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/tools/perf/arch/s390/util/kvm-stat.c 
b/tools/perf/arch/s390/util/kvm-stat.c
index a5dbc07..c2acb3e 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -10,7 +10,11 @@
  */
 
 #include "../../util/kvm-stat.h"
-#include 
+#include 
+
+#define DECODE_STR_LEN 40
+#define VCPU_ID "id"
+#define KVM_EXIT_REASON "icptcode"
 
 define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
 define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
@@ -83,6 +87,10 @@ const char * const kvm_events_tp[] = {
NULL,
 };
 
+const char *vcpu_id_str = VCPU_ID;
+const int decode_str_len = DECODE_STR_LEN;
+const char *exit_reason_code = KVM_EXIT_REASON;
+
 struct kvm_reg_events_ops kvm_reg_events_ops[] = {
{ .name = "vmexit", .ops = _events },
{ NULL, NULL },
diff --git a/tools/perf/arch/x86/util/kvm-stat.c 
b/tools/perf/arch/x86/util/kvm-stat.c
index 14e4e66..2d0d43b5 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -1,5 +1,11 @@
 #include "../../util/kvm-stat.h"
-#include 
+#include 
+#include 
+#include 
+
+#define DECODE_STR_LEN 20
+#define VCPU_ID "vcpu_id"
+#define KVM_EXIT_REASON "exit_reason"
 
 define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
 define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
@@ -129,6 +135,10 @@ const char * const kvm_events_tp[] = {
NULL,
 };
 
+const char *vcpu_id_str = VCPU_ID;
+const int decode_str_len = DECODE_STR_LEN;
+const char *exit_reason_code = KVM_EXIT_REASON;
+
 struct kvm_reg_events_ops kvm_reg_events_ops[] = {
{ .name = "vmexit", .ops = _events },
{ .name = "mmio", .ops = _events },
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index fc1cffb..dbb1b1e 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -31,20 +31,19 @@
 #include 
 
 #ifdef HAVE_KVM_STAT_SUPPORT
-#include 
 #include "util/kvm-stat.h"
 
-void exit_event_get_key(struct perf_evsel *evsel,
-   struct perf_sample *sample,
-   struct event_key *key)
+void exit_event_get_key(struct perf_evsel *evsel __maybe_unused,
+  struct perf_sample *sample __maybe_unused,
+  struct event_key *key __maybe_unused)
 {
key->info = 0;
-   key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON);
+   key->key = perf_evsel__intval(evsel, sample, exit_reason_code);
 }
 
-bool kvm_exit_event(struct perf_evsel *evsel)
+bool kvm_exit_event(struct perf_evsel *evsel __maybe_unused)
 {
-   return !strcmp(evsel->name, KVM_EXIT_TRACE);
+   return !strncmp(evsel->name, kvm_events_tp[1], strlen(evsel->name));
 }
 
 bool exit_event_begin(struct perf_evsel *evsel,
@@ -58,9 +57,9 @@ bool exit_event_begin(struct perf_evsel *evsel,
return false;
 }
 
-bool kvm_entry_event(struct perf_evsel *evsel)
+bool kvm_entry_event(struct perf_evsel *evsel __maybe_unused)
 {
-   return !strcmp(evsel->name, KVM_ENTRY_TRACE);
+   return !strncmp(evsel->name, kvm_events_tp[0], strlen(evsel->name));
 }
 
 bool exit_event_end(struct perf_evsel *evsel,
@@ -71,8 +70,8 @@ bool exit_event_end(struct perf_evsel *evsel,
 }
 
 static const char *get_exit_reason(struct perf_kvm_stat *kvm,
-  struct exit_reasons_table *tbl,
-  u64 exit_code)
+   struct exit_reasons_table *tbl,
+   u64 exit_code)
 {
while (tbl->reason != NULL) {
if (tbl->exit_code == exit_code)
@@ -92,7 +91,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm,
const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
  key->key);
 
-   scnprintf(decode, DECODE_STR_LEN, "%s", exit_reason);
+   scnprintf(decode, decode_str_len, "%s", exit_reason);
 }
 
 static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
@@ -358,7 +357,11 @@ static bool handle_end_event(struct perf_kvm_stat *kvm,
time_diff = sample->time - time_begin;
 
if (kvm->duration && time_diff > kvm->duration)

[PATCH v7 2/4] perf,kvm/{x86,s390}: Remove const from kvm_events_tp

2015-09-24 Thread Hemant Kumar
This patch removes the "const" qualifier from kvm_events_tp declaration
to account for the fact that powerpc will need to update this variable
dynamically depending on the machine type.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 tools/perf/arch/s390/util/kvm-stat.c | 2 +-
 tools/perf/arch/x86/util/kvm-stat.c  | 2 +-
 tools/perf/util/kvm-stat.h   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/arch/s390/util/kvm-stat.c 
b/tools/perf/arch/s390/util/kvm-stat.c
index c2acb3e..575e8da5 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -77,7 +77,7 @@ static struct kvm_events_ops exit_events = {
.name = "VM-EXIT"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
"kvm:kvm_s390_sie_enter",
"kvm:kvm_s390_sie_exit",
"kvm:kvm_s390_intercept_instruction",
diff --git a/tools/perf/arch/x86/util/kvm-stat.c 
b/tools/perf/arch/x86/util/kvm-stat.c
index 2d0d43b5..46d4e0c4 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -127,7 +127,7 @@ static struct kvm_events_ops ioport_events = {
.name = "IO Port Access"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
"kvm:kvm_entry",
"kvm:kvm_exit",
"kvm:kvm_mmio",
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index 59ed51c..fd9f40f 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -133,7 +133,7 @@ bool kvm_entry_event(struct perf_evsel *evsel);
  */
 int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
 
-extern const char * const kvm_events_tp[];
+extern const char *kvm_events_tp[];
 extern struct kvm_reg_events_ops kvm_reg_events_ops[];
 extern const char * const kvm_skip_events[];
 extern const char *vcpu_id_str;
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v7 3/4] perf,kvm/powerpc: Port perf kvm stat to powerpc

2015-09-24 Thread Hemant Kumar
perf kvm can be used to analyze guest exit reasons. This support already
exists in x86. Hence, porting it to powerpc.

 - To trace KVM events :
  perf kvm stat record
  If many guests are running, we can track for a specific guest by using
  --pid as in : perf kvm stat record --pid 

 - To see the results :
  perf kvm stat report

The result shows the number of exits (from the guest context to
host/hypervisor context) grouped by their respective exit reasons with
their frequency.

Since, different powerpc machines have different KVM tracepoints, this
patch discovers the available tracepoints dynamically and accordingly
looks for them. If any single tracepoint is not present, this support
won't be enabled for reporting. To record, this will fail if any of the
events we are looking to record isn't available.
Right now, its only supported on PowerPC Book3S_HV architectures.

To analyze the different exits, group them and present them (in a slight
descriptive way) to the user, we need a mapping between the "exit
code" (dumped in the kvm_guest_exit tracepoint data) and to its related
Interrupt vector description (exit reason). This patch adds this mapping
in book3s_hv_exits.h.

It records on two available KVM tracepoints for book3s_hv:
"kvm_hv:kvm_guest_exit" and "kvm_hv:kvm_guest_enter".

Here is a sample o/p:
 # pgrep qemu
19378
60515

2 Guests are running on the host.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624
samples) ]

 # perf kvm stat report -p 60515

Analyze events for pid(s) 60515, all VCPUs:

   VM-EXITSamples  Samples% Time%Min TimeMax Time 
Avg time

   SYSCALL   914163.67% 7.49%  1.26us   5782.39us  
9.87us ( +-   6.46% )
H_DATA_STORAGE   411428.66% 5.07%  1.72us   4597.68us 
14.84us ( +-  20.06% )
HV_DECREMENTER418 2.91% 4.26%  0.70us  30002.22us
122.58us ( +-  70.29% )
  EXTERNAL392 2.73% 0.06%  0.64us104.10us  
1.94us ( +-  18.83% )
RETURN_TO_HOST287 2.00%83.11%  1.53us 124240.15us   
3486.52us ( +-  16.81% )
H_INST_STORAGE  5 0.03% 0.00%  1.88us  3.73us  
2.39us ( +-  14.20% )

Total Samples:14357, Total events handled time:1203918.42us.

Signed-off-by: Srikar Dronamraju <sri...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changes :
- Remooved dependency on arch/uapi

 tools/perf/arch/powerpc/Makefile   |   2 +
 tools/perf/arch/powerpc/util/Build |   1 +
 tools/perf/arch/powerpc/util/book3s_hv_exits.h |  33 
 tools/perf/arch/powerpc/util/kvm-stat.c| 105 +
 tools/perf/builtin-kvm.c   |  12 +++
 tools/perf/util/kvm-stat.h |   1 +
 6 files changed, 154 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_hv_exits.h
 create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..9f9cea3 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,3 +1,5 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+HAVE_KVM_STAT_SUPPORT := 1
diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..c8fe207 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h 
b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
new file mode 100644
index 000..e68ba2d
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
@@ -0,0 +1,33 @@
+#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H
+#define ARCH_PERF_BOOK3S_HV_EXITS_H
+
+/*
+ * PowerPC Interrupt vectors : exit code to name mapping
+ */
+
+#define kvm_trace_symbol_exit \
+   {0x0,   "RETURN_TO_HOST"}, \
+   {0x100, "SYSTEM_RESET"}, \
+   {0x200, "MACHINE_CHECK"}, \
+   {0x300, "DATA_STORAGE"}, \
+   {0x380, "DATA_SEGMENT"}, \
+   {0x400, "INST_STORAGE"}, \
+   {0x480, "INST_SEGMENT"}, \
+   {0x500, "EXTERNAL"}, \
+   {0x501, "EXTERNAL_LEVEL"}, \
+   {0x502, "EXTERNAL_HV"}, \
+   {0x600, "ALIGNMENT"}, \
+   {0x700, "PROGRAM"}, \
+   {0x800, "FP_UNAVAIL"}, \
+   {0x900, "DECREMENTER"}, \
+   {0x980, "HV_DECREMENTER"}, \
+   {0xc00, "SYSCALL"}, \
+   {0xd00, "TRACE"}, \
+   {0xe00, "H_DATA_STORAGE"}, \
+   {0xe20,

[PATCH] perf/probe: Search both .eh_frame and .debug_frame sections for probe location

2015-09-23 Thread Hemant Kumar
perf probe through debuginfo__find_probes() in util/probe-finder.c
checks for the functions' frame descriptions in either .eh_frame section
of an ELF or the .debug_frame. The check is based on whether either one
of these sections is present. But sometimes, it may happen that,
.eh_frame, even if present, may not be complete and may miss some
descriptions. For e.g., in powerpc, this may happen :
 $ gcc -g bin.c -o bin

 $ objdump --dwarf ./bin
 <1><145>: Abbrev Number: 7 (DW_TAG_subprogram)
<146>   DW_AT_external: 1
<146>   DW_AT_name: (indirect string, offset: 0x9e): main
<14a>   DW_AT_decl_file   : 1
<14b>   DW_AT_decl_line   : 39
<14c>   DW_AT_prototyped  : 1
<14c>   DW_AT_type: <0x57>
<150>   DW_AT_low_pc  : 0x17b8

If the .eh_frame and .debug_frame are checked for the same binary, we
will find that, .eh_frame (although present) doesn't contain a
description for "main" function.
But, .debug_frame has a description :

00d8 0024  FDE cie= pc=17b8..1838
  DW_CFA_advance_loc: 16 to 17c8
  DW_CFA_def_cfa_offset: 144
  DW_CFA_offset_extended_sf: r65 at cfa+16
...

Due to this (since, perf checks whether .eh_frame is present and goes on
searching for that address inside that frame), perf is unable to process
the probes :
 # perf probe -x ./bin main
Failed to get call frame on 0x17b8
  Error: Failed to add events.

To avoid this issue, we need to check both the sections (.eh_frame and
.debug_frame), which is done in this patch.

Note that, we can always force everything into both .eh_frame and
.debug_frame by :
 $ gcc bin.c -fasynchronous-unwind-tables  -fno-dwarf2-cfi-asm -g -o bin

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 tools/perf/util/probe-finder.c | 59 +-
 1 file changed, 35 insertions(+), 24 deletions(-)

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 2da65a7..7ce02b9 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1022,9 +1022,8 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global 
*gl, void *data)
return DWARF_CB_OK;
 }
 
-/* Find probe points from debuginfo */
-static int debuginfo__find_probes(struct debuginfo *dbg,
- struct probe_finder *pf)
+static int debuginfo__find_probe_location(struct debuginfo *dbg,
+ struct probe_finder *pf)
 {
struct perf_probe_point *pp = >pev->point;
Dwarf_Off off, noff;
@@ -1032,27 +1031,6 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
Dwarf_Die *diep;
int ret = 0;
 
-#if _ELFUTILS_PREREQ(0, 142)
-   Elf *elf;
-   GElf_Ehdr ehdr;
-   GElf_Shdr shdr;
-
-   /* Get the call frame information from this dwarf */
-   elf = dwarf_getelf(dbg->dbg);
-   if (elf == NULL)
-   return -EINVAL;
-
-   if (gelf_getehdr(elf, ) == NULL)
-   return -EINVAL;
-
-   if (elf_section_by_name(elf, , , ".eh_frame", NULL) &&
-   shdr.sh_type == SHT_PROGBITS) {
-   pf->cfi = dwarf_getcfi_elf(elf);
-   } else {
-   pf->cfi = dwarf_getcfi(dbg->dbg);
-   }
-#endif
-
off = 0;
pf->lcache = intlist__new(NULL);
if (!pf->lcache)
@@ -1115,6 +1093,39 @@ found:
return ret;
 }
 
+/* Find probe points from debuginfo */
+static int debuginfo__find_probes(struct debuginfo *dbg,
+ struct probe_finder *pf)
+{
+   int ret = 0;
+
+#if _ELFUTILS_PREREQ(0, 142)
+   Elf *elf;
+   GElf_Ehdr ehdr;
+   GElf_Shdr shdr;
+
+   /* Get the call frame information from this dwarf */
+   elf = dwarf_getelf(dbg->dbg);
+   if (elf == NULL)
+   return -EINVAL;
+
+   if (gelf_getehdr(elf, ) == NULL)
+   return -EINVAL;
+
+   if (elf_section_by_name(elf, , , ".eh_frame", NULL) &&
+   shdr.sh_type == SHT_PROGBITS) {
+   pf->cfi = dwarf_getcfi_elf(elf);
+   ret = debuginfo__find_probe_location(dbg, pf);
+   if (ret >= 0)
+   return ret;
+   }
+   pf->cfi = dwarf_getcfi(dbg->dbg);
+#endif
+
+   ret = debuginfo__find_probe_location(dbg, pf);
+   return ret;
+}
+
 struct local_vars_finder {
struct probe_finder *pf;
struct perf_probe_arg *args;
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v8 2/4] perf,kvm/{x86,s390}: Remove const from kvm_events_tp

2015-09-25 Thread Hemant Kumar
This patch removes the "const" qualifier from kvm_events_tp declaration
to account for the fact that powerpc will need to update this variable
dynamically depending on the machine type.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 tools/perf/arch/s390/util/kvm-stat.c | 2 +-
 tools/perf/arch/x86/util/kvm-stat.c  | 2 +-
 tools/perf/util/kvm-stat.h   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/arch/s390/util/kvm-stat.c 
b/tools/perf/arch/s390/util/kvm-stat.c
index c2acb3e..575e8da5 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -77,7 +77,7 @@ static struct kvm_events_ops exit_events = {
.name = "VM-EXIT"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
"kvm:kvm_s390_sie_enter",
"kvm:kvm_s390_sie_exit",
"kvm:kvm_s390_intercept_instruction",
diff --git a/tools/perf/arch/x86/util/kvm-stat.c 
b/tools/perf/arch/x86/util/kvm-stat.c
index 2d0d43b5..46d4e0c4 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -127,7 +127,7 @@ static struct kvm_events_ops ioport_events = {
.name = "IO Port Access"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
"kvm:kvm_entry",
"kvm:kvm_exit",
"kvm:kvm_mmio",
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index 59ed51c..fd9f40f 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -133,7 +133,7 @@ bool kvm_entry_event(struct perf_evsel *evsel);
  */
 int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
 
-extern const char * const kvm_events_tp[];
+extern const char *kvm_events_tp[];
 extern struct kvm_reg_events_ops kvm_reg_events_ops[];
 extern const char * const kvm_skip_events[];
 extern const char *vcpu_id_str;
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v8 4/4] perf,kvm/powerpc: Add support for HCALL reasons

2015-09-25 Thread Hemant Kumar
Powerpc provides hcall events that also provides insights into guest
behaviour. Enhance perf kvm stat to record and analyze hcall events.

 - To trace hcall events :
  perf kvm stat record

 - To show the results :
  perf kvm stat report --event=hcall

The result shows the number of hypervisor calls from the guest grouped
by their respective reasons displayed with the frequency.

This patch makes use of two additional tracepoints
"kvm_hv:kvm_hcall_enter" and "kvm_hv:kvm_hcall_exit". To map the hcall
codes to their respective names, it needs a mapping. Such mapping is
added in this patch in book3s_hcalls.h.

 # pgrep qemu
A sample output :
19378
60515

2 VMs running.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624
samples) ]

 # perf kvm stat report -p 60515 --event=hcall

Analyze events for all VMs, all VCPUs:

HCALL-EVENTSamples  Samples% Time%Min TimeMax Time 
Avg time

  H_IPI82266.08%88.10%  0.63us 11.38us  
2.05us ( +-   1.42% )
 H_SEND_CRQ14411.58% 3.77%  0.41us  0.88us  
0.50us ( +-   1.47% )
   H_VIO_SIGNAL118 9.49% 2.86%  0.37us  0.83us  
0.47us ( +-   1.43% )
H_PUT_TERM_CHAR 76 6.11% 2.07%  0.37us  0.90us  
0.52us ( +-   2.43% )
H_GET_TERM_CHAR 74 5.95% 2.23%  0.37us  1.70us  
0.58us ( +-   4.77% )
 H_RTAS  6 0.48% 0.85%  1.10us  9.25us  
2.70us ( +-  48.57% )
  H_PERFMON  4 0.32% 0.12%  0.41us  0.96us  
0.59us ( +-  20.92% )

Total Samples:1244, Total events handled time:1916.69us.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/util/book3s_hcalls.h | 123 +++
 tools/perf/arch/powerpc/util/kvm-stat.c  |  66 +-
 2 files changed, 188 insertions(+), 1 deletion(-)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_hcalls.h

diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h 
b/tools/perf/arch/powerpc/util/book3s_hcalls.h
new file mode 100644
index 000..0dd6b7f
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -0,0 +1,123 @@
+#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H
+#define ARCH_PERF_BOOK3S_HV_HCALLS_H
+
+/*
+ * PowerPC HCALL codes : hcall code to name mapping
+ */
+#define kvm_trace_symbol_hcall \
+   {0x4, "H_REMOVE"},  \
+   {0x8, "H_ENTER"},   \
+   {0xc, "H_READ"},\
+   {0x10, "H_CLEAR_MOD"},  \
+   {0x14, "H_CLEAR_REF"},  \
+   {0x18, "H_PROTECT"},\
+   {0x1c, "H_GET_TCE"},\
+   {0x20, "H_PUT_TCE"},\
+   {0x24, "H_SET_SPRG0"},  \
+   {0x28, "H_SET_DABR"},   \
+   {0x2c, "H_PAGE_INIT"},  \
+   {0x30, "H_SET_ASR"},\
+   {0x34, "H_ASR_ON"}, \
+   {0x38, "H_ASR_OFF"},\
+   {0x3c, "H_LOGICAL_CI_LOAD"},\
+   {0x40, "H_LOGICAL_CI_STORE"},   \
+   {0x44, "H_LOGICAL_CACHE_LOAD"}, \
+   {0x48, "H_LOGICAL_CACHE_STORE"},\
+   {0x4c, "H_LOGICAL_ICBI"},   \
+   {0x50, "H_LOGICAL_DCBF"},   \
+   {0x54, "H_GET_TERM_CHAR"},  \
+   {0x58, "H_PUT_TERM_CHAR"},  \
+   {0x5c, "H_REAL_TO_LOGICAL"},\
+   {0x60, "H_HYPERVISOR_DATA"},\
+   {0x64, "H_EOI"},\
+   {0x68, "H_CPPR"},   \
+   {0x6c, "H_IPI"},\
+   {0x70, "H_IPOLL"},  \
+   {0x74, "H_XIRR"},   \
+   {0x78, "H_MIGRATE_DMA"},\
+   {0x7c, "H_PERFMON"},\
+   {0xdc, "H_REGISTER_VPA"},   \
+ 

[PATCH v8 1/4] perf, kvm/{x86, s390}: Remove dependency on uapi/kvm_perf.h

2015-09-25 Thread Hemant Kumar
Its better to remove the dependency on uapi/kvm_perf.h to allow dynamic
discovery of kvm events (if its needed). To do this, some extern
variables have been introduced with which we can keep the generic
functions generic.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changes since v7:
- Removed __maybe_unused for some parameters which weren't needed.

 tools/perf/arch/s390/util/kvm-stat.c | 10 -
 tools/perf/arch/x86/util/kvm-stat.c  | 12 ++-
 tools/perf/builtin-kvm.c | 39 +++-
 tools/perf/util/kvm-stat.h   |  3 +++
 4 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/tools/perf/arch/s390/util/kvm-stat.c 
b/tools/perf/arch/s390/util/kvm-stat.c
index a5dbc07..c2acb3e 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -10,7 +10,11 @@
  */
 
 #include "../../util/kvm-stat.h"
-#include 
+#include 
+
+#define DECODE_STR_LEN 40
+#define VCPU_ID "id"
+#define KVM_EXIT_REASON "icptcode"
 
 define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
 define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
@@ -83,6 +87,10 @@ const char * const kvm_events_tp[] = {
NULL,
 };
 
+const char *vcpu_id_str = VCPU_ID;
+const int decode_str_len = DECODE_STR_LEN;
+const char *exit_reason_code = KVM_EXIT_REASON;
+
 struct kvm_reg_events_ops kvm_reg_events_ops[] = {
{ .name = "vmexit", .ops = _events },
{ NULL, NULL },
diff --git a/tools/perf/arch/x86/util/kvm-stat.c 
b/tools/perf/arch/x86/util/kvm-stat.c
index 14e4e66..2d0d43b5 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -1,5 +1,11 @@
 #include "../../util/kvm-stat.h"
-#include 
+#include 
+#include 
+#include 
+
+#define DECODE_STR_LEN 20
+#define VCPU_ID "vcpu_id"
+#define KVM_EXIT_REASON "exit_reason"
 
 define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
 define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
@@ -129,6 +135,10 @@ const char * const kvm_events_tp[] = {
NULL,
 };
 
+const char *vcpu_id_str = VCPU_ID;
+const int decode_str_len = DECODE_STR_LEN;
+const char *exit_reason_code = KVM_EXIT_REASON;
+
 struct kvm_reg_events_ops kvm_reg_events_ops[] = {
{ .name = "vmexit", .ops = _events },
{ .name = "mmio", .ops = _events },
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index fc1cffb..ef25fcf 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -31,20 +31,18 @@
 #include 
 
 #ifdef HAVE_KVM_STAT_SUPPORT
-#include 
 #include "util/kvm-stat.h"
 
-void exit_event_get_key(struct perf_evsel *evsel,
-   struct perf_sample *sample,
+void exit_event_get_key(struct perf_evsel *evsel, struct perf_sample *sample,
struct event_key *key)
 {
key->info = 0;
-   key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON);
+   key->key = perf_evsel__intval(evsel, sample, exit_reason_code);
 }
 
 bool kvm_exit_event(struct perf_evsel *evsel)
 {
-   return !strcmp(evsel->name, KVM_EXIT_TRACE);
+   return !strncmp(evsel->name, kvm_events_tp[1], strlen(evsel->name));
 }
 
 bool exit_event_begin(struct perf_evsel *evsel,
@@ -60,7 +58,7 @@ bool exit_event_begin(struct perf_evsel *evsel,
 
 bool kvm_entry_event(struct perf_evsel *evsel)
 {
-   return !strcmp(evsel->name, KVM_ENTRY_TRACE);
+   return !strncmp(evsel->name, kvm_events_tp[0], strlen(evsel->name));
 }
 
 bool exit_event_end(struct perf_evsel *evsel,
@@ -71,8 +69,8 @@ bool exit_event_end(struct perf_evsel *evsel,
 }
 
 static const char *get_exit_reason(struct perf_kvm_stat *kvm,
-  struct exit_reasons_table *tbl,
-  u64 exit_code)
+   struct exit_reasons_table *tbl,
+   u64 exit_code)
 {
while (tbl->reason != NULL) {
if (tbl->exit_code == exit_code)
@@ -92,7 +90,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm,
const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
  key->key);
 
-   scnprintf(decode, DECODE_STR_LEN, "%s", exit_reason);
+   scnprintf(decode, decode_str_len, "%s", exit_reason);
 }
 
 static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
@@ -358,7 +356,12 @@ static bool handle_end_event(struct perf_kvm_stat *kvm,
time_diff = sample->time - time_begin;
 
if (kvm->duration && time_diff > kvm->duration) {
-   char decode[DECODE_STR_LEN];
+   char *decode = zalloc(decode_str_len);
+
+   if (!decode) {
+   pr_err("Not enough memory\n");
+

[PATCH v8 3/4] perf,kvm/powerpc: Port perf kvm stat to powerpc

2015-09-25 Thread Hemant Kumar
perf kvm can be used to analyze guest exit reasons. This support already
exists in x86. Hence, porting it to powerpc.

 - To trace KVM events :
  perf kvm stat record
  If many guests are running, we can track for a specific guest by using
  --pid as in : perf kvm stat record --pid 

 - To see the results :
  perf kvm stat report

The result shows the number of exits (from the guest context to
host/hypervisor context) grouped by their respective exit reasons with
their frequency.

Since, different powerpc machines have different KVM tracepoints, this
patch discovers the available tracepoints dynamically and accordingly
looks for them. If any single tracepoint is not present, this support
won't be enabled for reporting. To record, this will fail if any of the
events we are looking to record isn't available.
Right now, its only supported on PowerPC Book3S_HV architectures.

To analyze the different exits, group them and present them (in a slight
descriptive way) to the user, we need a mapping between the "exit
code" (dumped in the kvm_guest_exit tracepoint data) and to its related
Interrupt vector description (exit reason). This patch adds this mapping
in book3s_hv_exits.h.

It records on two available KVM tracepoints for book3s_hv:
"kvm_hv:kvm_guest_exit" and "kvm_hv:kvm_guest_enter".

Here is a sample o/p:
 # pgrep qemu
19378
60515

2 Guests are running on the host.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624
samples) ]

 # perf kvm stat report -p 60515

Analyze events for pid(s) 60515, all VCPUs:

   VM-EXITSamples  Samples% Time%Min TimeMax Time 
Avg time

   SYSCALL   914163.67% 7.49%  1.26us   5782.39us  
9.87us ( +-   6.46% )
H_DATA_STORAGE   411428.66% 5.07%  1.72us   4597.68us 
14.84us ( +-  20.06% )
HV_DECREMENTER418 2.91% 4.26%  0.70us  30002.22us
122.58us ( +-  70.29% )
  EXTERNAL392 2.73% 0.06%  0.64us104.10us  
1.94us ( +-  18.83% )
RETURN_TO_HOST287 2.00%83.11%  1.53us 124240.15us   
3486.52us ( +-  16.81% )
H_INST_STORAGE  5 0.03% 0.00%  1.88us  3.73us  
2.39us ( +-  14.20% )

Total Samples:14357, Total events handled time:1203918.42us.

Signed-off-by: Srikar Dronamraju <sri...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changes since v7:
- Fixed a kvm stat live bug for ppc.

 tools/perf/arch/powerpc/Makefile   |   2 +
 tools/perf/arch/powerpc/util/Build |   1 +
 tools/perf/arch/powerpc/util/book3s_hv_exits.h |  33 
 tools/perf/arch/powerpc/util/kvm-stat.c| 105 +
 tools/perf/builtin-kvm.c   |  18 +
 tools/perf/util/kvm-stat.h |   1 +
 6 files changed, 160 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_hv_exits.h
 create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..9f9cea3 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,3 +1,5 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+HAVE_KVM_STAT_SUPPORT := 1
diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..c8fe207 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h 
b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
new file mode 100644
index 000..e68ba2d
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
@@ -0,0 +1,33 @@
+#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H
+#define ARCH_PERF_BOOK3S_HV_EXITS_H
+
+/*
+ * PowerPC Interrupt vectors : exit code to name mapping
+ */
+
+#define kvm_trace_symbol_exit \
+   {0x0,   "RETURN_TO_HOST"}, \
+   {0x100, "SYSTEM_RESET"}, \
+   {0x200, "MACHINE_CHECK"}, \
+   {0x300, "DATA_STORAGE"}, \
+   {0x380, "DATA_SEGMENT"}, \
+   {0x400, "INST_STORAGE"}, \
+   {0x480, "INST_SEGMENT"}, \
+   {0x500, "EXTERNAL"}, \
+   {0x501, "EXTERNAL_LEVEL"}, \
+   {0x502, "EXTERNAL_HV"}, \
+   {0x600, "ALIGNMENT"}, \
+   {0x700, "PROGRAM"}, \
+   {0x800, "FP_UNAVAIL"}, \
+   {0x900, "DECREMENTER"}, \
+   {0x980, "HV_DECREMENTER"}, \
+   {0xc00, "SYSCALL"}, \
+   {0xd00, "TRACE"}, \
+   {0xe00, "H_DATA_STORAGE"}, \

[PATCH 4/4] perf,kvm/powerpc: Add support for HCALL reasons

2015-09-24 Thread Hemant Kumar
Powerpc provides hcall events that also provides insights into guest
behaviour. Enhance perf kvm stat to record and analyze hcall events.

 - To trace hcall events :
  perf kvm stat record

 - To show the results :
  perf kvm stat report --event=hcall

The result shows the number of hypervisor calls from the guest grouped
by their respective reasons displayed with the frequency.

This patch makes use of two additional tracepoints
"kvm_hv:kvm_hcall_enter" and "kvm_hv:kvm_hcall_exit". To map the hcall
codes to their respective names, it needs a mapping. Such mapping is
added in this patch in book3s_hcalls.h.

 # pgrep qemu
A sample output :
19378
60515

2 VMs running.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624
samples) ]

 # perf kvm stat report -p 60515 --event=hcall

Analyze events for all VMs, all VCPUs:

HCALL-EVENTSamples  Samples% Time%Min TimeMax Time 
Avg time

  H_IPI82266.08%88.10%  0.63us 11.38us  
2.05us ( +-   1.42% )
 H_SEND_CRQ14411.58% 3.77%  0.41us  0.88us  
0.50us ( +-   1.47% )
   H_VIO_SIGNAL118 9.49% 2.86%  0.37us  0.83us  
0.47us ( +-   1.43% )
H_PUT_TERM_CHAR 76 6.11% 2.07%  0.37us  0.90us  
0.52us ( +-   2.43% )
H_GET_TERM_CHAR 74 5.95% 2.23%  0.37us  1.70us  
0.58us ( +-   4.77% )
 H_RTAS  6 0.48% 0.85%  1.10us  9.25us  
2.70us ( +-  48.57% )
  H_PERFMON  4 0.32% 0.12%  0.41us  0.96us  
0.59us ( +-  20.92% )

Total Samples:1244, Total events handled time:1916.69us.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/util/book3s_hcalls.h | 123 +++
 tools/perf/arch/powerpc/util/kvm-stat.c  |  66 +-
 2 files changed, 188 insertions(+), 1 deletion(-)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_hcalls.h

diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h 
b/tools/perf/arch/powerpc/util/book3s_hcalls.h
new file mode 100644
index 000..0dd6b7f
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -0,0 +1,123 @@
+#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H
+#define ARCH_PERF_BOOK3S_HV_HCALLS_H
+
+/*
+ * PowerPC HCALL codes : hcall code to name mapping
+ */
+#define kvm_trace_symbol_hcall \
+   {0x4, "H_REMOVE"},  \
+   {0x8, "H_ENTER"},   \
+   {0xc, "H_READ"},\
+   {0x10, "H_CLEAR_MOD"},  \
+   {0x14, "H_CLEAR_REF"},  \
+   {0x18, "H_PROTECT"},\
+   {0x1c, "H_GET_TCE"},\
+   {0x20, "H_PUT_TCE"},\
+   {0x24, "H_SET_SPRG0"},  \
+   {0x28, "H_SET_DABR"},   \
+   {0x2c, "H_PAGE_INIT"},  \
+   {0x30, "H_SET_ASR"},\
+   {0x34, "H_ASR_ON"}, \
+   {0x38, "H_ASR_OFF"},\
+   {0x3c, "H_LOGICAL_CI_LOAD"},\
+   {0x40, "H_LOGICAL_CI_STORE"},   \
+   {0x44, "H_LOGICAL_CACHE_LOAD"}, \
+   {0x48, "H_LOGICAL_CACHE_STORE"},\
+   {0x4c, "H_LOGICAL_ICBI"},   \
+   {0x50, "H_LOGICAL_DCBF"},   \
+   {0x54, "H_GET_TERM_CHAR"},  \
+   {0x58, "H_PUT_TERM_CHAR"},  \
+   {0x5c, "H_REAL_TO_LOGICAL"},\
+   {0x60, "H_HYPERVISOR_DATA"},\
+   {0x64, "H_EOI"},\
+   {0x68, "H_CPPR"},   \
+   {0x6c, "H_IPI"},\
+   {0x70, "H_IPOLL"},  \
+   {0x74, "H_XIRR"},   \
+   {0x78, "H_MIGRATE_DMA"},\
+   {0x7c, "H_PERFMON"},\
+   {0xdc, "H_REGISTER_VPA"},   \
+ 

[PATCH v2] perf/probe: Search both .eh_frame and .debug_frame sections for probe location

2016-01-04 Thread Hemant Kumar
perf probe through debuginfo__find_probes() in util/probe-finder.c
checks for the functions' frame descriptions in either .eh_frame section
of an ELF or the .debug_frame. The check is based on whether either one
of these sections is present. Depending on distro, toolchain defaults,
architetcutre, build flags, etc., CFI might be found in either .eh_frame
and/or .debug_frame. Sometimes, it may happen that, .eh_frame, even if
present, may not be complete and may miss some descriptions. Therefore,
to be sure, to find the CFI covering an address we will always have to
investigate both if available.

For e.g., in powerpc, this may happen :
 $ gcc -g bin.c -o bin

 $ objdump --dwarf ./bin
 <1><145>: Abbrev Number: 7 (DW_TAG_subprogram)
<146>   DW_AT_external: 1
<146>   DW_AT_name: (indirect string, offset: 0x9e): main
<14a>   DW_AT_decl_file   : 1
<14b>   DW_AT_decl_line   : 39
<14c>   DW_AT_prototyped  : 1
<14c>   DW_AT_type: <0x57>
<150>   DW_AT_low_pc  : 0x17b8

If the .eh_frame and .debug_frame are checked for the same binary, we
will find that, .eh_frame (although present) doesn't contain a
description for "main" function.
But, .debug_frame has a description :

00d8 0024  FDE cie= pc=17b8..1838
  DW_CFA_advance_loc: 16 to 17c8
  DW_CFA_def_cfa_offset: 144
  DW_CFA_offset_extended_sf: r65 at cfa+16
...

Due to this (since, perf checks whether .eh_frame is present and goes on
searching for that address inside that frame), perf is unable to process
the probes :
 # perf probe -x ./bin main
Failed to get call frame on 0x17b8
  Error: Failed to add events.

To avoid this issue, we need to check both the sections (.eh_frame and
.debug_frame), which is done in this patch.

Note that, we can always force everything into both .eh_frame and
.debug_frame by :
 $ gcc bin.c -fasynchronous-unwind-tables  -fno-dwarf2-cfi-asm -g -o bin

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changes since v1:
- pf->cfi is now cached as pf->cfi_eh and pf->cfi_dbg depending on the source 
of CFI
  (Suggested by Mark Wielard).

 tools/perf/util/probe-finder.c | 63 +-
 tools/perf/util/probe-finder.h |  5 +++-
 2 files changed, 42 insertions(+), 26 deletions(-)

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 05012bb..71bf27e 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -685,9 +685,10 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct 
probe_finder *pf)
pf->fb_ops = NULL;
 #if _ELFUTILS_PREREQ(0, 142)
} else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
-  pf->cfi != NULL) {
+  (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) {
Dwarf_Frame *frame;
-   if (dwarf_cfi_addrframe(pf->cfi, pf->addr, ) != 0 ||
+   if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, ) != 0 &&
+(dwarf_cfi_addrframe(pf->cfi_dbg, pf->addr, ) != 0)) 
||
dwarf_frame_cfa(frame, >fb_ops, ) != 0) {
pr_warning("Failed to get call frame on 0x%jx\n",
   (uintmax_t)pf->addr);
@@ -1013,8 +1014,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global 
*gl, void *data)
return DWARF_CB_OK;
 }
 
-/* Find probe points from debuginfo */
-static int debuginfo__find_probes(struct debuginfo *dbg,
+static int debuginfo__find_probe_location(struct debuginfo *dbg,
  struct probe_finder *pf)
 {
struct perf_probe_point *pp = >pev->point;
@@ -1023,27 +1023,6 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
Dwarf_Die *diep;
int ret = 0;
 
-#if _ELFUTILS_PREREQ(0, 142)
-   Elf *elf;
-   GElf_Ehdr ehdr;
-   GElf_Shdr shdr;
-
-   /* Get the call frame information from this dwarf */
-   elf = dwarf_getelf(dbg->dbg);
-   if (elf == NULL)
-   return -EINVAL;
-
-   if (gelf_getehdr(elf, ) == NULL)
-   return -EINVAL;
-
-   if (elf_section_by_name(elf, , , ".eh_frame", NULL) &&
-   shdr.sh_type == SHT_PROGBITS) {
-   pf->cfi = dwarf_getcfi_elf(elf);
-   } else {
-   pf->cfi = dwarf_getcfi(dbg->dbg);
-   }
-#endif
-
off = 0;
pf->lcache = intlist__new(NULL);
if (!pf->lcache)
@@ -1106,6 +1085,40 @@ found:
return ret;
 }
 
+/* Find probe points from debuginfo */
+static int debuginfo__find_probes(struct debuginfo *dbg,
+ struct probe_finder *pf)
+{
+   int ret = 0;
+
+#if _ELFUTILS_PREREQ(0, 142)
+   Elf *elf;
+   GElf_Ehdr ehdr;
+  

[PATCH v10 1/4] perf, kvm/{x86, s390}: Remove dependency on uapi/kvm_perf.h

2016-01-03 Thread Hemant Kumar
Its better to remove the dependency on uapi/kvm_perf.h to allow dynamic
discovery of kvm events (if its needed). To do this, some extern
variables have been introduced with which we can keep the generic
functions generic.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
Acked-by: Alexander Yarygin <yary...@linux.vnet.ibm.com>
---
Changelog:
v9 to v10:
- Changed from dynamic to static declaration of "decode".
v8 to v9:
- Removed the macro definitions.
- Changed the access of kvm_entry_trace and kvm_exit_trace
- Removed unnecessary formatting.
v7 to v8:
- Removed unnecessary __unused_parameter modifiers.

 tools/perf/arch/s390/util/kvm-stat.c |  8 +++-
 tools/perf/arch/x86/util/kvm-stat.c  | 14 +++---
 tools/perf/builtin-kvm.c | 20 ++--
 tools/perf/util/kvm-stat.h   |  5 +
 4 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/tools/perf/arch/s390/util/kvm-stat.c 
b/tools/perf/arch/s390/util/kvm-stat.c
index a5dbc07..b85a94b 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -10,7 +10,7 @@
  */
 
 #include "../../util/kvm-stat.h"
-#include 
+#include 
 
 define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
 define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
@@ -18,6 +18,12 @@ define_exit_reasons_table(sie_sigp_order_codes, 
sigp_order_codes);
 define_exit_reasons_table(sie_diagnose_codes, diagnose_codes);
 define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes);
 
+const char *vcpu_id_str = "id";
+const int decode_str_len = 40;
+const char *kvm_exit_reason = "icptcode";
+const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter";
+const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit";
+
 static void event_icpt_insn_get_key(struct perf_evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
diff --git a/tools/perf/arch/x86/util/kvm-stat.c 
b/tools/perf/arch/x86/util/kvm-stat.c
index 14e4e66..babefda 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -1,5 +1,7 @@
 #include "../../util/kvm-stat.h"
-#include 
+#include 
+#include 
+#include 
 
 define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
 define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
@@ -11,6 +13,12 @@ static struct kvm_events_ops exit_events = {
.name = "VM-EXIT"
 };
 
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 20;
+const char *kvm_exit_reason = "exit_reason";
+const char *kvm_entry_trace = "kvm:kvm_entry";
+const char *kvm_exit_trace = "kvm:kvm_exit";
+
 /*
  * For the mmio events, we treat:
  * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
@@ -65,7 +73,7 @@ static void mmio_event_decode_key(struct perf_kvm_stat *kvm 
__maybe_unused,
  struct event_key *key,
  char *decode)
 {
-   scnprintf(decode, DECODE_STR_LEN, "%#lx:%s",
+   scnprintf(decode, decode_str_len, "%#lx:%s",
  (unsigned long)key->key,
  key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
 }
@@ -109,7 +117,7 @@ static void ioport_event_decode_key(struct perf_kvm_stat 
*kvm __maybe_unused,
struct event_key *key,
char *decode)
 {
-   scnprintf(decode, DECODE_STR_LEN, "%#llx:%s",
+   scnprintf(decode, decode_str_len, "%#llx:%s",
  (unsigned long long)key->key,
  key->info ? "POUT" : "PIN");
 }
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index dd94b4c..9d250df 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -30,7 +30,6 @@
 #include 
 
 #ifdef HAVE_KVM_STAT_SUPPORT
-#include 
 #include "util/kvm-stat.h"
 
 void exit_event_get_key(struct perf_evsel *evsel,
@@ -38,12 +37,12 @@ void exit_event_get_key(struct perf_evsel *evsel,
struct event_key *key)
 {
key->info = 0;
-   key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON);
+   key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
 }
 
 bool kvm_exit_event(struct perf_evsel *evsel)
 {
-   return !strcmp(evsel->name, KVM_EXIT_TRACE);
+   return !strcmp(evsel->name, kvm_exit_trace);
 }
 
 bool exit_event_begin(struct perf_evsel *evsel,
@@ -59,7 +58,7 @@ bool exit_event_begin(struct perf_evsel *evsel,
 
 bool kvm_entry_event(struct perf_evsel *evsel)
 {
-   return !strcmp(evsel->name, KVM_ENTRY_TRACE);
+   return !strcmp(evsel->name, kvm_entry_trace);
 }
 
 bool exit_event_end(struct perf_evsel *evsel,
@@ -91,7 +

[PATCH v10 2/4] perf,kvm/{x86,s390}: Remove const from kvm_events_tp

2016-01-03 Thread Hemant Kumar
This patch removes the "const" qualifier from kvm_events_tp declaration
to account for the fact that some architectures may need to update this
variable dynamically. For instance, powerpc will need to update this
variable dynamically depending on the machine type.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 tools/perf/arch/s390/util/kvm-stat.c | 2 +-
 tools/perf/arch/x86/util/kvm-stat.c  | 2 +-
 tools/perf/util/kvm-stat.h   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/arch/s390/util/kvm-stat.c 
b/tools/perf/arch/s390/util/kvm-stat.c
index b85a94b..ed57df2 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -79,7 +79,7 @@ static struct kvm_events_ops exit_events = {
.name = "VM-EXIT"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
"kvm:kvm_s390_sie_enter",
"kvm:kvm_s390_sie_exit",
"kvm:kvm_s390_intercept_instruction",
diff --git a/tools/perf/arch/x86/util/kvm-stat.c 
b/tools/perf/arch/x86/util/kvm-stat.c
index babefda..b63d4be 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -129,7 +129,7 @@ static struct kvm_events_ops ioport_events = {
.name = "IO Port Access"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
"kvm:kvm_entry",
"kvm:kvm_exit",
"kvm:kvm_mmio",
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index dd55548..c965dc8 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -133,7 +133,7 @@ bool kvm_entry_event(struct perf_evsel *evsel);
  */
 int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
 
-extern const char * const kvm_events_tp[];
+extern const char *kvm_events_tp[];
 extern struct kvm_reg_events_ops kvm_reg_events_ops[];
 extern const char * const kvm_skip_events[];
 extern const char *vcpu_id_str;
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v9 4/4] perf,kvm/powerpc: Add support for HCALL reasons

2016-01-03 Thread Hemant Kumar
Powerpc provides hcall events that also provides insights into guest
behaviour. Enhance perf kvm stat to record and analyze hcall events.

 - To trace hcall events :
  perf kvm stat record

 - To show the results :
  perf kvm stat report --event=hcall

The result shows the number of hypervisor calls from the guest grouped
by their respective reasons displayed with the frequency.

This patch makes use of two additional tracepoints
"kvm_hv:kvm_hcall_enter" and "kvm_hv:kvm_hcall_exit". To map the hcall
codes to their respective names, it needs a mapping. Such mapping is
added in this patch in book3s_hcalls.h.

 # pgrep qemu
A sample output :
19378
60515

2 VMs running.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624
samples) ]

 # perf kvm stat report -p 60515 --event=hcall

Analyze events for all VMs, all VCPUs:

HCALL-EVENTSamples  Samples% Time%Min TimeMax Time 
Avg time

  H_IPI82266.08%88.10%  0.63us 11.38us  
2.05us ( +-   1.42% )
 H_SEND_CRQ14411.58% 3.77%  0.41us  0.88us  
0.50us ( +-   1.47% )
   H_VIO_SIGNAL118 9.49% 2.86%  0.37us  0.83us  
0.47us ( +-   1.43% )
H_PUT_TERM_CHAR 76 6.11% 2.07%  0.37us  0.90us  
0.52us ( +-   2.43% )
H_GET_TERM_CHAR 74 5.95% 2.23%  0.37us  1.70us  
0.58us ( +-   4.77% )
 H_RTAS  6 0.48% 0.85%  1.10us  9.25us  
2.70us ( +-  48.57% )
  H_PERFMON  4 0.32% 0.12%  0.41us  0.96us  
0.59us ( +-  20.92% )

Total Samples:1244, Total events handled time:1916.69us.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changelog:
v8 to v9:
- Removed the macros (which were being used only once).
v6 to v7:
- Removed dependency on uapi.
v4 to v5:
- Removed dependency on arch/powerpc/include/asm/hvall.h and added them
in userspace side.
- No more arch side dependency.
v1 to v2:
- Split the patches for powerpc and perf.

 tools/perf/arch/powerpc/util/book3s_hcalls.h | 123 +++
 tools/perf/arch/powerpc/util/kvm-stat.c  |  65 +-
 2 files changed, 187 insertions(+), 1 deletion(-)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_hcalls.h

diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h 
b/tools/perf/arch/powerpc/util/book3s_hcalls.h
new file mode 100644
index 000..0dd6b7f
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -0,0 +1,123 @@
+#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H
+#define ARCH_PERF_BOOK3S_HV_HCALLS_H
+
+/*
+ * PowerPC HCALL codes : hcall code to name mapping
+ */
+#define kvm_trace_symbol_hcall \
+   {0x4, "H_REMOVE"},  \
+   {0x8, "H_ENTER"},   \
+   {0xc, "H_READ"},\
+   {0x10, "H_CLEAR_MOD"},  \
+   {0x14, "H_CLEAR_REF"},  \
+   {0x18, "H_PROTECT"},\
+   {0x1c, "H_GET_TCE"},\
+   {0x20, "H_PUT_TCE"},\
+   {0x24, "H_SET_SPRG0"},  \
+   {0x28, "H_SET_DABR"},   \
+   {0x2c, "H_PAGE_INIT"},  \
+   {0x30, "H_SET_ASR"},\
+   {0x34, "H_ASR_ON"}, \
+   {0x38, "H_ASR_OFF"},\
+   {0x3c, "H_LOGICAL_CI_LOAD"},\
+   {0x40, "H_LOGICAL_CI_STORE"},   \
+   {0x44, "H_LOGICAL_CACHE_LOAD"}, \
+   {0x48, "H_LOGICAL_CACHE_STORE"},\
+   {0x4c, "H_LOGICAL_ICBI"},   \
+   {0x50, "H_LOGICAL_DCBF"},   \
+   {0x54, "H_GET_TERM_CHAR"},  \
+   {0x58, "H_PUT_TERM_CHAR"},  \
+   {0x5c, "H_REAL_TO_LOGICAL"},\
+   {0x60, "H_HYPERVISOR_DATA"},\
+   {0x64, "H_EOI"},\
+   {0x68, "H_CPPR"},   \
+   {0x6c, "H_IPI"},\
+   {0x70, "H_IPOLL"},  \
+   {0x74, "H_XIRR"}, 

[PATCH v10 3/4] perf,kvm/powerpc: Port perf kvm stat to powerpc

2016-01-03 Thread Hemant Kumar
perf kvm can be used to analyze guest exit reasons. This support already
exists in x86. Hence, porting it to powerpc.

 - To trace KVM events :
  perf kvm stat record
  If many guests are running, we can track for a specific guest by using
  --pid as in : perf kvm stat record --pid 

 - To see the results :
  perf kvm stat report

The result shows the number of exits (from the guest context to
host/hypervisor context) grouped by their respective exit reasons with
their frequency.

Since, different powerpc machines have different KVM tracepoints, this
patch discovers the available tracepoints dynamically and accordingly
looks for them. If any single tracepoint is not present, this support
won't be enabled for reporting. To record, this will fail if any of the
events we are looking to record isn't available.
Right now, its only supported on PowerPC Book3S_HV architectures.

To analyze the different exits, group them and present them (in a slight
descriptive way) to the user, we need a mapping between the "exit
code" (dumped in the kvm_guest_exit tracepoint data) and to its related
Interrupt vector description (exit reason). This patch adds this mapping
in book3s_hv_exits.h.

It records on two available KVM tracepoints for book3s_hv:
"kvm_hv:kvm_guest_exit" and "kvm_hv:kvm_guest_enter".

Here is a sample o/p:
 # pgrep qemu
19378
60515

2 Guests are running on the host.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624
samples) ]

 # perf kvm stat report -p 60515

Analyze events for pid(s) 60515, all VCPUs:

   VM-EXITSamples  Samples% Time%Min TimeMax Time 
Avg time

   SYSCALL   914163.67% 7.49%  1.26us   5782.39us  
9.87us ( +-   6.46% )
H_DATA_STORAGE   411428.66% 5.07%  1.72us   4597.68us 
14.84us ( +-  20.06% )
HV_DECREMENTER418 2.91% 4.26%  0.70us  30002.22us
122.58us ( +-  70.29% )
  EXTERNAL392 2.73% 0.06%  0.64us104.10us  
1.94us ( +-  18.83% )
RETURN_TO_HOST287 2.00%83.11%  1.53us 124240.15us   
3486.52us ( +-  16.81% )
H_INST_STORAGE  5 0.03% 0.00%  1.88us  3.73us  
2.39us ( +-  14.20% )

Total Samples:14357, Total events handled time:1203918.42us.

Signed-off-by: Srikar Dronamraju <sri...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changelog:
v8 to v9:
- Moved the book3s specific setup into one function.
- Removed the macros (which were being used only once).
- Formatting changes.
v7 to v8:
- Fixed a perf kvm stat live bug.
v6 to v7:
- Removed dependency on uapi.
v4 to v5:
- Removed dependency on arch/powerpc/kvm/trace_book3s.h and added them in
the userspace side.
- No more arch side dependency.
v1 to v3:
- Split the patches for powerpc and perf

 tools/perf/arch/powerpc/Makefile   |   2 +
 tools/perf/arch/powerpc/util/Build |   1 +
 tools/perf/arch/powerpc/util/book3s_hv_exits.h |  33 
 tools/perf/arch/powerpc/util/kvm-stat.c| 100 +
 tools/perf/builtin-kvm.c   |  18 +
 tools/perf/util/kvm-stat.h |   1 +
 6 files changed, 155 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_hv_exits.h
 create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..9f9cea3 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,3 +1,5 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+HAVE_KVM_STAT_SUPPORT := 1
diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..c8fe207 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h 
b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
new file mode 100644
index 000..e68ba2d
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
@@ -0,0 +1,33 @@
+#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H
+#define ARCH_PERF_BOOK3S_HV_EXITS_H
+
+/*
+ * PowerPC Interrupt vectors : exit code to name mapping
+ */
+
+#define kvm_trace_symbol_exit \
+   {0x0,   "RETURN_TO_HOST"}, \
+   {0x100, "SYSTEM_RESET"}, \
+   {0x200, "MACHINE_CHECK"}, \
+   {0x300, "DATA_STORAGE"}, \
+   {0x380, "DATA_SEGMENT"}, \
+   {0x400, "INST_STORAGE"}, \
+   {0x480, "INST_SEGMENT"}, \
+   {0x500, "EXTERNAL"}, \
+   {0x501, "EXTERNAL_LEVEL"}, \
+   {0x

Re: [PATCH v2] perf/probe: Search both .eh_frame and .debug_frame sections for probe location

2016-01-13 Thread Hemant Kumar



On 01/12/2016 04:34 PM, 平松雅巳 / HIRAMATU,MASAMI wrote:

Hi Hemant,


From: Hemant Kumar [mailto:hem...@linux.vnet.ibm.com]

perf probe through debuginfo__find_probes() in util/probe-finder.c
checks for the functions' frame descriptions in either .eh_frame section
of an ELF or the .debug_frame. The check is based on whether either one
of these sections is present. Depending on distro, toolchain defaults,
architetcutre, build flags, etc., CFI might be found in either .eh_frame
and/or .debug_frame. Sometimes, it may happen that, .eh_frame, even if
present, may not be complete and may miss some descriptions. Therefore,
to be sure, to find the CFI covering an address we will always have to
investigate both if available.

OK, so we'd better check both cfi's.
  [...]

+/* Find probe points from debuginfo */
+static int debuginfo__find_probes(struct debuginfo *dbg,
+ struct probe_finder *pf)
+{
+   int ret = 0;
+
+#if _ELFUTILS_PREREQ(0, 142)
+   Elf *elf;
+   GElf_Ehdr ehdr;
+   GElf_Shdr shdr;
+
+   if (pf->cfi_eh || pf->cfi_dbg)
+   return debuginfo__find_probe_location(dbg, pf);
+
+   /* Get the call frame information from this dwarf */
+   elf = dwarf_getelf(dbg->dbg);
+   if (elf == NULL)
+   return -EINVAL;
+
+   if (gelf_getehdr(elf, ) == NULL)
+   return -EINVAL;
+
+   if (elf_section_by_name(elf, , , ".eh_frame", NULL) &&
+   shdr.sh_type == SHT_PROGBITS) {
+   pf->cfi_eh = dwarf_getcfi_elf(elf);
+   } else {
+   pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+   }

Hmm, if you want to check both of those cfi's, don't we have to do below?

if (elf_section_by_name(elf, , , ".eh_frame", NULL) &&
shdr.sh_type == SHT_PROGBITS)
pf->cfi_eh = dwarf_getcfi_elf(elf);

pf->cfi_dbg = dwarf_getcfi(dbg->dbg);

Then, both of pf->cfi_* will be filled (if the elf has ".eh_frame").

Thanks!


Ah, right. Fixed in v3.






--
Thanks,
Hemant Kumar

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3] perf/probe: Search both .eh_frame and .debug_frame sections for probe location

2016-01-13 Thread Hemant Kumar
perf probe through debuginfo__find_probes() in util/probe-finder.c
checks for the functions' frame descriptions in either .eh_frame section
of an ELF or the .debug_frame. The check is based on whether either one
of these sections is present. Depending on distro, toolchain defaults,
architetcutre, build flags, etc., CFI might be found in either .eh_frame
and/or .debug_frame. Sometimes, it may happen that, .eh_frame, even if
present, may not be complete and may miss some descriptions. Therefore,
to be sure, to find the CFI covering an address we will always have to
investigate both if available.

For e.g., in powerpc, this may happen :
 $ gcc -g bin.c -o bin

 $ objdump --dwarf ./bin
 <1><145>: Abbrev Number: 7 (DW_TAG_subprogram)
<146>   DW_AT_external: 1
<146>   DW_AT_name: (indirect string, offset: 0x9e): main
<14a>   DW_AT_decl_file   : 1
<14b>   DW_AT_decl_line   : 39
<14c>   DW_AT_prototyped  : 1
<14c>   DW_AT_type: <0x57>
<150>   DW_AT_low_pc  : 0x17b8

If the .eh_frame and .debug_frame are checked for the same binary, we
will find that, .eh_frame (although present) doesn't contain a
description for "main" function.
But, .debug_frame has a description :

00d8 0024  FDE cie= pc=17b8..1838
  DW_CFA_advance_loc: 16 to 17c8
  DW_CFA_def_cfa_offset: 144
  DW_CFA_offset_extended_sf: r65 at cfa+16
...

Due to this (since, perf checks whether .eh_frame is present and goes on
searching for that address inside that frame), perf is unable to process
the probes :
 # perf probe -x ./bin main
Failed to get call frame on 0x17b8
  Error: Failed to add events.

To avoid this issue, we need to check both the sections (.eh_frame and
.debug_frame), which is done in this patch.

Note that, we can always force everything into both .eh_frame and
.debug_frame by :
 $ gcc bin.c -fasynchronous-unwind-tables  -fno-dwarf2-cfi-asm -g -o bin

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changes since v2:
- Fixed an issue related to filling up both the CFIs (Suggested by Masami).

Changes since v1:
- pf->cfi is now cached as pf->cfi_eh and pf->cfi_dbg depending on the source 
of CFI
  (Suggested by Mark Wielard).

 tools/perf/util/probe-finder.c | 62 +-
 tools/perf/util/probe-finder.h |  5 +++-
 2 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 05012bb..e4d0498 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -685,9 +685,10 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct 
probe_finder *pf)
pf->fb_ops = NULL;
 #if _ELFUTILS_PREREQ(0, 142)
} else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
-  pf->cfi != NULL) {
+  (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) {
Dwarf_Frame *frame;
-   if (dwarf_cfi_addrframe(pf->cfi, pf->addr, ) != 0 ||
+   if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, ) != 0 &&
+(dwarf_cfi_addrframe(pf->cfi_dbg, pf->addr, ) != 0)) 
||
dwarf_frame_cfa(frame, >fb_ops, ) != 0) {
pr_warning("Failed to get call frame on 0x%jx\n",
   (uintmax_t)pf->addr);
@@ -1013,8 +1014,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global 
*gl, void *data)
return DWARF_CB_OK;
 }
 
-/* Find probe points from debuginfo */
-static int debuginfo__find_probes(struct debuginfo *dbg,
+static int debuginfo__find_probe_location(struct debuginfo *dbg,
  struct probe_finder *pf)
 {
struct perf_probe_point *pp = >pev->point;
@@ -1023,27 +1023,6 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
Dwarf_Die *diep;
int ret = 0;
 
-#if _ELFUTILS_PREREQ(0, 142)
-   Elf *elf;
-   GElf_Ehdr ehdr;
-   GElf_Shdr shdr;
-
-   /* Get the call frame information from this dwarf */
-   elf = dwarf_getelf(dbg->dbg);
-   if (elf == NULL)
-   return -EINVAL;
-
-   if (gelf_getehdr(elf, ) == NULL)
-   return -EINVAL;
-
-   if (elf_section_by_name(elf, , , ".eh_frame", NULL) &&
-   shdr.sh_type == SHT_PROGBITS) {
-   pf->cfi = dwarf_getcfi_elf(elf);
-   } else {
-   pf->cfi = dwarf_getcfi(dbg->dbg);
-   }
-#endif
-
off = 0;
pf->lcache = intlist__new(NULL);
if (!pf->lcache)
@@ -1106,6 +1085,39 @@ found:
return ret;
 }
 
+/* Find probe points from debuginfo */
+static int debuginfo__find_probes(struct debuginfo *dbg,
+ struct pro

[PATCH v11 4/4] perf,kvm/powerpc: Add support for HCALL reasons

2016-01-27 Thread Hemant Kumar
Powerpc provides hcall events that also provides insights into guest
behaviour. Enhance perf kvm stat to record and analyze hcall events.

 - To trace hcall events :
  perf kvm stat record

 - To show the results :
  perf kvm stat report --event=hcall

The result shows the number of hypervisor calls from the guest grouped
by their respective reasons displayed with the frequency.

This patch makes use of two additional tracepoints
"kvm_hv:kvm_hcall_enter" and "kvm_hv:kvm_hcall_exit". To map the hcall
codes to their respective names, it needs a mapping. Such mapping is
added in this patch in book3s_hcalls.h.

 # pgrep qemu
A sample output :
19378
60515

2 VMs running.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624
samples) ]

 # perf kvm stat report -p 60515 --event=hcall

Analyze events for all VMs, all VCPUs:

HCALL-EVENTSamples  Samples% Time%Min TimeMax Time 
Avg time

  H_IPI82266.08%88.10%  0.63us 11.38us  
2.05us ( +-   1.42% )
 H_SEND_CRQ14411.58% 3.77%  0.41us  0.88us  
0.50us ( +-   1.47% )
   H_VIO_SIGNAL118 9.49% 2.86%  0.37us  0.83us  
0.47us ( +-   1.43% )
H_PUT_TERM_CHAR 76 6.11% 2.07%  0.37us  0.90us  
0.52us ( +-   2.43% )
H_GET_TERM_CHAR 74 5.95% 2.23%  0.37us  1.70us  
0.58us ( +-   4.77% )
 H_RTAS  6 0.48% 0.85%  1.10us  9.25us  
2.70us ( +-  48.57% )
  H_PERFMON  4 0.32% 0.12%  0.41us  0.96us  
0.59us ( +-  20.92% )

Total Samples:1244, Total events handled time:1916.69us.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changelog:
v9 to v11:
- No Changes.
v8 to v9:
- Removed the macros (which were being used only once).
v6 to v7:
- Removed dependency on uapi.
v4 to v5:
- Removed dependency on arch/powerpc/include/asm/hvall.h and added them
in userspace side.
- No more arch side dependency.
v1 to v2:
- Split the patches for powerpc and perf.

 tools/perf/arch/powerpc/util/book3s_hcalls.h | 123 +++
 tools/perf/arch/powerpc/util/kvm-stat.c  |  65 +-
 2 files changed, 187 insertions(+), 1 deletion(-)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_hcalls.h

diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h 
b/tools/perf/arch/powerpc/util/book3s_hcalls.h
new file mode 100644
index 000..0dd6b7f
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -0,0 +1,123 @@
+#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H
+#define ARCH_PERF_BOOK3S_HV_HCALLS_H
+
+/*
+ * PowerPC HCALL codes : hcall code to name mapping
+ */
+#define kvm_trace_symbol_hcall \
+   {0x4, "H_REMOVE"},  \
+   {0x8, "H_ENTER"},   \
+   {0xc, "H_READ"},\
+   {0x10, "H_CLEAR_MOD"},  \
+   {0x14, "H_CLEAR_REF"},  \
+   {0x18, "H_PROTECT"},\
+   {0x1c, "H_GET_TCE"},\
+   {0x20, "H_PUT_TCE"},\
+   {0x24, "H_SET_SPRG0"},  \
+   {0x28, "H_SET_DABR"},   \
+   {0x2c, "H_PAGE_INIT"},  \
+   {0x30, "H_SET_ASR"},\
+   {0x34, "H_ASR_ON"}, \
+   {0x38, "H_ASR_OFF"},\
+   {0x3c, "H_LOGICAL_CI_LOAD"},\
+   {0x40, "H_LOGICAL_CI_STORE"},   \
+   {0x44, "H_LOGICAL_CACHE_LOAD"}, \
+   {0x48, "H_LOGICAL_CACHE_STORE"},\
+   {0x4c, "H_LOGICAL_ICBI"},   \
+   {0x50, "H_LOGICAL_DCBF"},   \
+   {0x54, "H_GET_TERM_CHAR"},  \
+   {0x58, "H_PUT_TERM_CHAR"},  \
+   {0x5c, "H_REAL_TO_LOGICAL"},\
+   {0x60, "H_HYPERVISOR_DATA"},\
+   {0x64, "H_EOI"},\
+   {0x68, "H_CPPR"},   \
+   {0x6c, "H_IPI"},\
+   {0x70, "H_IPOLL"},

[PATCH v11 1/4] perf, kvm/{x86, s390}: Remove dependency on uapi/kvm_perf.h

2016-01-27 Thread Hemant Kumar
Its better to remove the dependency on uapi/kvm_perf.h to allow dynamic
discovery of kvm events (if its needed). To do this, some extern
variables have been introduced with which we can keep the generic
functions generic.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
Acked-by: Alexander Yarygin <yary...@linux.vnet.ibm.com>
---
Changelog:
v10 to v11:
- No changes.
v9 to v10:
- Changed from dynamic to static declaration of "decode".
v8 to v9:
- Removed the macro definitions.
- Changed the access of kvm_entry_trace and kvm_exit_trace
- Removed unnecessary formatting.
v7 to v8:
- Removed unnecessary __unused_parameter modifiers.

 tools/perf/arch/s390/util/kvm-stat.c |  8 +++-
 tools/perf/arch/x86/util/kvm-stat.c  | 14 +++---
 tools/perf/builtin-kvm.c | 20 ++--
 tools/perf/util/kvm-stat.h   |  5 +
 4 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/tools/perf/arch/s390/util/kvm-stat.c 
b/tools/perf/arch/s390/util/kvm-stat.c
index a5dbc07..b85a94b 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -10,7 +10,7 @@
  */
 
 #include "../../util/kvm-stat.h"
-#include 
+#include 
 
 define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
 define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
@@ -18,6 +18,12 @@ define_exit_reasons_table(sie_sigp_order_codes, 
sigp_order_codes);
 define_exit_reasons_table(sie_diagnose_codes, diagnose_codes);
 define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes);
 
+const char *vcpu_id_str = "id";
+const int decode_str_len = 40;
+const char *kvm_exit_reason = "icptcode";
+const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter";
+const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit";
+
 static void event_icpt_insn_get_key(struct perf_evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
diff --git a/tools/perf/arch/x86/util/kvm-stat.c 
b/tools/perf/arch/x86/util/kvm-stat.c
index 14e4e66..babefda 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -1,5 +1,7 @@
 #include "../../util/kvm-stat.h"
-#include 
+#include 
+#include 
+#include 
 
 define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
 define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
@@ -11,6 +13,12 @@ static struct kvm_events_ops exit_events = {
.name = "VM-EXIT"
 };
 
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 20;
+const char *kvm_exit_reason = "exit_reason";
+const char *kvm_entry_trace = "kvm:kvm_entry";
+const char *kvm_exit_trace = "kvm:kvm_exit";
+
 /*
  * For the mmio events, we treat:
  * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
@@ -65,7 +73,7 @@ static void mmio_event_decode_key(struct perf_kvm_stat *kvm 
__maybe_unused,
  struct event_key *key,
  char *decode)
 {
-   scnprintf(decode, DECODE_STR_LEN, "%#lx:%s",
+   scnprintf(decode, decode_str_len, "%#lx:%s",
  (unsigned long)key->key,
  key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
 }
@@ -109,7 +117,7 @@ static void ioport_event_decode_key(struct perf_kvm_stat 
*kvm __maybe_unused,
struct event_key *key,
char *decode)
 {
-   scnprintf(decode, DECODE_STR_LEN, "%#llx:%s",
+   scnprintf(decode, decode_str_len, "%#llx:%s",
  (unsigned long long)key->key,
  key->info ? "POUT" : "PIN");
 }
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 4418d92..ab5645c 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -30,7 +30,6 @@
 #include 
 
 #ifdef HAVE_KVM_STAT_SUPPORT
-#include 
 #include "util/kvm-stat.h"
 
 void exit_event_get_key(struct perf_evsel *evsel,
@@ -38,12 +37,12 @@ void exit_event_get_key(struct perf_evsel *evsel,
struct event_key *key)
 {
key->info = 0;
-   key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON);
+   key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
 }
 
 bool kvm_exit_event(struct perf_evsel *evsel)
 {
-   return !strcmp(evsel->name, KVM_EXIT_TRACE);
+   return !strcmp(evsel->name, kvm_exit_trace);
 }
 
 bool exit_event_begin(struct perf_evsel *evsel,
@@ -59,7 +58,7 @@ bool exit_event_begin(struct perf_evsel *evsel,
 
 bool kvm_entry_event(struct perf_evsel *evsel)
 {
-   return !strcmp(evsel->name, KVM_ENTRY_TRACE);
+   return !strcmp(evsel->name, kvm_entry_trace);
 }
 
 bool exit_event_end(struct perf_evsel *evs

[PATCH v11 3/4] perf,kvm/powerpc: Port perf kvm stat to powerpc

2016-01-27 Thread Hemant Kumar
perf kvm can be used to analyze guest exit reasons. This support already
exists in x86. Hence, porting it to powerpc.

 - To trace KVM events :
  perf kvm stat record
  If many guests are running, we can track for a specific guest by using
  --pid as in : perf kvm stat record --pid 

 - To see the results :
  perf kvm stat report

The result shows the number of exits (from the guest context to
host/hypervisor context) grouped by their respective exit reasons with
their frequency.

Since, different powerpc machines have different KVM tracepoints, this
patch discovers the available tracepoints dynamically and accordingly
looks for them. If any single tracepoint is not present, this support
won't be enabled for reporting. To record, this will fail if any of the
events we are looking to record isn't available.
Right now, its only supported on PowerPC Book3S_HV architectures.

To analyze the different exits, group them and present them (in a slight
descriptive way) to the user, we need a mapping between the "exit
code" (dumped in the kvm_guest_exit tracepoint data) and to its related
Interrupt vector description (exit reason). This patch adds this mapping
in book3s_hv_exits.h.

It records on two available KVM tracepoints for book3s_hv:
"kvm_hv:kvm_guest_exit" and "kvm_hv:kvm_guest_enter".

Here is a sample o/p:
 # pgrep qemu
19378
60515

2 Guests are running on the host.

 # perf kvm stat record -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.153 MB perf.data.guest (39624
samples) ]

 # perf kvm stat report -p 60515

Analyze events for pid(s) 60515, all VCPUs:

   VM-EXITSamples  Samples% Time%Min TimeMax Time 
Avg time

   SYSCALL   914163.67% 7.49%  1.26us   5782.39us  
9.87us ( +-   6.46% )
H_DATA_STORAGE   411428.66% 5.07%  1.72us   4597.68us 
14.84us ( +-  20.06% )
HV_DECREMENTER418 2.91% 4.26%  0.70us  30002.22us
122.58us ( +-  70.29% )
  EXTERNAL392 2.73% 0.06%  0.64us104.10us  
1.94us ( +-  18.83% )
RETURN_TO_HOST287 2.00%83.11%  1.53us 124240.15us   
3486.52us ( +-  16.81% )
H_INST_STORAGE  5 0.03% 0.00%  1.88us  3.73us  
2.39us ( +-  14.20% )

Total Samples:14357, Total events handled time:1203918.42us.

Signed-off-by: Srikar Dronamraju <sri...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changelog:
v10 to v11:
- Fixed a bug related to tracepoint_error when rebasing to v4.5-rc1.
v8 to v9:
- Moved the book3s specific setup into one function.
- Removed the macros (which were being used only once).
- Formatting changes.
v7 to v8:
- Fixed a perf kvm stat live bug.
v6 to v7:
- Removed dependency on uapi.
v4 to v5:
- Removed dependency on arch/powerpc/kvm/trace_book3s.h and added them in
the userspace side.
- No more arch side dependency.
v1 to v3:
- Split the patches for powerpc and perf

 tools/perf/arch/powerpc/Makefile   |   2 +
 tools/perf/arch/powerpc/util/Build |   1 +
 tools/perf/arch/powerpc/util/book3s_hv_exits.h |  33 
 tools/perf/arch/powerpc/util/kvm-stat.c| 107 +
 tools/perf/builtin-kvm.c   |  18 +
 tools/perf/util/kvm-stat.h |   1 +
 6 files changed, 162 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/book3s_hv_exits.h
 create mode 100644 tools/perf/arch/powerpc/util/kvm-stat.c

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..9f9cea3 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,3 +1,5 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+HAVE_KVM_STAT_SUPPORT := 1
diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..c8fe207 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h 
b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
new file mode 100644
index 000..e68ba2d
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
@@ -0,0 +1,33 @@
+#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H
+#define ARCH_PERF_BOOK3S_HV_EXITS_H
+
+/*
+ * PowerPC Interrupt vectors : exit code to name mapping
+ */
+
+#define kvm_trace_symbol_exit \
+   {0x0,   "RETURN_TO_HOST"}, \
+   {0x100, "SYSTEM_RESET"}, \
+   {0x200, "MACHINE_CHECK"}, \
+   {0x300, "DATA_STORAGE"}, \
+   {0x380, "DATA_SEGMENT"}, \
+   {0x400, "INST_STORAGE"}, \
+   {0x480, "INST_SEGMENT"}, \
+   {0x500, 

[PATCH v11 2/4] perf,kvm/{x86,s390}: Remove const from kvm_events_tp

2016-01-27 Thread Hemant Kumar
This patch removes the "const" qualifier from kvm_events_tp declaration
to account for the fact that some architectures may need to update this
variable dynamically. For instance, powerpc will need to update this
variable dynamically depending on the machine type.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changelog:
v1 to v11:
- No Changes.

 tools/perf/arch/s390/util/kvm-stat.c | 2 +-
 tools/perf/arch/x86/util/kvm-stat.c  | 2 +-
 tools/perf/util/kvm-stat.h   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/arch/s390/util/kvm-stat.c 
b/tools/perf/arch/s390/util/kvm-stat.c
index b85a94b..ed57df2 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -79,7 +79,7 @@ static struct kvm_events_ops exit_events = {
.name = "VM-EXIT"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
"kvm:kvm_s390_sie_enter",
"kvm:kvm_s390_sie_exit",
"kvm:kvm_s390_intercept_instruction",
diff --git a/tools/perf/arch/x86/util/kvm-stat.c 
b/tools/perf/arch/x86/util/kvm-stat.c
index babefda..b63d4be 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -129,7 +129,7 @@ static struct kvm_events_ops ioport_events = {
.name = "IO Port Access"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
"kvm:kvm_entry",
"kvm:kvm_exit",
"kvm:kvm_mmio",
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index dd55548..c965dc8 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -133,7 +133,7 @@ bool kvm_entry_event(struct perf_evsel *evsel);
  */
 int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
 
-extern const char * const kvm_events_tp[];
+extern const char *kvm_events_tp[];
 extern struct kvm_reg_events_ops kvm_reg_events_ops[];
 extern const char * const kvm_skip_events[];
 extern const char *vcpu_id_str;
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4] perf/probe: Search both .eh_frame and .debug_frame sections for probe location

2016-02-02 Thread Hemant Kumar
perf probe through debuginfo__find_probes() in util/probe-finder.c
checks for the functions' frame descriptions in either .eh_frame section
of an ELF or the .debug_frame. The check is based on whether either one
of these sections is present. Depending on distro, toolchain defaults,
architetcutre, build flags, etc., CFI might be found in either .eh_frame
and/or .debug_frame. Sometimes, it may happen that, .eh_frame, even if
present, may not be complete and may miss some descriptions. Therefore,
to be sure, to find the CFI covering an address we will always have to
investigate both if available.

For e.g., in powerpc, this may happen :
 $ gcc -g bin.c -o bin

 $ objdump --dwarf ./bin
 <1><145>: Abbrev Number: 7 (DW_TAG_subprogram)
<146>   DW_AT_external: 1
<146>   DW_AT_name: (indirect string, offset: 0x9e): main
<14a>   DW_AT_decl_file   : 1
<14b>   DW_AT_decl_line   : 39
<14c>   DW_AT_prototyped  : 1
<14c>   DW_AT_type: <0x57>
<150>   DW_AT_low_pc  : 0x17b8

If the .eh_frame and .debug_frame are checked for the same binary, we
will find that, .eh_frame (although present) doesn't contain a
description for "main" function.
But, .debug_frame has a description :

00d8 0024  FDE cie= pc=17b8..1838
  DW_CFA_advance_loc: 16 to 17c8
  DW_CFA_def_cfa_offset: 144
  DW_CFA_offset_extended_sf: r65 at cfa+16
...

Due to this (since, perf checks whether .eh_frame is present and goes on
searching for that address inside that frame), perf is unable to process
the probes :
 # perf probe -x ./bin main
Failed to get call frame on 0x17b8
  Error: Failed to add events.

To avoid this issue, we need to check both the sections (.eh_frame and
.debug_frame), which is done in this patch.

Note that, we can always force everything into both .eh_frame and
.debug_frame by :
 $ gcc bin.c -fasynchronous-unwind-tables  -fno-dwarf2-cfi-asm -g -o bin

Acked-by: Masami Hiramatsu <masami.hiramatsu...@hitachi.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changes since v3:
- Rebased it to v4.5-rc2.
Changes since v2:
- Fixed an issue related to filling up both the CFIs (Suggested by Masami).
Changes since v1:
- pf->cfi is now cached as pf->cfi_eh and pf->cfi_dbg depending on the source 
of CFI
  (Suggested by Mark Wielard).

 tools/perf/util/probe-finder.c | 62 +-
 tools/perf/util/probe-finder.h |  5 +++-
 2 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 2be10fb..4ce5c5e 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -686,8 +686,9 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct 
probe_finder *pf)
pf->fb_ops = NULL;
 #if _ELFUTILS_PREREQ(0, 142)
} else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
-  pf->cfi != NULL) {
-   if (dwarf_cfi_addrframe(pf->cfi, pf->addr, ) != 0 ||
+  (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) {
+   if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, ) != 0 &&
+(dwarf_cfi_addrframe(pf->cfi_dbg, pf->addr, ) != 0)) 
||
dwarf_frame_cfa(frame, >fb_ops, ) != 0) {
pr_warning("Failed to get call frame on 0x%jx\n",
   (uintmax_t)pf->addr);
@@ -1015,8 +1016,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global 
*gl, void *data)
return DWARF_CB_OK;
 }
 
-/* Find probe points from debuginfo */
-static int debuginfo__find_probes(struct debuginfo *dbg,
+static int debuginfo__find_probe_location(struct debuginfo *dbg,
  struct probe_finder *pf)
 {
struct perf_probe_point *pp = >pev->point;
@@ -1025,27 +1025,6 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
Dwarf_Die *diep;
int ret = 0;
 
-#if _ELFUTILS_PREREQ(0, 142)
-   Elf *elf;
-   GElf_Ehdr ehdr;
-   GElf_Shdr shdr;
-
-   /* Get the call frame information from this dwarf */
-   elf = dwarf_getelf(dbg->dbg);
-   if (elf == NULL)
-   return -EINVAL;
-
-   if (gelf_getehdr(elf, ) == NULL)
-   return -EINVAL;
-
-   if (elf_section_by_name(elf, , , ".eh_frame", NULL) &&
-   shdr.sh_type == SHT_PROGBITS) {
-   pf->cfi = dwarf_getcfi_elf(elf);
-   } else {
-   pf->cfi = dwarf_getcfi(dbg->dbg);
-   }
-#endif
-
off = 0;
pf->lcache = intlist__new(NULL);
if (!pf->lcache)
@@ -1108,6 +1087,39 @@ found:
return ret;
 }
 
+/* Find probe points from debuginfo */
+static int debuginfo__find_probes(struct debuginfo *dbg,

[PATCH v4 00/10] IMC Instrumentation Support

2017-02-19 Thread Hemant Kumar
 compatible = "ibm,imc-counters-core";
events-prefix = "CPM_";
unit = "";
scale = "";
reg = <0x0 0x8>;
events = < _EVENTS >;
};
 
thread {
compatible = "ibm,imc-counters-core";
events-prefix = "CPM_";
unit = "";
scale = "";
reg = <0x0 0x8>;
events = < _EVENTS >;
};
}; 

>From the device tree, the kernel parses the PMUs and their events'
information.

After parsing the IMC PMUs and their events, the PMUs and their
attributes are registered in the kernel.

This patchset (patches 9 and 10) configure the thread level IMC PMUs
to count for tasks, which give us the thread level metric values per
task.

Example Usage :
 # perf list

  [...]
  nest_mcs0/PM_MCS_DOWN_128B_DATA_XFER_MC0/   [Kernel PMU event]
  nest_mcs0/PM_MCS_DOWN_128B_DATA_XFER_MC0_LAST_SAMPLE/ [Kernel PMU event]
  [...]
  core_imc/CPM_NON_IDLE_INST/[Kernel PMU event]
  core_imc/CPM_NON_IDLE_PCYC/[Kernel PMU event]
  [...]
  thread_imc/CPM_NON_IDLE_INST/  [Kernel PMU event]
  thread_imc/CPM_NON_IDLE_PCYC/  [Kernel PMU event]

To see per chip data for nest_mcs0/PM_MCS_DOWN_128B_DATA_XFER_MC0/ :
 # perf stat -e "nest_mcs0/PM_MCS_DOWN_128B_DATA_XFER_MC0/" -a --per-socket

To see non-idle instructions for core 0 :
 # ./perf stat -e "core_imc/CPM_NON_IDLE_INST/" -C 0 -I 1000

To see non-idle instructions for a "make" :
 # ./perf stat -e "thread_imc/CPM_NON_IDLE_PCYC/" make

Comments/feedback/suggestions are welcome.

Changelog:
 v3 -> v4 :
 - Changed the events parser code to discover the PMU and events because
   of the changed format of the IMC DTS file (Patch 3).
 - Implemented the two TODOs to include core and thread IMC support with
   this patchset (Patches 7 through 10).
 - Changed the CPU hotplug code of Nest IMC PMUs to include a new state
   CPUHP_AP_PERF_POWERPC_NEST_ONLINE (Patch 6).
 v2 -> v3 :
 - Changed all references for IMA (In-Memory Accumulation) to IMC (In-Memory
   Collection).
 v1 -> v2 :
 - Account for the cases where a PMU can have a common scale and unit
   values for all its supported events (Patch 3/6).
 - Fixed a Build error (for maple_defconfig) by enabling imc_pmu.o
   only for CONFIG_PPC_POWERNV=y (Patch 4/6)
 - Read from the "event-name" property instead of "name" for an event
   node (Patch 3/6).

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Cc: Balbir Singh <bsinghar...@gmail.com>
Cc: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>

Hemant Kumar (10):
  powerpc/powernv: Data structure and macros definitions
  powerpc/powernv: Autoload IMC device driver module
  powerpc/powernv: Detect supported IMC units and its events
  powerpc/perf: Add event attribute and group to IMC pmus
  powerpc/perf: Generic imc pmu event functions
  powerpc/perf: IMC pmu cpumask and cpu hotplug support
  powerpc/powernv: Core IMC events detection
  powerpc/perf: PMU functions for Core IMC and hotplugging
  powerpc/powernv: Thread IMC events detection
  powerpc/perf: Thread IMC PMU functions

 arch/powerpc/include/asm/imc-pmu.h |  83 +++
 arch/powerpc/include/asm/opal-api.h|  11 +-
 arch/powerpc/include/asm/opal.h|   5 +
 arch/powerpc/perf/Makefile |   6 +-
 arch/powerpc/perf/imc-pmu.c| 775 +
 arch/powerpc/platforms/powernv/Makefile|   2 +-
 arch/powerpc/platforms/powernv/opal-imc.c  | 553 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |   2 +
 arch/powerpc/platforms/powernv/opal.c  |  13 +
 include/linux/cpuhotplug.h |   2 +
 10 files changed, 1449 insertions(+), 3 deletions(-)
 create mode 100644 arch/powerpc/include/asm/imc-pmu.h
 create mode 100644 arch/powerpc/perf/imc-pmu.c
 create mode 100644 arch/powerpc/platforms/powernv/opal-imc.c

-- 
2.7.4



[PATCH v4 05/10] powerpc/perf: Generic imc pmu event functions

2017-02-19 Thread Hemant Kumar
Since, the IMC counters' data are periodically fed to a memory location,
the functions to read/update, start/stop, add/del can be generic and can
be used by all IMC PMU units.

This patch adds a set of generic imc pmu related event functions to be
used  by each imc pmu unit. Add code to setup format attribute and to
register imc pmus. Add a event_init function for nest_imc events.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Cc: Balbir Singh <bsinghar...@gmail.com>
Cc: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h|   1 +
 arch/powerpc/perf/imc-pmu.c   | 121 ++
 arch/powerpc/platforms/powernv/opal-imc.c |  30 +++-
 3 files changed, 148 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 3232322..7b58721 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -70,4 +70,5 @@ struct imc_pmu {
 
 #define UNKNOWN_DOMAIN -1
 
+int imc_get_domain(struct device_node *pmu_dev);
 #endif /* PPC_POWERNV_IMC_PMU_DEF_H */
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 7b6ce50..f6f1ef9 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -17,6 +17,116 @@
 struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];
 struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 
+/* Needed for sanity check */
+extern u64 nest_max_offset;
+
+PMU_FORMAT_ATTR(event, "config:0-20");
+static struct attribute *imc_format_attrs[] = {
+   _attr_event.attr,
+   NULL,
+};
+
+static struct attribute_group imc_format_group = {
+   .name = "format",
+   .attrs = imc_format_attrs,
+};
+
+static int nest_imc_event_init(struct perf_event *event)
+{
+   int chip_id;
+   u32 config = event->attr.config;
+   struct perchip_nest_info *pcni;
+
+   if (event->attr.type != event->pmu->type)
+   return -ENOENT;
+
+   /* Sampling not supported */
+   if (event->hw.sample_period)
+   return -EINVAL;
+
+   /* unsupported modes and filters */
+   if (event->attr.exclude_user   ||
+   event->attr.exclude_kernel ||
+   event->attr.exclude_hv ||
+   event->attr.exclude_idle   ||
+   event->attr.exclude_host   ||
+   event->attr.exclude_guest)
+   return -EINVAL;
+
+   if (event->cpu < 0)
+   return -EINVAL;
+
+   /* Sanity check for config (event offset) */
+   if (config > nest_max_offset)
+   return -EINVAL;
+
+   chip_id = topology_physical_package_id(event->cpu);
+   pcni = _perchip_info[chip_id];
+   event->hw.event_base = pcni->vbase[config/PAGE_SIZE] +
+   (config & ~PAGE_MASK);
+
+   return 0;
+}
+
+static void imc_read_counter(struct perf_event *event)
+{
+   u64 *addr, data;
+
+   addr = (u64 *)event->hw.event_base;
+   data = __be64_to_cpu(*addr);
+   local64_set(>hw.prev_count, data);
+}
+
+static void imc_perf_event_update(struct perf_event *event)
+{
+   u64 counter_prev, counter_new, final_count, *addr;
+
+   addr = (u64 *)event->hw.event_base;
+   counter_prev = local64_read(>hw.prev_count);
+   counter_new = __be64_to_cpu(*addr);
+   final_count = counter_new - counter_prev;
+
+   local64_set(>hw.prev_count, counter_new);
+   local64_add(final_count, >count);
+}
+
+static void imc_event_start(struct perf_event *event, int flags)
+{
+   imc_read_counter(event);
+}
+
+static void imc_event_stop(struct perf_event *event, int flags)
+{
+   imc_perf_event_update(event);
+}
+
+static int imc_event_add(struct perf_event *event, int flags)
+{
+   if (flags & PERF_EF_START)
+   imc_event_start(event, flags);
+
+   return 0;
+}
+
+/* update_pmu_ops : Populate the appropriate operations for "pmu" */
+static int update_pmu_ops(struct imc_pmu *pmu)
+{
+   if (!pmu)
+   return -EINVAL;
+
+   pmu->pmu.task_ctx_nr = perf_invalid_context;
+   pmu->pmu.event_init = nest_imc_event_init;
+   pmu->pmu.add = imc_event_add;
+   pmu->pmu.del = imc_event_stop;
+   pmu->pmu.start = imc_event_start;
+   pmu->pmu.stop = imc_event_stop;
+ 

[PATCH v4 06/10] powerpc/perf: IMC pmu cpumask and cpu hotplug support

2017-02-19 Thread Hemant Kumar
Adds cpumask attribute to be used by each IMC pmu. Only one cpu (any
online CPU) from each chip for nest PMUs is designated to read counters.

On CPU hotplug, dying CPU is checked to see whether it is one of the
designated cpus, if yes, next online cpu from the same chip (for nest
units) is designated as new cpu to read counters. For this purpose, we
introduce a new state : CPUHP_AP_PERF_POWERPC_NEST_ONLINE.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Cc: Balbir Singh <bsinghar...@gmail.com>
Cc: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal-api.h|   3 +-
 arch/powerpc/include/asm/opal.h|   3 +
 arch/powerpc/perf/imc-pmu.c| 163 -
 arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
 include/linux/cpuhotplug.h |   1 +
 5 files changed, 169 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index a0aa285..e15fb20 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -168,7 +168,8 @@
 #define OPAL_INT_SET_MFRR  125
 #define OPAL_PCI_TCE_KILL  126
 #define OPAL_NMMU_SET_PTCR 127
-#define OPAL_LAST  127
+#define OPAL_NEST_IMC_COUNTERS_CONTROL 128
+#define OPAL_LAST  128
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 1ff03a6..d93d082 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -227,6 +227,9 @@ int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t 
kill_type,
  uint64_t dma_addr, uint32_t npages);
 int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr);
 
+int64_t opal_nest_imc_counters_control(uint64_t mode, uint64_t value1,
+   uint64_t value2, uint64_t value3);
+
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
   int depth, void *data);
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index f6f1ef9..e46ff6d 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -16,6 +16,7 @@
 
 struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];
 struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+static cpumask_t nest_imc_cpumask;
 
 /* Needed for sanity check */
 extern u64 nest_max_offset;
@@ -31,6 +32,160 @@ static struct attribute_group imc_format_group = {
.attrs = imc_format_attrs,
 };
 
+/* Get the cpumask printed to a buffer "buf" */
+static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   cpumask_t *active_mask;
+
+   active_mask = _imc_cpumask;
+   return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL);
+
+static struct attribute *imc_pmu_cpumask_attrs[] = {
+   _attr_cpumask.attr,
+   NULL,
+};
+
+static struct attribute_group imc_pmu_cpumask_attr_group = {
+   .attrs = imc_pmu_cpumask_attrs,
+};
+
+/*
+ * nest_init : Initializes the nest imc engine for the current chip.
+ */
+static void nest_init(int *loc)
+{
+   int rc;
+
+   rc = opal_nest_imc_counters_control(NEST_IMC_PRODUCTION_MODE,
+   NEST_IMC_ENGINE_START, 0, 0);
+   if (rc)
+   loc[smp_processor_id()] = 1;
+}
+
+static void nest_change_cpu_context(int old_cpu, int new_cpu)
+{
+   int i;
+
+   for (i = 0;
+(per_nest_pmu_arr[i] != NULL) && (i < IMC_MAX_PMUS); i++)
+   perf_pmu_migrate_context(_nest_pmu_arr[i]->pmu,
+   old_cpu, new_cpu);
+}
+
+static int ppc_nest_imc_cpu_online(unsigned int cpu)
+{
+   int nid, fcpu, ncpu;
+   struct cpumask *l_cpumask, tmp_mask;
+
+   /* Fint the cpumask of this node */
+   nid = cpu_to_node(cpu);
+   l_cpumask = cpumask_of_node(nid);
+
+   /*
+* If any of the cpu from this node is already present in the mask,
+* just return, if not, then set this cpu in the mask.
+*/
+   if (!cpumask_and(_mask, l_cpumask, _imc_cpumask)) {
+   

[PATCH v4 04/10] powerpc/perf: Add event attribute and group to IMC pmus

2017-02-19 Thread Hemant Kumar
Device tree IMC driver code parses the IMC units and their events. It
passes the information to IMC pmu code which is placed in powerpc/perf
as "imc-pmu.c".

This patch creates only event attributes and attribute groups for the
IMC pmus.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Cc: Balbir Singh <bsinghar...@gmail.com>
Cc: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/perf/Makefile|  6 +-
 arch/powerpc/perf/imc-pmu.c   | 96 +++
 arch/powerpc/platforms/powernv/opal-imc.c | 12 +++-
 3 files changed, 111 insertions(+), 3 deletions(-)
 create mode 100644 arch/powerpc/perf/imc-pmu.c

diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 4d606b9..d0d1f04 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -2,10 +2,14 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 obj-$(CONFIG_PERF_EVENTS)  += callchain.o perf_regs.o
 
+imc-$(CONFIG_PPC_POWERNV)   += imc-pmu.o
+
 obj-$(CONFIG_PPC_PERF_CTRS)+= core-book3s.o bhrb.o
 obj64-$(CONFIG_PPC_PERF_CTRS)  += power4-pmu.o ppc970-pmu.o power5-pmu.o \
   power5+-pmu.o power6-pmu.o power7-pmu.o \
-  isa207-common.o power8-pmu.o power9-pmu.o
+  isa207-common.o power8-pmu.o power9-pmu.o \
+  $(imc-y)
+
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
 
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
new file mode 100644
index 000..7b6ce50
--- /dev/null
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -0,0 +1,96 @@
+/*
+ * Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2016 Madhavan Srinivasan, IBM Corporation.
+ *  (C) 2016 Hemant K Shaw, IBM Corporation.
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];
+struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+
+/* dev_str_attr : Populate event "name" and string "str" in attribute */
+static struct attribute *dev_str_attr(const char *name, const char *str)
+{
+   struct perf_pmu_events_attr *attr;
+
+   attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+
+   sysfs_attr_init(>attr.attr);
+
+   attr->event_str = str;
+   attr->attr.attr.name = name;
+   attr->attr.attr.mode = 0444;
+   attr->attr.show = perf_event_sysfs_show;
+
+   return >attr.attr;
+}
+
+/*
+ * update_events_in_group: Update the "events" information in an attr_group
+ * and assign the attr_group to the pmu "pmu".
+ */
+static int update_events_in_group(struct imc_events *events,
+ int idx, struct imc_pmu *pmu)
+{
+   struct attribute_group *attr_group;
+   struct attribute **attrs;
+   int i;
+
+   /* Allocate memory for attribute group */
+   attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
+   if (!attr_group)
+   return -ENOMEM;
+
+   /* Allocate memory for attributes */
+   attrs = kzalloc((sizeof(struct attribute *) * (idx + 1)), GFP_KERNEL);
+   if (!attrs) {
+   kfree(attr_group);
+   return -ENOMEM;
+   }
+
+   attr_group->name = "events";
+   attr_group->attrs = attrs;
+   for (i = 0; i < idx; i++, events++) {
+   attrs[i] = dev_str_attr((char *)events->ev_name,
+   (char *)events->ev_value);
+   }
+
+   pmu->attr_groups[0] = attr_group;
+   return 0;
+}
+
+/*
+ * init_imc_pmu : Setup the IMC pmu device in "pmu_ptr" and its events
+ *"events".
+ * Setup the cpu mask information for these pmus and setup the state machine
+ * hotplug notifiers as well.
+ */
+int init_imc_pmu(struct imc_events *events, int idx,
+struct imc_pmu *pmu_ptr)
+{
+   int ret = -ENODEV;
+
+   ret = update_events_in_group(events, idx, pmu_ptr);
+   if (ret)
+   goto err_free;
+
+   return 0;
+
+err_free:
+   /* Only f

[PATCH v4 03/10] powerpc/powernv: Detect supported IMC units and its events

2017-02-19 Thread Hemant Kumar
Parse device tree to detect IMC units. Traverse through each IMC unit
node to find supported events and corresponding unit/scale files (if any).

The device tree for IMC counters starts at the node :
"imc-counters". This node contains all the IMC PMU nodes and event nodes
for these IMC PMUs. The PMU nodes have an "events" property which has a
phandle value for the actual events node. The events are separated from
the PMU nodes to abstract out the common events. For example, PMU node
"mcs0", "mcs1" etc. will contain a pointer to "nest-mcs-events" since,
the events are common between these PMUs. These events have a different
prefix based on their relation to different PMUs, and hence, the PMU
nodes themselves contain an "events-prefix" property. The value for this
property concatenated to the event name, forms the actual event
name. Also, the PMU have a "reg" field as the base offset for the events
which belong to this PMU. This "reg" field is added to an event in the
"events" node, which gives us the location of the counter data. Kernel
code uses this offset as event configuration value.

Device tree parser code also looks for scale/unit property in the event
node and passes on the value as an event attr for perf interface to use
in the post processing by the perf tool. Some PMUs may have common scale
and unit properties which implies that all events supported by this PMU
inherit the scale and unit properties of the PMU itself. For those
events, we need to set the common unit and scale values.

For failure to initialize any unit or any event, disable that unit and
continue setting up the rest of them.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Cc: Balbir Singh <bsinghar...@gmail.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/opal-imc.c | 385 ++
 1 file changed, 385 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index ee2ae45..c58b893 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -32,6 +32,390 @@
 #include 
 
 struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];
+struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+
+static int imc_event_info(char *name, struct imc_events *events)
+{
+   char *buf;
+
+   /* memory for content */
+   buf = kzalloc(IMC_MAX_PMU_NAME_LEN, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   events->ev_name = name;
+   events->ev_value = buf;
+   return 0;
+}
+
+static int imc_event_info_str(struct property *pp, char *name,
+  struct imc_events *events)
+{
+   int ret;
+
+   ret = imc_event_info(name, events);
+   if (ret)
+   return ret;
+
+   if (!pp->value || (strnlen(pp->value, pp->length) == pp->length) ||
+  (pp->length > IMC_MAX_PMU_NAME_LEN))
+   return -EINVAL;
+   strncpy(events->ev_value, (const char *)pp->value, pp->length);
+
+   return 0;
+}
+
+static int imc_event_info_val(char *name, u32 val,
+ struct imc_events *events)
+{
+   int ret;
+
+   ret = imc_event_info(name, events);
+   if (ret)
+   return ret;
+   sprintf(events->ev_value, "event=0x%x", val);
+
+   return 0;
+}
+
+static int set_event_property(struct property *pp, char *event_prop,
+ struct imc_events *events, char *ev_name)
+{
+   char *buf;
+   int ret;
+
+   buf = kzalloc(IMC_MAX_PMU_NAME_LEN, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   sprintf(buf, "%s.%s", ev_name, event_prop);
+   ret = imc_event_info_str(pp, buf, events);
+   if (ret) {
+   kfree(events->ev_name);
+   kfree(events->ev_value);
+   }
+
+   return ret;
+}
+
+/*
+ * imc_events_node_parser: Parse the event node "dev" and assign the parsed
+ * information to event "events".
+ *
+ * Parses the "reg" property of this event. "reg" gives us the event offset.
+ * Also, parse the "scale" and "unit" properties, if any.
+ */
+static int imc_events_node_parser(struct device_node *dev,
+  

[PATCH v4 02/10] powerpc/powernv: Autoload IMC device driver module

2017-02-19 Thread Hemant Kumar
This patch does three things :
 - Enables "opal.c" to create a platform device for the IMC interface
   according to the appropriate compatibility string.
 - Find the reserved-memory region details from the system device tree
   and get the base address of HOMER region address for each chip.
 - We also get the Nest PMU counter data offsets (in the HOMER region)
   and their sizes. The offsets for the counters' data are fixed and
   won't change from chip to chip.

The device tree parsing logic is separated from the PMU creation
functions (which is done in subsequent patches). Right now, only Nest
units are taken care of.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Cc: Balbir Singh <bsinghar...@gmail.com>
Cc: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/Makefile   |   2 +-
 arch/powerpc/platforms/powernv/opal-imc.c | 117 ++
 arch/powerpc/platforms/powernv/opal.c |  13 
 3 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/platforms/powernv/opal-imc.c

diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index b5d98cb..44909fe 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -2,7 +2,7 @@ obj-y   += setup.o opal-wrappers.o opal.o 
opal-async.o idle.o
 obj-y  += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
 obj-y  += rng.o opal-elog.o opal-dump.o opal-sysparam.o 
opal-sensor.o
 obj-y  += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
-obj-y  += opal-kmsg.o
+obj-y  += opal-kmsg.o opal-imc.o
 
 obj-$(CONFIG_SMP)  += smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)  += pci.o pci-ioda.o npu-dma.o
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
new file mode 100644
index 000..ee2ae45
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -0,0 +1,117 @@
+/*
+ * OPAL IMC interface detection driver
+ * Supported on POWERNV platform
+ *
+ * Copyright  (C) 2016 Madhavan Srinivasan, IBM Corporation.
+ *(C) 2016 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];
+
+static int opal_imc_counters_probe(struct platform_device *pdev)
+{
+   struct device_node *child, *imc_dev, *rm_node = NULL;
+   struct perchip_nest_info *pcni;
+   u32 reg[4], pages, nest_offset, nest_size, idx;
+   int i = 0;
+   const char *node_name;
+
+   if (!pdev || !pdev->dev.of_node)
+   return -ENODEV;
+
+   imc_dev = pdev->dev.of_node;
+
+   /*
+* nest_offset : where the nest-counters' data start.
+* size : size of the entire nest-counters region
+*/
+   if (of_property_read_u32(imc_dev, "imc-nest-offset", _offset))
+   goto err;
+   if (of_property_read_u32(imc_dev, "imc-nest-size", _size))
+   goto err;
+
+   /* Find the "homer region" for each chip */
+   rm_node = of_find_node_by_path("/reserved-memory");
+   if (!rm_node)
+   goto err;
+
+   for_each_child_of_node(rm_node, child) {
+   if (of_property_read_string_index(child, "name", 0,
+ _name))
+   continue;
+   if (strncmp("ibm,homer-image", node_name,
+   strlen("ibm,homer-image")))
+   continue;
+
+   /* Get the chip id to which the above homer region belongs to */
+   if (of_property_read_u32(child, "ibm,chip-id", ))
+   g

[PATCH v4 10/10] powerpc/perf: Thread IMC PMU functions

2017-02-19 Thread Hemant Kumar
This patch adds the PMU functions required for event initialization,
read, update, add, del etc. for thread IMC PMU. Thread IMC PMUs are used
for per-task monitoring. These PMUs don't need any hotplugging support.

For each CPU, a page of memory is allocated and is kept static i.e.,
these pages will exist till the machine shuts down. The base address of
this page is assigned to the ldbar of that cpu. As soon as we do that,
the thread IMC counters start running for that cpu and the data of these
counters are assigned to the page allocated. But we use this for
per-task monitoring. Whenever we start monitoring a task, the event is
added is onto the task. At that point, we read the initial value of the
event. Whenever, we stop monitoring the task, the final value is taken
and the difference is the event data.

Now, a task can move to a different cpu. Suppose a task X is moving from
cpu A to cpu B. When the task is scheduled out of A, we get an
event_del for A, and hence, the event data is updated. And, we stop
updating the X's event data. As soon as X moves on to B, event_add is
called for B, and we again update the event_data. And this is how it
keeps on updating the event data even when the task is scheduled on to
different cpus.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Cc: Balbir Singh <bsinghar...@gmail.com>
Cc: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h |   4 +
 arch/powerpc/perf/imc-pmu.c| 161 -
 2 files changed, 164 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index f2b4f12..8b7141b 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -22,6 +22,7 @@
 #define IMC_MAX_PMUS   32
 #define IMC_MAX_PMU_NAME_LEN   256
 #define IMC_MAX_CORES  256
+#define IMC_MAX_CPUS2048
 
 #define NEST_IMC_ENGINE_START  1
 #define NEST_IMC_ENGINE_STOP   0
@@ -34,6 +35,9 @@
 #define IMC_DTB_CORE_COMPAT"ibm,imc-counters-core"
 #define IMC_DTB_THREAD_COMPAT   "ibm,imc-counters-thread"
 
+#define THREAD_IMC_LDBAR_MASK   0x0003e000
+#define THREAD_IMC_ENABLE   0x8000
+
 /*
  * Structure to hold per chip specific memory address
  * information for nest pmus. Nest Counter data are exported
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index a48c5be..4033b2d 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -23,6 +23,9 @@ static u64 per_core_pdbar_add[IMC_MAX_CHIPS][IMC_MAX_CORES];
 static cpumask_t core_imc_cpumask;
 struct imc_pmu *core_imc_pmu;
 
+/* Maintains base address for all the cpus */
+static u64 per_cpu_add[IMC_MAX_CPUS];
+
 /* Needed for sanity check */
 extern u64 nest_max_offset;
 extern u64 core_max_offset;
@@ -443,6 +446,56 @@ static int core_imc_event_init(struct perf_event *event)
return 0;
 }
 
+static int thread_imc_event_init(struct perf_event *event)
+{
+   struct task_struct *target;
+
+   if (event->attr.type != event->pmu->type)
+   return -ENOENT;
+
+   /* Sampling not supported */
+   if (event->hw.sample_period)
+   return -EINVAL;
+
+   event->hw.idx = -1;
+
+   /* Sanity check for config (event offset) */
+   if (event->attr.config > thread_max_offset)
+   return -EINVAL;
+
+   target = event->hw.target;
+
+   if (!target)
+   return -EINVAL;
+
+   event->pmu->task_ctx_nr = perf_sw_context;
+   return 0;
+}
+
+static void thread_imc_read_counter(struct perf_event *event)
+{
+   u64 *addr, data;
+   int cpu_id = smp_processor_id();
+
+   addr = (u64 *)(per_cpu_add[cpu_id] + event->attr.config);
+   data = __be64_to_cpu(*addr);
+   local64_set(>hw.prev_count, data);
+}
+
+static void thread_imc_perf_event_update(struct perf_event *event)
+{
+   u64 counter_prev, counter_new, final_count, *addr;
+   int cpu_id = smp_processor_id();
+
+   addr = (u64 *)(per_cpu_add[cpu_id] + event->attr.config);
+   counter_prev = local64_read(>hw.prev_count);
+   counter_new = __be64_to_cpu(*addr);
+   final_count = counter_new - counter_prev;
+
+   local64_set(>hw.prev_count, counter_new);
+   local64_add(final_count, 

[PATCH v4 09/10] powerpc/powernv: Thread IMC events detection

2017-02-19 Thread Hemant Kumar
Patch adds support for detection of thread IMC events. It adds a new
domain IMC_DOMAIN_THREAD and it is determined with the help of the
compatibility string "ibm,imc-counters-thread" based on the IMC device
tree.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Cc: Balbir Singh <bsinghar...@gmail.com>
Cc: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h|  2 ++
 arch/powerpc/perf/imc-pmu.c   |  1 +
 arch/powerpc/platforms/powernv/opal-imc.c | 11 +--
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 5e76cd0..f2b4f12 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -32,6 +32,7 @@
 #define IMC_DTB_COMPAT "ibm,opal-in-memory-counters"
 #define IMC_DTB_NEST_COMPAT"ibm,imc-counters-nest"
 #define IMC_DTB_CORE_COMPAT"ibm,imc-counters-core"
+#define IMC_DTB_THREAD_COMPAT   "ibm,imc-counters-thread"
 
 /*
  * Structure to hold per chip specific memory address
@@ -70,6 +71,7 @@ struct imc_pmu {
  */
 #define IMC_DOMAIN_NEST1
 #define IMC_DOMAIN_CORE2
+#define IMC_DOMAIN_THREAD   3
 
 #define UNKNOWN_DOMAIN -1
 
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 61d99c7..a48c5be 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -26,6 +26,7 @@ struct imc_pmu *core_imc_pmu;
 /* Needed for sanity check */
 extern u64 nest_max_offset;
 extern u64 core_max_offset;
+extern u64 thread_max_offset;
 
 PMU_FORMAT_ATTR(event, "config:0-20");
 static struct attribute *imc_format_attrs[] = {
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 6db3c5f..a5565e7 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -39,6 +39,7 @@ extern int init_imc_pmu(struct imc_events *events,
int idx, struct imc_pmu *pmu_ptr);
 u64 nest_max_offset;
 u64 core_max_offset;
+u64 thread_max_offset;
 
 static int imc_event_info(char *name, struct imc_events *events)
 {
@@ -86,6 +87,10 @@ static void update_max_value(u32 value, int pmu_domain)
if (core_max_offset < value)
core_max_offset = value;
break;
+   case IMC_DOMAIN_THREAD:
+   if (thread_max_offset < value)
+   thread_max_offset = value;
+   break;
default:
/* Unknown domain, return */
return;
@@ -239,6 +244,8 @@ int imc_get_domain(struct device_node *pmu_dev)
return IMC_DOMAIN_NEST;
if (of_device_is_compatible(pmu_dev, IMC_DTB_CORE_COMPAT))
return IMC_DOMAIN_CORE;
+   if (of_device_is_compatible(pmu_dev, IMC_DTB_THREAD_COMPAT))
+   return IMC_DOMAIN_THREAD;
else
return UNKNOWN_DOMAIN;
 }
@@ -277,7 +284,7 @@ static void imc_free_events(struct imc_events *events, int 
nr_entries)
 /*
  * imc_pmu_create : Takes the parent device which is the pmu unit and a
  *  pmu_index as the inputs.
- * Allocates memory for the pmu, sets up its domain (NEST or CORE), and
+ * Allocates memory for the pmu, sets up its domain (NEST/CORE/THREAD), and
  * allocates memory for the events supported by this pmu. Assigns a name for
  * the pmu. Calls imc_events_node_parser() to setup the individual events.
  * If everything goes fine, it calls, init_imc_pmu() to setup the pmu device
@@ -305,7 +312,7 @@ static int imc_pmu_create(struct device_node *parent, int 
pmu_index)
if (pmu_ptr->domain == UNKNOWN_DOMAIN)
goto free_pmu;
 
-   /* Needed for hotplug/migration */
+   /* Needed for hotplug/migration for nest and core IMC PMUs */
if (pmu_ptr->domain == IMC_DOMAIN_CORE)
core_imc_pmu = pmu_ptr;
else if (pmu_ptr->domain == IMC_DOMAIN_NEST)
-- 
2.7.4



[PATCH v4 01/10] powerpc/powernv: Data structure and macros definitions

2017-02-19 Thread Hemant Kumar
Create new header file "imc-pmu.h" to add the data structures
and macros needed for IMC pmu support.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Cc: Balbir Singh <bsinghar...@gmail.com>
Cc: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h | 73 ++
 1 file changed, 73 insertions(+)
 create mode 100644 arch/powerpc/include/asm/imc-pmu.h

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
new file mode 100644
index 000..3232322
--- /dev/null
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -0,0 +1,73 @@
+#ifndef PPC_POWERNV_IMC_PMU_DEF_H
+#define PPC_POWERNV_IMC_PMU_DEF_H
+
+/*
+ * IMC Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2016 Madhavan Srinivasan, IBM Corporation.
+ *   (C) 2016 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define IMC_MAX_CHIPS  32
+#define IMC_MAX_PMUS   32
+#define IMC_MAX_PMU_NAME_LEN   256
+
+#define NEST_IMC_ENGINE_START  1
+#define NEST_IMC_ENGINE_STOP   0
+#define NEST_MAX_PAGES 16
+
+#define NEST_IMC_PRODUCTION_MODE   1
+
+#define IMC_DTB_COMPAT "ibm,opal-in-memory-counters"
+#define IMC_DTB_NEST_COMPAT"ibm,imc-counters-nest"
+
+/*
+ * Structure to hold per chip specific memory address
+ * information for nest pmus. Nest Counter data are exported
+ * in per-chip reserved memory region by the PORE Engine.
+ */
+struct perchip_nest_info {
+   u32 chip_id;
+   u64 pbase;
+   u64 vbase[NEST_MAX_PAGES];
+   u64 size;
+};
+
+/*
+ * Place holder for nest pmu events and values.
+ */
+struct imc_events {
+   char *ev_name;
+   char *ev_value;
+};
+
+/*
+ * Device tree parser code detects IMC pmu support and
+ * registers new IMC pmus. This structure will
+ * hold the pmu functions and attrs for each imc pmu and
+ * will be referenced at the time of pmu registration.
+ */
+struct imc_pmu {
+   struct pmu pmu;
+   int domain;
+   const struct attribute_group *attr_groups[4];
+};
+
+/*
+ * Domains for IMC PMUs
+ */
+#define IMC_DOMAIN_NEST1
+
+#define UNKNOWN_DOMAIN -1
+
+#endif /* PPC_POWERNV_IMC_PMU_DEF_H */
-- 
2.7.4



[PATCH v4 08/10] powerpc/perf: PMU functions for Core IMC and hotplugging

2017-02-19 Thread Hemant Kumar
This patch adds the PMU function to initialize a core IMC event. It also
adds cpumask initialization function for core IMC PMU. For
initialization, a page of memory is allocated per core where the data
for core IMC counters will be accumulated. The base address for this
page is sent to OPAL via an OPAL call which initializes various SCOMs
related to Core IMC initialization. Upon any errors, the pages are
free'ed and core IMC counters are disabled using the same OPAL call.

For CPU hotplugging, a cpumask is initialized which contains an online
CPU from each core. If a cpu goes offline, we check whether that cpu
belongs to the core imc cpumask, if yes, then, we migrate the PMU
context to any other online cpu (if available) in that core. If a cpu
comes back online, then this cpu will be added to the core imc cpumask
only if there was no other cpu from that core in the previous cpumask.

To register the hotplug functions for core_imc, a new state
CPUHP_AP_PERF_POWERPC_COREIMC_ONLINE is added to the list of existing
states.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Cc: Balbir Singh <bsinghar...@gmail.com>
Cc: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h |   1 +
 arch/powerpc/include/asm/opal-api.h|  10 +-
 arch/powerpc/include/asm/opal.h|   2 +
 arch/powerpc/perf/imc-pmu.c| 248 -
 arch/powerpc/platforms/powernv/opal-imc.c  |   4 +-
 arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
 include/linux/cpuhotplug.h |   1 +
 7 files changed, 257 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 59de083..5e76cd0 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -21,6 +21,7 @@
 #define IMC_MAX_CHIPS  32
 #define IMC_MAX_PMUS   32
 #define IMC_MAX_PMU_NAME_LEN   256
+#define IMC_MAX_CORES  256
 
 #define NEST_IMC_ENGINE_START  1
 #define NEST_IMC_ENGINE_STOP   0
diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index e15fb20..4ee52e8 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -169,7 +169,8 @@
 #define OPAL_PCI_TCE_KILL  126
 #define OPAL_NMMU_SET_PTCR 127
 #define OPAL_NEST_IMC_COUNTERS_CONTROL 128
-#define OPAL_LAST  128
+#define OPAL_CORE_IMC_COUNTERS_CONTROL 129
+#define OPAL_LAST  129
 
 /* Device tree flags */
 
@@ -929,6 +930,13 @@ enum {
OPAL_PCI_TCE_KILL_ALL,
 };
 
+/* Operation argument to Core IMC */
+enum {
+   OPAL_CORE_IMC_DISABLE,
+   OPAL_CORE_IMC_ENABLE,
+   OPAL_CORE_IMC_INIT,
+};
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index d93d082..c4baa6d 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -229,6 +229,8 @@ int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr);
 
 int64_t opal_nest_imc_counters_control(uint64_t mode, uint64_t value1,
uint64_t value2, uint64_t value3);
+int64_t opal_core_imc_counters_control(uint64_t operation, uint64_t addr,
+   uint64_t value2, uint64_t value3);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9a0e3bc..61d99c7 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1,5 +1,5 @@
 /*
- * Nest Performance Monitor counter support.
+ * IMC Performance Monitor counter support.
  *
  * Copyright (C) 2016 Madhavan Srinivasan, IBM Corporation.
  *  (C) 2016 Hemant K Shaw, IBM Corporation.
@@ -18,6 +18,9 @@ struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];
 struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 static cpumask_t nest_imc_cpumask;
 
+/* Maintains base addresses for all the cores */
+static u64 per_core_pdbar_add[IMC_MAX_CHIPS][IMC_MAX_CORES];
+static cpumask_t core_imc_cpumask;
 struct imc_pmu *core_imc_pmu;
 
 /* Needed for sanity check */
@@ -37,11 +40,18 @@ static struct attribute_group imc_format_group = {
 
 /* Get the cpumas

[PATCH v4 07/10] powerpc/powernv: Core IMC events detection

2017-02-19 Thread Hemant Kumar
This patch adds support for detection of core IMC events along with the
Nest IMC events. It adds a new domain IMC_DOMAIN_CORE and its determined
with the help of the compatibility string "ibm,imc-counters-core" based
on the IMC device tree.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Cc: Balbir Singh <bsinghar...@gmail.com>
Cc: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h|  2 ++
 arch/powerpc/perf/imc-pmu.c   |  3 +++
 arch/powerpc/platforms/powernv/opal-imc.c | 18 --
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 7b58721..59de083 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -30,6 +30,7 @@
 
 #define IMC_DTB_COMPAT "ibm,opal-in-memory-counters"
 #define IMC_DTB_NEST_COMPAT"ibm,imc-counters-nest"
+#define IMC_DTB_CORE_COMPAT"ibm,imc-counters-core"
 
 /*
  * Structure to hold per chip specific memory address
@@ -67,6 +68,7 @@ struct imc_pmu {
  * Domains for IMC PMUs
  */
 #define IMC_DOMAIN_NEST1
+#define IMC_DOMAIN_CORE2
 
 #define UNKNOWN_DOMAIN -1
 
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index e46ff6d..9a0e3bc 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -18,8 +18,11 @@ struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];
 struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 static cpumask_t nest_imc_cpumask;
 
+struct imc_pmu *core_imc_pmu;
+
 /* Needed for sanity check */
 extern u64 nest_max_offset;
+extern u64 core_max_offset;
 
 PMU_FORMAT_ATTR(event, "config:0-20");
 static struct attribute *imc_format_attrs[] = {
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index a65aa2d..67ce873 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -33,10 +33,12 @@
 
 extern struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];
 extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+extern struct imc_pmu *core_imc_pmu;
 
 extern int init_imc_pmu(struct imc_events *events,
int idx, struct imc_pmu *pmu_ptr);
 u64 nest_max_offset;
+u64 core_max_offset;
 
 static int imc_event_info(char *name, struct imc_events *events)
 {
@@ -80,6 +82,10 @@ static void update_max_value(u32 value, int pmu_domain)
if (nest_max_offset < value)
nest_max_offset = value;
break;
+   case IMC_DOMAIN_CORE:
+   if (core_max_offset < value)
+   core_max_offset = value;
+   break;
default:
/* Unknown domain, return */
return;
@@ -231,6 +237,8 @@ int imc_get_domain(struct device_node *pmu_dev)
 {
if (of_device_is_compatible(pmu_dev, IMC_DTB_NEST_COMPAT))
return IMC_DOMAIN_NEST;
+   if (of_device_is_compatible(pmu_dev, IMC_DTB_CORE_COMPAT))
+   return IMC_DOMAIN_CORE;
else
return UNKNOWN_DOMAIN;
 }
@@ -298,7 +306,10 @@ static int imc_pmu_create(struct device_node *parent, int 
pmu_index)
goto free_pmu;
 
/* Needed for hotplug/migration */
-   per_nest_pmu_arr[pmu_index] = pmu_ptr;
+   if (pmu_ptr->domain == IMC_DOMAIN_CORE)
+   core_imc_pmu = pmu_ptr;
+   else if (pmu_ptr->domain == IMC_DOMAIN_NEST)
+   per_nest_pmu_arr[pmu_index] = pmu_ptr;
 
/*
 * "events" property inside a PMU node contains the phandle value
@@ -354,7 +365,10 @@ static int imc_pmu_create(struct device_node *parent, int 
pmu_index)
}
 
/* Save the name to register it later */
-   sprintf(buf, "nest_%s", (char *)pp->value);
+   if (pmu_ptr->domain == IMC_DOMAIN_NEST)
+   sprintf(buf, "nest_%s", (char *)pp->value);
+   else
+   sprintf(buf, "%s_imc", (char *)pp->value);
pmu_ptr->pmu.name = (char *)buf;
 
/*
-- 
2.7.4



[PATCH v2 0/6] IMA Instrumentation Support

2016-11-20 Thread Hemant Kumar
Power 9 has In-Memory-Accumulation (IMA) infrastructure which contains
various Performance Monitoring Units (PMUs) at Nest level (these are
on-chip but off-core). These Nest PMU counters are handled by a Nest
IMA microcode. This microcode runs in the OCC (On-Chip Controller)
complex and its purpose is to program the nest counters, collect the
counter data and move the counter data to memory. 

The IMA infrastructure encapsulates nest (per-chip), core and thread
level counters. While the nest IMA PMUs are handled by the nest IMA
microcode, the core and thread level PMUs are handled by the Core-HPMC
engine. This patchset enables the nest IMA PMUs and is based on the
initial work done by Madhavan Srinivasan.
"Nest Instrumentation Support" : 
https://lists.ozlabs.org/pipermail/linuxppc-dev/2015-August/132078.html

v1 for this patchset can be found here :
https://lwn.net/Articles/705475/

Nest events:
Per-chip nest instrumentation provides various per-chip metrics
such as memory, powerbus, Xlink and Alink bandwidth.

PMU Events' Information:
OPAL obtains the Nest PMU and event information from the IMA Catalog
and passes on to the kernel via the device tree. The events' information
contains :
 - Event name
 - Event Offset
 - Event description
and, maybe :
 - Event scale
 - Event unit

Some PMUs may have a common scale and unit values for all their
supported events. For those cases, the scale and unit properties for
those events must be inherited from the PMU.

The event offset in the memory is where the counter data gets
accumulated.

The OPAL-side patches are posted upstream :
https://lists.ozlabs.org/pipermail/skiboot/2016-November/005552.html

The kernel discovers the IMA counters information in the device tree
at the "ima-counters" device node which has a compatible field
"ibm,opal-in-memory-counters".

Parsing of the Events' information:
To parse the IMA PMUs and events information, the kernel has to
discover the "ima-counters" node and walk through the pmu and event
nodes.

Here is an excerpt of the dt showing the ima-counters and mcs node:
/dts-v1/;

[...]
ima-counters {   
ima-nest-offset = <0x32>;
compatible = "ibm,opal-in-memory-counters";
ima-nest-size = <0x3>;
#address-cells = <0x1>;
#size-cells = <0x1>;
phandle = <0x1238>;
version-id = [00];

mcs0 {
compatible = "ibm,ima-counters-chip";
ranges;
#address-cells = <0x1>;
#size-cells = <0x1>;
phandle = <0x1279>;
scale = "1.2207e-4";
unit = "MiB";

event@528 {
event-name = "PM_MCS_UP_128B_DATA_XFER_MC0" ;
desc = "Total Read Bandwidth seen on both MCS 
of MC0";
phandle = <0x128c>;
reg = <0x118 0x8>;
};
[...]

>From the device tree, the kernel parses the PMUs and their events'
information.

After parsing the nest IMA PMUs and their events, the PMUs and their
attributes are registered in the kernel.

Example Usage :
 # perf list

  [...]
  nest_mcs0/PM_MCS_DOWN_128B_DATA_XFER_MC0/   [Kernel PMU event]
  nest_mcs0/PM_MCS_DOWN_128B_DATA_XFER_MC0_LAST_SAMPLE/ [Kernel PMU event]
  [...]

 # perf stat -e "nest_mcs0/PM_MCS_DOWN_128B_DATA_XFER_MC0/" -a --per-socket

TODOs:
 - Add support for Core IMA.
 - Add support for thread IMA.

Comments/feedback/suggestions are welcome.

Changelog:
 v1 -> v2 :
 - Account for the cases where a PMU can have a common scale and unit
   values for all its supported events (Patch 3/6).
 - Fixed a Build error (for maple_defconfig) by enabling ima_pmu.o
   only for CONFIG_PPC_POWERNV=y (Patch 4/6)
 - Read from the "event-name" property instead of "name" for an event
   node (Patch 3/6).

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>

Hemant Kumar (6):
  powerpc/powernv: Data structure and macros definitions
  powerpc/powernv: Autoload 

[PATCH v2 1/6] powerpc/powernv: Data structure and macros definitions

2016-11-20 Thread Hemant Kumar
Create new header file "ima-pmu.h" to add the data structures
and macros needed for IMA pmu support.


Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Stephane Eranian <eran...@google.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/ima-pmu.h | 73 ++
 1 file changed, 73 insertions(+)
 create mode 100644 arch/powerpc/include/asm/ima-pmu.h

diff --git a/arch/powerpc/include/asm/ima-pmu.h 
b/arch/powerpc/include/asm/ima-pmu.h
new file mode 100644
index 000..0ed8886
--- /dev/null
+++ b/arch/powerpc/include/asm/ima-pmu.h
@@ -0,0 +1,73 @@
+#ifndef PPC_POWERNV_IMA_PMU_DEF_H
+#define PPC_POWERNV_IMA_PMU_DEF_H
+
+/*
+ * Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2016 Madhavan Srinivasan, IBM Corporation.
+ *   (C) 2016 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define IMA_MAX_CHIPS  32
+#define IMA_MAX_PMUS   32
+#define IMA_MAX_PMU_NAME_LEN   256
+
+#define NEST_IMA_ENGINE_START  1
+#define NEST_IMA_ENGINE_STOP   0
+#define NEST_MAX_PAGES 16
+
+#define NEST_IMA_PRODUCTION_MODE   1
+
+#define IMA_DTB_COMPAT "ibm,opal-in-memory-counters"
+#define IMA_DTB_NEST_COMPAT"ibm,ima-counters-chip"
+
+/*
+ * Structure to hold per chip specific memory address
+ * information for nest pmus. Nest Counter data are exported
+ * in per-chip reserved memory region by the PORE Engine.
+ */
+struct perchip_nest_info {
+   u32 chip_id;
+   u64 pbase;
+   u64 vbase[NEST_MAX_PAGES];
+   u64 size;
+};
+
+/*
+ * Place holder for nest pmu events and values.
+ */
+struct ima_events {
+   char *ev_name;
+   char *ev_value;
+};
+
+/*
+ * Device tree parser code detects IMA pmu support and
+ * registers new IMA pmus. This structure will
+ * hold the pmu functions and attrs for each ima pmu and
+ * will be referenced at the time of pmu registration.
+ */
+struct ima_pmu {
+   struct pmu pmu;
+   int domain;
+   const struct attribute_group *attr_groups[4];
+};
+
+/*
+ * Domains for IMA PMUs
+ */
+#define IMA_DOMAIN_NEST1
+
+#define UNKNOWN_DOMAIN -1
+
+#endif /* PPC_POWERNV_IMA_PMU_DEF_H */
-- 
2.7.4



[PATCH v2 2/6] powerpc/powernv: Autoload IMA device driver module

2016-11-20 Thread Hemant Kumar
This patch does three things :
 - Enables "opal.c" to create a platform device for the IMA interface
   according to the appropriate compatibility string.
 - Find the reserved-memory region details from the system device tree
   and get the base address of HOMER region address for each chip.
 - We also get the Nest PMU counter data offsets (in the HOMER region)
   and their sizes. The offsets for the counters' data are fixed and
   won't change from chip to chip.

The device tree parsing logic is separated from the PMU creation
functions (which is done in subsequent patches). Right now, only Nest
units are taken care of.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Stephane Eranian <eran...@google.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/Makefile   |   2 +-
 arch/powerpc/platforms/powernv/opal-ima.c | 117 ++
 arch/powerpc/platforms/powernv/opal.c |  13 
 3 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/platforms/powernv/opal-ima.c

diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index b5d98cb..ee28528 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -2,7 +2,7 @@ obj-y   += setup.o opal-wrappers.o opal.o 
opal-async.o idle.o
 obj-y  += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
 obj-y  += rng.o opal-elog.o opal-dump.o opal-sysparam.o 
opal-sensor.o
 obj-y  += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
-obj-y  += opal-kmsg.o
+obj-y  += opal-kmsg.o opal-ima.o
 
 obj-$(CONFIG_SMP)  += smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)  += pci.o pci-ioda.o npu-dma.o
diff --git a/arch/powerpc/platforms/powernv/opal-ima.c 
b/arch/powerpc/platforms/powernv/opal-ima.c
new file mode 100644
index 000..446e7bc
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-ima.c
@@ -0,0 +1,117 @@
+/*
+ * OPAL IMA interface detection driver
+ * Supported on POWERNV platform
+ *
+ * Copyright  (C) 2016 Madhavan Srinivasan, IBM Corporation.
+ *(C) 2016 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct perchip_nest_info nest_perchip_info[IMA_MAX_CHIPS];
+
+static int opal_ima_counters_probe(struct platform_device *pdev)
+{
+   struct device_node *child, *ima_dev, *rm_node = NULL;
+   struct perchip_nest_info *pcni;
+   u32 reg[4], pages, nest_offset, nest_size, idx;
+   int i = 0;
+   const char *node_name;
+
+   if (!pdev || !pdev->dev.of_node)
+   return -ENODEV;
+
+   ima_dev = pdev->dev.of_node;
+
+   /*
+* nest_offset : where the nest-counters' data start.
+* size : size of the entire nest-counters region
+*/
+   if (of_property_read_u32(ima_dev, "ima-nest-offset", _offset))
+   goto err;
+   if (of_property_read_u32(ima_dev, "ima-nest-size", _size))
+   goto err;
+
+   /* Find the "homer region" for each chip */
+   rm_node = of_find_node_by_path("/reserved-memory");
+   if (!rm_node)
+   goto err;
+
+   for_each_child_of_node(rm_node, child) {
+   if (of_property_read_string_index(child, "name", 0,
+ _name))
+   continue;
+   if (strncmp("ibm,homer-image", node_name,
+   strlen("ibm,homer-image")))
+   continue;
+
+   /* Get the chip id to which the above homer region belongs to */
+   if (of_property_read_u32(child, "ibm,chip-id", ))
+   goto err;
+
+   /* reg property will have four u32 cells. */
+   if (of_property_read_u32_array(child, "reg", re

[PATCH v2 4/6] powerpc/perf: Add event attribute and group to IMA pmus

2016-11-20 Thread Hemant Kumar
Device tree IMA driver code parses the IMA units and their events. It
passes the information to IMA pmu code which is placed in powerpc/perf
as "ima-pmu.c".

This patch creates only event attributes and attribute groups for the
IMA pmus.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Stephane Eranian <eran...@google.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changelog:
v1 -> v2:
 - Changes to Makefile to only enable this feature for
   CONFIG_PPC_POWERNV=y

 arch/powerpc/perf/Makefile|  6 +-
 arch/powerpc/perf/ima-pmu.c   | 96 +++
 arch/powerpc/platforms/powernv/opal-ima.c | 12 +++-
 3 files changed, 111 insertions(+), 3 deletions(-)
 create mode 100644 arch/powerpc/perf/ima-pmu.c

diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index f102d53..099c61a 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -2,10 +2,14 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 obj-$(CONFIG_PERF_EVENTS)  += callchain.o perf_regs.o
 
+ima-$(CONFIG_PPC_POWERNV)   += ima-pmu.o
+
 obj-$(CONFIG_PPC_PERF_CTRS)+= core-book3s.o bhrb.o
 obj64-$(CONFIG_PPC_PERF_CTRS)  += power4-pmu.o ppc970-pmu.o power5-pmu.o \
   power5+-pmu.o power6-pmu.o power7-pmu.o \
-  isa207-common.o power8-pmu.o power9-pmu.o
+  isa207-common.o power8-pmu.o power9-pmu.o \
+  $(ima-y)
+
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
 
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
diff --git a/arch/powerpc/perf/ima-pmu.c b/arch/powerpc/perf/ima-pmu.c
new file mode 100644
index 000..50d2226
--- /dev/null
+++ b/arch/powerpc/perf/ima-pmu.c
@@ -0,0 +1,96 @@
+/*
+ * Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2016 Madhavan Srinivasan, IBM Corporation.
+ *  (C) 2016 Hemant K Shaw, IBM Corporation.
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct perchip_nest_info nest_perchip_info[IMA_MAX_CHIPS];
+struct ima_pmu *per_nest_pmu_arr[IMA_MAX_PMUS];
+
+/* dev_str_attr : Populate event "name" and string "str" in attribute */
+static struct attribute *dev_str_attr(const char *name, const char *str)
+{
+   struct perf_pmu_events_attr *attr;
+
+   attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+
+   sysfs_attr_init(>attr.attr);
+
+   attr->event_str = str;
+   attr->attr.attr.name = name;
+   attr->attr.attr.mode = 0444;
+   attr->attr.show = perf_event_sysfs_show;
+
+   return >attr.attr;
+}
+
+/*
+ * update_events_in_group: Update the "events" information in an attr_group
+ * and assign the attr_group to the pmu "pmu".
+ */
+static int update_events_in_group(struct ima_events *events,
+ int idx, struct ima_pmu *pmu)
+{
+   struct attribute_group *attr_group;
+   struct attribute **attrs;
+   int i;
+
+   /* Allocate memory for attribute group */
+   attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
+   if (!attr_group)
+   return -ENOMEM;
+
+   /* Allocate memory for attributes */
+   attrs = kzalloc((sizeof(struct attribute *) * (idx + 1)), GFP_KERNEL);
+   if (!attrs) {
+   kfree(attr_group);
+   return -ENOMEM;
+   }
+
+   attr_group->name = "events";
+   attr_group->attrs = attrs;
+   for (i = 0; i < idx; i++, events++) {
+   attrs[i] = dev_str_attr((char *)events->ev_name,
+   (char *)events->ev_value);
+   }
+
+   pmu->attr_groups[0] = attr_group;
+   return 0;
+}
+
+/*
+ * init_ima_pmu : Setup the IMA pmu device in "pmu_ptr" and its events
+ *"events".
+ * Setup the cpu mask information for these pmus and setup the state machine
+ * hotplug notifiers as well.
+ */
+int init_ima_pmu(struct ima_events *events, int idx,
+struct ima_pmu *pmu_ptr)
+{
+   int ret = -ENODEV;
+
+   ret = update_events_in_group(events, idx, pmu_ptr);
+   if (ret)
+   goto err_free;
+
+   return 0;
+
+err_free:
+   /* Only free the attr_groups which are dynamically allocated  */
+ 

[PATCH v2 3/6] powerpc/powernv: Detect supported IMA units and its events

2016-11-20 Thread Hemant Kumar
Parse device tree to detect IMA units. Traverse through each IMA unit
node to find supported events and corresponding unit/scale files (if any).

Right now, only nest IMA units are supported.
The nest IMA unit event node from device tree will contain the offset in
the reserved memory region to get the counter data for a given
event. The offsets for the nest events are contained in the "reg"
property of the event "node".

Kernel code uses this offset as event configuration value.

Device tree parser code also looks for scale/unit property in the event
node and passes on the value as an event attr for perf interface to use
in the post processing by the perf tool. Some PMUs may have common scale
and unit properties which implies that all events supported by this PMU
inherit the scale and unit properties of the PMU itself. For those
events, we need to set the common unit and scale values.

For failure to initialize any unit or any event, disable that unit and
continue setting up the rest of them.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Stephane Eranian <eran...@google.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
Changelog :
v1 -> v2:
 - Read from the "event-name" property instead of "name" property for
   an event node.
 - Assign scale and unit values for events for a PMU which has a common
   unit and scale value.

 arch/powerpc/platforms/powernv/opal-ima.c | 332 ++
 1 file changed, 332 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/opal-ima.c 
b/arch/powerpc/platforms/powernv/opal-ima.c
index 446e7bc..e8d5771 100644
--- a/arch/powerpc/platforms/powernv/opal-ima.c
+++ b/arch/powerpc/platforms/powernv/opal-ima.c
@@ -32,6 +32,337 @@
 #include 
 
 struct perchip_nest_info nest_perchip_info[IMA_MAX_CHIPS];
+struct ima_pmu *per_nest_pmu_arr[IMA_MAX_PMUS];
+
+static int ima_event_info(char *name, struct ima_events *events)
+{
+   char *buf;
+
+   /* memory for content */
+   buf = kzalloc(IMA_MAX_PMU_NAME_LEN, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   events->ev_name = name;
+   events->ev_value = buf;
+   return 0;
+}
+
+static int ima_event_info_str(struct property *pp, char *name,
+  struct ima_events *events)
+{
+   int ret;
+
+   ret = ima_event_info(name, events);
+   if (ret)
+   return ret;
+
+   if (!pp->value || (strnlen(pp->value, pp->length) == pp->length) ||
+  (pp->length > IMA_MAX_PMU_NAME_LEN))
+   return -EINVAL;
+   strncpy(events->ev_value, (const char *)pp->value, pp->length);
+
+   return 0;
+}
+
+static int ima_event_info_val(char *name, u32 val,
+  struct ima_events *events)
+{
+   int ret;
+
+   ret = ima_event_info(name, events);
+   if (ret)
+   return ret;
+   sprintf(events->ev_value, "event=0x%x", val);
+
+   return 0;
+}
+
+static int set_event_property(struct property *pp, char *event_prop,
+ struct ima_events *events, char *ev_name)
+{
+   char *buf;
+   int ret;
+
+   buf = kzalloc(IMA_MAX_PMU_NAME_LEN, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   sprintf(buf, "%s.%s", ev_name, event_prop);
+   ret = ima_event_info_str(pp, buf, events);
+   if (ret) {
+   kfree(events->ev_name);
+   kfree(events->ev_value);
+   }
+
+   return ret;
+}
+
+/*
+ * ima_events_node_parser: Parse the event node "dev" and assign the parsed
+ * information to event "events".
+ *
+ * Parses the "reg" property of this event. "reg" gives us the event offset.
+ * Also, parse the "scale" and "unit" properties, if any.
+ */
+static int ima_events_node_parser(struct device_node *dev,
+ struct ima_events *events,
+ struct property *event_scale,
+ struct property *event_unit)
+{
+   struct property *name, *pp;
+   char *ev_name;
+   u32 val;
+   int idx = 0, ret;
+
+   if (!dev)
+   return -EINVAL;
+
+   /*
+* Loop through each property of an event node
+*/
+   name = of_find_property(dev, "event-name", NULL);
+   if (!name)
+   return -ENODEV;
+
+   if (!name->value ||
+ (strnlen(name-

[PATCH v2 5/6] powerpc/perf: Generic ima pmu event functions

2016-11-20 Thread Hemant Kumar
Since, the IMA counters' data are periodically fed to a memory location,
the functions to read/update, start/stop, add/del can be generic and can
be used by all IMA PMU units.

This patch adds a set of generic ima pmu related event functions to be
used  by each ima pmu unit. Add code to setup format attribute and to
register ima pmus. Add a event_init function for nest_ima events.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Stephane Eranian <eran...@google.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/ima-pmu.h|   2 +
 arch/powerpc/perf/ima-pmu.c   | 122 ++
 arch/powerpc/platforms/powernv/opal-ima.c |  37 +++--
 3 files changed, 154 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/ima-pmu.h 
b/arch/powerpc/include/asm/ima-pmu.h
index 0ed8886..f0d95bb 100644
--- a/arch/powerpc/include/asm/ima-pmu.h
+++ b/arch/powerpc/include/asm/ima-pmu.h
@@ -70,4 +70,6 @@ struct ima_pmu {
 
 #define UNKNOWN_DOMAIN -1
 
+int ima_get_domain(struct device_node *pmu_dev);
+
 #endif /* PPC_POWERNV_IMA_PMU_DEF_H */
diff --git a/arch/powerpc/perf/ima-pmu.c b/arch/powerpc/perf/ima-pmu.c
index 50d2226..9948636 100644
--- a/arch/powerpc/perf/ima-pmu.c
+++ b/arch/powerpc/perf/ima-pmu.c
@@ -17,6 +17,117 @@
 struct perchip_nest_info nest_perchip_info[IMA_MAX_CHIPS];
 struct ima_pmu *per_nest_pmu_arr[IMA_MAX_PMUS];
 
+/* Needed for sanity check */
+extern u64 nest_max_offset;
+
+PMU_FORMAT_ATTR(event, "config:0-20");
+static struct attribute *ima_format_attrs[] = {
+   _attr_event.attr,
+   NULL,
+};
+
+static struct attribute_group ima_format_group = {
+   .name = "format",
+   .attrs = ima_format_attrs,
+};
+
+static int nest_ima_event_init(struct perf_event *event)
+{
+   int chip_id;
+   u32 config = event->attr.config;
+   struct perchip_nest_info *pcni;
+
+   if (event->attr.type != event->pmu->type)
+   return -ENOENT;
+
+   /* Sampling not supported */
+   if (event->hw.sample_period)
+   return -EINVAL;
+
+   /* unsupported modes and filters */
+   if (event->attr.exclude_user   ||
+   event->attr.exclude_kernel ||
+   event->attr.exclude_hv ||
+   event->attr.exclude_idle   ||
+   event->attr.exclude_host   ||
+   event->attr.exclude_guest)
+   return -EINVAL;
+
+   if (event->cpu < 0)
+   return -EINVAL;
+
+   /* Sanity check for config (event offset) */
+   if (config > nest_max_offset)
+   return -EINVAL;
+
+   chip_id = topology_physical_package_id(event->cpu);
+   pcni = _perchip_info[chip_id];
+   event->hw.event_base = pcni->vbase[config/PAGE_SIZE] +
+   (config & ~PAGE_MASK);
+
+   return 0;
+}
+
+static void ima_read_counter(struct perf_event *event)
+{
+   u64 *addr, data;
+
+   addr = (u64 *)event->hw.event_base;
+   data = __be64_to_cpu(*addr);
+   local64_set(>hw.prev_count, data);
+}
+
+static void ima_perf_event_update(struct perf_event *event)
+{
+   u64 counter_prev, counter_new, final_count, *addr;
+
+   addr = (u64 *)event->hw.event_base;
+   counter_prev = local64_read(>hw.prev_count);
+   counter_new = __be64_to_cpu(*addr);
+   final_count = counter_new - counter_prev;
+
+   local64_set(>hw.prev_count, counter_new);
+   local64_add(final_count, >count);
+}
+
+static void ima_event_start(struct perf_event *event, int flags)
+{
+   ima_read_counter(event);
+}
+
+static void ima_event_stop(struct perf_event *event, int flags)
+{
+   if (flags & PERF_EF_UPDATE)
+   ima_perf_event_update(event);
+}
+
+static int ima_event_add(struct perf_event *event, int flags)
+{
+   if (flags & PERF_EF_START)
+   ima_event_start(event, flags);
+
+   return 0;
+}
+
+/* update_pmu_ops : Populate the appropriate operations for "pmu" */
+static int update_pmu_ops(struct ima_pmu *pmu)
+{
+   if (!pmu)
+   return -EINVAL;
+
+   pmu->pmu.task_ctx_nr = perf_invalid_context;
+   pmu->pmu.event_init = nest_ima_event_init;
+   pmu->pmu.add = ima_event_add;
+   pmu->pmu.del = ima_event_stop;
+   pmu->pmu.start = ima_event_start;
+   pmu->pmu.stop = ima_event_stop;
+   pmu->pmu.read = ima_perf_event_update;
+   pmu->attr_groups[1] = _format_grou

[PATCH v2 6/6] powerpc/perf: IMA pmu cpumask and cpu hotplug support

2016-11-20 Thread Hemant Kumar
Adds cpumask attribute to be used by each IMA pmu. Only one cpu (any
online CPU) from each chip for nest PMUs is designated to read counters.

On CPU hotplug, dying CPU is checked to see whether it is one of the
designated cpus, if yes, next online cpu from the same chip (for nest
units) is designated as new cpu to read counters.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Stephane Eranian <eran...@google.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal-api.h|   3 +-
 arch/powerpc/include/asm/opal.h|   2 +
 arch/powerpc/perf/ima-pmu.c| 167 -
 arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
 4 files changed, 171 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 0e2e57b..116c155 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -167,7 +167,8 @@
 #define OPAL_INT_EOI   124
 #define OPAL_INT_SET_MFRR  125
 #define OPAL_PCI_TCE_KILL  126
-#define OPAL_LAST  126
+#define OPAL_NEST_IMA_COUNTERS_CONTROL  128
+#define OPAL_LAST  128
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index e958b70..bc31251 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -229,6 +229,8 @@ int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t 
kill_type,
 int64_t opal_rm_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
 uint32_t pe_num, uint32_t tce_size,
 uint64_t dma_addr, uint32_t npages);
+int64_t opal_nest_ima_counters_control(uint64_t mode, uint64_t value1,
+ uint64_t value2, uint64_t value3);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/perf/ima-pmu.c b/arch/powerpc/perf/ima-pmu.c
index 9948636..2b1bfc1 100644
--- a/arch/powerpc/perf/ima-pmu.c
+++ b/arch/powerpc/perf/ima-pmu.c
@@ -16,6 +16,7 @@
 
 struct perchip_nest_info nest_perchip_info[IMA_MAX_CHIPS];
 struct ima_pmu *per_nest_pmu_arr[IMA_MAX_PMUS];
+static cpumask_t nest_ima_cpumask;
 
 /* Needed for sanity check */
 extern u64 nest_max_offset;
@@ -31,6 +32,164 @@ static struct attribute_group ima_format_group = {
.attrs = ima_format_attrs,
 };
 
+/* Get the cpumask printed to a buffer "buf" */
+static ssize_t ima_pmu_cpumask_get_attr(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   cpumask_t *active_mask;
+
+   active_mask = _ima_cpumask;
+   return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, ima_pmu_cpumask_get_attr, NULL);
+
+static struct attribute *ima_pmu_cpumask_attrs[] = {
+   _attr_cpumask.attr,
+   NULL,
+};
+
+static struct attribute_group ima_pmu_cpumask_attr_group = {
+   .attrs = ima_pmu_cpumask_attrs,
+};
+
+/*
+ * nest_init : Initializes the nest ima engine for the current chip.
+ */
+static void nest_init(int *loc)
+{
+   int rc;
+
+   rc = opal_nest_ima_counters_control(NEST_IMA_PRODUCTION_MODE,
+   NEST_IMA_ENGINE_START, 0, 0);
+   if (rc)
+   loc[smp_processor_id()] = 1;
+}
+
+static void nest_change_cpu_context(int old_cpu, int new_cpu)
+{
+   int i;
+
+   for (i = 0;
+(per_nest_pmu_arr[i] != NULL) && (i < IMA_MAX_PMUS); i++)
+   perf_pmu_migrate_context(_nest_pmu_arr[i]->pmu,
+   old_cpu, new_cpu);
+}
+
+static int ppc_nest_ima_cpu_online(unsigned int cpu)
+{
+   int nid, fcpu, ncpu;
+   struct cpumask *l_cpumask, tmp_mask;
+
+   /* Fint the cpumask of this node */
+   nid = cpu_to_node(cpu);
+   l_cpumask = cpumask_of_node(nid);
+
+   /*
+* If any of the cpu from this node is already present in the mask,
+* just return, if not, then set this cpu in the mask.
+*/
+   if (!cpumask_and(_mask, l_cpumask, _ima_cpumask)) {
+   cpumask_set_cpu(cpu, _ima_cpumask);
+   return 0;
+   }
+
+   fcpu = cpumask_first(l_cpumask);
+   ncpu = cpumask_next(cpu, l_cpumask);
+   if (cpu == fcpu) {
+   if (cpumask_test_and_clear_

[PATCH v3 6/6] powerpc/perf: IMC pmu cpumask and cpu hotplug support

2016-12-19 Thread Hemant Kumar
Adds cpumask attribute to be used by each IMC pmu. Only one cpu (any
online CPU) from each chip for nest PMUs is designated to read counters.

On CPU hotplug, dying CPU is checked to see whether it is one of the
designated cpus, if yes, next online cpu from the same chip (for nest
units) is designated as new cpu to read counters.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal-api.h|   3 +-
 arch/powerpc/include/asm/opal.h|   2 +
 arch/powerpc/perf/imc-pmu.c| 167 -
 arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
 4 files changed, 171 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 0e2e57b..48e1d3e 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -167,7 +167,8 @@
 #define OPAL_INT_EOI   124
 #define OPAL_INT_SET_MFRR  125
 #define OPAL_PCI_TCE_KILL  126
-#define OPAL_LAST  126
+#define OPAL_NEST_IMC_COUNTERS_CONTROL  128
+#define OPAL_LAST  128
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index e958b70..fe72b57 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -229,6 +229,8 @@ int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t 
kill_type,
 int64_t opal_rm_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
 uint32_t pe_num, uint32_t tce_size,
 uint64_t dma_addr, uint32_t npages);
+int64_t opal_nest_imc_counters_control(uint64_t mode, uint64_t value1,
+ uint64_t value2, uint64_t value3);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index f12ece8..49f6486 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -16,6 +16,7 @@
 
 struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];
 struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+static cpumask_t nest_imc_cpumask;
 
 /* Needed for sanity check */
 extern u64 nest_max_offset;
@@ -31,6 +32,164 @@ static struct attribute_group imc_format_group = {
.attrs = imc_format_attrs,
 };
 
+/* Get the cpumask printed to a buffer "buf" */
+static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   cpumask_t *active_mask;
+
+   active_mask = _imc_cpumask;
+   return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL);
+
+static struct attribute *imc_pmu_cpumask_attrs[] = {
+   _attr_cpumask.attr,
+   NULL,
+};
+
+static struct attribute_group imc_pmu_cpumask_attr_group = {
+   .attrs = imc_pmu_cpumask_attrs,
+};
+
+/*
+ * nest_init : Initializes the nest imc engine for the current chip.
+ */
+static void nest_init(int *loc)
+{
+   int rc;
+
+   rc = opal_nest_imc_counters_control(NEST_IMC_PRODUCTION_MODE,
+   NEST_IMC_ENGINE_START, 0, 0);
+   if (rc)
+   loc[smp_processor_id()] = 1;
+}
+
+static void nest_change_cpu_context(int old_cpu, int new_cpu)
+{
+   int i;
+
+   for (i = 0;
+(per_nest_pmu_arr[i] != NULL) && (i < IMC_MAX_PMUS); i++)
+   perf_pmu_migrate_context(_nest_pmu_arr[i]->pmu,
+   old_cpu, new_cpu);
+}
+
+static int ppc_nest_imc_cpu_online(unsigned int cpu)
+{
+   int nid, fcpu, ncpu;
+   struct cpumask *l_cpumask, tmp_mask;
+
+   /* Fint the cpumask of this node */
+   nid = cpu_to_node(cpu);
+   l_cpumask = cpumask_of_node(nid);
+
+   /*
+* If any of the cpu from this node is already present in the mask,
+* just return, if not, then set this cpu in the mask.
+*/
+   if (!cpumask_and(_mask, l_cpumask, _imc_cpumask)) {
+   cpumask_set_cpu(cpu, _imc_cpumask);
+   return 0;
+   }
+
+   fcpu = cpumask_first(l_cpumask);
+   ncpu = cpumask_next(cpu, l_cpumask);
+   if (cpu == fcpu) {
+   if (cpumask_test_and_

[PATCH v3 1/6] powerpc/powernv: Data structure and macros definitions

2016-12-19 Thread Hemant Kumar
Create new header file "imc-pmu.h" to add the data structures
and macros needed for IMC pmu support.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h | 73 ++
 1 file changed, 73 insertions(+)
 create mode 100644 arch/powerpc/include/asm/imc-pmu.h

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
new file mode 100644
index 000..911d837
--- /dev/null
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -0,0 +1,73 @@
+#ifndef PPC_POWERNV_IMC_PMU_DEF_H
+#define PPC_POWERNV_IMC_PMU_DEF_H
+
+/*
+ * IMC Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2016 Madhavan Srinivasan, IBM Corporation.
+ *   (C) 2016 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define IMC_MAX_CHIPS  32
+#define IMC_MAX_PMUS   32
+#define IMC_MAX_PMU_NAME_LEN   256
+
+#define NEST_IMC_ENGINE_START  1
+#define NEST_IMC_ENGINE_STOP   0
+#define NEST_MAX_PAGES 16
+
+#define NEST_IMC_PRODUCTION_MODE   1
+
+#define IMC_DTB_COMPAT "ibm,opal-in-memory-counters"
+#define IMC_DTB_NEST_COMPAT"ibm,imc-counters-chip"
+
+/*
+ * Structure to hold per chip specific memory address
+ * information for nest pmus. Nest Counter data are exported
+ * in per-chip reserved memory region by the PORE Engine.
+ */
+struct perchip_nest_info {
+   u32 chip_id;
+   u64 pbase;
+   u64 vbase[NEST_MAX_PAGES];
+   u64 size;
+};
+
+/*
+ * Place holder for nest pmu events and values.
+ */
+struct imc_events {
+   char *ev_name;
+   char *ev_value;
+};
+
+/*
+ * Device tree parser code detects IMC pmu support and
+ * registers new IMC pmus. This structure will
+ * hold the pmu functions and attrs for each imc pmu and
+ * will be referenced at the time of pmu registration.
+ */
+struct imc_pmu {
+   struct pmu pmu;
+   int domain;
+   const struct attribute_group *attr_groups[4];
+};
+
+/*
+ * Domains for IMC PMUs
+ */
+#define IMC_DOMAIN_NEST1
+
+#define UNKNOWN_DOMAIN -1
+
+#endif /* PPC_POWERNV_IMC_PMU_DEF_H */
-- 
2.7.4



[PATCH v3 2/6] powerpc/powernv: Autoload IMC device driver module

2016-12-19 Thread Hemant Kumar
This patch does three things :
 - Enables "opal.c" to create a platform device for the IMC interface
   according to the appropriate compatibility string.
 - Find the reserved-memory region details from the system device tree
   and get the base address of HOMER region address for each chip.
 - We also get the Nest PMU counter data offsets (in the HOMER region)
   and their sizes. The offsets for the counters' data are fixed and
   won't change from chip to chip.

The device tree parsing logic is separated from the PMU creation
functions (which is done in subsequent patches). Right now, only Nest
units are taken care of.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/Makefile   |   2 +-
 arch/powerpc/platforms/powernv/opal-imc.c | 117 ++
 arch/powerpc/platforms/powernv/opal.c |  13 
 3 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/platforms/powernv/opal-imc.c

diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index b5d98cb..44909fe 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -2,7 +2,7 @@ obj-y   += setup.o opal-wrappers.o opal.o 
opal-async.o idle.o
 obj-y  += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
 obj-y  += rng.o opal-elog.o opal-dump.o opal-sysparam.o 
opal-sensor.o
 obj-y  += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
-obj-y  += opal-kmsg.o
+obj-y  += opal-kmsg.o opal-imc.o
 
 obj-$(CONFIG_SMP)  += smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)  += pci.o pci-ioda.o npu-dma.o
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
new file mode 100644
index 000..ee2ae45
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -0,0 +1,117 @@
+/*
+ * OPAL IMC interface detection driver
+ * Supported on POWERNV platform
+ *
+ * Copyright  (C) 2016 Madhavan Srinivasan, IBM Corporation.
+ *(C) 2016 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];
+
+static int opal_imc_counters_probe(struct platform_device *pdev)
+{
+   struct device_node *child, *imc_dev, *rm_node = NULL;
+   struct perchip_nest_info *pcni;
+   u32 reg[4], pages, nest_offset, nest_size, idx;
+   int i = 0;
+   const char *node_name;
+
+   if (!pdev || !pdev->dev.of_node)
+   return -ENODEV;
+
+   imc_dev = pdev->dev.of_node;
+
+   /*
+* nest_offset : where the nest-counters' data start.
+* size : size of the entire nest-counters region
+*/
+   if (of_property_read_u32(imc_dev, "imc-nest-offset", _offset))
+   goto err;
+   if (of_property_read_u32(imc_dev, "imc-nest-size", _size))
+   goto err;
+
+   /* Find the "homer region" for each chip */
+   rm_node = of_find_node_by_path("/reserved-memory");
+   if (!rm_node)
+   goto err;
+
+   for_each_child_of_node(rm_node, child) {
+   if (of_property_read_string_index(child, "name", 0,
+ _name))
+   continue;
+   if (strncmp("ibm,homer-image", node_name,
+   strlen("ibm,homer-image")))
+   continue;
+
+   /* Get the chip id to which the above homer region belongs to */
+   if (of_property_read_u32(child, "ibm,chip-id", ))
+   goto err;
+
+   /* reg property will have four u32 cells. */
+   if (of_property_rea

[PATCH v3 3/6] powerpc/powernv: Detect supported IMC units and its events

2016-12-19 Thread Hemant Kumar
Parse device tree to detect IMC units. Traverse through each IMC unit
node to find supported events and corresponding unit/scale files (if any).

Right now, only nest IMC units are supported.
The nest IMC unit event node from device tree will contain the offset in
the reserved memory region to get the counter data for a given
event. The offsets for the nest events are contained in the "reg"
property of the event "node".

Kernel code uses this offset as event configuration value.

Device tree parser code also looks for scale/unit property in the event
node and passes on the value as an event attr for perf interface to use
in the post processing by the perf tool. Some PMUs may have common scale
and unit properties which implies that all events supported by this PMU
inherit the scale and unit properties of the PMU itself. For those
events, we need to set the common unit and scale values.

For failure to initialize any unit or any event, disable that unit and
continue setting up the rest of them.

Cc: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Paul Mackerras <pau...@samba.org>
Cc: Anton Blanchard <an...@samba.org>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Michael Neuling <mi...@neuling.org>
Cc: Stewart Smith <stew...@linux.vnet.ibm.com>
Cc: Daniel Axtens <d...@axtens.net>
Cc: Stephane Eranian <eran...@google.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/opal-imc.c | 332 ++
 1 file changed, 332 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index ee2ae45..5ee93402 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -32,6 +32,337 @@
 #include 
 
 struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];
+struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+
+static int imc_event_info(char *name, struct imc_events *events)
+{
+   char *buf;
+
+   /* memory for content */
+   buf = kzalloc(IMC_MAX_PMU_NAME_LEN, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   events->ev_name = name;
+   events->ev_value = buf;
+   return 0;
+}
+
+static int imc_event_info_str(struct property *pp, char *name,
+  struct imc_events *events)
+{
+   int ret;
+
+   ret = imc_event_info(name, events);
+   if (ret)
+   return ret;
+
+   if (!pp->value || (strnlen(pp->value, pp->length) == pp->length) ||
+  (pp->length > IMC_MAX_PMU_NAME_LEN))
+   return -EINVAL;
+   strncpy(events->ev_value, (const char *)pp->value, pp->length);
+
+   return 0;
+}
+
+static int imc_event_info_val(char *name, u32 val,
+ struct imc_events *events)
+{
+   int ret;
+
+   ret = imc_event_info(name, events);
+   if (ret)
+   return ret;
+   sprintf(events->ev_value, "event=0x%x", val);
+
+   return 0;
+}
+
+static int set_event_property(struct property *pp, char *event_prop,
+ struct imc_events *events, char *ev_name)
+{
+   char *buf;
+   int ret;
+
+   buf = kzalloc(IMC_MAX_PMU_NAME_LEN, GFP_KERNEL);
+   if (!buf)
+   return -ENOMEM;
+
+   sprintf(buf, "%s.%s", ev_name, event_prop);
+   ret = imc_event_info_str(pp, buf, events);
+   if (ret) {
+   kfree(events->ev_name);
+   kfree(events->ev_value);
+   }
+
+   return ret;
+}
+
+/*
+ * imc_events_node_parser: Parse the event node "dev" and assign the parsed
+ * information to event "events".
+ *
+ * Parses the "reg" property of this event. "reg" gives us the event offset.
+ * Also, parse the "scale" and "unit" properties, if any.
+ */
+static int imc_events_node_parser(struct device_node *dev,
+ struct imc_events *events,
+ struct property *event_scale,
+ struct property *event_unit)
+{
+   struct property *name, *pp;
+   char *ev_name;
+   u32 val;
+   int idx = 0, ret;
+
+   if (!dev)
+   return -EINVAL;
+
+   /*
+* Loop through each property of an event node
+*/
+   name = of_find_property(dev, "event-name", NULL);
+   if (!name)
+   return -ENODEV;
+
+   if (!name->value ||
+ (strnlen(name->value, name->length) == name->length) ||
+ (name->length > IMC_MAX_PMU_NAME_LEN))
+   return -EINVAL;
+
+   ev_name = kzalloc(IMC_MAX_PMU_NAME_LEN, GFP_KERNEL);
+

  1   2   >