[PATCH] iomap: Fix the write_count in iomap_add_to_ioend().

2020-08-19 Thread Anju T Sudhakar
From: Ritesh Harjani 

__bio_try_merge_page() may return same_page = 1 and merged = 0. 
This could happen when bio->bi_iter.bi_size + len > UINT_MAX. 
Handle this case in iomap_add_to_ioend() by incrementing write_count.
This scenario mostly happens where we have too much dirty data accumulated. 

w/o the patch we hit below kernel warning,
 
 WARNING: CPU: 18 PID: 5130 at fs/iomap/buffered-io.c:74 
iomap_page_release+0x120/0x150
 CPU: 18 PID: 5130 Comm: fio Kdump: loaded Tainted: GW 
5.8.0-rc3 #6
 Call Trace:
  __remove_mapping+0x154/0x320 (unreliable)
  iomap_releasepage+0x80/0x180
  try_to_release_page+0x94/0xe0
  invalidate_inode_page+0xc8/0x110
  invalidate_mapping_pages+0x1dc/0x540
  generic_fadvise+0x3c8/0x450
  xfs_file_fadvise+0x2c/0xe0 [xfs]
  vfs_fadvise+0x3c/0x60
  ksys_fadvise64_64+0x68/0xe0
  sys_fadvise64+0x28/0x40
  system_call_exception+0xf8/0x1c0
  system_call_common+0xf0/0x278

Reported-by: Shivaprasad G Bhat 
Signed-off-by: Ritesh Harjani 
Signed-off-by: Anju T Sudhakar 
---
 fs/iomap/buffered-io.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index bcfc288dba3f..4e8062279e66 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1332,10 +1332,12 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, 
struct page *page,
 
merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
_page);
-   if (iop && !same_page)
+   if (iop && merged && !same_page)
atomic_inc(>write_count);
 
if (!merged) {
+   if (iop)
+   atomic_inc(>write_count);
if (bio_full(wpc->ioend->io_bio, len)) {
wpc->ioend->io_bio =
iomap_chain_bio(wpc->ioend->io_bio);
-- 
2.25.4



[PATCH 2/2] powerpc/perf: Add support for outputting extended regs in perf intr_regs

2020-04-29 Thread Anju T Sudhakar
The capability flag PERF_PMU_CAP_EXTENDED_REGS, is used to indicate the
PMU which support extended registers. The generic code define the mask
of extended registers as 0 for non supported architectures.

Add support for extended registers in POWER9 architecture. For POWER9,
the extended registers are mmcr0, mmc1 and mmcr2.

REG_RESERVED mask is redefined to accommodate the extended registers.

With patch:


# perf record -I?
available registers: r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r13 r14
r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31 nip
msr orig_r3 ctr link xer ccr softe trap dar dsisr sier mmcra mmcr0
mmcr1 mmcr2

# perf record -I ls
# perf script -D

PERF_RECORD_SAMPLE(IP, 0x1): 9019/9019: 0 period: 1 addr: 0
... intr regs: mask 0x ABI 64-bit
 r00xc011b12c
 r10xc03f9a98b930
 r20xc1a32100
 r30xc03f8fe9a800
 r40xc03fd181
 r50x3e32557150
 r60xc03f9a98b908
 r70xffc1cdae06ac
 r80x818
[.]
 r31   0xc03ffd047230
 nip   0xc011b2c0
 msr   0x90009033
 orig_r3 0xc011b21c
 ctr   0xc0119380
 link  0xc011b12c
 xer   0x0
 ccr   0x2800
 softe 0x1
 trap  0xf00
 dar   0x0
 dsisr 0x800
 sier  0x0
 mmcra 0x800
 mmcr0 0x82008090
 mmcr1 0x1e00
 mmcr2 0x0
 ... thread: perf:9019

Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/include/asm/perf_event_server.h  |  5 +++
 arch/powerpc/include/uapi/asm/perf_regs.h | 13 +++-
 arch/powerpc/perf/core-book3s.c   |  1 +
 arch/powerpc/perf/perf_regs.c | 29 ++--
 arch/powerpc/perf/power9-pmu.c|  1 +
 .../arch/powerpc/include/uapi/asm/perf_regs.h | 13 +++-
 tools/perf/arch/powerpc/include/perf_regs.h   |  6 +++-
 tools/perf/arch/powerpc/util/perf_regs.c  | 33 +++
 8 files changed, 95 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h 
b/arch/powerpc/include/asm/perf_event_server.h
index 3e9703f44c7c..1d15953bd99e 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -55,6 +55,11 @@ struct power_pmu {
int *blacklist_ev;
/* BHRB entries in the PMU */
int bhrb_nr;
+   /*
+* set this flag with `PERF_PMU_CAP_EXTENDED_REGS` if
+* the pmu supports extended perf regs capability
+*/
+   int capabilities;
 };
 
 /*
diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
index f599064dd8dc..604b831378fe 100644
--- a/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -48,6 +48,17 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_DSISR,
PERF_REG_POWERPC_SIER,
PERF_REG_POWERPC_MMCRA,
-   PERF_REG_POWERPC_MAX,
+   /* Extended registers */
+   PERF_REG_POWERPC_MMCR0,
+   PERF_REG_POWERPC_MMCR1,
+   PERF_REG_POWERPC_MMCR2,
+   PERF_REG_EXTENDED_MAX,
+   /* Max regs without the extended regs */
+   PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
 };
+
+#define PERF_REG_PMU_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REG_EXTENDED_MASK  (((1ULL << (PERF_REG_EXTENDED_MAX))\
+   - 1) - PERF_REG_PMU_MASK)
+
 #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 3dcfecf858f3..f56b77800a7b 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2276,6 +2276,7 @@ int register_power_pmu(struct power_pmu *pmu)
 
power_pmu.attr_groups = ppmu->attr_groups;
 
+   power_pmu.capabilities |= (ppmu->capabilities & 
PERF_PMU_CAP_EXTENDED_REGS);
 #ifdef MSR_HV
/*
 * Use FCHV to ignore kernel events if MSR.HV is set.
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index a213a0aa5d25..57aa02568caf 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -15,7 +15,8 @@
 
 #define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
 
-#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+#define REG_RESERVED (~(PERF_REG_EXTENDED_MASK) &  \
+   (~((1ULL << PERF_REG_POWERPC_MAX) - 1)))
 
 static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_R0,  gpr[0]),
@@ -69,10 +70,22 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
 };
 
+/* Function to return the extended register values */
+static u64 get_ext_regs_value(int idx)
+{
+   switch (idx) {
+   case PERF_REG_POWERPC_MMCR0:
+

[PATCH 1/2] tools/perf: set no_auxtrace for powerpc

2020-04-29 Thread Anju T Sudhakar
x86/perf_regs.h is included by util/intel-pt.c, which will get compiled
when buiding perf on powerpc. Since x86/perf_regs.h has
`PERF_EXTENDED_REG_MASK` defined, defining `PERF_EXTENDED_REG_MASK` for
powerpc to add support for perf extended regs will result in perf build
error on powerpc.

Currently powerpc architecture is not having support for auxtrace. So as
a workaround for this issue, set NO_AUXTRACE for powerpc.

Signed-off-by: Anju T Sudhakar 
---
 tools/perf/arch/powerpc/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index e58d00d62f02..9ebb5f513605 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -3,6 +3,7 @@ ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
 
+NO_AUXTRACE := 1
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
 PERF_HAVE_JITDUMP := 1
-- 
2.20.1



[PATCH 0/2] powerpc/perf: Add support for perf extended regs in powerpc

2020-04-29 Thread Anju T Sudhakar
Patch set to add support for perf extended register capability in
powerpc. The capability flag PERF_PMU_CAP_EXTENDED_REGS, is used to
indicate the PMU which support extended registers. The generic code
define the mask of extended registers as 0 for non supported architectures.

patch 2/2 defines this PERF_PMU_CAP_EXTENDED_REGS mask to output the
values of mmcr0,mmcr1,mmcr2 for POWER9.
   
x86/perf_regs.h is included by util/intel-pt.c, which will get compiled   
when buiding perf on powerpc. Since x86/perf_regs.h has
`PERF_EXTENDED_REG_MASK` defined, defining `PERF_EXTENDED_REG_MASK` for   
powerpc to add support for perf extended regs will result in perf build   
error on powerpc. Currently powerpc architecture is not having support
for auxtrace. So as a workaround for this issue, patch 1/2 set
NO_AUXTRACE for powerpc. (Any other solutions are welcome.)

Patch 2/2 also add extended regs to sample_reg_mask in the tool side to use
with `-I?` option.

Anju T Sudhakar (2):
  tools/perf: set no_auxtrace for powerpc
  powerpc/perf: Add support for outputting extended regs in perf
intr_regs

 arch/powerpc/include/asm/perf_event_server.h  |  5 +++
 arch/powerpc/include/uapi/asm/perf_regs.h | 13 +++-
 arch/powerpc/perf/core-book3s.c   |  1 +
 arch/powerpc/perf/perf_regs.c | 29 ++--
 arch/powerpc/perf/power9-pmu.c|  1 +
 .../arch/powerpc/include/uapi/asm/perf_regs.h | 13 +++-
 tools/perf/arch/powerpc/Makefile  |  1 +
 tools/perf/arch/powerpc/include/perf_regs.h   |  6 +++-
 tools/perf/arch/powerpc/util/perf_regs.c  | 33 +++
 9 files changed, 96 insertions(+), 6 deletions(-)

-- 
2.20.1



[tip: perf/urgent] perf kvm: Move kvm-stat header file from conditional inclusion to common include section

2019-09-20 Thread tip-bot2 for Anju T Sudhakar
The following commit has been merged into the perf/urgent branch of tip:

Commit-ID: 8067b3da970baa12e6045400fdf009673b8dd3c2
Gitweb:
https://git.kernel.org/tip/8067b3da970baa12e6045400fdf009673b8dd3c2
Author:Anju T Sudhakar 
AuthorDate:Thu, 18 Jul 2019 23:47:47 +05:30
Committer: Arnaldo Carvalho de Melo 
CommitterDate: Fri, 20 Sep 2019 10:28:26 -03:00

perf kvm: Move kvm-stat header file from conditional inclusion to common 
include section

Move kvm-stat header file to the common include section, and make the
definitions in the header file under the conditional inclusion `#ifdef
HAVE_KVM_STAT_SUPPORT`.

This helps to define other 'perf kvm' related function prototypes in
kvm-stat header file, which may not need kvm-stat support.

Signed-off-by: Anju T Sudhakar 
Reviewed-By: Ravi Bangoria 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Madhavan Srinivasan 
Cc: Michael Ellerman 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: linuxppc-...@lists.ozlabs.org
Link: http://lore.kernel.org/lkml/20190718181749.30612-1-a...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/builtin-kvm.c   | 2 +-
 tools/perf/util/kvm-stat.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index ac6d6e0..2b822be 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -21,6 +21,7 @@
 #include "util/top.h"
 #include "util/data.h"
 #include "util/ordered-events.h"
+#include "util/kvm-stat.h"
 #include "ui/ui.h"
 
 #include 
@@ -59,7 +60,6 @@ static const char *get_filename_for_perf_kvm(void)
 }
 
 #ifdef HAVE_KVM_STAT_SUPPORT
-#include "util/kvm-stat.h"
 
 void exit_event_get_key(struct evsel *evsel,
struct perf_sample *sample,
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index 4691363..8fd6ec2 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -2,6 +2,8 @@
 #ifndef __PERF_KVM_STAT_H
 #define __PERF_KVM_STAT_H
 
+#ifdef HAVE_KVM_STAT_SUPPORT
+
 #include "tool.h"
 #include "stat.h"
 #include "record.h"
@@ -144,5 +146,6 @@ extern const int decode_str_len;
 extern const char *kvm_exit_reason;
 extern const char *kvm_entry_trace;
 extern const char *kvm_exit_trace;
+#endif /* HAVE_KVM_STAT_SUPPORT */
 
 #endif /* __PERF_KVM_STAT_H */


[tip: perf/urgent] perf kvm stat: Set 'trace_cycles' as default event for 'perf kvm record' in powerpc

2019-09-20 Thread tip-bot2 for Anju T Sudhakar
The following commit has been merged into the perf/urgent branch of tip:

Commit-ID: 2bff2b828502b5e5d5ea5a52643d3542053df03f
Gitweb:
https://git.kernel.org/tip/2bff2b828502b5e5d5ea5a52643d3542053df03f
Author:Anju T Sudhakar 
AuthorDate:Thu, 18 Jul 2019 23:47:49 +05:30
Committer: Arnaldo Carvalho de Melo 
CommitterDate: Fri, 20 Sep 2019 10:28:26 -03:00

perf kvm stat: Set 'trace_cycles' as default event for 'perf kvm record' in 
powerpc

Use 'trace_imc/trace_cycles' as the default event for 'perf kvm record'
in powerpc.

Signed-off-by: Anju T Sudhakar 
Reviewed-by: Ravi Bangoria 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Madhavan Srinivasan 
Cc: Michael Ellerman 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: linuxppc-...@lists.ozlabs.org
Link: http://lore.kernel.org/lkml/20190718181749.30612-3-a...@linux.vnet.ibm.com
[ Add missing pmu.h header, needed because this patch uses pmu_have_event() ]
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/arch/powerpc/util/kvm-stat.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
index ec5b771..9cc1c4a 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -5,6 +5,7 @@
 #include "util/debug.h"
 #include "util/evsel.h"
 #include "util/evlist.h"
+#include "util/pmu.h"
 
 #include "book3s_hv_exits.h"
 #include "book3s_hcalls.h"
@@ -177,8 +178,9 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char 
*cpuid __maybe_unused)
 /*
  * Incase of powerpc architecture, pmu registers are programmable
  * by guest kernel. So monitoring guest via host may not provide
- * valid samples. It is better to fail the "perf kvm record"
- * with default "cycles" event to monitor guest in powerpc.
+ * valid samples with default 'cycles' event. It is better to use
+ * 'trace_imc/trace_cycles' event for guest profiling, since it
+ * can track the guest instruction pointer in the trace-record.
  *
  * Function to parse the arguments and return appropriate values.
  */
@@ -202,8 +204,14 @@ int kvm_add_default_arch_event(int *argc, const char 
**argv)
 
parse_options(j, tmp, event_options, NULL, PARSE_OPT_KEEP_UNKNOWN);
if (!event) {
-   free(tmp);
-   return -EINVAL;
+   if (pmu_have_event("trace_imc", "trace_cycles")) {
+   argv[j++] = strdup("-e");
+   argv[j++] = strdup("trace_imc/trace_cycles/");
+   *argc += 2;
+   } else {
+   free(tmp);
+   return -EINVAL;
+   }
}
 
free(tmp);


[tip: perf/urgent] perf kvm: Add arch neutral function to choose event for perf kvm record

2019-09-20 Thread tip-bot2 for Anju T Sudhakar
The following commit has been merged into the perf/urgent branch of tip:

Commit-ID: 124eb5f82bf9395419b20205c4dcc1b8fcda7f29
Gitweb:
https://git.kernel.org/tip/124eb5f82bf9395419b20205c4dcc1b8fcda7f29
Author:Anju T Sudhakar 
AuthorDate:Thu, 18 Jul 2019 23:47:48 +05:30
Committer: Arnaldo Carvalho de Melo 
CommitterDate: Fri, 20 Sep 2019 10:28:26 -03:00

perf kvm: Add arch neutral function to choose event for perf kvm record

'perf kvm record' uses 'cycles'(if the user did not specify any event)
as the default event to profile the guest.

This will not provide any proper samples from the guest incase of
powerpc architecture, since in powerpc the PMUs are controlled by the
guest rather than the host.

Patch adds a function to pick an arch specific event for 'perf kvm
record', instead of selecting 'cycles' as a default event for all
architectures.

For powerpc this function checks for any user specified event, and if
there isn't any it returns invalid instead of proceeding with 'cycles'
event.

Signed-off-by: Anju T Sudhakar 
Reviewed-by: Ravi Bangoria 
Cc: Alexander Shishkin 
Cc: Jiri Olsa 
Cc: Madhavan Srinivasan 
Cc: Michael Ellerman 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: linuxppc-...@lists.ozlabs.org
Link: http://lore.kernel.org/lkml/20190718181749.30612-2-a...@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/arch/powerpc/util/kvm-stat.c | 37 -
 tools/perf/builtin-kvm.c| 12 +++-
 tools/perf/util/kvm-stat.h  |  1 +-
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
index f0dbf7b..ec5b771 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -8,6 +8,7 @@
 
 #include "book3s_hv_exits.h"
 #include "book3s_hcalls.h"
+#include 
 
 #define NR_TPS 4
 
@@ -172,3 +173,39 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char 
*cpuid __maybe_unused)
 
return ret;
 }
+
+/*
+ * Incase of powerpc architecture, pmu registers are programmable
+ * by guest kernel. So monitoring guest via host may not provide
+ * valid samples. It is better to fail the "perf kvm record"
+ * with default "cycles" event to monitor guest in powerpc.
+ *
+ * Function to parse the arguments and return appropriate values.
+ */
+int kvm_add_default_arch_event(int *argc, const char **argv)
+{
+   const char **tmp;
+   bool event = false;
+   int i, j = *argc;
+
+   const struct option event_options[] = {
+   OPT_BOOLEAN('e', "event", , NULL),
+   OPT_END()
+   };
+
+   tmp = calloc(j + 1, sizeof(char *));
+   if (!tmp)
+   return -EINVAL;
+
+   for (i = 0; i < j; i++)
+   tmp[i] = argv[i];
+
+   parse_options(j, tmp, event_options, NULL, PARSE_OPT_KEEP_UNKNOWN);
+   if (!event) {
+   free(tmp);
+   return -EINVAL;
+   }
+
+   free(tmp);
+   return 0;
+}
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 2b822be..6e3e366 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1514,11 +1514,21 @@ perf_stat:
 }
 #endif /* HAVE_KVM_STAT_SUPPORT */
 
+int __weak kvm_add_default_arch_event(int *argc __maybe_unused,
+   const char **argv __maybe_unused)
+{
+   return 0;
+}
+
 static int __cmd_record(const char *file_name, int argc, const char **argv)
 {
-   int rec_argc, i = 0, j;
+   int rec_argc, i = 0, j, ret;
const char **rec_argv;
 
+   ret = kvm_add_default_arch_event(, argv);
+   if (ret)
+   return -EINVAL;
+
rec_argc = argc + 2;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
rec_argv[i++] = strdup("record");
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index 8fd6ec2..6f0fa05 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -148,4 +148,5 @@ extern const char *kvm_entry_trace;
 extern const char *kvm_exit_trace;
 #endif /* HAVE_KVM_STAT_SUPPORT */
 
+extern int kvm_add_default_arch_event(int *argc, const char **argv);
 #endif /* __PERF_KVM_STAT_H */


[PATCH v2 3/3] tools/perf: Set 'trace_cycles' as defaultevent for perf kvm record in powerpc

2019-07-18 Thread Anju T Sudhakar
Use 'trace_imc/trace_cycles' as the default event for 'perf kvm record'
in powerpc.

Signed-off-by: Anju T Sudhakar 
---
 tools/perf/arch/powerpc/util/kvm-stat.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
index c55e7405940e..0a06626fb18a 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -177,8 +177,9 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char 
*cpuid __maybe_unused)
 /*
  * Incase of powerpc architecture, pmu registers are programmable
  * by guest kernel. So monitoring guest via host may not provide
- * valid samples. It is better to fail the "perf kvm record"
- * with default "cycles" event to monitor guest in powerpc.
+ * valid samples with default 'cycles' event. It is better to use
+ * 'trace_imc/trace_cycles' event for guest profiling, since it
+ * can track the guest instruction pointer in the trace-record.
  *
  * Function to parse the arguments and return appropriate values.
  */
@@ -202,8 +203,14 @@ int kvm_add_default_arch_event(int *argc, const char 
**argv)
 
parse_options(j, tmp, event_options, NULL, PARSE_OPT_KEEP_UNKNOWN);
if (!event) {
-   free(tmp);
-   return -EINVAL;
+   if (pmu_have_event("trace_imc", "trace_cycles")) {
+   argv[j++] = strdup("-e");
+   argv[j++] = strdup("trace_imc/trace_cycles/");
+   *argc += 2;
+   } else {
+   free(tmp);
+   return -EINVAL;
+   }
}
 
free(tmp);
-- 
2.20.1



[PATCH v2 2/3] tools/perf: Add arch neutral function to choose event for perf kvm record

2019-07-18 Thread Anju T Sudhakar
'perf kvm record' uses 'cycles'(if the user did not specify any event) as
the default event to profile the guest.
This will not provide any proper samples from the guest incase of
powerpc architecture, since in powerpc the PMUs are controlled by
the guest rather than the host.

Patch adds a function to pick an arch specific event for 'perf kvm record',
instead of selecting 'cycles' as a default event for all architectures.

For powerpc this function checks for any user specified event, and if there
isn't any it returns invalid instead of proceeding with 'cycles' event.

Signed-off-by: Anju T Sudhakar 
---

Changes from v1->v2
* Cross-build issue for aarch64, reported by Ravi is fixed.
---

 tools/perf/arch/powerpc/util/kvm-stat.c | 37 +
 tools/perf/builtin-kvm.c| 12 +++-
 tools/perf/util/kvm-stat.h  |  1 +
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
index f9db341c47b6..c55e7405940e 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -8,6 +8,7 @@
 
 #include "book3s_hv_exits.h"
 #include "book3s_hcalls.h"
+#include 
 
 #define NR_TPS 4
 
@@ -172,3 +173,39 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char 
*cpuid __maybe_unused)
 
return ret;
 }
+
+/*
+ * Incase of powerpc architecture, pmu registers are programmable
+ * by guest kernel. So monitoring guest via host may not provide
+ * valid samples. It is better to fail the "perf kvm record"
+ * with default "cycles" event to monitor guest in powerpc.
+ *
+ * Function to parse the arguments and return appropriate values.
+ */
+int kvm_add_default_arch_event(int *argc, const char **argv)
+{
+   const char **tmp;
+   bool event = false;
+   int i, j = *argc;
+
+   const struct option event_options[] = {
+   OPT_BOOLEAN('e', "event", , NULL),
+   OPT_END()
+   };
+
+   tmp = calloc(j + 1, sizeof(char *));
+   if (!tmp)
+   return -EINVAL;
+
+   for (i = 0; i < j; i++)
+   tmp[i] = argv[i];
+
+   parse_options(j, tmp, event_options, NULL, PARSE_OPT_KEEP_UNKNOWN);
+   if (!event) {
+   free(tmp);
+   return -EINVAL;
+   }
+
+   free(tmp);
+   return 0;
+}
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 5d2b34d290a3..d03750da051b 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1510,11 +1510,21 @@ static int kvm_cmd_stat(const char *file_name, int 
argc, const char **argv)
 }
 #endif /* HAVE_KVM_STAT_SUPPORT */
 
+int __weak kvm_add_default_arch_event(int *argc __maybe_unused,
+   const char **argv __maybe_unused)
+{
+   return 0;
+}
+
 static int __cmd_record(const char *file_name, int argc, const char **argv)
 {
-   int rec_argc, i = 0, j;
+   int rec_argc, i = 0, j, ret;
const char **rec_argv;
 
+   ret = kvm_add_default_arch_event(, argv);
+   if (ret)
+   return -EINVAL;
+
rec_argc = argc + 2;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
rec_argv[i++] = strdup("record");
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index b3b2670e1a2b..81a5bf4fbc71 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -148,4 +148,5 @@ extern const char *kvm_entry_trace;
 extern const char *kvm_exit_trace;
 #endif /* HAVE_KVM_STAT_SUPPORT */
 
+extern int kvm_add_default_arch_event(int *argc, const char **argv);
 #endif /* __PERF_KVM_STAT_H */
-- 
2.20.1



[PATCH v2 1/3] tools/perf: Move kvm-stat header file from conditional inclusion to common include section

2019-07-18 Thread Anju T Sudhakar
Move kvm-stat header file to the common include section, and make the
definitions in the header file under the conditional inclusion 
`#ifdef HAVE_KVM_STAT_SUPPORT`.

This helps to define other perf kvm related function prototypes in
kvm-stat header file, which may not need kvm-stat support.

Signed-off-by: Anju T Sudhakar 
---
 tools/perf/builtin-kvm.c   | 2 +-
 tools/perf/util/kvm-stat.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index b33c83489120..5d2b34d290a3 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -19,6 +19,7 @@
 #include "util/top.h"
 #include "util/data.h"
 #include "util/ordered-events.h"
+#include "util/kvm-stat.h"
 
 #include 
 #ifdef HAVE_TIMERFD_SUPPORT
@@ -55,7 +56,6 @@ static const char *get_filename_for_perf_kvm(void)
 }
 
 #ifdef HAVE_KVM_STAT_SUPPORT
-#include "util/kvm-stat.h"
 
 void exit_event_get_key(struct perf_evsel *evsel,
struct perf_sample *sample,
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index 1403dec189b4..b3b2670e1a2b 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -2,6 +2,8 @@
 #ifndef __PERF_KVM_STAT_H
 #define __PERF_KVM_STAT_H
 
+#ifdef HAVE_KVM_STAT_SUPPORT
+
 #include "../perf.h"
 #include "tool.h"
 #include "stat.h"
@@ -144,5 +146,6 @@ extern const int decode_str_len;
 extern const char *kvm_exit_reason;
 extern const char *kvm_entry_trace;
 extern const char *kvm_exit_trace;
+#endif /* HAVE_KVM_STAT_SUPPORT */
 
 #endif /* __PERF_KVM_STAT_H */
-- 
2.20.1



Re: power9 NUMA crash while reading debugfs imc_cmd

2019-06-28 Thread Anju T Sudhakar



On 6/28/19 9:04 AM, Qian Cai wrote:



On Jun 27, 2019, at 11:12 PM, Michael Ellerman  wrote:

Qian Cai  writes:

Read of debugfs imc_cmd file for a memory-less node will trigger a crash below
on this power9 machine which has the following NUMA layout.

What type of machine is it?

description: PowerNV
product: 8335-GTH (ibm,witherspoon)
vendor: IBM
width: 64 bits
capabilities: smp powernv opal



Hi Qian Cai,

Could you please try with this patch: 
https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-June/192803.html


and see if the issue is resolved?


Thanks,

Anju




[PATCH RESEND 2/2] tools/perf: Set 'trace_cycles' as defaultevent for perf kvm record in powerpc

2019-06-10 Thread Anju T Sudhakar
Use 'trace_imc/trace_cycles' as the default event for 'perf kvm record'
in powerpc.

Signed-off-by: Anju T Sudhakar 
---
 tools/perf/arch/powerpc/util/kvm-stat.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
index 66f8fe500945..b552884263df 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -177,8 +177,9 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char 
*cpuid __maybe_unused)
 /*
  * Incase of powerpc architecture, pmu registers are programmable
  * by guest kernel. So monitoring guest via host may not provide
- * valid samples. It is better to fail the "perf kvm record"
- * with default "cycles" event to monitor guest in powerpc.
+ * valid samples with default 'cycles' event. It is better to use
+ * 'trace_imc/trace_cycles' event for guest profiling, since it
+ * can track the guest instruction pointer in the trace-record.
  *
  * Function to parse the arguments and return appropriate values.
  */
@@ -202,8 +203,14 @@ int kvm_add_default_arch_event(int *argc, const char 
**argv)
 
parse_options(j, tmp, event_options, NULL, 0);
if (!event) {
-   free(tmp);
-   return -EINVAL;
+   if (pmu_have_event("trace_imc", "trace_cycles")) {
+   argv[j++] = strdup("-e");
+   argv[j++] = strdup("trace_imc/trace_cycles/");
+   *argc += 2;
+   } else {
+   free(tmp);
+   return -EINVAL;
+   }
}
 
free(tmp);
-- 
2.17.2



[PATCH RESEND 1/2] tools/perf: Add arch neutral function to choose event for perf kvm record

2019-06-10 Thread Anju T Sudhakar
'perf kvm record' uses 'cycles'(if the user did not specify any event) as
the default event to profile the guest.
This will not provide any proper samples from the guest incase of
powerpc architecture, since in powerpc the PMUs are controlled by
the guest rather than the host.

Patch adds a function to pick an arch specific event for 'perf kvm record',
instead of selecting 'cycles' as a default event for all architectures.

For powerpc this function checks for any user specified event, and if there
isn't any it returns invalid instead of proceeding with 'cycles' event.

Signed-off-by: Anju T Sudhakar 
---
 tools/perf/arch/powerpc/util/kvm-stat.c | 37 +
 tools/perf/builtin-kvm.c| 12 +++-
 tools/perf/util/kvm-stat.h  |  2 +-
 3 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
index f9db341c47b6..66f8fe500945 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -8,6 +8,7 @@
 
 #include "book3s_hv_exits.h"
 #include "book3s_hcalls.h"
+#include 
 
 #define NR_TPS 4
 
@@ -172,3 +173,39 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char 
*cpuid __maybe_unused)
 
return ret;
 }
+
+/*
+ * Incase of powerpc architecture, pmu registers are programmable
+ * by guest kernel. So monitoring guest via host may not provide
+ * valid samples. It is better to fail the "perf kvm record"
+ * with default "cycles" event to monitor guest in powerpc.
+ *
+ * Function to parse the arguments and return appropriate values.
+ */
+int kvm_add_default_arch_event(int *argc, const char **argv)
+{
+   const char **tmp;
+   bool event = false;
+   int i, j = *argc;
+
+   const struct option event_options[] = {
+   OPT_BOOLEAN('e', "event", , NULL),
+   OPT_END()
+   };
+
+   tmp = calloc(j + 1, sizeof(char *));
+   if (!tmp)
+   return -EINVAL;
+
+   for (i = 0; i < j; i++)
+   tmp[i] = argv[i];
+
+   parse_options(j, tmp, event_options, NULL, 0);
+   if (!event) {
+   free(tmp);
+   return -EINVAL;
+   }
+
+   free(tmp);
+   return 0;
+}
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index dbb6f737a3e2..fe33b3ec55c9 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1510,11 +1510,21 @@ static int kvm_cmd_stat(const char *file_name, int 
argc, const char **argv)
 }
 #endif /* HAVE_KVM_STAT_SUPPORT */
 
+int __weak kvm_add_default_arch_event(int *argc __maybe_unused,
+   const char **argv __maybe_unused)
+{
+   return 0;
+}
+
 static int __cmd_record(const char *file_name, int argc, const char **argv)
 {
-   int rec_argc, i = 0, j;
+   int rec_argc, i = 0, j, ret;
const char **rec_argv;
 
+   ret = kvm_add_default_arch_event(, argv);
+   if (ret)
+   return -EINVAL;
+
rec_argc = argc + 2;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
rec_argv[i++] = strdup("record");
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index 1403dec189b4..da38b56c46cb 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -144,5 +144,5 @@ extern const int decode_str_len;
 extern const char *kvm_exit_reason;
 extern const char *kvm_entry_trace;
 extern const char *kvm_exit_trace;
-
+extern int kvm_add_default_arch_event(int *argc, const char **argv);
 #endif /* __PERF_KVM_STAT_H */
-- 
2.17.2



[PATCH 1/2] tools/perf: Add arch neutral function to choose event for perf kvm record

2019-05-24 Thread Anju T Sudhakar
'perf kvm record' uses 'cycles'(if the user did not specify any event) as
the default event to profile the guest.
This will not provide any proper samples from the guest incase of
powerpc architecture, since in powerpc the PMUs are controlled by
the guest rather than the host.

Patch adds a function to pick an arch specific event for 'perf kvm record',
instead of selecting 'cycles' as a default event for all architectures.

For powerpc this function checks for any user specified event, and if there
isn't any it returns invalid instead of proceeding with 'cycles' event.

Signed-off-by: Anju T Sudhakar 
---
 tools/perf/arch/powerpc/util/kvm-stat.c | 37 +
 tools/perf/builtin-kvm.c| 12 +++-
 tools/perf/util/kvm-stat.h  |  2 +-
 3 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
index f9db341c47b6..66f8fe500945 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -8,6 +8,7 @@
 
 #include "book3s_hv_exits.h"
 #include "book3s_hcalls.h"
+#include 
 
 #define NR_TPS 4
 
@@ -172,3 +173,39 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char 
*cpuid __maybe_unused)
 
return ret;
 }
+
+/*
+ * Incase of powerpc architecture, pmu registers are programmable
+ * by guest kernel. So monitoring guest via host may not provide
+ * valid samples. It is better to fail the "perf kvm record"
+ * with default "cycles" event to monitor guest in powerpc.
+ *
+ * Function to parse the arguments and return appropriate values.
+ */
+int kvm_add_default_arch_event(int *argc, const char **argv)
+{
+   const char **tmp;
+   bool event = false;
+   int i, j = *argc;
+
+   const struct option event_options[] = {
+   OPT_BOOLEAN('e', "event", , NULL),
+   OPT_END()
+   };
+
+   tmp = calloc(j + 1, sizeof(char *));
+   if (!tmp)
+   return -EINVAL;
+
+   for (i = 0; i < j; i++)
+   tmp[i] = argv[i];
+
+   parse_options(j, tmp, event_options, NULL, 0);
+   if (!event) {
+   free(tmp);
+   return -EINVAL;
+   }
+
+   free(tmp);
+   return 0;
+}
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index dbb6f737a3e2..fe33b3ec55c9 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1510,11 +1510,21 @@ static int kvm_cmd_stat(const char *file_name, int 
argc, const char **argv)
 }
 #endif /* HAVE_KVM_STAT_SUPPORT */
 
+int __weak kvm_add_default_arch_event(int *argc __maybe_unused,
+   const char **argv __maybe_unused)
+{
+   return 0;
+}
+
 static int __cmd_record(const char *file_name, int argc, const char **argv)
 {
-   int rec_argc, i = 0, j;
+   int rec_argc, i = 0, j, ret;
const char **rec_argv;
 
+   ret = kvm_add_default_arch_event(, argv);
+   if (ret)
+   return -EINVAL;
+
rec_argc = argc + 2;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
rec_argv[i++] = strdup("record");
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index 1403dec189b4..da38b56c46cb 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -144,5 +144,5 @@ extern const int decode_str_len;
 extern const char *kvm_exit_reason;
 extern const char *kvm_entry_trace;
 extern const char *kvm_exit_trace;
-
+extern int kvm_add_default_arch_event(int *argc, const char **argv);
 #endif /* __PERF_KVM_STAT_H */
-- 
2.17.2



[PATCH 2/2] tools/perf: Set 'trace_cycles' as defaultevent for perf kvm record in powerpc

2019-05-24 Thread Anju T Sudhakar
Use 'trace_imc/trace_cycles' as the default event for 'perf kvm record'
in powerpc.

Signed-off-by: Anju T Sudhakar 
---
 tools/perf/arch/powerpc/util/kvm-stat.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c 
b/tools/perf/arch/powerpc/util/kvm-stat.c
index 66f8fe500945..b552884263df 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -177,8 +177,9 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char 
*cpuid __maybe_unused)
 /*
  * Incase of powerpc architecture, pmu registers are programmable
  * by guest kernel. So monitoring guest via host may not provide
- * valid samples. It is better to fail the "perf kvm record"
- * with default "cycles" event to monitor guest in powerpc.
+ * valid samples with default 'cycles' event. It is better to use
+ * 'trace_imc/trace_cycles' event for guest profiling, since it
+ * can track the guest instruction pointer in the trace-record.
  *
  * Function to parse the arguments and return appropriate values.
  */
@@ -202,8 +203,14 @@ int kvm_add_default_arch_event(int *argc, const char 
**argv)
 
parse_options(j, tmp, event_options, NULL, 0);
if (!event) {
-   free(tmp);
-   return -EINVAL;
+   if (pmu_have_event("trace_imc", "trace_cycles")) {
+   argv[j++] = strdup("-e");
+   argv[j++] = strdup("trace_imc/trace_cycles/");
+   *argc += 2;
+   } else {
+   free(tmp);
+   return -EINVAL;
+   }
}
 
free(tmp);
-- 
2.17.2



Re: [PATCH v4 0/5] powerpc/perf: IMC trace-mode support

2019-04-16 Thread Anju T Sudhakar



On 4/16/19 3:14 PM, Anju T Sudhakar wrote:

Hi,

Kindly ignore this series, since patch 5/5 in this series doesn't 
incorporate the event-format change


that I've done in v4 of this series.


Apologies for the inconvenience. I will post the updated v5 soon.



s/v5/v4



Thanks,

Anju

On 4/15/19 3:41 PM, Anju T Sudhakar wrote:

IMC (In-Memory collection counters) is a hardware monitoring facility
that collects large number of hardware performance events.
POWER9 support two modes for IMC which are the Accumulation mode and
Trace mode. In Accumulation mode, event counts are accumulated in system
Memory. Hypervisor then reads the posted counts periodically or when
requested. In IMC Trace mode, the 64 bit trace scom value is initialized
with the event information. The CPMC*SEL and CPMC_LOAD in the trace 
scom, specifies
the event to be monitored and the sampling duration. On each overflow 
in the

CPMC*SEL, hardware snapshots the program counter along with event counts
and writes into memory pointed by LDBAR. LDBAR has bits to indicate 
whether

hardware is configured for accumulation or trace mode.
Currently the event monitored for trace-mode is fixed as cycle.

Trace-IMC Implementation:
--
To enable trace-imc, we need to

* Add trace node in the DTS file for power9, so that the new trace 
node can

be discovered by the kernel.

Information included in the DTS file are as follows, (a snippet from
the ima-catalog)

TRACE_IMC: trace-events {
  #address-cells = <0x1>;
  #size-cells = <0x1>;
  event at 1020 {
 event-name = "cycles" ;
 reg = <0x1020 0x8>;
 desc = "Reference cycles" ;
  };
  };
  trace@0 {
 compatible = "ibm,imc-counters";
 events-prefix = "trace_";
 reg = <0x0 0x8>;
 events = < _IMC >;
 type = <0x2>;
 size = <0x4>;
  };

OP-BUILD changes needed to include the "trace node" is already pulled in
to the ima-catalog repo.

ps://github.com/open-power/op-build/commit/d3e75dc26d1283d7d5eb444bff1ec9e40d5dfc07 



* Enchance the opal_imc_counters_* calls to support this new trace mode
in imc. Add support to initialize the trace-mode scom.

TRACE_IMC_SCOM bit representation:

0:1 : SAMPSEL
2:33    : CPMC_LOAD
34:40   : CPMC1SEL
41:47   : CPMC2SEL
48:50   : BUFFERSIZE
51:63   : RESERVED

CPMC_LOAD contains the sampling duration. SAMPSEL and CPMC*SEL 
determines
the event to count. BUFFRSIZE indicates the memory range. On each 
overflow,
hardware snapshots program counter along with event counts and update 
the

memory and reloads the CMPC_LOAD value for the next sampling duration.
IMC hardware does not support exceptions, so it quietly wraps around if
memory buffer reaches the end.

OPAL support for IMC trace mode is already upstream.

* Set LDBAR spr to enable imc-trace mode.
   LDBAR Layout:
   0 : Enable/Disable
   1 : 0 -> Accumulation Mode
   1 -> Trace Mode
   2:3   : Reserved
   4-6   : PB scope
   7 : Reserved
   8:50  : Counter Address
   51:63 : Reserved

--

PMI interrupt handling is avoided, since IMC trace mode snapshots the
program counter and update to the memory. And this also provide a way 
for

the operating system to do instruction sampling in real time without
PMI(Performance Monitoring Interrupts) processing overhead.
Performance data using 'perf top' with and without trace-imc event:

PMI interrupts count when `perf top` command is executed without 
trace-imc event.


# cat /proc/interrupts  (a snippet from the output)
9944  1072    804    804   1644    804 1306
804    804    804    804    804 804    804
804    804   1961   1602    804    804 1258
[-]
803    803    803    803    803 803    803
803    803    803    803    804 804    804
804    804    804    804    804 804    803
803    803    803    803    803 1306    803
803   Performance monitoring interrupts


`perf top` with trace-imc (executed right after 'perf top' without 
trace-imc event):


# perf top -e trace_imc/trace_cycles/
12.50%  [kernel]  [k] arch_cpu_idle
11.81%  [kernel]  [k] __next_timer_interrupt
11.22%  [kernel]  [k] rcu_idle_enter
10.25%  [kernel]  [k] find_next_bit
  7.91%  [kernel]  [k] do_idle
  7.69%  [kernel]  [k] rcu_dynticks_eqs_exit
  5.20%  [kernel]  [k] tick_nohz_idle_stop_tick
  [---]

# cat /proc/interrupts (a snippet from the output)

9944  1072    804    804   1644    804 1306
804    804    804    804    804 804    804
804    804   1961   1602    804    804 1258
[-]
8

[PATCH v3 3/5] powerpc/perf: Add privileged access check for thread_imc

2019-02-06 Thread Anju T Sudhakar
From: Madhavan Srinivasan 

Add code to restrict user access to thread_imc pmu since
some event report privilege level information.

Fixes: f74c89bd80fb3 ('powerpc/perf: Add thread IMC PMU support')
Signed-off-by: Madhavan Srinivasan 
Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/perf/imc-pmu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 3bef46f8417d..5ca80545a849 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -877,6 +877,9 @@ static int thread_imc_event_init(struct perf_event *event)
if (event->attr.type != event->pmu->type)
return -ENOENT;
 
+   if (!capable(CAP_SYS_ADMIN))
+   return -EACCES;
+
/* Sampling not supported */
if (event->hw.sample_period)
return -EINVAL;
-- 
2.17.1



[PATCH v3 0/5] powerpc/perf: IMC trace-mode support

2019-02-06 Thread Anju T Sudhakar
bs.org/pipermail/skiboot/2018-December/012883.html

* Set LDBAR spr to enable imc-trace mode.

LDBAR Layout:

0 : Enable/Disable
1 : 0 -> Accumulation Mode
1 -> Trace Mode
2:3   : Reserved
4-6   : PB scope
7 : Reserved
8:50  : Counter Address
51:63 : Reserved

   

Key benefit of imc trace-mode is, each sample record contains the address
pointer along with other information. So that, we can profile the IP
without interrupting the application.

Performance data using 'perf top' with and without trace-imc event:

When the application is monitored with trace-imc event, we dont take any   
PMI interrupts.

PMI interrupts count when `perf top` command is executed without trac-imc event.

# perf top  
12.53%  [kernel]   [k] arch_cpu_idle   
11.32%  [kernel]   [k] rcu_idle_enter  
10.76%  [kernel]   [k] __next_timer_interrupt  
 9.49%  [kernel]   [k] find_next_bit   
 8.06%  [kernel]   [k] rcu_dynticks_eqs_exit   
 7.82%  [kernel]   [k] do_idle 
 5.71%  [kernel]   [k] tick_nohz_idle_stop_tic 
 [---]  
# cat /proc/interrupts  (a snippet from the output)
9944  1072804804   1644804   1306  
804804804804804804804  
804804   1961   1602804804   1258  
[-]
803803803803803803803  
803803803803804804804 
804804804804804804803 
803803803803803   1306803 
803   Performance monitoring interrupts   


`perf top` with trace-imc (right after 'perf top' without trace-imc event):

# perf top -e trace_imc/trace_cycles/  
12.50%  [kernel]  [k] arch_cpu_idle
11.81%  [kernel]  [k] __next_timer_interrupt   
11.22%  [kernel]  [k] rcu_idle_enter   
10.25%  [kernel]  [k] find_next_bit
 7.91%  [kernel]  [k] do_idle  
 7.69%  [kernel]  [k] rcu_dynticks_eqs_exit
 5.20%  [kernel]  [k] tick_nohz_idle_stop_tick 
 [---]  

# cat /proc/interrupts (a snippet from the output) 

9944  1072804804   1644804   1306  
804804804804804804804  
804804   1961   1602804804   1258  
[-]
803803803803803803803  
803803803804804804804
804804804804804804803 
803803803803803   1306803 
803   Performance monitoring interrupts   

The PMI interrupts count remains the same.  

Changelog:

>From v2 -> v3
--

* Redefined the event format for trace-imc.

Suggestions/comments are welcome.


Anju T Sudhakar (4):
  powerpc/include: Add data structures and macros for IMC trace mode
  powerpc/perf: Rearrange setting of ldbar for thread-imc
  powerpc/perf: Trace imc events detection and cpuhotplug
  powerpc/perf: Trace imc PMU functions

Madhavan Srinivasan (1):
  powerpc/perf: Add privileged access check for thread_imc

 arch/powerpc/include/asm/im

[PATCH v3 5/5] powerpc/perf: Trace imc PMU functions

2019-02-06 Thread Anju T Sudhakar
Add PMU functions to support trace-imc and define the format for
trace-imc events.

Signed-off-by: Anju T Sudhakar 
Reviewed-by: Madhavan Srinivasan 
---
 arch/powerpc/perf/imc-pmu.c | 197 +++-
 1 file changed, 196 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 1f09265c8fb0..0f1a30f11f6a 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -52,7 +52,7 @@ static struct imc_pmu *imc_event_to_pmu(struct perf_event 
*event)
return container_of(event->pmu, struct imc_pmu, pmu);
 }
 
-PMU_FORMAT_ATTR(event, "config:0-40");
+PMU_FORMAT_ATTR(event, "config:0-61");
 PMU_FORMAT_ATTR(offset, "config:0-31");
 PMU_FORMAT_ATTR(rvalue, "config:32");
 PMU_FORMAT_ATTR(mode, "config:33-40");
@@ -69,6 +69,25 @@ static struct attribute_group imc_format_group = {
.attrs = imc_format_attrs,
 };
 
+/* Format attribute for imc trace-mode */
+PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19");
+PMU_FORMAT_ATTR(cpmc_event, "config:20-27");
+PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29");
+PMU_FORMAT_ATTR(cpmc_load, "config:30-61");
+static struct attribute *trace_imc_format_attrs[] = {
+   _attr_event.attr,
+   _attr_cpmc_reserved.attr,
+   _attr_cpmc_event.attr,
+   _attr_cpmc_samplesel.attr,
+   _attr_cpmc_load.attr,
+   NULL,
+};
+
+static struct attribute_group trace_imc_format_group = {
+   .name = "format",
+   .attrs = trace_imc_format_attrs,
+};
+
 /* Get the cpumask printed to a buffer "buf" */
 static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
struct device_attribute *attr,
@@ -1120,6 +1139,173 @@ static int trace_imc_cpu_init(void)
  ppc_trace_imc_cpu_offline);
 }
 
+static u64 get_trace_imc_event_base_addr(void)
+{
+   return (u64)per_cpu(trace_imc_mem, smp_processor_id());
+}
+
+/*
+ * Function to parse trace-imc data obtained
+ * and to prepare the perf sample.
+ */
+static int trace_imc_prepare_sample(struct trace_imc_data *mem,
+   struct perf_sample_data *data,
+   u64 *prev_tb,
+   struct perf_event_header *header,
+   struct perf_event *event)
+{
+   /* Sanity checks for a valid record */
+   if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb)
+   *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1));
+   else
+   return -EINVAL;
+
+   if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) !=
+be64_to_cpu(READ_ONCE(mem->tb2)))
+   return -EINVAL;
+
+   /* Prepare perf sample */
+   data->ip =  be64_to_cpu(READ_ONCE(mem->ip));
+   data->period = event->hw.last_period;
+
+   header->type = PERF_RECORD_SAMPLE;
+   header->size = sizeof(*header) + event->header_size;
+   header->misc = 0;
+
+   if (is_kernel_addr(data->ip))
+   header->misc |= PERF_RECORD_MISC_KERNEL;
+   else
+   header->misc |= PERF_RECORD_MISC_USER;
+
+   perf_event_header__init_id(header, data, event);
+
+   return 0;
+}
+
+static void dump_trace_imc_data(struct perf_event *event)
+{
+   struct trace_imc_data *mem;
+   int i, ret;
+   u64 prev_tb = 0;
+
+   mem = (struct trace_imc_data *)get_trace_imc_event_base_addr();
+   for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data));
+   i++, mem++) {
+   struct perf_sample_data data;
+   struct perf_event_header header;
+
+   ret = trace_imc_prepare_sample(mem, , _tb, , 
event);
+   if (ret) /* Exit, if not a valid record */
+   break;
+   else {
+   /* If this is a valid record, create the sample */
+   struct perf_output_handle handle;
+
+   if (perf_output_begin(, event, header.size))
+   return;
+
+   perf_output_sample(, , , event);
+   perf_output_end();
+   }
+   }
+}
+
+static int trace_imc_event_add(struct perf_event *event, int flags)
+{
+   /* Enable the sched_task to start the engine */
+   perf_sched_cb_inc(event->ctx->pmu);
+   return 0;
+}
+
+static void trace_imc_event_read(struct perf_event *event)
+{
+   dump_trace_imc_data(event);
+}
+
+static void trace_imc_event_stop(struct perf_event *event, int flags)
+{
+   trace_imc_event_read(event);
+}
+
+static void trace_imc_event_start(struct perf_event *event, int flags)
+{
+   return;
+}
+
+static void trace_imc_event_del(struct perf_event *event, int flags)
+{
+  

[PATCH v3 4/5] powerpc/perf: Trace imc events detection and cpuhotplug

2019-02-06 Thread Anju T Sudhakar
Patch detects trace-imc events, does memory initilizations for each online
cpu, and registers cpuhotplug call-backs.

Signed-off-by: Anju T Sudhakar 
Reviewed-by: Madhavan Srinivasan 
---
 arch/powerpc/perf/imc-pmu.c   | 91 +++
 arch/powerpc/platforms/powernv/opal-imc.c |  3 +
 include/linux/cpuhotplug.h|  1 +
 3 files changed, 95 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 5ca80545a849..1f09265c8fb0 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -43,6 +43,10 @@ static DEFINE_PER_CPU(u64 *, thread_imc_mem);
 static struct imc_pmu *thread_imc_pmu;
 static int thread_imc_mem_size;
 
+/* Trace IMC data structures */
+static DEFINE_PER_CPU(u64 *, trace_imc_mem);
+static int trace_imc_mem_size;
+
 static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
 {
return container_of(event->pmu, struct imc_pmu, pmu);
@@ -1068,6 +1072,54 @@ static void thread_imc_event_del(struct perf_event 
*event, int flags)
imc_event_update(event);
 }
 
+/*
+ * Allocate a page of memory for each cpu, and load LDBAR with 0.
+ */
+static int trace_imc_mem_alloc(int cpu_id, int size)
+{
+   u64 *local_mem = per_cpu(trace_imc_mem, cpu_id);
+   int phys_id = cpu_to_node(cpu_id), rc = 0;
+
+   if (!local_mem) {
+   local_mem = page_address(alloc_pages_node(phys_id,
+   GFP_KERNEL | __GFP_ZERO | 
__GFP_THISNODE |
+   __GFP_NOWARN, get_order(size)));
+   if (!local_mem)
+   return -ENOMEM;
+   per_cpu(trace_imc_mem, cpu_id) = local_mem;
+
+   /* Initialise the counters for trace mode */
+   rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void 
*)local_mem),
+   get_hard_smp_processor_id(cpu_id));
+   if (rc) {
+   pr_info("IMC:opal init failed for trace imc\n");
+   return rc;
+   }
+   }
+
+   mtspr(SPRN_LDBAR, 0);
+   return 0;
+}
+
+static int ppc_trace_imc_cpu_online(unsigned int cpu)
+{
+   return trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+}
+
+static int ppc_trace_imc_cpu_offline(unsigned int cpu)
+{
+   mtspr(SPRN_LDBAR, 0);
+   return 0;
+}
+
+static int trace_imc_cpu_init(void)
+{
+   return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
+ "perf/powerpc/imc_trace:online",
+ ppc_trace_imc_cpu_online,
+ ppc_trace_imc_cpu_offline);
+}
+
 /* update_pmu_ops : Populate the appropriate operations for "pmu" */
 static int update_pmu_ops(struct imc_pmu *pmu)
 {
@@ -1189,6 +1241,17 @@ static void cleanup_all_thread_imc_memory(void)
}
 }
 
+static void cleanup_all_trace_imc_memory(void)
+{
+   int i, order = get_order(trace_imc_mem_size);
+
+   for_each_online_cpu(i) {
+   if (per_cpu(trace_imc_mem, i))
+   free_pages((u64)per_cpu(trace_imc_mem, i), order);
+
+   }
+}
+
 /* Function to free the attr_groups which are dynamically allocated */
 static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
 {
@@ -1230,6 +1293,11 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu 
*pmu_ptr)
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
cleanup_all_thread_imc_memory();
}
+
+   if (pmu_ptr->domain == IMC_DOMAIN_TRACE) {
+   cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE);
+   cleanup_all_trace_imc_memory();
+   }
 }
 
 /*
@@ -1312,6 +1380,21 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct 
device_node *parent,
 
thread_imc_pmu = pmu_ptr;
break;
+   case IMC_DOMAIN_TRACE:
+   /* Update the pmu name */
+   pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+   if (!pmu_ptr->pmu.name)
+   return -ENOMEM;
+
+   trace_imc_mem_size = pmu_ptr->counter_mem_size;
+   for_each_online_cpu(cpu) {
+   res = trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+   if (res) {
+   cleanup_all_trace_imc_memory();
+   goto err;
+   }
+   }
+   break;
default:
return -EINVAL;
}
@@ -1384,6 +1467,14 @@ int init_imc_pmu(struct device_node *parent, struct 
imc_pmu *pmu_ptr, int pmu_id
goto err_free_mem;
}
 
+   break;
+   case IMC_DOMAIN_TRACE:
+   ret = trace_imc_cpu_init();
+   if (ret) {
+   cleanup_

[PATCH v3 2/5] powerpc/perf: Rearrange setting of ldbar for thread-imc

2019-02-06 Thread Anju T Sudhakar
LDBAR holds the memory address allocated for each cpu. For thread-imc
the mode bit (i.e bit 1) of LDBAR is set to accumulation.
Currently, ldbar is loaded with per cpu memory address and mode set to
accumulation at boot time.

To enable trace-imc, the mode bit of ldbar should be set to 'trace'. So to
accommodate trace-mode of IMC, reposition setting of ldbar for thread-imc
to thread_imc_event_add(). Also reset ldbar at thread_imc_event_del().

Signed-off-by: Anju T Sudhakar 
Reviewed-by: Madhavan Srinivasan 
---
 arch/powerpc/perf/imc-pmu.c | 28 +---
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index f292a3f284f1..3bef46f8417d 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -806,8 +806,11 @@ static int core_imc_event_init(struct perf_event *event)
 }
 
 /*
- * Allocates a page of memory for each of the online cpus, and write the
- * physical base address of that page to the LDBAR for that cpu.
+ * Allocates a page of memory for each of the online cpus, and load
+ * LDBAR with 0.
+ * The physical base address of the page allocated for a cpu will be
+ * written to the LDBAR for that cpu, when the thread-imc event
+ * is added.
  *
  * LDBAR Register Layout:
  *
@@ -825,7 +828,7 @@ static int core_imc_event_init(struct perf_event *event)
  */
 static int thread_imc_mem_alloc(int cpu_id, int size)
 {
-   u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id);
+   u64 *local_mem = per_cpu(thread_imc_mem, cpu_id);
int nid = cpu_to_node(cpu_id);
 
if (!local_mem) {
@@ -842,9 +845,7 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
per_cpu(thread_imc_mem, cpu_id) = local_mem;
}
 
-   ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | 
THREAD_IMC_ENABLE;
-
-   mtspr(SPRN_LDBAR, ldbar_value);
+   mtspr(SPRN_LDBAR, 0);
return 0;
 }
 
@@ -995,6 +996,7 @@ static int thread_imc_event_add(struct perf_event *event, 
int flags)
 {
int core_id;
struct imc_pmu_ref *ref;
+   u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, 
smp_processor_id());
 
if (flags & PERF_EF_START)
imc_event_start(event, flags);
@@ -1003,6 +1005,9 @@ static int thread_imc_event_add(struct perf_event *event, 
int flags)
return -EINVAL;
 
core_id = smp_processor_id() / threads_per_core;
+   ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | 
THREAD_IMC_ENABLE;
+   mtspr(SPRN_LDBAR, ldbar_value);
+
/*
 * imc pmus are enabled only when it is used.
 * See if this is triggered for the first time.
@@ -1034,11 +1039,7 @@ static void thread_imc_event_del(struct perf_event 
*event, int flags)
int core_id;
struct imc_pmu_ref *ref;
 
-   /*
-* Take a snapshot and calculate the delta and update
-* the event counter values.
-*/
-   imc_event_update(event);
+   mtspr(SPRN_LDBAR, 0);
 
core_id = smp_processor_id() / threads_per_core;
ref = _imc_refc[core_id];
@@ -1057,6 +1058,11 @@ static void thread_imc_event_del(struct perf_event 
*event, int flags)
ref->refc = 0;
}
mutex_unlock(>lock);
+   /*
+* Take a snapshot and calculate the delta and update
+* the event counter values.
+*/
+   imc_event_update(event);
 }
 
 /* update_pmu_ops : Populate the appropriate operations for "pmu" */
-- 
2.17.1



[PATCH v3 1/5] powerpc/include: Add data structures and macros for IMC trace mode

2019-02-06 Thread Anju T Sudhakar
Add the macros needed for IMC (In-Memory Collection Counters) trace-mode
and data structure to hold the trace-imc record data.
Also, add the new type "OPAL_IMC_COUNTERS_TRACE" in 'opal-api.h', since
there is a new switch case added in the opal-calls for IMC.

Signed-off-by: Anju T Sudhakar 
Reviewed-by: Madhavan Srinivasan 
---
 arch/powerpc/include/asm/imc-pmu.h  | 39 +
 arch/powerpc/include/asm/opal-api.h |  1 +
 2 files changed, 40 insertions(+)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 69f516ecb2fd..7c2ef0e42661 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -33,6 +33,7 @@
  */
 #define THREAD_IMC_LDBAR_MASK   0x0003e000ULL
 #define THREAD_IMC_ENABLE   0x8000ULL
+#define TRACE_IMC_ENABLE   0x4000ULL
 
 /*
  * For debugfs interface for imc-mode and imc-command
@@ -59,6 +60,34 @@ struct imc_events {
char *scale;
 };
 
+/*
+ * Trace IMC hardware updates a 64bytes record on
+ * Core Performance Monitoring Counter (CPMC)
+ * overflow. Here is the layout for the trace imc record
+ *
+ * DW 0 : Timebase
+ * DW 1 : Program Counter
+ * DW 2 : PIDR information
+ * DW 3 : CPMC1
+ * DW 4 : CPMC2
+ * DW 5 : CPMC3
+ * Dw 6 : CPMC4
+ * DW 7 : Timebase
+ * .
+ *
+ * The following is the data structure to hold trace imc data.
+ */
+struct trace_imc_data {
+   u64 tb1;
+   u64 ip;
+   u64 val;
+   u64 cpmc1;
+   u64 cpmc2;
+   u64 cpmc3;
+   u64 cpmc4;
+   u64 tb2;
+};
+
 /* Event attribute array index */
 #define IMC_FORMAT_ATTR0
 #define IMC_EVENT_ATTR 1
@@ -68,6 +97,13 @@ struct imc_events {
 /* PMU Format attribute macros */
 #define IMC_EVENT_OFFSET_MASK  0xULL
 
+/*
+ * Macro to mask bits 0:21 of first double word(which is the timebase) to
+ * compare with 8th double word (timebase) of trace imc record data.
+ */
+#define IMC_TRACE_RECORD_TB1_MASK  0x3ffULL
+
+
 /*
  * Device tree parser code detects IMC pmu support and
  * registers new IMC pmus. This structure will hold the
@@ -113,6 +149,7 @@ struct imc_pmu_ref {
 
 enum {
IMC_TYPE_THREAD = 0x1,
+   IMC_TYPE_TRACE  = 0x2,
IMC_TYPE_CORE   = 0x4,
IMC_TYPE_CHIP   = 0x10,
 };
@@ -123,6 +160,8 @@ enum {
 #define IMC_DOMAIN_NEST1
 #define IMC_DOMAIN_CORE2
 #define IMC_DOMAIN_THREAD  3
+/* For trace-imc the domain is still thread but it operates in trace-mode */
+#define IMC_DOMAIN_TRACE   4
 
 extern int init_imc_pmu(struct device_node *parent,
struct imc_pmu *pmu_ptr, int pmu_id);
diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 870fb7b239ea..a4130b21b159 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1118,6 +1118,7 @@ enum {
 enum {
OPAL_IMC_COUNTERS_NEST = 1,
OPAL_IMC_COUNTERS_CORE = 2,
+   OPAL_IMC_COUNTERS_TRACE = 3,
 };
 
 
-- 
2.17.1



[PATCH v2 1/5] powerpc/include: Add data structures and macros for IMC trace mode

2018-12-14 Thread Anju T Sudhakar
Add the macros needed for IMC (In-Memory Collection Counters) trace-mode
and data structure to hold the trace-imc record data.
Also, add the new type "OPAL_IMC_COUNTERS_TRACE" in 'opal-api.h', since
there is a new switch case added in the opal-calls for IMC.

Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/include/asm/imc-pmu.h  | 39 +
 arch/powerpc/include/asm/opal-api.h |  1 +
 2 files changed, 40 insertions(+)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 69f516ecb2fd..7c2ef0e42661 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -33,6 +33,7 @@
  */
 #define THREAD_IMC_LDBAR_MASK   0x0003e000ULL
 #define THREAD_IMC_ENABLE   0x8000ULL
+#define TRACE_IMC_ENABLE   0x4000ULL
 
 /*
  * For debugfs interface for imc-mode and imc-command
@@ -59,6 +60,34 @@ struct imc_events {
char *scale;
 };
 
+/*
+ * Trace IMC hardware updates a 64bytes record on
+ * Core Performance Monitoring Counter (CPMC)
+ * overflow. Here is the layout for the trace imc record
+ *
+ * DW 0 : Timebase
+ * DW 1 : Program Counter
+ * DW 2 : PIDR information
+ * DW 3 : CPMC1
+ * DW 4 : CPMC2
+ * DW 5 : CPMC3
+ * Dw 6 : CPMC4
+ * DW 7 : Timebase
+ * .
+ *
+ * The following is the data structure to hold trace imc data.
+ */
+struct trace_imc_data {
+   u64 tb1;
+   u64 ip;
+   u64 val;
+   u64 cpmc1;
+   u64 cpmc2;
+   u64 cpmc3;
+   u64 cpmc4;
+   u64 tb2;
+};
+
 /* Event attribute array index */
 #define IMC_FORMAT_ATTR0
 #define IMC_EVENT_ATTR 1
@@ -68,6 +97,13 @@ struct imc_events {
 /* PMU Format attribute macros */
 #define IMC_EVENT_OFFSET_MASK  0xULL
 
+/*
+ * Macro to mask bits 0:21 of first double word(which is the timebase) to
+ * compare with 8th double word (timebase) of trace imc record data.
+ */
+#define IMC_TRACE_RECORD_TB1_MASK  0x3ffULL
+
+
 /*
  * Device tree parser code detects IMC pmu support and
  * registers new IMC pmus. This structure will hold the
@@ -113,6 +149,7 @@ struct imc_pmu_ref {
 
 enum {
IMC_TYPE_THREAD = 0x1,
+   IMC_TYPE_TRACE  = 0x2,
IMC_TYPE_CORE   = 0x4,
IMC_TYPE_CHIP   = 0x10,
 };
@@ -123,6 +160,8 @@ enum {
 #define IMC_DOMAIN_NEST1
 #define IMC_DOMAIN_CORE2
 #define IMC_DOMAIN_THREAD  3
+/* For trace-imc the domain is still thread but it operates in trace-mode */
+#define IMC_DOMAIN_TRACE   4
 
 extern int init_imc_pmu(struct device_node *parent,
struct imc_pmu *pmu_ptr, int pmu_id);
diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 870fb7b239ea..a4130b21b159 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1118,6 +1118,7 @@ enum {
 enum {
OPAL_IMC_COUNTERS_NEST = 1,
OPAL_IMC_COUNTERS_CORE = 2,
+   OPAL_IMC_COUNTERS_TRACE = 3,
 };
 
 
-- 
2.17.1



[PATCH v2 3/5] powerpc/perf: Add privileged access check for thread_imc

2018-12-14 Thread Anju T Sudhakar
From: Madhavan Srinivasan 

Add code to restrict user access to thread_imc pmu since
some event report privilege level information.

Fixes: f74c89bd80fb3 ('powerpc/perf: Add thread IMC PMU support')
Signed-off-by: Madhavan Srinivasan 
Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/perf/imc-pmu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 3bef46f8417d..5ca80545a849 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -877,6 +877,9 @@ static int thread_imc_event_init(struct perf_event *event)
if (event->attr.type != event->pmu->type)
return -ENOENT;
 
+   if (!capable(CAP_SYS_ADMIN))
+   return -EACCES;
+
/* Sampling not supported */
if (event->hw.sample_period)
return -EINVAL;
-- 
2.17.1



[PATCH v2 2/5] powerpc/perf: Rearrange setting of ldbar for thread-imc

2018-12-14 Thread Anju T Sudhakar
LDBAR holds the memory address allocated for each cpu. For thread-imc
the mode bit (i.e bit 1) of LDBAR is set to accumulation.
Currently, ldbar is loaded with per cpu memory address and mode set to
accumulation at boot time.

To enable trace-imc, the mode bit of ldbar should be set to 'trace'. So to
accommodate trace-mode of IMC, reposition setting of ldbar for thread-imc
to thread_imc_event_add(). Also reset ldbar at thread_imc_event_del().

Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/perf/imc-pmu.c | 28 +---
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index f292a3f284f1..3bef46f8417d 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -806,8 +806,11 @@ static int core_imc_event_init(struct perf_event *event)
 }
 
 /*
- * Allocates a page of memory for each of the online cpus, and write the
- * physical base address of that page to the LDBAR for that cpu.
+ * Allocates a page of memory for each of the online cpus, and load
+ * LDBAR with 0.
+ * The physical base address of the page allocated for a cpu will be
+ * written to the LDBAR for that cpu, when the thread-imc event
+ * is added.
  *
  * LDBAR Register Layout:
  *
@@ -825,7 +828,7 @@ static int core_imc_event_init(struct perf_event *event)
  */
 static int thread_imc_mem_alloc(int cpu_id, int size)
 {
-   u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id);
+   u64 *local_mem = per_cpu(thread_imc_mem, cpu_id);
int nid = cpu_to_node(cpu_id);
 
if (!local_mem) {
@@ -842,9 +845,7 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
per_cpu(thread_imc_mem, cpu_id) = local_mem;
}
 
-   ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | 
THREAD_IMC_ENABLE;
-
-   mtspr(SPRN_LDBAR, ldbar_value);
+   mtspr(SPRN_LDBAR, 0);
return 0;
 }
 
@@ -995,6 +996,7 @@ static int thread_imc_event_add(struct perf_event *event, 
int flags)
 {
int core_id;
struct imc_pmu_ref *ref;
+   u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, 
smp_processor_id());
 
if (flags & PERF_EF_START)
imc_event_start(event, flags);
@@ -1003,6 +1005,9 @@ static int thread_imc_event_add(struct perf_event *event, 
int flags)
return -EINVAL;
 
core_id = smp_processor_id() / threads_per_core;
+   ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | 
THREAD_IMC_ENABLE;
+   mtspr(SPRN_LDBAR, ldbar_value);
+
/*
 * imc pmus are enabled only when it is used.
 * See if this is triggered for the first time.
@@ -1034,11 +1039,7 @@ static void thread_imc_event_del(struct perf_event 
*event, int flags)
int core_id;
struct imc_pmu_ref *ref;
 
-   /*
-* Take a snapshot and calculate the delta and update
-* the event counter values.
-*/
-   imc_event_update(event);
+   mtspr(SPRN_LDBAR, 0);
 
core_id = smp_processor_id() / threads_per_core;
ref = _imc_refc[core_id];
@@ -1057,6 +1058,11 @@ static void thread_imc_event_del(struct perf_event 
*event, int flags)
ref->refc = 0;
}
mutex_unlock(>lock);
+   /*
+* Take a snapshot and calculate the delta and update
+* the event counter values.
+*/
+   imc_event_update(event);
 }
 
 /* update_pmu_ops : Populate the appropriate operations for "pmu" */
-- 
2.17.1



[PATCH v2 4/5] powerpc/perf: Trace imc events detection and cpuhotplug

2018-12-14 Thread Anju T Sudhakar
Patch detects trace-imc events, does memory initilizations for each online
cpu, and registers cpuhotplug call-backs.

Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/perf/imc-pmu.c   | 91 +++
 arch/powerpc/platforms/powernv/opal-imc.c |  3 +
 include/linux/cpuhotplug.h|  1 +
 3 files changed, 95 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 5ca80545a849..1f09265c8fb0 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -43,6 +43,10 @@ static DEFINE_PER_CPU(u64 *, thread_imc_mem);
 static struct imc_pmu *thread_imc_pmu;
 static int thread_imc_mem_size;
 
+/* Trace IMC data structures */
+static DEFINE_PER_CPU(u64 *, trace_imc_mem);
+static int trace_imc_mem_size;
+
 static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
 {
return container_of(event->pmu, struct imc_pmu, pmu);
@@ -1068,6 +1072,54 @@ static void thread_imc_event_del(struct perf_event 
*event, int flags)
imc_event_update(event);
 }
 
+/*
+ * Allocate a page of memory for each cpu, and load LDBAR with 0.
+ */
+static int trace_imc_mem_alloc(int cpu_id, int size)
+{
+   u64 *local_mem = per_cpu(trace_imc_mem, cpu_id);
+   int phys_id = cpu_to_node(cpu_id), rc = 0;
+
+   if (!local_mem) {
+   local_mem = page_address(alloc_pages_node(phys_id,
+   GFP_KERNEL | __GFP_ZERO | 
__GFP_THISNODE |
+   __GFP_NOWARN, get_order(size)));
+   if (!local_mem)
+   return -ENOMEM;
+   per_cpu(trace_imc_mem, cpu_id) = local_mem;
+
+   /* Initialise the counters for trace mode */
+   rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void 
*)local_mem),
+   get_hard_smp_processor_id(cpu_id));
+   if (rc) {
+   pr_info("IMC:opal init failed for trace imc\n");
+   return rc;
+   }
+   }
+
+   mtspr(SPRN_LDBAR, 0);
+   return 0;
+}
+
+static int ppc_trace_imc_cpu_online(unsigned int cpu)
+{
+   return trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+}
+
+static int ppc_trace_imc_cpu_offline(unsigned int cpu)
+{
+   mtspr(SPRN_LDBAR, 0);
+   return 0;
+}
+
+static int trace_imc_cpu_init(void)
+{
+   return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
+ "perf/powerpc/imc_trace:online",
+ ppc_trace_imc_cpu_online,
+ ppc_trace_imc_cpu_offline);
+}
+
 /* update_pmu_ops : Populate the appropriate operations for "pmu" */
 static int update_pmu_ops(struct imc_pmu *pmu)
 {
@@ -1189,6 +1241,17 @@ static void cleanup_all_thread_imc_memory(void)
}
 }
 
+static void cleanup_all_trace_imc_memory(void)
+{
+   int i, order = get_order(trace_imc_mem_size);
+
+   for_each_online_cpu(i) {
+   if (per_cpu(trace_imc_mem, i))
+   free_pages((u64)per_cpu(trace_imc_mem, i), order);
+
+   }
+}
+
 /* Function to free the attr_groups which are dynamically allocated */
 static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
 {
@@ -1230,6 +1293,11 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu 
*pmu_ptr)
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
cleanup_all_thread_imc_memory();
}
+
+   if (pmu_ptr->domain == IMC_DOMAIN_TRACE) {
+   cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE);
+   cleanup_all_trace_imc_memory();
+   }
 }
 
 /*
@@ -1312,6 +1380,21 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct 
device_node *parent,
 
thread_imc_pmu = pmu_ptr;
break;
+   case IMC_DOMAIN_TRACE:
+   /* Update the pmu name */
+   pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+   if (!pmu_ptr->pmu.name)
+   return -ENOMEM;
+
+   trace_imc_mem_size = pmu_ptr->counter_mem_size;
+   for_each_online_cpu(cpu) {
+   res = trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+   if (res) {
+   cleanup_all_trace_imc_memory();
+   goto err;
+   }
+   }
+   break;
default:
return -EINVAL;
}
@@ -1384,6 +1467,14 @@ int init_imc_pmu(struct device_node *parent, struct 
imc_pmu *pmu_ptr, int pmu_id
goto err_free_mem;
}
 
+   break;
+   case IMC_DOMAIN_TRACE:
+   ret = trace_imc_cpu_init();
+   if (ret) {
+   cleanup_all_trace_imc_memory();
+   goto err_fre

[PATCH v2 5/5] powerpc/perf: Trace imc PMU functions

2018-12-14 Thread Anju T Sudhakar
Add PMU functions to support trace-imc.

Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/perf/imc-pmu.c | 175 
 1 file changed, 175 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 1f09265c8fb0..32ff0e449fca 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1120,6 +1120,173 @@ static int trace_imc_cpu_init(void)
  ppc_trace_imc_cpu_offline);
 }
 
+static u64 get_trace_imc_event_base_addr(void)
+{
+   return (u64)per_cpu(trace_imc_mem, smp_processor_id());
+}
+
+/*
+ * Function to parse trace-imc data obtained
+ * and to prepare the perf sample.
+ */
+static int trace_imc_prepare_sample(struct trace_imc_data *mem,
+   struct perf_sample_data *data,
+   u64 *prev_tb,
+   struct perf_event_header *header,
+   struct perf_event *event)
+{
+   /* Sanity checks for a valid record */
+   if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb)
+   *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1));
+   else
+   return -EINVAL;
+
+   if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) !=
+be64_to_cpu(READ_ONCE(mem->tb2)))
+   return -EINVAL;
+
+   /* Prepare perf sample */
+   data->ip =  be64_to_cpu(READ_ONCE(mem->ip));
+   data->period = event->hw.last_period;
+
+   header->type = PERF_RECORD_SAMPLE;
+   header->size = sizeof(*header) + event->header_size;
+   header->misc = 0;
+
+   if (is_kernel_addr(data->ip))
+   header->misc |= PERF_RECORD_MISC_KERNEL;
+   else
+   header->misc |= PERF_RECORD_MISC_USER;
+
+   perf_event_header__init_id(header, data, event);
+
+   return 0;
+}
+
+static void dump_trace_imc_data(struct perf_event *event)
+{
+   struct trace_imc_data *mem;
+   int i, ret;
+   u64 prev_tb = 0;
+
+   mem = (struct trace_imc_data *)get_trace_imc_event_base_addr();
+   for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data));
+   i++, mem++) {
+   struct perf_sample_data data;
+   struct perf_event_header header;
+
+   ret = trace_imc_prepare_sample(mem, , _tb, , 
event);
+   if (ret) /* Exit, if not a valid record */
+   break;
+   else {
+   /* If this is a valid record, create the sample */
+   struct perf_output_handle handle;
+
+   if (perf_output_begin(, event, header.size))
+   return;
+
+   perf_output_sample(, , , event);
+   perf_output_end();
+   }
+   }
+}
+
+static int trace_imc_event_add(struct perf_event *event, int flags)
+{
+   /* Enable the sched_task to start the engine */
+   perf_sched_cb_inc(event->ctx->pmu);
+   return 0;
+}
+
+static void trace_imc_event_read(struct perf_event *event)
+{
+   dump_trace_imc_data(event);
+}
+
+static void trace_imc_event_stop(struct perf_event *event, int flags)
+{
+   trace_imc_event_read(event);
+}
+
+static void trace_imc_event_start(struct perf_event *event, int flags)
+{
+   return;
+}
+
+static void trace_imc_event_del(struct perf_event *event, int flags)
+{
+   perf_sched_cb_dec(event->ctx->pmu);
+}
+
+void trace_imc_pmu_sched_task(struct perf_event_context *ctx,
+   bool sched_in)
+{
+   int core_id = smp_processor_id() / threads_per_core;
+   struct imc_pmu_ref *ref;
+   u64 local_mem, ldbar_value;
+
+   /* Set trace-imc bit in ldbar and load ldbar with per-thread memory 
address */
+   local_mem = get_trace_imc_event_base_addr();
+   ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | 
TRACE_IMC_ENABLE;
+
+   ref = _imc_refc[core_id];
+   if (!ref)
+   return;
+
+   if (sched_in) {
+   mtspr(SPRN_LDBAR, ldbar_value);
+   mutex_lock(>lock);
+   if (ref->refc == 0) {
+   if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
+   
get_hard_smp_processor_id(smp_processor_id( {
+   mutex_unlock(>lock);
+   pr_err("trace-imc: Unable to start the counters 
for core %d\n", core_id);
+   mtspr(SPRN_LDBAR, 0);
+   return;
+   }
+   }
+   ++ref->refc;
+   mutex_unlock(>lock);
+   } else {
+   mtspr(SPRN_LDBAR, 0);
+   mutex_lock(>lock);
+   ref->refc--;
+ 

[PATCH v2 0/5] powerpc/perf: IMC trace-mode support

2018-12-14 Thread Anju T Sudhakar
bs.org/pipermail/skiboot/2018-December/012883.html

* Set LDBAR spr to enable imc-trace mode.

LDBAR Layout:

0 : Enable/Disable
1 : 0 -> Accumulation Mode
1 -> Trace Mode
2:3   : Reserved
4-6   : PB scope
7 : Reserved
8:50  : Counter Address
51:63 : Reserved

   

Key benefit of imc trace-mode is, each sample record contains the address
pointer along with other information. So that, we can profile the IP
without interrupting the application.

Performance data using 'perf top' with and without trace-imc event:

When the application is monitored with trace-imc event, we dont take any   
PMI interrupts.

PMI interrupts count when `perf top` command is executed without trac-imc event.

# perf top  
12.53%  [kernel]   [k] arch_cpu_idle   
11.32%  [kernel]   [k] rcu_idle_enter  
10.76%  [kernel]   [k] __next_timer_interrupt  
 9.49%  [kernel]   [k] find_next_bit   
 8.06%  [kernel]   [k] rcu_dynticks_eqs_exit   
 7.82%  [kernel]   [k] do_idle 
 5.71%  [kernel]   [k] tick_nohz_idle_stop_tic 
 [---]  
# cat /proc/interrupts  (a snippet from the output)
9944  1072804804   1644804   1306  
804804804804804804804  
804804   1961   1602804804   1258  
[-]
803803803803803803803  
803803803803804804804 
804804804804804804803 
803803803803803   1306803 
803   Performance monitoring interrupts   


`perf top` with trace-imc (right after 'perf top' without trace-imc event):

# perf top -e trace_imc/trace_cycles/  
12.50%  [kernel]  [k] arch_cpu_idle
11.81%  [kernel]  [k] __next_timer_interrupt   
11.22%  [kernel]  [k] rcu_idle_enter   
10.25%  [kernel]  [k] find_next_bit
 7.91%  [kernel]  [k] do_idle  
 7.69%  [kernel]  [k] rcu_dynticks_eqs_exit
 5.20%  [kernel]  [k] tick_nohz_idle_stop_tick 
 [---]  

# cat /proc/interrupts (a snippet from the output) 

9944  1072804804   1644804   1306  
804804804804804804804  
804804   1961   1602804804   1258  
[-]
803803803803803803803  
803803803804804804804
804804804804804804803 
803803803803803   1306803 
803   Performance monitoring interrupts   

The PMI interrupts count remains the same.  

Changelog:

>From v1 -> v2
--

* Added privileged access check for thread-imc and trace-imc

Suggestions/comments are welcome.

Anju T Sudhakar (4):
  powerpc/include: Add data structures and macros for IMC trace mode
  powerpc/perf: Rearrange setting of ldbar for thread-imc
  powerpc/perf: Trace imc events detection and cpuhotplug
  powerpc/perf: Trace imc PMU functions

Madhavan Srinivasan (1):
  powerpc/perf: Add privileged access check for thread_imc

 arch/powerpc

Re: [PATCH v3] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2018-03-12 Thread Anju T Sudhakar

Hi,


On Tuesday 06 March 2018 04:35 PM, Michael Ellerman wrote:

Anju T Sudhakar <a...@linux.vnet.ibm.com> writes:


diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70..caefb64 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);
  
  #ifdef CONFIG_PPC64

+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);

Is this register readable in supervisor state?


This is a nice catch, thanks. :)
The guest kernel can not access the register, it is only readable in the 
hypervisor state.
I will resend the patch with a condition check so that this spr will not 
get registered for guest kernel.



Regards,
Anju


cheers





Re: [PATCH v3] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2018-03-12 Thread Anju T Sudhakar

Hi,


On Tuesday 06 March 2018 04:35 PM, Michael Ellerman wrote:

Anju T Sudhakar  writes:


diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70..caefb64 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);
  
  #ifdef CONFIG_PPC64

+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);

Is this register readable in supervisor state?


This is a nice catch, thanks. :)
The guest kernel can not access the register, it is only readable in the 
hypervisor state.
I will resend the patch with a condition check so that this spr will not 
get registered for guest kernel.



Regards,
Anju


cheers





[PATCH] powerpc/perf: Fix nest-imc cpuhotplug callback failure

2017-12-04 Thread Anju T Sudhakar
Call trace observed during boot:

Faulting instruction address: 0xc0248340
cpu 0x0: Vector: 380 (Data Access Out of Range) at [c00ff66fb850]   
pc: c0248340: event_function_call+0x50/0x1f0
lr: c024878c: perf_remove_from_context+0x3c/0x100   
sp: c00ff66fbad0
   msr: 90009033
   dar: 7d20e2a6f92d03c0
  current = 0xc00ff6679200  
  paca= 0xcfd4   softe: 0  irq_happened: 0x01   
pid   = 14, comm = cpuhp/0  
Linux version 4.14.0-rc2-42789-ge8eae4b (rgrimm@) (gcc version 5.4.0
20160609 (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.4)) #1 SMP Thu Nov 16 14:35:14 CST
2017
enter ? for help
[c00ff66fbb80] c024878c perf_remove_from_context+0x3c/0x100 
[c00ff66fbbc0] c024e84c perf_pmu_migrate_context+0x10c/0x380
[c00ff66fbc60] c00ca050 ppc_nest_imc_cpu_offline+0x1b0/0x210
[c00ff66fbcb0] c00d5d54 cpuhp_invoke_callback+0x194/0x620   
[c00ff66fbd20] c00d702c cpuhp_thread_fun+0x7c/0x1b0 
[c00ff66fbd60] c010ad90 smpboot_thread_fn+0x290/0x2a0   
[c00ff66fbdc0] c0104818 kthread+0x168/0x1b0 
[c00ff66fbe30] c000b5a0 ret_from_kernel_thread+0x5c/0xbc

While registering the cpuhotplug callbacks for nest-imc, if we fail in the  
cpuhotplug online path for any random node in a multi node system (because  
the opal call to stop nest-imc counters fails for that node),   
ppc_nest_imc_cpu_offline() will get invoked for other nodes who successfully
returned from cpuhotplug online path.   

This call trace is generated since in the ppc_nest_imc_cpu_offline()
path we are trying to migrate the event context, when nest-imc counters are 
not even initialized.   

Patch to add a check to ensure that nest-imc is registered before migrating 
the event context. 

Note:   
Madhavan Srinivasan has recently send a skiboot patch to have a check in the
skiboot code to make sure that the  microcode is initialized in all the chips,  
before enabling the nest units. 
https://patchwork.ozlabs.org/patch/844047/ (v2)

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>  
Reviewed-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/perf/imc-pmu.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 0ead3cd..9daa1c3 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -309,6 +309,20 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
if (!cpumask_test_and_clear_cpu(cpu, _imc_cpumask))
return 0;
 
+   /*
+   * Check whether nest_imc is registered. We could end up here
+   * if the cpuhotplug callback registration fails. i.e, callback
+   * invokes the offline path for all successfully registered nodes.
+   * At this stage, nest_imc pmu will not be registered and we
+   * should return here.
+   *
+   * We return with a zero since this is not an offline failure.
+   * And cpuhp_setup_state() returns the actual failure reason
+   * to the caller, which inturn will call the cleanup routine.
+   */
+   if (!nest_pmus)
+   return 0;
+
/*
 * Now that this cpu is one of the designated,
 * find a next cpu a) which is online and b) in same chip.
-- 
2.7.4



[PATCH] powerpc/perf: Fix nest-imc cpuhotplug callback failure

2017-12-04 Thread Anju T Sudhakar
Call trace observed during boot:

Faulting instruction address: 0xc0248340
cpu 0x0: Vector: 380 (Data Access Out of Range) at [c00ff66fb850]   
pc: c0248340: event_function_call+0x50/0x1f0
lr: c024878c: perf_remove_from_context+0x3c/0x100   
sp: c00ff66fbad0
   msr: 90009033
   dar: 7d20e2a6f92d03c0
  current = 0xc00ff6679200  
  paca= 0xcfd4   softe: 0  irq_happened: 0x01   
pid   = 14, comm = cpuhp/0  
Linux version 4.14.0-rc2-42789-ge8eae4b (rgrimm@) (gcc version 5.4.0
20160609 (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.4)) #1 SMP Thu Nov 16 14:35:14 CST
2017
enter ? for help
[c00ff66fbb80] c024878c perf_remove_from_context+0x3c/0x100 
[c00ff66fbbc0] c024e84c perf_pmu_migrate_context+0x10c/0x380
[c00ff66fbc60] c00ca050 ppc_nest_imc_cpu_offline+0x1b0/0x210
[c00ff66fbcb0] c00d5d54 cpuhp_invoke_callback+0x194/0x620   
[c00ff66fbd20] c00d702c cpuhp_thread_fun+0x7c/0x1b0 
[c00ff66fbd60] c010ad90 smpboot_thread_fn+0x290/0x2a0   
[c00ff66fbdc0] c0104818 kthread+0x168/0x1b0 
[c00ff66fbe30] c000b5a0 ret_from_kernel_thread+0x5c/0xbc

While registering the cpuhotplug callbacks for nest-imc, if we fail in the  
cpuhotplug online path for any random node in a multi node system (because  
the opal call to stop nest-imc counters fails for that node),   
ppc_nest_imc_cpu_offline() will get invoked for other nodes who successfully
returned from cpuhotplug online path.   

This call trace is generated since in the ppc_nest_imc_cpu_offline()
path we are trying to migrate the event context, when nest-imc counters are 
not even initialized.   

Patch to add a check to ensure that nest-imc is registered before migrating 
the event context. 

Note:   
Madhavan Srinivasan has recently send a skiboot patch to have a check in the
skiboot code to make sure that the  microcode is initialized in all the chips,  
before enabling the nest units. 
https://patchwork.ozlabs.org/patch/844047/ (v2)

Signed-off-by: Anju T Sudhakar   
Reviewed-by: Madhavan Srinivasan 
---
 arch/powerpc/perf/imc-pmu.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 0ead3cd..9daa1c3 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -309,6 +309,20 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
if (!cpumask_test_and_clear_cpu(cpu, _imc_cpumask))
return 0;
 
+   /*
+   * Check whether nest_imc is registered. We could end up here
+   * if the cpuhotplug callback registration fails. i.e, callback
+   * invokes the offline path for all successfully registered nodes.
+   * At this stage, nest_imc pmu will not be registered and we
+   * should return here.
+   *
+   * We return with a zero since this is not an offline failure.
+   * And cpuhp_setup_state() returns the actual failure reason
+   * to the caller, which inturn will call the cleanup routine.
+   */
+   if (!nest_pmus)
+   return 0;
+
/*
 * Now that this cpu is one of the designated,
 * find a next cpu a) which is online and b) in same chip.
-- 
2.7.4



[PATCH] powerpc/perf: Add debugfs interface for imc-mode and imc-command

2017-11-12 Thread Anju T Sudhakar
In memory Collection (IMC) counter pmu driver controls the ucode's execution
state. At the system boot, IMC perf driver pause the ucode. Ucode state is
changed to "running" only when any of the nest units are monitored or profiled
using perf tool.  

Nest units support only limited set of hardware counters and ucode is always
programmed in the "production mode" ("accumulation") mode. This mode is
configured to provide key performance metric data for most of the nest units.   
  

But ucode also supports other modes which would be used for "debug" to drill
down specific nest units. That is, ucode when switched to "powerbus" debug  
mode (for example), will dynamically reconfigure the nest counters to target
only "powerbus" related events in the hardware counters. This allows the IMC
nest unit to focus on powerbus related transactions in the system in more
detail. At this point, production mode events may or may not be counted.
  

IMC nest counters has both in-band (ucode access) and out of band access to it. 
Since not all nest counter configurations are supported by ucode, out of band   
tools are used to characterize other nest counter configurations.   

Patch provides an interface via "debugfs" to enable the switching of ucode
modes in the system. To switch ucode mode, one has to first pause the microcode 
(imc_cmd), and then write the target mode value to the "imc_mode" file. 
  

Proposed Approach   
=== 

In the proposed approach, the function (export_imc_mode_and_cmd) which creates  
 
the debugfs interface for imc mode and command is implemented in opal-imc.c.
Thus we can use imc_get_mem_addr() to get the homer base address for each chip. 

The interface to expose imc mode and command is required only if we have nest
pmu units registered. Employing the existing data structures to track whether
we have any nest units registered will require to extend data from perf side
to opal-imc.c. Instead an integer is introduced to hold that information by
counting successful nest unit registration. Debugfs interface is removed
based on the integer count.   

Example for the interface:  

root@:/sys/kernel/debug/imc# ls 
imc_cmd_0  imc_cmd_8  imc_mode_0  imc_mode_8
    
Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h|  7 +++
 arch/powerpc/platforms/powernv/opal-imc.c | 74 ++-
 2 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 7f74c28..317002d 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -40,6 +40,13 @@
 #define THREAD_IMC_ENABLE   0x8000ULL
 
 /*
+ * For debugfs interface for imc-mode and imc-command
+ */
+#define IMC_CNTL_BLK_OFFSET0x3FC00
+#define IMC_CNTL_BLK_CMD_OFFSET8
+#define IMC_CNTL_BLK_MODE_OFFSET   32
+
+/*
  * Structure to hold memory address information for imc units.
  */
 struct imc_mem_info {
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 21f6531..a88ddab 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -21,6 +21,70 @@
 #include 
 #include 
 #include 
+#include 
+
+static struct dentry *parent;
+
+/* Helpers to export imc command and status via debugfs */
+static int debugfs_imc_mem_get(void *data, u64 *val)
+{
+   *val = cpu_to_be64(*(u64 *)data);
+   return 0;
+}
+
+static int debugfs_imc_mem_set(void *data, u64 val)
+{
+   *(u64 *)data = cpu_to_be64(val);
+   return 0

[PATCH] powerpc/perf: Add debugfs interface for imc-mode and imc-command

2017-11-12 Thread Anju T Sudhakar
In memory Collection (IMC) counter pmu driver controls the ucode's execution
state. At the system boot, IMC perf driver pause the ucode. Ucode state is
changed to "running" only when any of the nest units are monitored or profiled
using perf tool.  

Nest units support only limited set of hardware counters and ucode is always
programmed in the "production mode" ("accumulation") mode. This mode is
configured to provide key performance metric data for most of the nest units.   
  

But ucode also supports other modes which would be used for "debug" to drill
down specific nest units. That is, ucode when switched to "powerbus" debug  
mode (for example), will dynamically reconfigure the nest counters to target
only "powerbus" related events in the hardware counters. This allows the IMC
nest unit to focus on powerbus related transactions in the system in more
detail. At this point, production mode events may or may not be counted.
  

IMC nest counters has both in-band (ucode access) and out of band access to it. 
Since not all nest counter configurations are supported by ucode, out of band   
tools are used to characterize other nest counter configurations.   

Patch provides an interface via "debugfs" to enable the switching of ucode
modes in the system. To switch ucode mode, one has to first pause the microcode 
(imc_cmd), and then write the target mode value to the "imc_mode" file. 
  

Proposed Approach   
=== 

In the proposed approach, the function (export_imc_mode_and_cmd) which creates  
 
the debugfs interface for imc mode and command is implemented in opal-imc.c.
Thus we can use imc_get_mem_addr() to get the homer base address for each chip. 

The interface to expose imc mode and command is required only if we have nest
pmu units registered. Employing the existing data structures to track whether
we have any nest units registered will require to extend data from perf side
to opal-imc.c. Instead an integer is introduced to hold that information by
counting successful nest unit registration. Debugfs interface is removed
based on the integer count.   

Example for the interface:  

root@:/sys/kernel/debug/imc# ls 
imc_cmd_0  imc_cmd_8  imc_mode_0  imc_mode_8
    
Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/include/asm/imc-pmu.h|  7 +++
 arch/powerpc/platforms/powernv/opal-imc.c | 74 ++-
 2 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 7f74c28..317002d 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -40,6 +40,13 @@
 #define THREAD_IMC_ENABLE   0x8000ULL
 
 /*
+ * For debugfs interface for imc-mode and imc-command
+ */
+#define IMC_CNTL_BLK_OFFSET0x3FC00
+#define IMC_CNTL_BLK_CMD_OFFSET8
+#define IMC_CNTL_BLK_MODE_OFFSET   32
+
+/*
  * Structure to hold memory address information for imc units.
  */
 struct imc_mem_info {
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 21f6531..a88ddab 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -21,6 +21,70 @@
 #include 
 #include 
 #include 
+#include 
+
+static struct dentry *parent;
+
+/* Helpers to export imc command and status via debugfs */
+static int debugfs_imc_mem_get(void *data, u64 *val)
+{
+   *val = cpu_to_be64(*(u64 *)data);
+   return 0;
+}
+
+static int debugfs_imc_mem_set(void *data, u64 val)
+{
+   *(u64 *)data = cpu_to_be64(val);
+   return 0;
+}
+

Re: [PATCH] powerpc/perf: Add debugfs interface for imc run-mode and run-status

2017-11-12 Thread Anju T Sudhakar


Hi,

Kindly ignore this version

Thanks,
Anju

On Monday 13 November 2017 11:06 AM, Anju T Sudhakar wrote:

In memory Collection (IMC) counter pmu driver controls the ucode's execution
state. At the system boot, IMC perf driver pause the ucode. Ucode state is
changed to "running" only when any of the nest units are monitored or profiled
using perf tool.
 
Nest units support only limited set of hardware counters and ucode is always

programmed in the "production mode" ("accumulation") mode. This mode is
configured to provide key performance metric data for most of the nest units.
 
But ucode also supports other modes which would be used for "debug" to drill

down specific nest units. That is, ucode when switched to "powerbus" debug
mode (for example), will dynamically reconfigure the nest counters to target
only "powerbus" related events in the hardware counters. This allows the IMC
nest unit to focus on powerbus related transactions in the system in more
detail. At this point, production mode events may or may not be counted.
 
IMC nest counters has both in-band (ucode access) and out of band access to it.

Since not all nest counter configurations are supported by ucode, out of band
tools are used to characterize other nest counter configurations.
 
Patch provides an interface via "debugfs" to enable the switching of ucode

modes in the system. To switch ucode mode, one has to first pause the microcode
(imc_cmd), and then write the target mode value to the "imc_mode" file.
 
Proposed Approach

===
 
In the proposed approach, the function (export_imc_mode_and_cmd) which creates

the debugfs interface for imc mode and command is implemented in opal-imc.c.
Thus we can use imc_get_mem_addr() to get the homer base address for each chip.
 
The interface to expose imc mode and command is required only if we have nest

pmu units registered. Employing the existing data structures to track whether
we have any nest units registered will require to extend data from perf side
to opal-imc.c. Instead an integer is introduced to hold that information by
counting successful nest unit registration. Debugfs interface is removed
based on the integer count.

Example for the interface:
 
root@:/sys/kernel/debug/imc# ls

imc_cmd_0  imc_cmd_8  imc_mode_0  imc_mode_8
     
Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>

---
  arch/powerpc/include/asm/imc-pmu.h|  7 +++
  arch/powerpc/platforms/powernv/opal-imc.c | 74 ++-
  2 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 7f74c28..317002d 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -40,6 +40,13 @@
  #define THREAD_IMC_ENABLE   0x8000ULL

  /*
+ * For debugfs interface for imc-mode and imc-command
+ */
+#define IMC_CNTL_BLK_OFFSET0x3FC00
+#define IMC_CNTL_BLK_CMD_OFFSET8
+#define IMC_CNTL_BLK_MODE_OFFSET   32
+
+/*
   * Structure to hold memory address information for imc units.
   */
  struct imc_mem_info {
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 21f6531..a88ddab 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -21,6 +21,70 @@
  #include 
  #include 
  #include 
+#include 
+
+static struct dentry *parent;
+
+/* Helpers to export imc command and status via debugfs */
+static int debugfs_imc_mem_get(void *data, u64 *val)
+{
+   *val = cpu_to_be64(*(u64 *)data);
+   return 0;
+}
+
+static int debugfs_imc_mem_set(void *data, u64 val)
+{
+   *(u64 *)data = cpu_to_be64(val);
+   return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, debugfs_imc_mem_get, 
debugfs_imc_mem_set,
+   "0x%016llx\n");
+
+static struct dentry *debugfs_create_imc_x64(const char *name, umode_t mode,
+   struct dentry *parent, u64  *value)
+{
+   return debugfs_create_file_unsafe(name, m

Re: [PATCH] powerpc/perf: Add debugfs interface for imc run-mode and run-status

2017-11-12 Thread Anju T Sudhakar


Hi,

Kindly ignore this version

Thanks,
Anju

On Monday 13 November 2017 11:06 AM, Anju T Sudhakar wrote:

In memory Collection (IMC) counter pmu driver controls the ucode's execution
state. At the system boot, IMC perf driver pause the ucode. Ucode state is
changed to "running" only when any of the nest units are monitored or profiled
using perf tool.
 
Nest units support only limited set of hardware counters and ucode is always

programmed in the "production mode" ("accumulation") mode. This mode is
configured to provide key performance metric data for most of the nest units.
 
But ucode also supports other modes which would be used for "debug" to drill

down specific nest units. That is, ucode when switched to "powerbus" debug
mode (for example), will dynamically reconfigure the nest counters to target
only "powerbus" related events in the hardware counters. This allows the IMC
nest unit to focus on powerbus related transactions in the system in more
detail. At this point, production mode events may or may not be counted.
 
IMC nest counters has both in-band (ucode access) and out of band access to it.

Since not all nest counter configurations are supported by ucode, out of band
tools are used to characterize other nest counter configurations.
 
Patch provides an interface via "debugfs" to enable the switching of ucode

modes in the system. To switch ucode mode, one has to first pause the microcode
(imc_cmd), and then write the target mode value to the "imc_mode" file.
 
Proposed Approach

===
 
In the proposed approach, the function (export_imc_mode_and_cmd) which creates

the debugfs interface for imc mode and command is implemented in opal-imc.c.
Thus we can use imc_get_mem_addr() to get the homer base address for each chip.
 
The interface to expose imc mode and command is required only if we have nest

pmu units registered. Employing the existing data structures to track whether
we have any nest units registered will require to extend data from perf side
to opal-imc.c. Instead an integer is introduced to hold that information by
counting successful nest unit registration. Debugfs interface is removed
based on the integer count.

Example for the interface:
 
root@:/sys/kernel/debug/imc# ls

imc_cmd_0  imc_cmd_8  imc_mode_0  imc_mode_8
     
Signed-off-by: Anju T Sudhakar 

---
  arch/powerpc/include/asm/imc-pmu.h|  7 +++
  arch/powerpc/platforms/powernv/opal-imc.c | 74 ++-
  2 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 7f74c28..317002d 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -40,6 +40,13 @@
  #define THREAD_IMC_ENABLE   0x8000ULL

  /*
+ * For debugfs interface for imc-mode and imc-command
+ */
+#define IMC_CNTL_BLK_OFFSET0x3FC00
+#define IMC_CNTL_BLK_CMD_OFFSET8
+#define IMC_CNTL_BLK_MODE_OFFSET   32
+
+/*
   * Structure to hold memory address information for imc units.
   */
  struct imc_mem_info {
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 21f6531..a88ddab 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -21,6 +21,70 @@
  #include 
  #include 
  #include 
+#include 
+
+static struct dentry *parent;
+
+/* Helpers to export imc command and status via debugfs */
+static int debugfs_imc_mem_get(void *data, u64 *val)
+{
+   *val = cpu_to_be64(*(u64 *)data);
+   return 0;
+}
+
+static int debugfs_imc_mem_set(void *data, u64 val)
+{
+   *(u64 *)data = cpu_to_be64(val);
+   return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, debugfs_imc_mem_get, 
debugfs_imc_mem_set,
+   "0x%016llx\n");
+
+static struct dentry *debugfs_create_imc_x64(const char *name, umode_t mode,
+   struct dentry *parent, u64  *value)
+{
+   return debugfs_create_file_unsafe(name, mode, parent, value, 
_imc_x64);
+}
+
+/*
+ * export_imc

[PATCH] powerpc/perf: Add debugfs interface for imc run-mode and run-status

2017-11-12 Thread Anju T Sudhakar
In memory Collection (IMC) counter pmu driver controls the ucode's execution
state. At the system boot, IMC perf driver pause the ucode. Ucode state is
changed to "running" only when any of the nest units are monitored or profiled
using perf tool.  

Nest units support only limited set of hardware counters and ucode is always
programmed in the "production mode" ("accumulation") mode. This mode is
configured to provide key performance metric data for most of the nest units.   
  

But ucode also supports other modes which would be used for "debug" to drill
down specific nest units. That is, ucode when switched to "powerbus" debug  
mode (for example), will dynamically reconfigure the nest counters to target
only "powerbus" related events in the hardware counters. This allows the IMC
nest unit to focus on powerbus related transactions in the system in more
detail. At this point, production mode events may or may not be counted.
  

IMC nest counters has both in-band (ucode access) and out of band access to it. 
Since not all nest counter configurations are supported by ucode, out of band   
tools are used to characterize other nest counter configurations.   

Patch provides an interface via "debugfs" to enable the switching of ucode
modes in the system. To switch ucode mode, one has to first pause the microcode 
(imc_cmd), and then write the target mode value to the "imc_mode" file. 
  

Proposed Approach   
=== 

In the proposed approach, the function (export_imc_mode_and_cmd) which creates  
 
the debugfs interface for imc mode and command is implemented in opal-imc.c.
Thus we can use imc_get_mem_addr() to get the homer base address for each chip. 

The interface to expose imc mode and command is required only if we have nest
pmu units registered. Employing the existing data structures to track whether
we have any nest units registered will require to extend data from perf side
to opal-imc.c. Instead an integer is introduced to hold that information by
counting successful nest unit registration. Debugfs interface is removed
based on the integer count.   

Example for the interface:  

root@:/sys/kernel/debug/imc# ls 
imc_cmd_0  imc_cmd_8  imc_mode_0  imc_mode_8
    
Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h|  7 +++
 arch/powerpc/platforms/powernv/opal-imc.c | 74 ++-
 2 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 7f74c28..317002d 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -40,6 +40,13 @@
 #define THREAD_IMC_ENABLE   0x8000ULL
 
 /*
+ * For debugfs interface for imc-mode and imc-command
+ */
+#define IMC_CNTL_BLK_OFFSET0x3FC00
+#define IMC_CNTL_BLK_CMD_OFFSET8
+#define IMC_CNTL_BLK_MODE_OFFSET   32
+
+/*
  * Structure to hold memory address information for imc units.
  */
 struct imc_mem_info {
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 21f6531..a88ddab 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -21,6 +21,70 @@
 #include 
 #include 
 #include 
+#include 
+
+static struct dentry *parent;
+
+/* Helpers to export imc command and status via debugfs */
+static int debugfs_imc_mem_get(void *data, u64 *val)
+{
+   *val = cpu_to_be64(*(u64 *)data);
+   return 0;
+}
+
+static int debugfs_imc_mem_set(void *data, u64 val)
+{
+   *(u64 *)data = cpu_to_be64(val);
+   return 0

[PATCH] powerpc/perf: Add debugfs interface for imc run-mode and run-status

2017-11-12 Thread Anju T Sudhakar
In memory Collection (IMC) counter pmu driver controls the ucode's execution
state. At the system boot, IMC perf driver pause the ucode. Ucode state is
changed to "running" only when any of the nest units are monitored or profiled
using perf tool.  

Nest units support only limited set of hardware counters and ucode is always
programmed in the "production mode" ("accumulation") mode. This mode is
configured to provide key performance metric data for most of the nest units.   
  

But ucode also supports other modes which would be used for "debug" to drill
down specific nest units. That is, ucode when switched to "powerbus" debug  
mode (for example), will dynamically reconfigure the nest counters to target
only "powerbus" related events in the hardware counters. This allows the IMC
nest unit to focus on powerbus related transactions in the system in more
detail. At this point, production mode events may or may not be counted.
  

IMC nest counters has both in-band (ucode access) and out of band access to it. 
Since not all nest counter configurations are supported by ucode, out of band   
tools are used to characterize other nest counter configurations.   

Patch provides an interface via "debugfs" to enable the switching of ucode
modes in the system. To switch ucode mode, one has to first pause the microcode 
(imc_cmd), and then write the target mode value to the "imc_mode" file. 
  

Proposed Approach   
=== 

In the proposed approach, the function (export_imc_mode_and_cmd) which creates  
 
the debugfs interface for imc mode and command is implemented in opal-imc.c.
Thus we can use imc_get_mem_addr() to get the homer base address for each chip. 

The interface to expose imc mode and command is required only if we have nest
pmu units registered. Employing the existing data structures to track whether
we have any nest units registered will require to extend data from perf side
to opal-imc.c. Instead an integer is introduced to hold that information by
counting successful nest unit registration. Debugfs interface is removed
based on the integer count.   

Example for the interface:  

root@:/sys/kernel/debug/imc# ls 
imc_cmd_0  imc_cmd_8  imc_mode_0  imc_mode_8
    
Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/include/asm/imc-pmu.h|  7 +++
 arch/powerpc/platforms/powernv/opal-imc.c | 74 ++-
 2 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 7f74c28..317002d 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -40,6 +40,13 @@
 #define THREAD_IMC_ENABLE   0x8000ULL
 
 /*
+ * For debugfs interface for imc-mode and imc-command
+ */
+#define IMC_CNTL_BLK_OFFSET0x3FC00
+#define IMC_CNTL_BLK_CMD_OFFSET8
+#define IMC_CNTL_BLK_MODE_OFFSET   32
+
+/*
  * Structure to hold memory address information for imc units.
  */
 struct imc_mem_info {
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 21f6531..a88ddab 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -21,6 +21,70 @@
 #include 
 #include 
 #include 
+#include 
+
+static struct dentry *parent;
+
+/* Helpers to export imc command and status via debugfs */
+static int debugfs_imc_mem_get(void *data, u64 *val)
+{
+   *val = cpu_to_be64(*(u64 *)data);
+   return 0;
+}
+
+static int debugfs_imc_mem_set(void *data, u64 val)
+{
+   *(u64 *)data = cpu_to_be64(val);
+   return 0;
+}
+

[PATCH v3] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2017-11-09 Thread Anju T Sudhakar
Add ldbar spr to sysfs. The spr holds thread level In-Memory Collection (IMC)   

counter configuration data. Exposing this will help to understand the current
configuration of thread-level counters in the system.
Primarily, Bit 0 of ldbar says whether the counters are enabled or not.
And bit 1 indicates the mode (if 0-Accumulation Mode/if 1-Trace Mode). 

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/sysfs.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70..caefb64 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
 SYSFS_SPRSETUP(purr, SPRN_PURR);
 SYSFS_SPRSETUP(spurr, SPRN_SPURR);
 SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(ldbar, SPRN_LDBAR);
 
 /*
   Lets only enable read for phyp resources and
@@ -492,6 +493,7 @@ SYSFS_SPRSETUP(pir, SPRN_PIR);
   Lets be conservative and default to pseries.
 */
 static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(ldbar, 0400, show_ldbar, NULL);
 static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
 static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, _attr_mmcra);
 
@@ -842,6 +847,9 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_remove_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, _attr_mmcra);
 
-- 
2.7.4



[PATCH v3] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2017-11-09 Thread Anju T Sudhakar
Add ldbar spr to sysfs. The spr holds thread level In-Memory Collection (IMC)   

counter configuration data. Exposing this will help to understand the current
configuration of thread-level counters in the system.
Primarily, Bit 0 of ldbar says whether the counters are enabled or not.
And bit 1 indicates the mode (if 0-Accumulation Mode/if 1-Trace Mode). 

Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/kernel/sysfs.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70..caefb64 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
 SYSFS_SPRSETUP(purr, SPRN_PURR);
 SYSFS_SPRSETUP(spurr, SPRN_SPURR);
 SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(ldbar, SPRN_LDBAR);
 
 /*
   Lets only enable read for phyp resources and
@@ -492,6 +493,7 @@ SYSFS_SPRSETUP(pir, SPRN_PIR);
   Lets be conservative and default to pseries.
 */
 static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(ldbar, 0400, show_ldbar, NULL);
 static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
 static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, _attr_mmcra);
 
@@ -842,6 +847,9 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_remove_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, _attr_mmcra);
 
-- 
2.7.4



Re: [PATCH v3] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2017-11-09 Thread Anju T Sudhakar

Hi,

Kindly ignore this patch, I send a wrong version. Will send out the 
right one.



Thanks,

Anju


On Thursday 09 November 2017 05:43 PM, Anju T Sudhakar wrote:

Add ldbar spr to sysfs. The spr holds thread level In-Memory Collection (IMC)
counter configuration data. Exposing this will help to understand the current
configuration of thread-level counters in the system.
Primarily, Bit 0 of ldbar says whether the counters are enabled or not.
And bit 1 indicates the mode (if 0-Accumulation Mode/if 1-Trace Mode).

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
  arch/powerpc/kernel/sysfs.c | 8 
  1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70..f8caee0 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
  SYSFS_SPRSETUP(purr, SPRN_PURR);
  SYSFS_SPRSETUP(spurr, SPRN_SPURR);
  SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(ldbar, SPRN_LDBAR);

  /*
Lets only enable read for phyp resources and
@@ -492,6 +493,7 @@ SYSFS_SPRSETUP(pir, SPRN_PIR);
Lets be conservative and default to pseries.
  */
  static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(ldbar, 0400, show_ldbar, store_ldbar);
  static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
  static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
  static DEVICE_ATTR(pir, 0400, show_pir, NULL);
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);

  #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, _attr_mmcra);

@@ -842,6 +847,9 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, _attrs[i]);

  #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_remove_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, _attr_mmcra);





Re: [PATCH v3] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2017-11-09 Thread Anju T Sudhakar

Hi,

Kindly ignore this patch, I send a wrong version. Will send out the 
right one.



Thanks,

Anju


On Thursday 09 November 2017 05:43 PM, Anju T Sudhakar wrote:

Add ldbar spr to sysfs. The spr holds thread level In-Memory Collection (IMC)
counter configuration data. Exposing this will help to understand the current
configuration of thread-level counters in the system.
Primarily, Bit 0 of ldbar says whether the counters are enabled or not.
And bit 1 indicates the mode (if 0-Accumulation Mode/if 1-Trace Mode).

Signed-off-by: Anju T Sudhakar 
---
  arch/powerpc/kernel/sysfs.c | 8 
  1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70..f8caee0 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
  SYSFS_SPRSETUP(purr, SPRN_PURR);
  SYSFS_SPRSETUP(spurr, SPRN_SPURR);
  SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(ldbar, SPRN_LDBAR);

  /*
Lets only enable read for phyp resources and
@@ -492,6 +493,7 @@ SYSFS_SPRSETUP(pir, SPRN_PIR);
Lets be conservative and default to pseries.
  */
  static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(ldbar, 0400, show_ldbar, store_ldbar);
  static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
  static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
  static DEVICE_ATTR(pir, 0400, show_pir, NULL);
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);

  #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, _attr_mmcra);

@@ -842,6 +847,9 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, _attrs[i]);

  #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_remove_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, _attr_mmcra);





[PATCH v3] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2017-11-09 Thread Anju T Sudhakar
Add ldbar spr to sysfs. The spr holds thread level In-Memory Collection (IMC)   

counter configuration data. Exposing this will help to understand the current
configuration of thread-level counters in the system.
Primarily, Bit 0 of ldbar says whether the counters are enabled or not.
And bit 1 indicates the mode (if 0-Accumulation Mode/if 1-Trace Mode). 

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/sysfs.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70..f8caee0 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
 SYSFS_SPRSETUP(purr, SPRN_PURR);
 SYSFS_SPRSETUP(spurr, SPRN_SPURR);
 SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(ldbar, SPRN_LDBAR);
 
 /*
   Lets only enable read for phyp resources and
@@ -492,6 +493,7 @@ SYSFS_SPRSETUP(pir, SPRN_PIR);
   Lets be conservative and default to pseries.
 */
 static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(ldbar, 0400, show_ldbar, store_ldbar);
 static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
 static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, _attr_mmcra);
 
@@ -842,6 +847,9 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_remove_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, _attr_mmcra);
 
-- 
2.7.4



[PATCH v3] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2017-11-09 Thread Anju T Sudhakar
Add ldbar spr to sysfs. The spr holds thread level In-Memory Collection (IMC)   

counter configuration data. Exposing this will help to understand the current
configuration of thread-level counters in the system.
Primarily, Bit 0 of ldbar says whether the counters are enabled or not.
And bit 1 indicates the mode (if 0-Accumulation Mode/if 1-Trace Mode). 

Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/kernel/sysfs.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70..f8caee0 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
 SYSFS_SPRSETUP(purr, SPRN_PURR);
 SYSFS_SPRSETUP(spurr, SPRN_SPURR);
 SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(ldbar, SPRN_LDBAR);
 
 /*
   Lets only enable read for phyp resources and
@@ -492,6 +493,7 @@ SYSFS_SPRSETUP(pir, SPRN_PIR);
   Lets be conservative and default to pseries.
 */
 static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(ldbar, 0400, show_ldbar, store_ldbar);
 static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
 static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, _attr_mmcra);
 
@@ -842,6 +847,9 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_remove_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, _attr_mmcra);
 
-- 
2.7.4



[PATCH] powerpc/powernv: Fix IMC_MAX_PMU macro

2017-11-09 Thread Anju T Sudhakar
IMC_MAX_PMU is used for static storage (per_nest_pmu_arr) which holds
nest pmu information. Current value for the macro is 32 based on
the initial number of nest pmu units supported by the nest microcode.
Currently 29 nest units are enabled based on the system configuration.
But going forward, microcode could support more nest units (max of 64
nest units). Hence fix the value for the macro.

Fixes:8f95faaac56c1 ('powerpc/powernv: Detect and create IMC device')
Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 7f74c28..f9f0f2e 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -23,7 +23,7 @@
 /*
  * For static allocation of some of the structures.
  */
-#define IMC_MAX_PMUS   32
+#define IMC_MAX_PMUS   64
 
 /*
  * Compatibility macros for IMC devices
-- 
2.7.4



[PATCH] powerpc/powernv: Fix IMC_MAX_PMU macro

2017-11-09 Thread Anju T Sudhakar
IMC_MAX_PMU is used for static storage (per_nest_pmu_arr) which holds
nest pmu information. Current value for the macro is 32 based on
the initial number of nest pmu units supported by the nest microcode.
Currently 29 nest units are enabled based on the system configuration.
But going forward, microcode could support more nest units (max of 64
nest units). Hence fix the value for the macro.

Fixes:8f95faaac56c1 ('powerpc/powernv: Detect and create IMC device')
Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/include/asm/imc-pmu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 7f74c28..f9f0f2e 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -23,7 +23,7 @@
 /*
  * For static allocation of some of the structures.
  */
-#define IMC_MAX_PMUS   32
+#define IMC_MAX_PMUS   64
 
 /*
  * Compatibility macros for IMC devices
-- 
2.7.4



Re: [PATCH v2] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2017-11-07 Thread Anju T Sudhakar

Hi mpe,


On Wednesday 01 November 2017 06:20 AM, Michael Ellerman wrote:

Anju T Sudhakar <a...@linux.vnet.ibm.com> writes:


Add ldbar spr to sysfs. The spr will hold thread level In-Memory Collection 
(IMC)
counter configuration data.

This is missing any justification for why we would want to expose this,
and in particular why we would make it *writable*.

cheers



Thank you for reviewing the patch.

LDBAR, holds the thread-level counter configuration. Exposing this will help
us to understand  the current status of thread-level counters in the system.
Primarily, Bit 0 of ldbar tells whether the counters are enabled or not.
And bit 1  tells the mode (if 0-Accumulation Mode/if 1-Trace Mode).

But regarding the permission, you are right. On a reassessment I think 
that the permission
should be read only, because it is possible that we may write an 
incorrect value to the ldbar, that is wrong.

So I will change the permission here.



Thanks,
Anju



diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70..f8caee0 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
  SYSFS_SPRSETUP(purr, SPRN_PURR);
  SYSFS_SPRSETUP(spurr, SPRN_SPURR);
  SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(ldbar, SPRN_LDBAR);
  
  /*

Lets only enable read for phyp resources and
@@ -492,6 +493,7 @@ SYSFS_SPRSETUP(pir, SPRN_PIR);
Lets be conservative and default to pseries.
  */
  static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(ldbar, 0600, show_ldbar, store_ldbar);
  static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
  static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
  static DEVICE_ATTR(pir, 0400, show_pir, NULL);
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);
  
  #ifdef CONFIG_PPC64

+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, _attr_mmcra);
  
@@ -842,6 +847,9 @@ static int unregister_cpu_online(unsigned int cpu)

device_remove_file(s, _attrs[i]);
  
  #ifdef CONFIG_PPC64

+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_remove_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, _attr_mmcra);
  
--

2.7.4




Re: [PATCH v2] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2017-11-07 Thread Anju T Sudhakar

Hi mpe,


On Wednesday 01 November 2017 06:20 AM, Michael Ellerman wrote:

Anju T Sudhakar  writes:


Add ldbar spr to sysfs. The spr will hold thread level In-Memory Collection 
(IMC)
counter configuration data.

This is missing any justification for why we would want to expose this,
and in particular why we would make it *writable*.

cheers



Thank you for reviewing the patch.

LDBAR, holds the thread-level counter configuration. Exposing this will help
us to understand  the current status of thread-level counters in the system.
Primarily, Bit 0 of ldbar tells whether the counters are enabled or not.
And bit 1  tells the mode (if 0-Accumulation Mode/if 1-Trace Mode).

But regarding the permission, you are right. On a reassessment I think 
that the permission
should be read only, because it is possible that we may write an 
incorrect value to the ldbar, that is wrong.

So I will change the permission here.



Thanks,
Anju



diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70..f8caee0 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
  SYSFS_SPRSETUP(purr, SPRN_PURR);
  SYSFS_SPRSETUP(spurr, SPRN_SPURR);
  SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(ldbar, SPRN_LDBAR);
  
  /*

Lets only enable read for phyp resources and
@@ -492,6 +493,7 @@ SYSFS_SPRSETUP(pir, SPRN_PIR);
Lets be conservative and default to pseries.
  */
  static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(ldbar, 0600, show_ldbar, store_ldbar);
  static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
  static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
  static DEVICE_ATTR(pir, 0400, show_pir, NULL);
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);
  
  #ifdef CONFIG_PPC64

+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, _attr_mmcra);
  
@@ -842,6 +847,9 @@ static int unregister_cpu_online(unsigned int cpu)

device_remove_file(s, _attrs[i]);
  
  #ifdef CONFIG_PPC64

+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_remove_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, _attr_mmcra);
  
--

2.7.4




[PATCH v2] powerpc/perf: Fix core-imc hotplug callback failure during imc initialization

2017-11-02 Thread Anju T Sudhakar
From: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>

Call trace observed during boot:

[0.750749] PCI: CLS 0 bytes, default 128
[0.750855] Unpacking initramfs...
[1.570445] Freeing initrd memory: 23168K
[1.571090] rtas_flash: no firmware flash support
[1.573873] nest_capp0_imc performance monitor hardware support registered
[1.574006] nest_capp1_imc performance monitor hardware support registered
[1.579616] core_imc memory allocation for cpu 56 failed
[1.579730] Unable to handle kernel paging request for data at address 
0xffa400010
[1.579797] Faulting instruction address: 0xc0bf3294
0:mon> e
cpu 0x0: Vector: 300 (Data Access) at [c00ff38ff8d0]
pc: c0bf3294: mutex_lock+0x34/0x90
lr: c0bf3288: mutex_lock+0x28/0x90
sp: c00ff38ffb50
   msr: 92009033
   dar: ffa400010
 dsisr: 8
  current = 0xc00ff383de00
  paca= 0xc7ae   softe: 0irq_happened: 0x01
pid   = 13, comm = cpuhp/0
Linux version 4.11.0-39.el7a.ppc64le 
(mockbu...@ppc-058.build.eng.bos.redhat.com) (gcc version 4.8.5 20150623 (Red 
Hat 4.8.5-16) (GCC) ) #1 SMP Tue Oct 3 07:42:44 EDT 2017
0:mon> t
[c00ff38ffb80] c02ddfac perf_pmu_migrate_context+0xac/0x470
[c00ff38ffc40] c011385c ppc_core_imc_cpu_offline+0x1ac/0x1e0
[c00ff38ffc90] c0125758 cpuhp_invoke_callback+0x198/0x5d0
[c00ff38ffd00] c012782c cpuhp_thread_fun+0x8c/0x3d0
[c00ff38ffd60] c01678d0 smpboot_thread_fn+0x290/0x2a0
[c00ff38ffdc0] c015ee78 kthread+0x168/0x1b0
[c00ff38ffe30] c000b368 ret_from_kernel_thread+0x5c/0x74


While registering the cpuhoplug callbacks for core-imc, if we fails
in the cpuhotplug online path for any random core (either because opal call to
initialize the core-imc counters fails or because memory allocation fails for
that core), ppc_core_imc_cpu_offline() will get invoked for other cpus who
successfully returned from cpuhotplug online path. 

But in the ppc_core_imc_cpu_offline() path we are trying to migrate the event
context, when core-imc counters are not even initialized. Thus creating the
above stack dump.

Add a check to see if core-imc counters are enabled or not in the cpuhotplug
offline path before migrating the context to handle this failing scenario.

Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>   
---
 arch/powerpc/perf/imc-pmu.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 8812624..a7a1db4 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -607,6 +607,20 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
if (!cpumask_test_and_clear_cpu(cpu, _imc_cpumask))
return 0;
 
+   /*
+* Check whether core_imc is registered. We could end up here
+* if the cpuhotplug callback registration fails. i.e, callback
+* invokes the offline path for all sucessfully registered cpus.
+* At this stage, core_imc pmu will not be registered and we
+* should return here.
+*
+* We return with a zero since this is not an offline failure.
+* And cpuhp_setup_state() returns the actual failure reason
+* to the caller, which inturn will call the cleanup routine.
+*/
+   if (!core_imc_pmu->pmu.event_init)
+   return 0;
+
/* Find any online cpu in that core except the current "cpu" */
ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
 
-- 
2.7.4



[PATCH v2] powerpc/perf: Fix core-imc hotplug callback failure during imc initialization

2017-11-02 Thread Anju T Sudhakar
From: Madhavan Srinivasan 

Call trace observed during boot:

[0.750749] PCI: CLS 0 bytes, default 128
[0.750855] Unpacking initramfs...
[1.570445] Freeing initrd memory: 23168K
[1.571090] rtas_flash: no firmware flash support
[1.573873] nest_capp0_imc performance monitor hardware support registered
[1.574006] nest_capp1_imc performance monitor hardware support registered
[1.579616] core_imc memory allocation for cpu 56 failed
[1.579730] Unable to handle kernel paging request for data at address 
0xffa400010
[1.579797] Faulting instruction address: 0xc0bf3294
0:mon> e
cpu 0x0: Vector: 300 (Data Access) at [c00ff38ff8d0]
pc: c0bf3294: mutex_lock+0x34/0x90
lr: c0bf3288: mutex_lock+0x28/0x90
sp: c00ff38ffb50
   msr: 92009033
   dar: ffa400010
 dsisr: 8
  current = 0xc00ff383de00
  paca= 0xc7ae   softe: 0irq_happened: 0x01
pid   = 13, comm = cpuhp/0
Linux version 4.11.0-39.el7a.ppc64le 
(mockbu...@ppc-058.build.eng.bos.redhat.com) (gcc version 4.8.5 20150623 (Red 
Hat 4.8.5-16) (GCC) ) #1 SMP Tue Oct 3 07:42:44 EDT 2017
0:mon> t
[c00ff38ffb80] c02ddfac perf_pmu_migrate_context+0xac/0x470
[c00ff38ffc40] c011385c ppc_core_imc_cpu_offline+0x1ac/0x1e0
[c00ff38ffc90] c0125758 cpuhp_invoke_callback+0x198/0x5d0
[c00ff38ffd00] c012782c cpuhp_thread_fun+0x8c/0x3d0
[c00ff38ffd60] c01678d0 smpboot_thread_fn+0x290/0x2a0
[c00ff38ffdc0] c015ee78 kthread+0x168/0x1b0
[c00ff38ffe30] c000b368 ret_from_kernel_thread+0x5c/0x74


While registering the cpuhoplug callbacks for core-imc, if we fails
in the cpuhotplug online path for any random core (either because opal call to
initialize the core-imc counters fails or because memory allocation fails for
that core), ppc_core_imc_cpu_offline() will get invoked for other cpus who
successfully returned from cpuhotplug online path. 

But in the ppc_core_imc_cpu_offline() path we are trying to migrate the event
context, when core-imc counters are not even initialized. Thus creating the
above stack dump.

Add a check to see if core-imc counters are enabled or not in the cpuhotplug
offline path before migrating the context to handle this failing scenario.

Signed-off-by: Madhavan Srinivasan 
Signed-off-by: Anju T Sudhakar
---
 arch/powerpc/perf/imc-pmu.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 8812624..a7a1db4 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -607,6 +607,20 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
if (!cpumask_test_and_clear_cpu(cpu, _imc_cpumask))
return 0;
 
+   /*
+* Check whether core_imc is registered. We could end up here
+* if the cpuhotplug callback registration fails. i.e, callback
+* invokes the offline path for all sucessfully registered cpus.
+* At this stage, core_imc pmu will not be registered and we
+* should return here.
+*
+* We return with a zero since this is not an offline failure.
+* And cpuhp_setup_state() returns the actual failure reason
+* to the caller, which inturn will call the cleanup routine.
+*/
+   if (!core_imc_pmu->pmu.event_init)
+   return 0;
+
/* Find any online cpu in that core except the current "cpu" */
ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
 
-- 
2.7.4



Re: [PATCH] powerpc/perf: Fix core-imc hotplug callback failure during imc initialization

2017-11-02 Thread Anju T Sudhakar

Hi,


On Wednesday 01 November 2017 06:22 AM, Michael Ellerman wrote:

Anju T Sudhakar <a...@linux.vnet.ibm.com> writes:


Call trace observed during boot:

What's the actual oops?


The actual oops is:

[0.750749] PCI: CLS 0 bytes, default 128
[0.750855] Unpacking initramfs...
[1.570445] Freeing initrd memory: 23168K
[1.571090] rtas_flash: no firmware flash support
[1.573873] nest_capp0_imc performance monitor hardware support registered
[1.574006] nest_capp1_imc performance monitor hardware support registered
[1.579616] core_imc memory allocation for cpu 56 failed
[1.579730] Unable to handle kernel paging request for data at address 
0xffa400010
[1.579797] Faulting instruction address: 0xc0bf3294
0:mon> e
cpu 0x0: Vector: 300 (Data Access) at [c00ff38ff8d0]
pc: c0bf3294: mutex_lock+0x34/0x90
lr: c0bf3288: mutex_lock+0x28/0x90
sp: c00ff38ffb50
   msr: 92009033
   dar: ffa400010
 dsisr: 8
  current = 0xc00ff383de00
  paca= 0xc7ae   softe: 0irq_happened: 0x01
pid   = 13, comm = cpuhp/0
Linux version 4.11.0-39.el7a.ppc64le 
(mockbu...@ppc-058.build.eng.bos.redhat.com) (gcc version 4.8.5 20150623 (Red 
Hat 4.8.5-16) (GCC) ) #1 SMP Tue Oct 3 07:42:44 EDT 2017
0:mon> t
[c00ff38ffb80] c02ddfac perf_pmu_migrate_context+0xac/0x470
[c00ff38ffc40] c011385c ppc_core_imc_cpu_offline+0x1ac/0x1e0
[c00ff38ffc90] c0125758 cpuhp_invoke_callback+0x198/0x5d0
[c00ff38ffd00] c012782c cpuhp_thread_fun+0x8c/0x3d0
[c00ff38ffd60] c01678d0 smpboot_thread_fn+0x290/0x2a0
[c00ff38ffdc0] c015ee78 kthread+0x168/0x1b0
[c00ff38ffe30] c000b368 ret_from_kernel_thread+0x5c/0x74



[c00ff38ffb80] c02ddfac perf_pmu_migrate_context+0xac/0x470
[c00ff38ffc40] c011385c ppc_core_imc_cpu_offline+0x1ac/0x1e0
[c00ff38ffc90] c0125758 cpuhp_invoke_callback+0x198/0x5d0
[c00ff38ffd00] c012782c cpuhp_thread_fun+0x8c/0x3d0
[c00ff38ffd60] c01678d0 smpboot_thread_fn+0x290/0x2a0
[c00ff38ffdc0] c015ee78 kthread+0x168/0x1b0
[c00ff38ffe30] c000b368 ret_from_kernel_thread+0x5c/0x74

While registering the cpuhoplug callbacks for core-imc, if we fails
in the cpuhotplug online path for any random core (either because opal call to
initialize the core-imc counters fails or because memory allocation fails for
that core), ppc_core_imc_cpu_offline() will get invoked for other cpus who
successfully returned from cpuhotplug online path.

But in the ppc_core_imc_cpu_offline() path we are trying to migrate the event
context, when core-imc counters are not even initialized. Thus creating the
above stack dump.

Add a check to see if core-imc counters are enabled or not in the cpuhotplug
offline path before migrating the context to handle this failing scenario.

Why do we need a bool to track this? Can't we just check the data
structure we're deinitialising has been initialised?

Doesn't this also mean we won't cleanup the initialisation for any CPUs
that have been initialised?

we do the cleanup in the failing case.




Thanks for the review.

Thanks,
Anju




cheers


diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 8812624..08139f9 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -30,6 +30,7 @@ static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
  static cpumask_t nest_imc_cpumask;
  struct imc_pmu_ref *nest_imc_refc;
  static int nest_pmus;
+static bool core_imc_enabled;
  
  /* Core IMC data structures and variables */
  
@@ -607,6 +608,19 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)

if (!cpumask_test_and_clear_cpu(cpu, _imc_cpumask))
return 0;
  
+	/*

+* See if core imc counters are enabled or not.
+*
+* Suppose we reach here from core_imc_cpumask_init(),
+* since we failed at the cpuhotplug online path for any random
+* core (either because opal call to initialize the core-imc counters
+* failed  or because memory allocation failed).
+* We need to check whether core imc counters are enabled or not before
+* migrating the event context from cpus in the other cores.
+*/
+   if (!core_imc_enabled)
+   return 0;
+
/* Find any online cpu in that core except the current "cpu" */
ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
  
@@ -1299,6 +1313,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id

return ret;
}
  
+		core_imc_enabled = true;

break;
case IMC_DOMAIN_THREAD:
ret = thread_imc_cpu_init();
--
2.7.4




Re: [PATCH] powerpc/perf: Fix core-imc hotplug callback failure during imc initialization

2017-11-02 Thread Anju T Sudhakar

Hi,


On Wednesday 01 November 2017 06:22 AM, Michael Ellerman wrote:

Anju T Sudhakar  writes:


Call trace observed during boot:

What's the actual oops?


The actual oops is:

[0.750749] PCI: CLS 0 bytes, default 128
[0.750855] Unpacking initramfs...
[1.570445] Freeing initrd memory: 23168K
[1.571090] rtas_flash: no firmware flash support
[1.573873] nest_capp0_imc performance monitor hardware support registered
[1.574006] nest_capp1_imc performance monitor hardware support registered
[1.579616] core_imc memory allocation for cpu 56 failed
[1.579730] Unable to handle kernel paging request for data at address 
0xffa400010
[1.579797] Faulting instruction address: 0xc0bf3294
0:mon> e
cpu 0x0: Vector: 300 (Data Access) at [c00ff38ff8d0]
pc: c0bf3294: mutex_lock+0x34/0x90
lr: c0bf3288: mutex_lock+0x28/0x90
sp: c00ff38ffb50
   msr: 92009033
   dar: ffa400010
 dsisr: 8
  current = 0xc00ff383de00
  paca= 0xc7ae   softe: 0irq_happened: 0x01
pid   = 13, comm = cpuhp/0
Linux version 4.11.0-39.el7a.ppc64le 
(mockbu...@ppc-058.build.eng.bos.redhat.com) (gcc version 4.8.5 20150623 (Red 
Hat 4.8.5-16) (GCC) ) #1 SMP Tue Oct 3 07:42:44 EDT 2017
0:mon> t
[c00ff38ffb80] c02ddfac perf_pmu_migrate_context+0xac/0x470
[c00ff38ffc40] c011385c ppc_core_imc_cpu_offline+0x1ac/0x1e0
[c00ff38ffc90] c0125758 cpuhp_invoke_callback+0x198/0x5d0
[c00ff38ffd00] c012782c cpuhp_thread_fun+0x8c/0x3d0
[c00ff38ffd60] c01678d0 smpboot_thread_fn+0x290/0x2a0
[c00ff38ffdc0] c015ee78 kthread+0x168/0x1b0
[c00ff38ffe30] c000b368 ret_from_kernel_thread+0x5c/0x74



[c00ff38ffb80] c02ddfac perf_pmu_migrate_context+0xac/0x470
[c00ff38ffc40] c011385c ppc_core_imc_cpu_offline+0x1ac/0x1e0
[c00ff38ffc90] c0125758 cpuhp_invoke_callback+0x198/0x5d0
[c00ff38ffd00] c012782c cpuhp_thread_fun+0x8c/0x3d0
[c00ff38ffd60] c01678d0 smpboot_thread_fn+0x290/0x2a0
[c00ff38ffdc0] c015ee78 kthread+0x168/0x1b0
[c00ff38ffe30] c000b368 ret_from_kernel_thread+0x5c/0x74

While registering the cpuhoplug callbacks for core-imc, if we fails
in the cpuhotplug online path for any random core (either because opal call to
initialize the core-imc counters fails or because memory allocation fails for
that core), ppc_core_imc_cpu_offline() will get invoked for other cpus who
successfully returned from cpuhotplug online path.

But in the ppc_core_imc_cpu_offline() path we are trying to migrate the event
context, when core-imc counters are not even initialized. Thus creating the
above stack dump.

Add a check to see if core-imc counters are enabled or not in the cpuhotplug
offline path before migrating the context to handle this failing scenario.

Why do we need a bool to track this? Can't we just check the data
structure we're deinitialising has been initialised?

Doesn't this also mean we won't cleanup the initialisation for any CPUs
that have been initialised?

we do the cleanup in the failing case.




Thanks for the review.

Thanks,
Anju




cheers


diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 8812624..08139f9 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -30,6 +30,7 @@ static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
  static cpumask_t nest_imc_cpumask;
  struct imc_pmu_ref *nest_imc_refc;
  static int nest_pmus;
+static bool core_imc_enabled;
  
  /* Core IMC data structures and variables */
  
@@ -607,6 +608,19 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)

if (!cpumask_test_and_clear_cpu(cpu, _imc_cpumask))
return 0;
  
+	/*

+* See if core imc counters are enabled or not.
+*
+* Suppose we reach here from core_imc_cpumask_init(),
+* since we failed at the cpuhotplug online path for any random
+* core (either because opal call to initialize the core-imc counters
+* failed  or because memory allocation failed).
+* We need to check whether core imc counters are enabled or not before
+* migrating the event context from cpus in the other cores.
+*/
+   if (!core_imc_enabled)
+   return 0;
+
/* Find any online cpu in that core except the current "cpu" */
ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
  
@@ -1299,6 +1313,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id

return ret;
}
  
+		core_imc_enabled = true;

break;
case IMC_DOMAIN_THREAD:
ret = thread_imc_cpu_init();
--
2.7.4




[PATCH v2] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2017-10-31 Thread Anju T Sudhakar
Add ldbar spr to sysfs. The spr will hold thread level In-Memory Collection 
(IMC)
counter configuration data.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/sysfs.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70..f8caee0 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
 SYSFS_SPRSETUP(purr, SPRN_PURR);
 SYSFS_SPRSETUP(spurr, SPRN_SPURR);
 SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(ldbar, SPRN_LDBAR);
 
 /*
   Lets only enable read for phyp resources and
@@ -492,6 +493,7 @@ SYSFS_SPRSETUP(pir, SPRN_PIR);
   Lets be conservative and default to pseries.
 */
 static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(ldbar, 0600, show_ldbar, store_ldbar);
 static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
 static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, _attr_mmcra);
 
@@ -842,6 +847,9 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_remove_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, _attr_mmcra);
 
-- 
2.7.4



[PATCH v2] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2017-10-31 Thread Anju T Sudhakar
Add ldbar spr to sysfs. The spr will hold thread level In-Memory Collection 
(IMC)
counter configuration data.

Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/kernel/sysfs.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70..f8caee0 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
 SYSFS_SPRSETUP(purr, SPRN_PURR);
 SYSFS_SPRSETUP(spurr, SPRN_SPURR);
 SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(ldbar, SPRN_LDBAR);
 
 /*
   Lets only enable read for phyp resources and
@@ -492,6 +493,7 @@ SYSFS_SPRSETUP(pir, SPRN_PIR);
   Lets be conservative and default to pseries.
 */
 static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(ldbar, 0600, show_ldbar, store_ldbar);
 static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
 static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, _attr_mmcra);
 
@@ -842,6 +847,9 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_remove_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, _attr_mmcra);
 
-- 
2.7.4



[PATCH] powerpc/perf: Clean up IMC code with some code refactoring

2017-10-31 Thread Anju T Sudhakar
Factor out memory freeing part for attribute elements from 
imc_common_mem_free(),
also handle the memory leak for pmu->events in update_events_in_group(). 

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/perf/imc-pmu.c | 24 
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 88126245881b..45623428f164 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -224,8 +224,10 @@ static int update_events_in_group(struct device_node 
*node, struct imc_pmu *pmu)
 
/* Allocate memory for attribute group */
attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
-   if (!attr_group)
+   if (!attr_group) {
+   kfree(pmu->events);
return -ENOMEM;
+   }
 
/*
 * Allocate memory for attributes.
@@ -1143,6 +1145,15 @@ static void cleanup_all_thread_imc_memory(void)
}
 }
 
+/* Function to free the attr_groups which are dynamically allocated */
+static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
+{
+   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+   kfree(pmu_ptr);
+   return;
+}
+
 /*
  * Common function to unregister cpu hotplug callback and
  * free the memory.
@@ -1175,11 +1186,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu 
*pmu_ptr)
cleanup_all_thread_imc_memory();
}
 
-   /* Only free the attr_groups which are dynamically allocated  */
-   if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
-   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
-   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
-   kfree(pmu_ptr);
return;
 }
 
@@ -1318,17 +1324,19 @@ int init_imc_pmu(struct device_node *parent, struct 
imc_pmu *pmu_ptr, int pmu_id
 
ret = update_pmu_ops(pmu_ptr);
if (ret)
-   goto err_free;
+   goto err_free_mem;
 
ret = perf_pmu_register(_ptr->pmu, pmu_ptr->pmu.name, -1);
if (ret)
-   goto err_free;
+   goto err_free_mem;
 
pr_info("%s performance monitor hardware support registered\n",
pmu_ptr->pmu.name);
 
return 0;
 
+err_free_mem:
+   imc_common_mem_free(pmu_ptr);
 err_free:
imc_common_cpuhp_mem_free(pmu_ptr);
return ret;
-- 
2.14.1



[PATCH] powerpc/perf: Clean up IMC code with some code refactoring

2017-10-31 Thread Anju T Sudhakar
Factor out memory freeing part for attribute elements from 
imc_common_mem_free(),
also handle the memory leak for pmu->events in update_events_in_group(). 

Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/perf/imc-pmu.c | 24 
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 88126245881b..45623428f164 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -224,8 +224,10 @@ static int update_events_in_group(struct device_node 
*node, struct imc_pmu *pmu)
 
/* Allocate memory for attribute group */
attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
-   if (!attr_group)
+   if (!attr_group) {
+   kfree(pmu->events);
return -ENOMEM;
+   }
 
/*
 * Allocate memory for attributes.
@@ -1143,6 +1145,15 @@ static void cleanup_all_thread_imc_memory(void)
}
 }
 
+/* Function to free the attr_groups which are dynamically allocated */
+static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
+{
+   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+   kfree(pmu_ptr);
+   return;
+}
+
 /*
  * Common function to unregister cpu hotplug callback and
  * free the memory.
@@ -1175,11 +1186,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu 
*pmu_ptr)
cleanup_all_thread_imc_memory();
}
 
-   /* Only free the attr_groups which are dynamically allocated  */
-   if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
-   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
-   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
-   kfree(pmu_ptr);
return;
 }
 
@@ -1318,17 +1324,19 @@ int init_imc_pmu(struct device_node *parent, struct 
imc_pmu *pmu_ptr, int pmu_id
 
ret = update_pmu_ops(pmu_ptr);
if (ret)
-   goto err_free;
+   goto err_free_mem;
 
ret = perf_pmu_register(_ptr->pmu, pmu_ptr->pmu.name, -1);
if (ret)
-   goto err_free;
+   goto err_free_mem;
 
pr_info("%s performance monitor hardware support registered\n",
pmu_ptr->pmu.name);
 
return 0;
 
+err_free_mem:
+   imc_common_mem_free(pmu_ptr);
 err_free:
imc_common_cpuhp_mem_free(pmu_ptr);
return ret;
-- 
2.14.1



[PATCH] powerpc/perf: Change the data type for the variable 'ncpu' in IMC code

2017-10-31 Thread Anju T Sudhakar
Change the data type for the variable 'ncpu' in ppc_core_imc_cpu_offline(),
since cpumask_any_but() returns an 'int' value.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Reported-by: David Binderman <dcb...@hotmail.com>
---
 arch/powerpc/perf/imc-pmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 8812624..64dae3e 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -597,7 +597,8 @@ static int ppc_core_imc_cpu_online(unsigned int cpu)
 
 static int ppc_core_imc_cpu_offline(unsigned int cpu)
 {
-   unsigned int ncpu, core_id;
+   unsigned int core_id;
+   int ncpu;
struct imc_pmu_ref *ref;
 
/*
-- 
2.7.4



[PATCH] powerpc/perf: Change the data type for the variable 'ncpu' in IMC code

2017-10-31 Thread Anju T Sudhakar
Change the data type for the variable 'ncpu' in ppc_core_imc_cpu_offline(),
since cpumask_any_but() returns an 'int' value.

Signed-off-by: Anju T Sudhakar 
Reported-by: David Binderman 
---
 arch/powerpc/perf/imc-pmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 8812624..64dae3e 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -597,7 +597,8 @@ static int ppc_core_imc_cpu_online(unsigned int cpu)
 
 static int ppc_core_imc_cpu_offline(unsigned int cpu)
 {
-   unsigned int ncpu, core_id;
+   unsigned int core_id;
+   int ncpu;
struct imc_pmu_ref *ref;
 
/*
-- 
2.7.4



[PATCH] powerpc/perf: Fix core-imc hotplug callback failure during imc initialization

2017-10-31 Thread Anju T Sudhakar
Call trace observed during boot:

[c00ff38ffb80] c02ddfac perf_pmu_migrate_context+0xac/0x470
[c00ff38ffc40] c011385c ppc_core_imc_cpu_offline+0x1ac/0x1e0
[c00ff38ffc90] c0125758 cpuhp_invoke_callback+0x198/0x5d0
[c00ff38ffd00] c012782c cpuhp_thread_fun+0x8c/0x3d0
[c00ff38ffd60] c01678d0 smpboot_thread_fn+0x290/0x2a0
[c00ff38ffdc0] c015ee78 kthread+0x168/0x1b0
[c00ff38ffe30] c000b368 ret_from_kernel_thread+0x5c/0x74

While registering the cpuhoplug callbacks for core-imc, if we fails
in the cpuhotplug online path for any random core (either because opal call to
initialize the core-imc counters fails or because memory allocation fails for
that core), ppc_core_imc_cpu_offline() will get invoked for other cpus who
successfully returned from cpuhotplug online path. 

But in the ppc_core_imc_cpu_offline() path we are trying to migrate the event
context, when core-imc counters are not even initialized. Thus creating the
above stack dump.

Add a check to see if core-imc counters are enabled or not in the cpuhotplug
offline path before migrating the context to handle this failing scenario.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/perf/imc-pmu.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 8812624..08139f9 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -30,6 +30,7 @@ static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 static cpumask_t nest_imc_cpumask;
 struct imc_pmu_ref *nest_imc_refc;
 static int nest_pmus;
+static bool core_imc_enabled;
 
 /* Core IMC data structures and variables */
 
@@ -607,6 +608,19 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
if (!cpumask_test_and_clear_cpu(cpu, _imc_cpumask))
return 0;
 
+   /*
+* See if core imc counters are enabled or not.
+*
+* Suppose we reach here from core_imc_cpumask_init(),
+* since we failed at the cpuhotplug online path for any random
+* core (either because opal call to initialize the core-imc counters
+* failed  or because memory allocation failed).
+* We need to check whether core imc counters are enabled or not before
+* migrating the event context from cpus in the other cores.
+*/
+   if (!core_imc_enabled)
+   return 0;
+
/* Find any online cpu in that core except the current "cpu" */
ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
 
@@ -1299,6 +1313,7 @@ int init_imc_pmu(struct device_node *parent, struct 
imc_pmu *pmu_ptr, int pmu_id
return ret;
}
 
+   core_imc_enabled = true;
break;
case IMC_DOMAIN_THREAD:
ret = thread_imc_cpu_init();
-- 
2.7.4



[PATCH] powerpc/perf: Fix core-imc hotplug callback failure during imc initialization

2017-10-31 Thread Anju T Sudhakar
Call trace observed during boot:

[c00ff38ffb80] c02ddfac perf_pmu_migrate_context+0xac/0x470
[c00ff38ffc40] c011385c ppc_core_imc_cpu_offline+0x1ac/0x1e0
[c00ff38ffc90] c0125758 cpuhp_invoke_callback+0x198/0x5d0
[c00ff38ffd00] c012782c cpuhp_thread_fun+0x8c/0x3d0
[c00ff38ffd60] c01678d0 smpboot_thread_fn+0x290/0x2a0
[c00ff38ffdc0] c015ee78 kthread+0x168/0x1b0
[c00ff38ffe30] c000b368 ret_from_kernel_thread+0x5c/0x74

While registering the cpuhoplug callbacks for core-imc, if we fails
in the cpuhotplug online path for any random core (either because opal call to
initialize the core-imc counters fails or because memory allocation fails for
that core), ppc_core_imc_cpu_offline() will get invoked for other cpus who
successfully returned from cpuhotplug online path. 

But in the ppc_core_imc_cpu_offline() path we are trying to migrate the event
context, when core-imc counters are not even initialized. Thus creating the
above stack dump.

Add a check to see if core-imc counters are enabled or not in the cpuhotplug
offline path before migrating the context to handle this failing scenario.

Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/perf/imc-pmu.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 8812624..08139f9 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -30,6 +30,7 @@ static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 static cpumask_t nest_imc_cpumask;
 struct imc_pmu_ref *nest_imc_refc;
 static int nest_pmus;
+static bool core_imc_enabled;
 
 /* Core IMC data structures and variables */
 
@@ -607,6 +608,19 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
if (!cpumask_test_and_clear_cpu(cpu, _imc_cpumask))
return 0;
 
+   /*
+* See if core imc counters are enabled or not.
+*
+* Suppose we reach here from core_imc_cpumask_init(),
+* since we failed at the cpuhotplug online path for any random
+* core (either because opal call to initialize the core-imc counters
+* failed  or because memory allocation failed).
+* We need to check whether core imc counters are enabled or not before
+* migrating the event context from cpus in the other cores.
+*/
+   if (!core_imc_enabled)
+   return 0;
+
/* Find any online cpu in that core except the current "cpu" */
ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
 
@@ -1299,6 +1313,7 @@ int init_imc_pmu(struct device_node *parent, struct 
imc_pmu *pmu_ptr, int pmu_id
return ret;
}
 
+   core_imc_enabled = true;
break;
case IMC_DOMAIN_THREAD:
ret = thread_imc_cpu_init();
-- 
2.7.4



[PATCH] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2017-10-27 Thread Anju T Sudhakar
Add ldbar spr to sysfs. The spr will hold thread level In-Memory Collection 
(IMC)
counter configuration data.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Acked-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/sysfs.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70c7c2b..8efcaece4796 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -466,6 +466,7 @@ static ssize_t __used \
 #ifdef HAS_PPC_PMC_CLASSIC
 SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
 SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
+SYSFS_PMCSETUP(ldbar, SPRN_LDBAR);
 SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
 SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
 SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
@@ -492,6 +493,7 @@ SYSFS_SPRSETUP(pir, SPRN_PIR);
   Lets be conservative and default to pseries.
 */
 static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(ldbar, 0600, show_ldbar, store_ldbar);
 static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
 static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, _attr_mmcra);
 
@@ -842,6 +847,9 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_remove_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, _attr_mmcra);
 
-- 
2.14.1



[PATCH] powerpc/kernel/sysfs: Export ldbar spr to sysfs

2017-10-27 Thread Anju T Sudhakar
Add ldbar spr to sysfs. The spr will hold thread level In-Memory Collection 
(IMC)
counter configuration data.

Signed-off-by: Anju T Sudhakar 
Acked-by: Madhavan Srinivasan 
---
 arch/powerpc/kernel/sysfs.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 4437c70c7c2b..8efcaece4796 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -466,6 +466,7 @@ static ssize_t __used \
 #ifdef HAS_PPC_PMC_CLASSIC
 SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
 SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
+SYSFS_PMCSETUP(ldbar, SPRN_LDBAR);
 SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
 SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
 SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
@@ -492,6 +493,7 @@ SYSFS_SPRSETUP(pir, SPRN_PIR);
   Lets be conservative and default to pseries.
 */
 static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(ldbar, 0600, show_ldbar, store_ldbar);
 static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
 static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
@@ -757,6 +759,9 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_create_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, _attr_mmcra);
 
@@ -842,6 +847,9 @@ static int unregister_cpu_online(unsigned int cpu)
device_remove_file(s, _attrs[i]);
 
 #ifdef CONFIG_PPC64
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   device_remove_file(s, _attr_ldbar);
+
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, _attr_mmcra);
 
-- 
2.14.1



[PATCH V2] powerpc/perf: Fix IMC initialization crash

2017-10-13 Thread Anju T Sudhakar
Call trace observed with latest firmware, and upstream kernel.

[   14.499938] NIP [c00f318c] init_imc_pmu+0x8c/0xcf0
[   14.499973] LR [c00f33f8] init_imc_pmu+0x2f8/0xcf0
[   14.57] Call Trace:
[   14.500027] [c03fed18f710] [c00f33c8] init_imc_pmu+0x2c8/0xcf0 
(unreliable)
[   14.500080] [c03fed18f800] [c00b5ec0] 
opal_imc_counters_probe+0x300/0x400
[   14.500132] [c03fed18f900] [c0807ef4] 
platform_drv_probe+0x64/0x110
[   14.500185] [c03fed18f980] [c0804b58] 
driver_probe_device+0x3d8/0x580
[   14.500236] [c03fed18fa10] [c0804e4c] __driver_attach+0x14c/0x1a0
[   14.500302] [c03fed18fa90] [c080156c] bus_for_each_dev+0x8c/0xf0
[   14.500353] [c03fed18fae0] [c0803fa4] driver_attach+0x34/0x50
[   14.500397] [c03fed18fb00] [c0803688] bus_add_driver+0x298/0x350
[   14.500449] [c03fed18fb90] [c080605c] driver_register+0x9c/0x180
[   14.500500] [c03fed18fc00] [c0807dec] 
__platform_driver_register+0x5c/0x70
[   14.500552] [c03fed18fc20] [c101cee0] 
opal_imc_driver_init+0x2c/0x40
[   14.500603] [c03fed18fc40] [c000d084] do_one_initcall+0x64/0x1d0
[   14.500654] [c03fed18fd00] [c100434c] 
kernel_init_freeable+0x280/0x374
[   14.500705] [c03fed18fdc0] [c000d314] kernel_init+0x24/0x160
[   14.500750] [c03fed18fe30] [c000b4e8] 
ret_from_kernel_thread+0x5c/0x74
[   14.500799] Instruction dump:
[   14.500827] 4082024c 2f890002 419e054c 2e890003 41960094 2e890001 3ba0ffea 
419602d8 
[   14.500884] 419e0290 2f890003 419e02a8 e93e0118  2fa3 419e0010 
4827ba41 
[   14.500945] ---[ end trace 27b734ad26f1add4 ]---
[   15.908719] 
[   16.908869] Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x0007
[   16.908869] 
[   18.125813] ---[ end Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x0007]

While registering nest imc at init, cpu-hotplug callback 
`nest_pmu_cpumask_init()`
makes an opal call to stop the engine. And if the OPAL call fails, 
imc_common_cpuhp_mem_free() is invoked to cleanup memory and cpuhotplug setup.

But when cleaning up the attribute group, we were dereferencing the attribute
element array without checking whether the backing element is not NULL. This
causes the kernel panic.

Add a check for the backing element prior to dereferencing the attribute 
element,
to handle the failing case gracefully.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Reported-by: Pridhiviraj Paidipeddi <ppaid...@linux.vnet.ibm.com>
---
 arch/powerpc/perf/imc-pmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9ccac86f3463..001504b0e800 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1148,7 +1148,8 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu 
*pmu_ptr)
}
 
/* Only free the attr_groups which are dynamically allocated  */
-   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+   if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
+   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
kfree(pmu_ptr);
return;
-- 
2.14.1



[PATCH V2] powerpc/perf: Fix IMC initialization crash

2017-10-13 Thread Anju T Sudhakar
Call trace observed with latest firmware, and upstream kernel.

[   14.499938] NIP [c00f318c] init_imc_pmu+0x8c/0xcf0
[   14.499973] LR [c00f33f8] init_imc_pmu+0x2f8/0xcf0
[   14.57] Call Trace:
[   14.500027] [c03fed18f710] [c00f33c8] init_imc_pmu+0x2c8/0xcf0 
(unreliable)
[   14.500080] [c03fed18f800] [c00b5ec0] 
opal_imc_counters_probe+0x300/0x400
[   14.500132] [c03fed18f900] [c0807ef4] 
platform_drv_probe+0x64/0x110
[   14.500185] [c03fed18f980] [c0804b58] 
driver_probe_device+0x3d8/0x580
[   14.500236] [c03fed18fa10] [c0804e4c] __driver_attach+0x14c/0x1a0
[   14.500302] [c03fed18fa90] [c080156c] bus_for_each_dev+0x8c/0xf0
[   14.500353] [c03fed18fae0] [c0803fa4] driver_attach+0x34/0x50
[   14.500397] [c03fed18fb00] [c0803688] bus_add_driver+0x298/0x350
[   14.500449] [c03fed18fb90] [c080605c] driver_register+0x9c/0x180
[   14.500500] [c03fed18fc00] [c0807dec] 
__platform_driver_register+0x5c/0x70
[   14.500552] [c03fed18fc20] [c101cee0] 
opal_imc_driver_init+0x2c/0x40
[   14.500603] [c03fed18fc40] [c000d084] do_one_initcall+0x64/0x1d0
[   14.500654] [c03fed18fd00] [c100434c] 
kernel_init_freeable+0x280/0x374
[   14.500705] [c03fed18fdc0] [c000d314] kernel_init+0x24/0x160
[   14.500750] [c03fed18fe30] [c000b4e8] 
ret_from_kernel_thread+0x5c/0x74
[   14.500799] Instruction dump:
[   14.500827] 4082024c 2f890002 419e054c 2e890003 41960094 2e890001 3ba0ffea 
419602d8 
[   14.500884] 419e0290 2f890003 419e02a8 e93e0118  2fa3 419e0010 
4827ba41 
[   14.500945] ---[ end trace 27b734ad26f1add4 ]---
[   15.908719] 
[   16.908869] Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x0007
[   16.908869] 
[   18.125813] ---[ end Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x0007]

While registering nest imc at init, cpu-hotplug callback 
`nest_pmu_cpumask_init()`
makes an opal call to stop the engine. And if the OPAL call fails, 
imc_common_cpuhp_mem_free() is invoked to cleanup memory and cpuhotplug setup.

But when cleaning up the attribute group, we were dereferencing the attribute
element array without checking whether the backing element is not NULL. This
causes the kernel panic.

Add a check for the backing element prior to dereferencing the attribute 
element,
to handle the failing case gracefully.

Signed-off-by: Anju T Sudhakar 
Reported-by: Pridhiviraj Paidipeddi 
---
 arch/powerpc/perf/imc-pmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9ccac86f3463..001504b0e800 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1148,7 +1148,8 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu 
*pmu_ptr)
}
 
/* Only free the attr_groups which are dynamically allocated  */
-   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+   if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
+   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
kfree(pmu_ptr);
return;
-- 
2.14.1



[PATCH] powerpc/perf: Add ___GFP_NOWARN flag to alloc_pages_node()

2017-10-11 Thread Anju T Sudhakar
Stack trace output during a stress test:
 [4.310049] Freeing initrd memory: 22592K
[4.310646] rtas_flash: no firmware flash support
[4.313341] cpuhp/64: page allocation failure: order:0, 
mode:0x14480c0(GFP_KERNEL|__GFP_ZERO|__GFP_THISNODE), nodemask=(null)
[4.313465] cpuhp/64 cpuset=/ mems_allowed=0
[4.313521] CPU: 64 PID: 392 Comm: cpuhp/64 Not tainted 
4.11.0-39.el7a.ppc64le #1
[4.313588] Call Trace:
[4.313622] [c00f1fb1b8e0] [c0c09388] dump_stack+0xb0/0xf0 
(unreliable)
[4.313694] [c00f1fb1b920] [c030ef6c] warn_alloc+0x12c/0x1c0
[4.313753] [c00f1fb1b9c0] [c030ff68] 
__alloc_pages_nodemask+0xea8/0x1000
[4.313823] [c00f1fb1bbb0] [c0113a8c] 
core_imc_mem_init+0xbc/0x1c0
[4.313892] [c00f1fb1bc00] [c0113cdc] 
ppc_core_imc_cpu_online+0x14c/0x170
[4.313962] [c00f1fb1bc90] [c0125758] 
cpuhp_invoke_callback+0x198/0x5d0
[4.314031] [c00f1fb1bd00] [c012782c] cpuhp_thread_fun+0x8c/0x3d0
[4.314101] [c00f1fb1bd60] [c01678d0] 
smpboot_thread_fn+0x290/0x2a0
[4.314169] [c00f1fb1bdc0] [c015ee78] kthread+0x168/0x1b0
[4.314229] [c00f1fb1be30] [c000b368] 
ret_from_kernel_thread+0x5c/0x74
[4.314313] Mem-Info:
[4.314356] active_anon:0 inactive_anon:0 isolated_anon:0

core_imc_mem_init() at system boot use alloc_pages_node() to get memory
and alloc_pages_node() throws this stack dump when tried to allocate
memory from a node which has no memory behind it. Add a ___GFP_NOWARN
flag in allocation request as a fix. 

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Reported-by: Michael Ellerman <m...@ellerman.id.au>
Reported-by: Venkat R.B <venka...@in.ibm.com>
---
 arch/powerpc/perf/imc-pmu.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9ccac86f3463..d99ea01a2028 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -523,8 +523,8 @@ static int core_imc_mem_init(int cpu, int size)
 
/* We need only vbase for core counters */
mem_info->vbase = page_address(alloc_pages_node(phys_id,
- GFP_KERNEL | __GFP_ZERO | 
__GFP_THISNODE,
- get_order(size)));
+ GFP_KERNEL | __GFP_ZERO | 
__GFP_THISNODE |
+ __GFP_NOWARN, get_order(size)));
if (!mem_info->vbase)
return -ENOMEM;
 
@@ -763,8 +763,8 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
 * free the memory in cpu offline path.
 */
local_mem = page_address(alloc_pages_node(phys_id,
- GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
- get_order(size)));
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size)));
if (!local_mem)
return -ENOMEM;
 
-- 
2.14.1



[PATCH] powerpc/perf: Add ___GFP_NOWARN flag to alloc_pages_node()

2017-10-11 Thread Anju T Sudhakar
Stack trace output during a stress test:
 [4.310049] Freeing initrd memory: 22592K
[4.310646] rtas_flash: no firmware flash support
[4.313341] cpuhp/64: page allocation failure: order:0, 
mode:0x14480c0(GFP_KERNEL|__GFP_ZERO|__GFP_THISNODE), nodemask=(null)
[4.313465] cpuhp/64 cpuset=/ mems_allowed=0
[4.313521] CPU: 64 PID: 392 Comm: cpuhp/64 Not tainted 
4.11.0-39.el7a.ppc64le #1
[4.313588] Call Trace:
[4.313622] [c00f1fb1b8e0] [c0c09388] dump_stack+0xb0/0xf0 
(unreliable)
[4.313694] [c00f1fb1b920] [c030ef6c] warn_alloc+0x12c/0x1c0
[4.313753] [c00f1fb1b9c0] [c030ff68] 
__alloc_pages_nodemask+0xea8/0x1000
[4.313823] [c00f1fb1bbb0] [c0113a8c] 
core_imc_mem_init+0xbc/0x1c0
[4.313892] [c00f1fb1bc00] [c0113cdc] 
ppc_core_imc_cpu_online+0x14c/0x170
[4.313962] [c00f1fb1bc90] [c0125758] 
cpuhp_invoke_callback+0x198/0x5d0
[4.314031] [c00f1fb1bd00] [c012782c] cpuhp_thread_fun+0x8c/0x3d0
[4.314101] [c00f1fb1bd60] [c01678d0] 
smpboot_thread_fn+0x290/0x2a0
[4.314169] [c00f1fb1bdc0] [c015ee78] kthread+0x168/0x1b0
[4.314229] [c00f1fb1be30] [c000b368] 
ret_from_kernel_thread+0x5c/0x74
[4.314313] Mem-Info:
[4.314356] active_anon:0 inactive_anon:0 isolated_anon:0

core_imc_mem_init() at system boot use alloc_pages_node() to get memory
and alloc_pages_node() throws this stack dump when tried to allocate
memory from a node which has no memory behind it. Add a ___GFP_NOWARN
flag in allocation request as a fix. 

Signed-off-by: Anju T Sudhakar 
Reported-by: Michael Ellerman 
Reported-by: Venkat R.B 
---
 arch/powerpc/perf/imc-pmu.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9ccac86f3463..d99ea01a2028 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -523,8 +523,8 @@ static int core_imc_mem_init(int cpu, int size)
 
/* We need only vbase for core counters */
mem_info->vbase = page_address(alloc_pages_node(phys_id,
- GFP_KERNEL | __GFP_ZERO | 
__GFP_THISNODE,
- get_order(size)));
+ GFP_KERNEL | __GFP_ZERO | 
__GFP_THISNODE |
+ __GFP_NOWARN, get_order(size)));
if (!mem_info->vbase)
return -ENOMEM;
 
@@ -763,8 +763,8 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
 * free the memory in cpu offline path.
 */
local_mem = page_address(alloc_pages_node(phys_id,
- GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
- get_order(size)));
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size)));
if (!local_mem)
return -ENOMEM;
 
-- 
2.14.1



Re: [PATCH] powerpc/powernv: Add kernel cmdline parameter to disable imc

2017-10-11 Thread Anju T Sudhakar

Hi mpe, stewart,


On Wednesday 11 October 2017 01:55 AM, Stewart Smith wrote:

Michael Ellerman <m...@ellerman.id.au> writes:

Anju T Sudhakar <a...@linux.vnet.ibm.com> writes:


Add a kernel command line parameter option to disable In-Memory Collection
(IMC) counters and add documentation. This helps in debug.

I'd really rather we didn't. Do we *really* need this?

We don't have command line parameters to disable any of the other ~20
PMUs, why is this one special?


This one is really helpful in debugging, incase if we want to proceed 
without nest counters  OR
core counters . But if we have the facility to do the same from 
petitboot, its fine.



You could also do the same thing by editing the device tree before
booting your kernel, we do have the facility to do that in petitboot.

A recent firmware patch: https://patchwork.ozlabs.org/patch/823249/
would fix the firmware implementation where the counters were already
running before the INIT/START calls, which are likely the cause of the
problems that this patch is trying to work around.

I propose we have the firmware do the right thing and nothing special in
kernel. i.e. not to merge this.



Agreed.



Thanks,
Anju



Re: [PATCH] powerpc/powernv: Add kernel cmdline parameter to disable imc

2017-10-11 Thread Anju T Sudhakar

Hi mpe, stewart,


On Wednesday 11 October 2017 01:55 AM, Stewart Smith wrote:

Michael Ellerman  writes:

Anju T Sudhakar  writes:


Add a kernel command line parameter option to disable In-Memory Collection
(IMC) counters and add documentation. This helps in debug.

I'd really rather we didn't. Do we *really* need this?

We don't have command line parameters to disable any of the other ~20
PMUs, why is this one special?


This one is really helpful in debugging, incase if we want to proceed 
without nest counters  OR
core counters . But if we have the facility to do the same from 
petitboot, its fine.



You could also do the same thing by editing the device tree before
booting your kernel, we do have the facility to do that in petitboot.

A recent firmware patch: https://patchwork.ozlabs.org/patch/823249/
would fix the firmware implementation where the counters were already
running before the INIT/START calls, which are likely the cause of the
problems that this patch is trying to work around.

I propose we have the firmware do the right thing and nothing special in
kernel. i.e. not to merge this.



Agreed.



Thanks,
Anju



[PATCH] powerpc/perf: Fix IMC initialization crash

2017-10-10 Thread Anju T Sudhakar
Call trace observed with latest firmware, and upstream kernel.

[   14.499938] NIP [c00f318c] init_imc_pmu+0x8c/0xcf0
[   14.499973] LR [c00f33f8] init_imc_pmu+0x2f8/0xcf0
[   14.57] Call Trace:
[   14.500027] [c03fed18f710] [c00f33c8] init_imc_pmu+0x2c8/0xcf0 
(unreliable)
[   14.500080] [c03fed18f800] [c00b5ec0] 
opal_imc_counters_probe+0x300/0x400
[   14.500132] [c03fed18f900] [c0807ef4] 
platform_drv_probe+0x64/0x110
[   14.500185] [c03fed18f980] [c0804b58] 
driver_probe_device+0x3d8/0x580
[   14.500236] [c03fed18fa10] [c0804e4c] __driver_attach+0x14c/0x1a0
[   14.500302] [c03fed18fa90] [c080156c] bus_for_each_dev+0x8c/0xf0
[   14.500353] [c03fed18fae0] [c0803fa4] driver_attach+0x34/0x50
[   14.500397] [c03fed18fb00] [c0803688] bus_add_driver+0x298/0x350
[   14.500449] [c03fed18fb90] [c080605c] driver_register+0x9c/0x180
[   14.500500] [c03fed18fc00] [c0807dec] 
__platform_driver_register+0x5c/0x70
[   14.500552] [c03fed18fc20] [c101cee0] 
opal_imc_driver_init+0x2c/0x40
[   14.500603] [c03fed18fc40] [c000d084] do_one_initcall+0x64/0x1d0
[   14.500654] [c03fed18fd00] [c100434c] 
kernel_init_freeable+0x280/0x374
[   14.500705] [c03fed18fdc0] [c000d314] kernel_init+0x24/0x160
[   14.500750] [c03fed18fe30] [c000b4e8] 
ret_from_kernel_thread+0x5c/0x74
[   14.500799] Instruction dump:
[   14.500827] 4082024c 2f890002 419e054c 2e890003 41960094 2e890001 3ba0ffea 
419602d8 
[   14.500884] 419e0290 2f890003 419e02a8 e93e0118  2fa3 419e0010 
4827ba41 
[   14.500945] ---[ end trace 27b734ad26f1add4 ]---
[   15.908719] 
[   16.908869] Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x0007
[   16.908869] 
[   18.125813] ---[ end Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x0007]

While registering nest imc at init, cpu-hotplug callback 
`nest_pmu_cpumask_init()`
makes an opal call to stop the engine. And if the OPAL call fails, 
imc_common_cpuhp_mem_free() is invoked to cleanup memory and cpuhotplug setup.

But when cleaning up the attribute group, we were dereferencing the attribute
element array without checking whether the backing element is not NULL. This
causes the kernel panic.

Factor out the memory freeing part from imc_common_cpuhp_mem_free() to handle
the failing case gracefully.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Reported-by: Pridhiviraj Paidipeddi <ppaid...@linux.vnet.ibm.com>
---
 arch/powerpc/perf/imc-pmu.c | 23 ---
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9ccac86..213d976 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -224,8 +224,10 @@ static int update_events_in_group(struct device_node 
*node, struct imc_pmu *pmu)
 
/* Allocate memory for attribute group */
attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
-   if (!attr_group)
+   if (!attr_group) {
+   kfree(pmu->events);
return -ENOMEM;
+   }
 
/*
 * Allocate memory for attributes.
@@ -1115,6 +1117,15 @@ static void cleanup_all_thread_imc_memory(void)
}
 }
 
+/* Function to free the attr_groups which are dynamically allocated */
+static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
+{
+   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+   kfree(pmu_ptr);
+   return;
+}
+
 /*
  * Common function to unregister cpu hotplug callback and
  * free the memory.
@@ -1147,10 +1158,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu 
*pmu_ptr)
cleanup_all_thread_imc_memory();
}
 
-   /* Only free the attr_groups which are dynamically allocated  */
-   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
-   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
-   kfree(pmu_ptr);
return;
 }
 
@@ -1289,17 +1296,19 @@ int init_imc_pmu(struct device_node *parent, struct 
imc_pmu *pmu_ptr, int pmu_id
 
ret = update_pmu_ops(pmu_ptr);
if (ret)
-   goto err_free;
+   goto err_free_mem;
 
ret = perf_pmu_register(_ptr->pmu, pmu_ptr->pmu.name, -1);
if (ret)
-   goto err_free;
+   goto err_free_mem;
 
pr_info("%s performance monitor hardware support registered\n",
pmu_ptr->pmu.name);
 
return 0;
 
+err_free_mem:
+   imc_common_mem_free(pmu_ptr);
 err_free:
imc_common_cpuhp_mem_free(pmu_ptr);
return ret;
-- 
1.8.3.1



[PATCH] powerpc/perf: Fix IMC initialization crash

2017-10-10 Thread Anju T Sudhakar
Call trace observed with latest firmware, and upstream kernel.

[   14.499938] NIP [c00f318c] init_imc_pmu+0x8c/0xcf0
[   14.499973] LR [c00f33f8] init_imc_pmu+0x2f8/0xcf0
[   14.57] Call Trace:
[   14.500027] [c03fed18f710] [c00f33c8] init_imc_pmu+0x2c8/0xcf0 
(unreliable)
[   14.500080] [c03fed18f800] [c00b5ec0] 
opal_imc_counters_probe+0x300/0x400
[   14.500132] [c03fed18f900] [c0807ef4] 
platform_drv_probe+0x64/0x110
[   14.500185] [c03fed18f980] [c0804b58] 
driver_probe_device+0x3d8/0x580
[   14.500236] [c03fed18fa10] [c0804e4c] __driver_attach+0x14c/0x1a0
[   14.500302] [c03fed18fa90] [c080156c] bus_for_each_dev+0x8c/0xf0
[   14.500353] [c03fed18fae0] [c0803fa4] driver_attach+0x34/0x50
[   14.500397] [c03fed18fb00] [c0803688] bus_add_driver+0x298/0x350
[   14.500449] [c03fed18fb90] [c080605c] driver_register+0x9c/0x180
[   14.500500] [c03fed18fc00] [c0807dec] 
__platform_driver_register+0x5c/0x70
[   14.500552] [c03fed18fc20] [c101cee0] 
opal_imc_driver_init+0x2c/0x40
[   14.500603] [c03fed18fc40] [c000d084] do_one_initcall+0x64/0x1d0
[   14.500654] [c03fed18fd00] [c100434c] 
kernel_init_freeable+0x280/0x374
[   14.500705] [c03fed18fdc0] [c000d314] kernel_init+0x24/0x160
[   14.500750] [c03fed18fe30] [c000b4e8] 
ret_from_kernel_thread+0x5c/0x74
[   14.500799] Instruction dump:
[   14.500827] 4082024c 2f890002 419e054c 2e890003 41960094 2e890001 3ba0ffea 
419602d8 
[   14.500884] 419e0290 2f890003 419e02a8 e93e0118  2fa3 419e0010 
4827ba41 
[   14.500945] ---[ end trace 27b734ad26f1add4 ]---
[   15.908719] 
[   16.908869] Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x0007
[   16.908869] 
[   18.125813] ---[ end Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x0007]

While registering nest imc at init, cpu-hotplug callback 
`nest_pmu_cpumask_init()`
makes an opal call to stop the engine. And if the OPAL call fails, 
imc_common_cpuhp_mem_free() is invoked to cleanup memory and cpuhotplug setup.

But when cleaning up the attribute group, we were dereferencing the attribute
element array without checking whether the backing element is not NULL. This
causes the kernel panic.

Factor out the memory freeing part from imc_common_cpuhp_mem_free() to handle
the failing case gracefully.

Signed-off-by: Anju T Sudhakar 
Reported-by: Pridhiviraj Paidipeddi 
---
 arch/powerpc/perf/imc-pmu.c | 23 ---
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9ccac86..213d976 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -224,8 +224,10 @@ static int update_events_in_group(struct device_node 
*node, struct imc_pmu *pmu)
 
/* Allocate memory for attribute group */
attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
-   if (!attr_group)
+   if (!attr_group) {
+   kfree(pmu->events);
return -ENOMEM;
+   }
 
/*
 * Allocate memory for attributes.
@@ -1115,6 +1117,15 @@ static void cleanup_all_thread_imc_memory(void)
}
 }
 
+/* Function to free the attr_groups which are dynamically allocated */
+static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
+{
+   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+   kfree(pmu_ptr);
+   return;
+}
+
 /*
  * Common function to unregister cpu hotplug callback and
  * free the memory.
@@ -1147,10 +1158,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu 
*pmu_ptr)
cleanup_all_thread_imc_memory();
}
 
-   /* Only free the attr_groups which are dynamically allocated  */
-   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
-   kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
-   kfree(pmu_ptr);
return;
 }
 
@@ -1289,17 +1296,19 @@ int init_imc_pmu(struct device_node *parent, struct 
imc_pmu *pmu_ptr, int pmu_id
 
ret = update_pmu_ops(pmu_ptr);
if (ret)
-   goto err_free;
+   goto err_free_mem;
 
ret = perf_pmu_register(_ptr->pmu, pmu_ptr->pmu.name, -1);
if (ret)
-   goto err_free;
+   goto err_free_mem;
 
pr_info("%s performance monitor hardware support registered\n",
pmu_ptr->pmu.name);
 
return 0;
 
+err_free_mem:
+   imc_common_mem_free(pmu_ptr);
 err_free:
imc_common_cpuhp_mem_free(pmu_ptr);
return ret;
-- 
1.8.3.1



Re: linux-4.14-rc3/arch/powerpc/perf/imc-pmu.c:599: pointless test ?

2017-10-09 Thread Anju T Sudhakar

Hi David,


On Monday 09 October 2017 10:31 AM, Michael Ellerman wrote:

David Binderman <dcb...@hotmail.com> writes:

Hello there,

linux-4.14-rc3/arch/powerpc/perf/imc-pmu.c:599]: (style) Unsigned variable 
'ncpu' can't be negative so it is unnecessary to test it.

Source code is

 if (ncpu >= 0 && ncpu < nr_cpu_ids) {

but

 unsigned int ncpu, core_id;

Suggest remove test.


Nice catch. Thanks for mentioning this. I will fix this.

Thanks,
Anju

Thanks.

Notifying the author would be more helpful:

$ git blame -L 599,599 arch/powerpc/perf/imc-pmu.c
39a846db1d574 (Anju T Sudhakar 2017-07-19 03:06:35 +0530 599)   if (ncpu >= 0 && 
ncpu < nr_cpu_ids) {
$ git log -1 --format=%ae 39a846db1d574
a...@linux.vnet.ibm.com

cheers





Re: linux-4.14-rc3/arch/powerpc/perf/imc-pmu.c:599: pointless test ?

2017-10-09 Thread Anju T Sudhakar

Hi David,


On Monday 09 October 2017 10:31 AM, Michael Ellerman wrote:

David Binderman  writes:

Hello there,

linux-4.14-rc3/arch/powerpc/perf/imc-pmu.c:599]: (style) Unsigned variable 
'ncpu' can't be negative so it is unnecessary to test it.

Source code is

 if (ncpu >= 0 && ncpu < nr_cpu_ids) {

but

 unsigned int ncpu, core_id;

Suggest remove test.


Nice catch. Thanks for mentioning this. I will fix this.

Thanks,
Anju

Thanks.

Notifying the author would be more helpful:

$ git blame -L 599,599 arch/powerpc/perf/imc-pmu.c
39a846db1d574 (Anju T Sudhakar 2017-07-19 03:06:35 +0530 599)   if (ncpu >= 0 && 
ncpu < nr_cpu_ids) {
$ git log -1 --format=%ae 39a846db1d574
a...@linux.vnet.ibm.com

cheers





[PATCH] powerpc/powernv: Add kernel cmdline parameter to disable imc

2017-10-09 Thread Anju T Sudhakar
Add a kernel command line parameter option to disable In-Memory Collection
(IMC) counters and add documentation. This helps in debug.
 
Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Reviewed-By: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 Documentation/admin-guide/kernel-parameters.txt |  7 +
 arch/powerpc/platforms/powernv/opal-imc.c   | 35 +
 2 files changed, 42 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 0549662..06a8da1 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -820,6 +820,13 @@
disable_ipv6=   [IPV6]
See Documentation/networking/ipv6.txt.
 
+   disable_imc=[PPC]
+   Format {nest | core | all}
+   Disable imc counters during boot.
+   nest- Disable nest-imc counters.
+   core- Disable core and thread imc counters.
+   all-- Disable nest, core and thread imc counters.
+
disable_mtrr_cleanup [X86]
The kernel tries to adjust MTRR layout from continuous
to discrete, to make X server driver able to add WB
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 21f6531..e929f33 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -22,6 +22,28 @@
 #include 
 #include 
 
+static bool disable_nest_imc;
+static bool disable_core_imc;
+
+/*
+ * diasble_imc=nest: skip the registration of nest pmus.
+ * disable_imc=core: skip the registration of core and thread pmus.
+ * disable_imc=all : disables nest, core and thread.
+ */
+static int __init disable_imc_counters(char *p)
+{
+   if (strncmp(p, "nest", 4) == 0)
+   disable_nest_imc = true;
+   else if (strncmp(p, "core", 4) == 0)
+   disable_core_imc = true;
+   else if (strncmp(p, "all", 3) == 0) {
+   disable_nest_imc = true;
+   disable_core_imc = true;
+   }
+   return 0;
+}
+early_param("disable_imc", disable_imc_counters);
+
 /*
  * imc_get_mem_addr_nest: Function to get nest counter memory region
  * for each chip
@@ -169,6 +191,10 @@ static int opal_imc_counters_probe(struct platform_device 
*pdev)
return -ENODEV;
}
 
+   /* If kernel is booted with disable_imc parameters, then return */
+   if (disable_nest_imc && disable_core_imc)
+   return -ENODEV;
+
for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) {
if (of_property_read_u32(imc_dev, "type", )) {
pr_warn("IMC Device without type property\n");
@@ -177,12 +203,21 @@ static int opal_imc_counters_probe(struct platform_device 
*pdev)
 
switch (type) {
case IMC_TYPE_CHIP:
+   if (disable_nest_imc)
+   continue;
+
domain = IMC_DOMAIN_NEST;
break;
case IMC_TYPE_CORE:
+   if (disable_core_imc)
+   continue;
+
domain =IMC_DOMAIN_CORE;
break;
case IMC_TYPE_THREAD:
+   if (disable_core_imc)
+   continue;
+
domain = IMC_DOMAIN_THREAD;
break;
default:
-- 
2.7.4



[PATCH] powerpc/powernv: Add kernel cmdline parameter to disable imc

2017-10-09 Thread Anju T Sudhakar
Add a kernel command line parameter option to disable In-Memory Collection
(IMC) counters and add documentation. This helps in debug.
 
Signed-off-by: Anju T Sudhakar 
Reviewed-By: Madhavan Srinivasan 
---
 Documentation/admin-guide/kernel-parameters.txt |  7 +
 arch/powerpc/platforms/powernv/opal-imc.c   | 35 +
 2 files changed, 42 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 0549662..06a8da1 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -820,6 +820,13 @@
disable_ipv6=   [IPV6]
See Documentation/networking/ipv6.txt.
 
+   disable_imc=[PPC]
+   Format {nest | core | all}
+   Disable imc counters during boot.
+   nest- Disable nest-imc counters.
+   core- Disable core and thread imc counters.
+   all-- Disable nest, core and thread imc counters.
+
disable_mtrr_cleanup [X86]
The kernel tries to adjust MTRR layout from continuous
to discrete, to make X server driver able to add WB
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 21f6531..e929f33 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -22,6 +22,28 @@
 #include 
 #include 
 
+static bool disable_nest_imc;
+static bool disable_core_imc;
+
+/*
+ * diasble_imc=nest: skip the registration of nest pmus.
+ * disable_imc=core: skip the registration of core and thread pmus.
+ * disable_imc=all : disables nest, core and thread.
+ */
+static int __init disable_imc_counters(char *p)
+{
+   if (strncmp(p, "nest", 4) == 0)
+   disable_nest_imc = true;
+   else if (strncmp(p, "core", 4) == 0)
+   disable_core_imc = true;
+   else if (strncmp(p, "all", 3) == 0) {
+   disable_nest_imc = true;
+   disable_core_imc = true;
+   }
+   return 0;
+}
+early_param("disable_imc", disable_imc_counters);
+
 /*
  * imc_get_mem_addr_nest: Function to get nest counter memory region
  * for each chip
@@ -169,6 +191,10 @@ static int opal_imc_counters_probe(struct platform_device 
*pdev)
return -ENODEV;
}
 
+   /* If kernel is booted with disable_imc parameters, then return */
+   if (disable_nest_imc && disable_core_imc)
+   return -ENODEV;
+
for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) {
if (of_property_read_u32(imc_dev, "type", )) {
pr_warn("IMC Device without type property\n");
@@ -177,12 +203,21 @@ static int opal_imc_counters_probe(struct platform_device 
*pdev)
 
switch (type) {
case IMC_TYPE_CHIP:
+   if (disable_nest_imc)
+   continue;
+
domain = IMC_DOMAIN_NEST;
break;
case IMC_TYPE_CORE:
+   if (disable_core_imc)
+   continue;
+
domain =IMC_DOMAIN_CORE;
break;
case IMC_TYPE_THREAD:
+   if (disable_core_imc)
+   continue;
+
domain = IMC_DOMAIN_THREAD;
break;
default:
-- 
2.7.4



Re: [PATCH] powerpc/perf: Fix for core/nest imc call trace on cpuhotplug

2017-10-05 Thread Anju T Sudhakar

Hi Santosh,


On Thursday 05 October 2017 03:20 PM, Santosh Sivaraj wrote:

* Anju T Sudhakar <a...@linux.vnet.ibm.com> wrote (on 2017-10-04 06:50:52 
+):


Nest/core pmu units are enabled only when it is used. A reference count is
maintained for the events which uses the nest/core pmu units. Currently in
*_imc_counters_release function a WARN() is used for notification of any
underflow of ref count.
 
The case where event ref count hit a negative value is, when perf session is

started, followed by offlining of all cpus in a given core.
i.e. in cpuhotplug offline path ppc_core_imc_cpu_offline() function set the
ref->count to zero, if the current cpu which is about to offline is the last
cpu in a given core and make an OPAL call to disable the engine in that core.
And on perf session termination, perf->destroy (core_imc_counters_release) will
first decrement the ref->count for this core and based on the ref->count value
an opal call is made to disable the core-imc engine.
Now, since cpuhotplug path already clears the ref->count for core and disabled
the engine, perf->destroy() decrementing again at event termination make it
negative which in turn fires the WARN_ON. The same happens for nest units.
 
Add a check to see if the reference count is alreday zero, before decrementing

the count, so that the ref count will not hit a negative value.
     
Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>

Reviewed-by: Santosh Sivaraj <sant...@fossix.org>


Thanks for reviewing.

-Anju

---
  arch/powerpc/perf/imc-pmu.c | 28 
  1 file changed, 28 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9ccac86f3463..e3a1f65933b5 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -399,6 +399,20 @@ static void nest_imc_counters_release(struct perf_event 
*event)
  
  	/* Take the mutex lock for this node and then decrement the reference count */

mutex_lock(>lock);
+   if (ref->refc == 0) {
+   /*
+* The scenario where this is true is, when perf session is
+* started, followed by offlining of all cpus in a given node.
+*
+* In the cpuhotplug offline path, ppc_nest_imc_cpu_offline()
+* function set the ref->count to zero, if the cpu which is
+* about to offline is the last cpu in a given node and make
+* an OPAL call to disable the engine in that node.
+*
+*/
+   mutex_unlock(>lock);
+   return;
+   }
ref->refc--;
if (ref->refc == 0) {
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
@@ -646,6 +660,20 @@ static void core_imc_counters_release(struct perf_event 
*event)
return;
  
  	mutex_lock(>lock);

+   if (ref->refc == 0) {
+   /*
+* The scenario where this is true is, when perf session is
+* started, followed by offlining of all cpus in a given core.
+*
+* In the cpuhotplug offline path, ppc_core_imc_cpu_offline()
+* function set the ref->count to zero, if the cpu which is
+* about to offline is the last cpu in a given core and make
+* an OPAL call to disable the engine in that core.
+*
+*/
+   mutex_unlock(>lock);
+   return;
+   }
ref->refc--;
if (ref->refc == 0) {
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,




Re: [PATCH] powerpc/perf: Fix for core/nest imc call trace on cpuhotplug

2017-10-05 Thread Anju T Sudhakar

Hi Santosh,


On Thursday 05 October 2017 03:20 PM, Santosh Sivaraj wrote:

* Anju T Sudhakar  wrote (on 2017-10-04 06:50:52 
+):


Nest/core pmu units are enabled only when it is used. A reference count is
maintained for the events which uses the nest/core pmu units. Currently in
*_imc_counters_release function a WARN() is used for notification of any
underflow of ref count.
 
The case where event ref count hit a negative value is, when perf session is

started, followed by offlining of all cpus in a given core.
i.e. in cpuhotplug offline path ppc_core_imc_cpu_offline() function set the
ref->count to zero, if the current cpu which is about to offline is the last
cpu in a given core and make an OPAL call to disable the engine in that core.
And on perf session termination, perf->destroy (core_imc_counters_release) will
first decrement the ref->count for this core and based on the ref->count value
an opal call is made to disable the core-imc engine.
Now, since cpuhotplug path already clears the ref->count for core and disabled
the engine, perf->destroy() decrementing again at event termination make it
negative which in turn fires the WARN_ON. The same happens for nest units.
 
Add a check to see if the reference count is alreday zero, before decrementing

the count, so that the ref count will not hit a negative value.
     
Signed-off-by: Anju T Sudhakar 

Reviewed-by: Santosh Sivaraj 


Thanks for reviewing.

-Anju

---
  arch/powerpc/perf/imc-pmu.c | 28 
  1 file changed, 28 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9ccac86f3463..e3a1f65933b5 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -399,6 +399,20 @@ static void nest_imc_counters_release(struct perf_event 
*event)
  
  	/* Take the mutex lock for this node and then decrement the reference count */

mutex_lock(>lock);
+   if (ref->refc == 0) {
+   /*
+* The scenario where this is true is, when perf session is
+* started, followed by offlining of all cpus in a given node.
+*
+* In the cpuhotplug offline path, ppc_nest_imc_cpu_offline()
+* function set the ref->count to zero, if the cpu which is
+* about to offline is the last cpu in a given node and make
+* an OPAL call to disable the engine in that node.
+*
+*/
+   mutex_unlock(>lock);
+   return;
+   }
ref->refc--;
if (ref->refc == 0) {
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
@@ -646,6 +660,20 @@ static void core_imc_counters_release(struct perf_event 
*event)
return;
  
  	mutex_lock(>lock);

+   if (ref->refc == 0) {
+   /*
+* The scenario where this is true is, when perf session is
+* started, followed by offlining of all cpus in a given core.
+*
+* In the cpuhotplug offline path, ppc_core_imc_cpu_offline()
+* function set the ref->count to zero, if the cpu which is
+* about to offline is the last cpu in a given core and make
+* an OPAL call to disable the engine in that core.
+*
+*/
+   mutex_unlock(>lock);
+   return;
+   }
ref->refc--;
if (ref->refc == 0) {
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,




[PATCH] powerpc/perf: Fix for core/nest imc call trace on cpuhotplug

2017-10-04 Thread Anju T Sudhakar
Nest/core pmu units are enabled only when it is used. A reference count is  
maintained for the events which uses the nest/core pmu units. Currently in  
*_imc_counters_release function a WARN() is used for notification of any
underflow of ref count. 

The case where event ref count hit a negative value is, when perf session is
started, followed by offlining of all cpus in a given core. 
i.e. in cpuhotplug offline path ppc_core_imc_cpu_offline() function set the 
ref->count to zero, if the current cpu which is about to offline is the last
cpu in a given core and make an OPAL call to disable the engine in that core.   
And on perf session termination, perf->destroy (core_imc_counters_release) will 
first decrement the ref->count for this core and based on the ref->count value  
an opal call is made to disable the core-imc engine.
Now, since cpuhotplug path already clears the ref->count for core and disabled  
the engine, perf->destroy() decrementing again at event termination make it 
negative which in turn fires the WARN_ON. The same happens for nest units.  

Add a check to see if the reference count is alreday zero, before decrementing  
the count, so that the ref count will not hit a negative value. 
    
Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/perf/imc-pmu.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9ccac86f3463..e3a1f65933b5 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -399,6 +399,20 @@ static void nest_imc_counters_release(struct perf_event 
*event)
 
/* Take the mutex lock for this node and then decrement the reference 
count */
mutex_lock(>lock);
+   if (ref->refc == 0) {
+   /*
+* The scenario where this is true is, when perf session is
+* started, followed by offlining of all cpus in a given node.
+*
+* In the cpuhotplug offline path, ppc_nest_imc_cpu_offline()
+* function set the ref->count to zero, if the cpu which is
+* about to offline is the last cpu in a given node and make
+* an OPAL call to disable the engine in that node.
+*
+*/
+   mutex_unlock(>lock);
+   return;
+   }
ref->refc--;
if (ref->refc == 0) {
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
@@ -646,6 +660,20 @@ static void core_imc_counters_release(struct perf_event 
*event)
return;
 
mutex_lock(>lock);
+   if (ref->refc == 0) {
+   /*
+* The scenario where this is true is, when perf session is
+* started, followed by offlining of all cpus in a given core.
+*
+* In the cpuhotplug offline path, ppc_core_imc_cpu_offline()
+* function set the ref->count to zero, if the cpu which is
+* about to offline is the last cpu in a given core and make
+* an OPAL call to disable the engine in that core.
+*
+*/
+   mutex_unlock(>lock);
+   return;
+   }
ref->refc--;
if (ref->refc == 0) {
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
-- 
2.14.1



[PATCH] powerpc/perf: Fix for core/nest imc call trace on cpuhotplug

2017-10-04 Thread Anju T Sudhakar
Nest/core pmu units are enabled only when it is used. A reference count is  
maintained for the events which uses the nest/core pmu units. Currently in  
*_imc_counters_release function a WARN() is used for notification of any
underflow of ref count. 

The case where event ref count hit a negative value is, when perf session is
started, followed by offlining of all cpus in a given core. 
i.e. in cpuhotplug offline path ppc_core_imc_cpu_offline() function set the 
ref->count to zero, if the current cpu which is about to offline is the last
cpu in a given core and make an OPAL call to disable the engine in that core.   
And on perf session termination, perf->destroy (core_imc_counters_release) will 
first decrement the ref->count for this core and based on the ref->count value  
an opal call is made to disable the core-imc engine.
Now, since cpuhotplug path already clears the ref->count for core and disabled  
the engine, perf->destroy() decrementing again at event termination make it 
negative which in turn fires the WARN_ON. The same happens for nest units.  

Add a check to see if the reference count is alreday zero, before decrementing  
the count, so that the ref count will not hit a negative value. 
    
Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/perf/imc-pmu.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9ccac86f3463..e3a1f65933b5 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -399,6 +399,20 @@ static void nest_imc_counters_release(struct perf_event 
*event)
 
/* Take the mutex lock for this node and then decrement the reference 
count */
mutex_lock(>lock);
+   if (ref->refc == 0) {
+   /*
+* The scenario where this is true is, when perf session is
+* started, followed by offlining of all cpus in a given node.
+*
+* In the cpuhotplug offline path, ppc_nest_imc_cpu_offline()
+* function set the ref->count to zero, if the cpu which is
+* about to offline is the last cpu in a given node and make
+* an OPAL call to disable the engine in that node.
+*
+*/
+   mutex_unlock(>lock);
+   return;
+   }
ref->refc--;
if (ref->refc == 0) {
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
@@ -646,6 +660,20 @@ static void core_imc_counters_release(struct perf_event 
*event)
return;
 
mutex_lock(>lock);
+   if (ref->refc == 0) {
+   /*
+* The scenario where this is true is, when perf session is
+* started, followed by offlining of all cpus in a given core.
+*
+* In the cpuhotplug offline path, ppc_core_imc_cpu_offline()
+* function set the ref->count to zero, if the cpu which is
+* about to offline is the last cpu in a given core and make
+* an OPAL call to disable the engine in that core.
+*
+*/
+   mutex_unlock(>lock);
+   return;
+   }
ref->refc--;
if (ref->refc == 0) {
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
-- 
2.14.1



Re: [PATCH] powerpc/perf: Fix for core/nest imc call trace on cpuhotplug

2017-09-25 Thread Anju T Sudhakar

Hi mpe,


On Thursday 21 September 2017 10:04 AM, Michael Ellerman wrote:

Anju T Sudhakar <a...@linux.vnet.ibm.com> writes:


Nest/core pmu units are enabled only when it is used. A reference count is
maintained for the events which uses the nest/core pmu units. Currently in
*_imc_counters_release function a WARN() is used for notification of any
underflow of ref count. Replace WARN() with a pr_info since it is an overkill.

As discussed elsewhere this is not the right solution.

If it's OK for the reference count to be negative, then we shouldn't
print anything when it is.

But I don't understand how it can be OK for the refcount to be negative.
That means someone has a negative number of references to something?

cheers



Scenario where this happens is in a stress test where perf session is 
started, followed by offlining of all cpus in a given core. And finally 
terminate the perf session.


So, in cpuhotplug offline path(ppc_core_imc_cpu_offline), function set 
the ref->count to zero, if the current cpu which is about to offline is 
the last cpu in a given core and make an OPAL call to disable the engine 
in that core.
And on perf session termination, perf->destory 
(core_imc_counters_release) will first decrement the ref->count for this 
core and based on the ref->count value an opal call is made to disable 
the core-imc engine.


Now, since cpuhotplug path already clears the ref->count for core and 
disabled the engine, perf->destroy() decrementing again at event 
termination make it negative which in turn fires the WARN_ON.


So we do prefer to remove the message as this wont happen in normal 
operation and the core counters are working as expected.

I will send out a patch by removing the message asap.


Thanks,
Anju




Re: [PATCH] powerpc/perf: Fix for core/nest imc call trace on cpuhotplug

2017-09-25 Thread Anju T Sudhakar

Hi mpe,


On Thursday 21 September 2017 10:04 AM, Michael Ellerman wrote:

Anju T Sudhakar  writes:


Nest/core pmu units are enabled only when it is used. A reference count is
maintained for the events which uses the nest/core pmu units. Currently in
*_imc_counters_release function a WARN() is used for notification of any
underflow of ref count. Replace WARN() with a pr_info since it is an overkill.

As discussed elsewhere this is not the right solution.

If it's OK for the reference count to be negative, then we shouldn't
print anything when it is.

But I don't understand how it can be OK for the refcount to be negative.
That means someone has a negative number of references to something?

cheers



Scenario where this happens is in a stress test where perf session is 
started, followed by offlining of all cpus in a given core. And finally 
terminate the perf session.


So, in cpuhotplug offline path(ppc_core_imc_cpu_offline), function set 
the ref->count to zero, if the current cpu which is about to offline is 
the last cpu in a given core and make an OPAL call to disable the engine 
in that core.
And on perf session termination, perf->destory 
(core_imc_counters_release) will first decrement the ref->count for this 
core and based on the ref->count value an opal call is made to disable 
the core-imc engine.


Now, since cpuhotplug path already clears the ref->count for core and 
disabled the engine, perf->destroy() decrementing again at event 
termination make it negative which in turn fires the WARN_ON.


So we do prefer to remove the message as this wont happen in normal 
operation and the core counters are working as expected.

I will send out a patch by removing the message asap.


Thanks,
Anju




[PATCH] powerpc/perf: Fix for core/nest imc call trace on cpuhotplug

2017-09-11 Thread Anju T Sudhakar
Nest/core pmu units are enabled only when it is used. A reference count is
maintained for the events which uses the nest/core pmu units. Currently in
*_imc_counters_release function a WARN() is used for notification of any
underflow of ref count. Replace WARN() with a pr_info since it is an overkill.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Acked-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/perf/imc-pmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9ccac86..4d523d0 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -409,7 +409,7 @@ static void nest_imc_counters_release(struct perf_event 
*event)
return;
}
} else if (ref->refc < 0) {
-   WARN(1, "nest-imc: Invalid event reference count\n");
+   pr_info("nest-imc: Invalid event reference count\n");
ref->refc = 0;
}
mutex_unlock(>lock);
@@ -656,7 +656,7 @@ static void core_imc_counters_release(struct perf_event 
*event)
return;
}
} else if (ref->refc < 0) {
-   WARN(1, "core-imc: Invalid event reference count\n");
+   pr_info("core-imc: Invalid event reference count\n");
ref->refc = 0;
}
mutex_unlock(>lock);
-- 
2.7.4



[PATCH] powerpc/perf: Fix for core/nest imc call trace on cpuhotplug

2017-09-11 Thread Anju T Sudhakar
Nest/core pmu units are enabled only when it is used. A reference count is
maintained for the events which uses the nest/core pmu units. Currently in
*_imc_counters_release function a WARN() is used for notification of any
underflow of ref count. Replace WARN() with a pr_info since it is an overkill.

Signed-off-by: Anju T Sudhakar 
Acked-by: Madhavan Srinivasan 
---
 arch/powerpc/perf/imc-pmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 9ccac86..4d523d0 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -409,7 +409,7 @@ static void nest_imc_counters_release(struct perf_event 
*event)
return;
}
} else if (ref->refc < 0) {
-   WARN(1, "nest-imc: Invalid event reference count\n");
+   pr_info("nest-imc: Invalid event reference count\n");
ref->refc = 0;
}
mutex_unlock(>lock);
@@ -656,7 +656,7 @@ static void core_imc_counters_release(struct perf_event 
*event)
return;
}
} else if (ref->refc < 0) {
-   WARN(1, "core-imc: Invalid event reference count\n");
+   pr_info("core-imc: Invalid event reference count\n");
ref->refc = 0;
}
mutex_unlock(>lock);
-- 
2.7.4



[PATCH v12 07/10] powerpc/perf: PMU functions for Core IMC and hotplugging

2017-07-03 Thread Anju T Sudhakar
From: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>

Code to add PMU function to initialize a core IMC event. It also
adds cpumask initialization function for core IMC PMU. For  
initialization, memory is allocated per core where the data 
for core IMC counters will be accumulated. The base address for this
page is sent to OPAL via an OPAL call which initializes various SCOMs   
related to Core IMC initialization. Upon any errors, the pages are  
free'ed and core IMC counters are disabled using the same OPAL call.

For CPU hotplugging, a cpumask is initialized which contains an online  
CPU from each core. If a cpu goes offline, we check whether that cpu
belongs to the core imc cpumask, if yes, then, we migrate the PMU   
context to any other online cpu (if available) in that core. If a cpu   
comes back online, then this cpu will be added to the core imc cpumask  
only if there was no other cpu from that core in the previous cpumask.  

To register the hotplug functions for core_imc, a new state 
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE is added to the list of existing  
states. 

Patch also adds OPAL device shutdown callback. Needed to disable the
IMC core engine to handle kexec.

Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com> 
Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal-api.h   |   1 +
 arch/powerpc/perf/imc-pmu.c   | 371 +++---
 arch/powerpc/platforms/powernv/opal-imc.c |  25 ++
 include/linux/cpuhotplug.h|   1 +
 4 files changed, 371 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index fdacb030cd77..0d83427b7467 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1009,6 +1009,7 @@ enum {
 /* Argument to OPAL_IMC_COUNTERS_*  */
 enum {
OPAL_IMC_COUNTERS_NEST = 1,
+   OPAL_IMC_COUNTERS_CORE = 2,
 };
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 041d3097d42a..c1a275ed2510 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1,5 +1,5 @@
 /*
- * Nest Performance Monitor counter support.
+ * IMC Performance Monitor counter support.
  *
  * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
  *   (C) 2017 Anju T Sudhakar, IBM Corporation.
@@ -21,6 +21,7 @@
 /* Needed for sanity check */
 struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 static cpumask_t nest_imc_cpumask;
+static cpumask_t core_imc_cpumask;
 static int nest_imc_cpumask_initialized;
 static int nest_pmus;
 /*
@@ -30,7 +31,7 @@ static int nest_pmus;
 static DEFINE_MUTEX(imc_nest_inited_reserve);
 
 struct imc_pmu_ref *nest_imc_refc;
-
+struct imc_pmu_ref *core_imc_refc;
 struct imc_pmu *core_imc_pmu;
 
 struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
@@ -55,14 +56,32 @@ static struct attribute_group imc_format_group = {
.attrs = nest_imc_format_attrs,
 };
 
+static struct attribute *core_imc_format_attrs[] = {
+   _attr_event.attr,
+   _attr_offset.attr,
+   _attr_rvalue.attr,
+   NULL,
+};
+
+static struct attribute_group core_imc_format_group = {
+   .name = "format",
+   .attrs = core_imc_format_attrs,
+};
+
 /* Get the cpumask printed to a buffer "buf" */
 static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
struct device_attribute *attr,
char *buf)
 {
+   struct pmu *pmu = dev_get_drvdata(dev);
cpumask_t *active_mask;
 
-   active_mask = _imc_cpumask;
+   if (!strncmp(pmu->name, "nest_", strlen("nest_")))
+   active_mask = _imc_cpumask;
+   else if (!strncmp(pmu->name, "core_", strlen("core_")))
+   active_mask = _imc_cpumask;
+   else
+   return 0;
return cpumap_print_to_pagebuf(true, buf, active_mask);
 }
 
@@ -313,6 +332,242 @@ static int nest_imc_event_init(struc

[PATCH v12 07/10] powerpc/perf: PMU functions for Core IMC and hotplugging

2017-07-03 Thread Anju T Sudhakar
From: Madhavan Srinivasan 

Code to add PMU function to initialize a core IMC event. It also
adds cpumask initialization function for core IMC PMU. For  
initialization, memory is allocated per core where the data 
for core IMC counters will be accumulated. The base address for this
page is sent to OPAL via an OPAL call which initializes various SCOMs   
related to Core IMC initialization. Upon any errors, the pages are  
free'ed and core IMC counters are disabled using the same OPAL call.

For CPU hotplugging, a cpumask is initialized which contains an online  
CPU from each core. If a cpu goes offline, we check whether that cpu
belongs to the core imc cpumask, if yes, then, we migrate the PMU   
context to any other online cpu (if available) in that core. If a cpu   
comes back online, then this cpu will be added to the core imc cpumask  
only if there was no other cpu from that core in the previous cpumask.  

To register the hotplug functions for core_imc, a new state 
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE is added to the list of existing  
states. 

Patch also adds OPAL device shutdown callback. Needed to disable the
IMC core engine to handle kexec.

Signed-off-by: Hemant Kumar  
Signed-off-by: Anju T Sudhakar 
Signed-off-by: Madhavan Srinivasan 
---
 arch/powerpc/include/asm/opal-api.h   |   1 +
 arch/powerpc/perf/imc-pmu.c   | 371 +++---
 arch/powerpc/platforms/powernv/opal-imc.c |  25 ++
 include/linux/cpuhotplug.h|   1 +
 4 files changed, 371 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index fdacb030cd77..0d83427b7467 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1009,6 +1009,7 @@ enum {
 /* Argument to OPAL_IMC_COUNTERS_*  */
 enum {
OPAL_IMC_COUNTERS_NEST = 1,
+   OPAL_IMC_COUNTERS_CORE = 2,
 };
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 041d3097d42a..c1a275ed2510 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1,5 +1,5 @@
 /*
- * Nest Performance Monitor counter support.
+ * IMC Performance Monitor counter support.
  *
  * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
  *   (C) 2017 Anju T Sudhakar, IBM Corporation.
@@ -21,6 +21,7 @@
 /* Needed for sanity check */
 struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 static cpumask_t nest_imc_cpumask;
+static cpumask_t core_imc_cpumask;
 static int nest_imc_cpumask_initialized;
 static int nest_pmus;
 /*
@@ -30,7 +31,7 @@ static int nest_pmus;
 static DEFINE_MUTEX(imc_nest_inited_reserve);
 
 struct imc_pmu_ref *nest_imc_refc;
-
+struct imc_pmu_ref *core_imc_refc;
 struct imc_pmu *core_imc_pmu;
 
 struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
@@ -55,14 +56,32 @@ static struct attribute_group imc_format_group = {
.attrs = nest_imc_format_attrs,
 };
 
+static struct attribute *core_imc_format_attrs[] = {
+   _attr_event.attr,
+   _attr_offset.attr,
+   _attr_rvalue.attr,
+   NULL,
+};
+
+static struct attribute_group core_imc_format_group = {
+   .name = "format",
+   .attrs = core_imc_format_attrs,
+};
+
 /* Get the cpumask printed to a buffer "buf" */
 static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
struct device_attribute *attr,
char *buf)
 {
+   struct pmu *pmu = dev_get_drvdata(dev);
cpumask_t *active_mask;
 
-   active_mask = _imc_cpumask;
+   if (!strncmp(pmu->name, "nest_", strlen("nest_")))
+   active_mask = _imc_cpumask;
+   else if (!strncmp(pmu->name, "core_", strlen("core_")))
+   active_mask = _imc_cpumask;
+   else
+   return 0;
return cpumap_print_to_pagebuf(true, buf, active_mask);
 }
 
@@ -313,6 +332,242 @@ static int nest_imc_event_init(struct perf_event *event)
return 0;
 }
 
+/*
+ * core_imc_mem_init : Initializes memory for the current core.
+ *
+ * Uses alloc_pages_

[PATCH v12 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support

2017-07-03 Thread Anju T Sudhakar
Adds cpumask attribute to be used by each IMC pmu. Only one cpu (any
online CPU) from each chip for nest PMUs is designated to read counters.

On CPU hotplug, dying CPU is checked to see whether it is one of the
designated cpus, if yes, next online cpu from the same chip (for nest   
units) is designated as new cpu to read counters. For this purpose, we  
introduce a new state : CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE.  

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com> 
Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com> 
---
 arch/powerpc/include/asm/imc-pmu.h |  11 +
 arch/powerpc/include/asm/opal-api.h|  10 +-
 arch/powerpc/include/asm/opal.h|   4 +
 arch/powerpc/perf/imc-pmu.c| 280 -
 arch/powerpc/platforms/powernv/opal-imc.c  |  21 +-
 arch/powerpc/platforms/powernv/opal-wrappers.S |   3 +
 include/linux/cpuhotplug.h |   1 +
 7 files changed, 324 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 25d0c57d14fe..aeed903b2a79 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -24,6 +24,7 @@
  * For static allocation of some of the structures.
  */
 #define IMC_MAX_PMUS   32
+#define IMC_MAX_CHIPS  32
 
 /*
  * This macro is used for memory buffer allocation of
@@ -94,6 +95,16 @@ struct imc_pmu {
const struct attribute_group *attr_groups[4];
 };
 
+/*
+ * Structure to hold id, lock and reference count for the imc events which
+ * are inited.
+ */
+struct imc_pmu_ref {
+   unsigned int id;
+   struct mutex lock;
+   int refc;
+};
+
 /* In-Memory Collection Counters Type */
 enum {
IMC_COUNTER_PER_CHIP= 0x10,
diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index cb3e6242a78c..fdacb030cd77 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -190,7 +190,10 @@
 #define OPAL_NPU_INIT_CONTEXT  146
 #define OPAL_NPU_DESTROY_CONTEXT   147
 #define OPAL_NPU_MAP_LPAR  148
-#define OPAL_LAST  148
+#define OPAL_IMC_COUNTERS_INIT 149
+#define OPAL_IMC_COUNTERS_START150
+#define OPAL_IMC_COUNTERS_STOP 151
+#define OPAL_LAST  151
 
 /* Device tree flags */
 
@@ -1003,6 +1006,11 @@ enum {
XIVE_DUMP_EMU_STATE = 5,
 };
 
+/* Argument to OPAL_IMC_COUNTERS_*  */
+enum {
+   OPAL_IMC_COUNTERS_NEST = 1,
+};
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 588fb1c23af9..48842d2d465c 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -268,6 +268,10 @@ int64_t opal_xive_free_irq(uint32_t girq);
 int64_t opal_xive_sync(uint32_t type, uint32_t id);
 int64_t opal_xive_dump(uint32_t type, uint32_t id);
 
+int64_t opal_imc_counters_init(uint32_t type, uint64_t address, uint64_t cpu);
+int64_t opal_imc_counters_start(uint32_t type, uint64_t cpu_pir);
+int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir);
+
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
   int depth, void *data);
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 4e2f837b8bb7..ca9662bea7d6 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -20,6 +20,16 @@
 
 /* Needed for sanity check */
 struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+static cpumask_t nest_imc_cpumask;
+static int nest_imc_cpumask_initialized;
+static int nest_pmus;
+/*
+ * Used to avoid races in counting the nest-pmu units during hotplug
+ * register and unregister
+ */
+static DEFINE_MUTEX(imc_nest_inited_reserve);
+
+struct imc_pmu_ref *nest_imc_refc;
 
 struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
 {
@@ -43,12 +53,183 @@ static struct attribute_group imc_format_group = {
.attrs = nest_imc_format_attrs,
 };
 
+/* Get the cpumask printed to a buffer "buf" */
+static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
+   struct device_attribute *attr,
+   char *buf)
+{
+   cpumask_t *active_mask;
+
+   active_mask = _imc_cpumask;
+   return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+
+static 

[PATCH v12 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support

2017-07-03 Thread Anju T Sudhakar
Adds cpumask attribute to be used by each IMC pmu. Only one cpu (any
online CPU) from each chip for nest PMUs is designated to read counters.

On CPU hotplug, dying CPU is checked to see whether it is one of the
designated cpus, if yes, next online cpu from the same chip (for nest   
units) is designated as new cpu to read counters. For this purpose, we  
introduce a new state : CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE.  

Signed-off-by: Anju T Sudhakar 
Signed-off-by: Hemant Kumar  
Signed-off-by: Madhavan Srinivasan  
---
 arch/powerpc/include/asm/imc-pmu.h |  11 +
 arch/powerpc/include/asm/opal-api.h|  10 +-
 arch/powerpc/include/asm/opal.h|   4 +
 arch/powerpc/perf/imc-pmu.c| 280 -
 arch/powerpc/platforms/powernv/opal-imc.c  |  21 +-
 arch/powerpc/platforms/powernv/opal-wrappers.S |   3 +
 include/linux/cpuhotplug.h |   1 +
 7 files changed, 324 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 25d0c57d14fe..aeed903b2a79 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -24,6 +24,7 @@
  * For static allocation of some of the structures.
  */
 #define IMC_MAX_PMUS   32
+#define IMC_MAX_CHIPS  32
 
 /*
  * This macro is used for memory buffer allocation of
@@ -94,6 +95,16 @@ struct imc_pmu {
const struct attribute_group *attr_groups[4];
 };
 
+/*
+ * Structure to hold id, lock and reference count for the imc events which
+ * are inited.
+ */
+struct imc_pmu_ref {
+   unsigned int id;
+   struct mutex lock;
+   int refc;
+};
+
 /* In-Memory Collection Counters Type */
 enum {
IMC_COUNTER_PER_CHIP= 0x10,
diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index cb3e6242a78c..fdacb030cd77 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -190,7 +190,10 @@
 #define OPAL_NPU_INIT_CONTEXT  146
 #define OPAL_NPU_DESTROY_CONTEXT   147
 #define OPAL_NPU_MAP_LPAR  148
-#define OPAL_LAST  148
+#define OPAL_IMC_COUNTERS_INIT 149
+#define OPAL_IMC_COUNTERS_START150
+#define OPAL_IMC_COUNTERS_STOP 151
+#define OPAL_LAST  151
 
 /* Device tree flags */
 
@@ -1003,6 +1006,11 @@ enum {
XIVE_DUMP_EMU_STATE = 5,
 };
 
+/* Argument to OPAL_IMC_COUNTERS_*  */
+enum {
+   OPAL_IMC_COUNTERS_NEST = 1,
+};
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 588fb1c23af9..48842d2d465c 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -268,6 +268,10 @@ int64_t opal_xive_free_irq(uint32_t girq);
 int64_t opal_xive_sync(uint32_t type, uint32_t id);
 int64_t opal_xive_dump(uint32_t type, uint32_t id);
 
+int64_t opal_imc_counters_init(uint32_t type, uint64_t address, uint64_t cpu);
+int64_t opal_imc_counters_start(uint32_t type, uint64_t cpu_pir);
+int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir);
+
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
   int depth, void *data);
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 4e2f837b8bb7..ca9662bea7d6 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -20,6 +20,16 @@
 
 /* Needed for sanity check */
 struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+static cpumask_t nest_imc_cpumask;
+static int nest_imc_cpumask_initialized;
+static int nest_pmus;
+/*
+ * Used to avoid races in counting the nest-pmu units during hotplug
+ * register and unregister
+ */
+static DEFINE_MUTEX(imc_nest_inited_reserve);
+
+struct imc_pmu_ref *nest_imc_refc;
 
 struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
 {
@@ -43,12 +53,183 @@ static struct attribute_group imc_format_group = {
.attrs = nest_imc_format_attrs,
 };
 
+/* Get the cpumask printed to a buffer "buf" */
+static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
+   struct device_attribute *attr,
+   char *buf)
+{
+   cpumask_t *active_mask;
+
+   active_mask = _imc_cpumask;
+   return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL);
+
+static struct attribute *imc_pmu_cpu

[PATCH v12 10/10] powerpc/perf: Thread IMC PMU functions

2017-07-03 Thread Anju T Sudhakar
Code to add support for thread IMC on cpuhotplug.
When a cpu goes offline, the LDBAR for that cpu is disabled, and when it comes
back online the previous ldbar value is written back to the LDBAR for that cpu.

To register the hotplug functions for thread_imc, a new state
CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE is added to the list of existing
states.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/perf/imc-pmu.c | 38 +-
 include/linux/cpuhotplug.h  |  1 +
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index bea4dafc2aad..700e3cb0c89f 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -931,6 +931,37 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
return 0;
 }
 
+static int ppc_thread_imc_cpu_online(unsigned int cpu)
+{
+   int rc = 0;
+   u64 ldbar_value;
+
+   if (per_cpu(thread_imc_mem, cpu) == NULL)
+   rc = thread_imc_mem_alloc(cpu, thread_imc_mem_size);
+
+   if (rc)
+   mtspr(SPRN_LDBAR, 0);
+
+   ldbar_value = ((u64)per_cpu(thread_imc_mem, cpu) & 
(u64)THREAD_IMC_LDBAR_MASK) |
+   (u64)THREAD_IMC_ENABLE;
+   mtspr(SPRN_LDBAR, ldbar_value);
+   return 0;
+}
+
+static int ppc_thread_imc_cpu_offline(unsigned int cpu)
+{
+   mtspr(SPRN_LDBAR, 0);
+   return 0;
+}
+
+void thread_imc_cpu_init(void)
+{
+   cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
+ "perf/powerpc/imc_thread:online",
+ ppc_thread_imc_cpu_online,
+ ppc_thread_imc_cpu_offline);
+}
+
 /*
  * imc_mem_init : Function to support memory allocation for core imc.
  */
@@ -1044,6 +1075,9 @@ int init_imc_pmu(struct imc_events *events, int idx,
return ret;
}
break;
+   case IMC_DOMAIN_THREAD:
+   thread_imc_cpu_init();
+   break;
default:
return  -1; /* Unknown domain */
}
@@ -1095,7 +1129,9 @@ int init_imc_pmu(struct imc_events *events, int idx,
cleanup_all_core_imc_memory(pmu_ptr);
}
/* For thread_imc, we have allocated memory, we need to free it */
-   if (pmu_ptr->domain == IMC_DOMAIN_THREAD)
+   if (pmu_ptr->domain == IMC_DOMAIN_THREAD) {
+   cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
cleanup_all_thread_imc_memory();
+   }
return ret;
 }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index e145fffec093..937d1ec8c3e9 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -141,6 +141,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
+   CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
CPUHP_AP_ONLINE_DYN,
-- 
2.11.0



[PATCH v12 10/10] powerpc/perf: Thread IMC PMU functions

2017-07-03 Thread Anju T Sudhakar
Code to add support for thread IMC on cpuhotplug.
When a cpu goes offline, the LDBAR for that cpu is disabled, and when it comes
back online the previous ldbar value is written back to the LDBAR for that cpu.

To register the hotplug functions for thread_imc, a new state
CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE is added to the list of existing
states.

Signed-off-by: Anju T Sudhakar 
Signed-off-by: Madhavan Srinivasan 
---
 arch/powerpc/perf/imc-pmu.c | 38 +-
 include/linux/cpuhotplug.h  |  1 +
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index bea4dafc2aad..700e3cb0c89f 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -931,6 +931,37 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
return 0;
 }
 
+static int ppc_thread_imc_cpu_online(unsigned int cpu)
+{
+   int rc = 0;
+   u64 ldbar_value;
+
+   if (per_cpu(thread_imc_mem, cpu) == NULL)
+   rc = thread_imc_mem_alloc(cpu, thread_imc_mem_size);
+
+   if (rc)
+   mtspr(SPRN_LDBAR, 0);
+
+   ldbar_value = ((u64)per_cpu(thread_imc_mem, cpu) & 
(u64)THREAD_IMC_LDBAR_MASK) |
+   (u64)THREAD_IMC_ENABLE;
+   mtspr(SPRN_LDBAR, ldbar_value);
+   return 0;
+}
+
+static int ppc_thread_imc_cpu_offline(unsigned int cpu)
+{
+   mtspr(SPRN_LDBAR, 0);
+   return 0;
+}
+
+void thread_imc_cpu_init(void)
+{
+   cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
+ "perf/powerpc/imc_thread:online",
+ ppc_thread_imc_cpu_online,
+ ppc_thread_imc_cpu_offline);
+}
+
 /*
  * imc_mem_init : Function to support memory allocation for core imc.
  */
@@ -1044,6 +1075,9 @@ int init_imc_pmu(struct imc_events *events, int idx,
return ret;
}
break;
+   case IMC_DOMAIN_THREAD:
+   thread_imc_cpu_init();
+   break;
default:
return  -1; /* Unknown domain */
}
@@ -1095,7 +1129,9 @@ int init_imc_pmu(struct imc_events *events, int idx,
cleanup_all_core_imc_memory(pmu_ptr);
}
/* For thread_imc, we have allocated memory, we need to free it */
-   if (pmu_ptr->domain == IMC_DOMAIN_THREAD)
+   if (pmu_ptr->domain == IMC_DOMAIN_THREAD) {
+   cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
cleanup_all_thread_imc_memory();
+   }
return ret;
 }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index e145fffec093..937d1ec8c3e9 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -141,6 +141,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
+   CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
CPUHP_AP_ONLINE_DYN,
-- 
2.11.0



[PATCH v12 01/10] powerpc/powernv: Data structure and macros definitions for IMC

2017-07-03 Thread Anju T Sudhakar
From: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>

Create a new header file to add the data structures and
macros needed for In-Memory Collection (IMC) counter support.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h | 99 ++
 1 file changed, 99 insertions(+)
 create mode 100644 arch/powerpc/include/asm/imc-pmu.h

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
new file mode 100644
index ..ffaea0b9c13e
--- /dev/null
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -0,0 +1,99 @@
+#ifndef PPC_POWERNV_IMC_PMU_DEF_H
+#define PPC_POWERNV_IMC_PMU_DEF_H
+
+/*
+ * IMC Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ *   (C) 2017 Anju T Sudhakar, IBM Corporation.
+ *   (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * For static allocation of some of the structures.
+ */
+#define IMC_MAX_PMUS   32
+
+/*
+ * This macro is used for memory buffer allocation of
+ * event names and event string
+ */
+#define IMC_MAX_NAME_VAL_LEN   96
+
+/*
+ * Currently Microcode supports a max of 256KB of counter memory
+ * in the reserved memory region. Max pages to mmap (considering 4K PAGESIZE).
+ */
+#define IMC_MAX_PAGES  64
+
+/*
+ *Compatbility macros for IMC devices
+ */
+#define IMC_DTB_COMPAT "ibm,opal-in-memory-counters"
+#define IMC_DTB_UNIT_COMPAT"ibm,imc-counters"
+
+/*
+ * Structure to hold memory address information for imc units.
+ */
+struct imc_mem_info {
+   u32 id;
+   u64 *vbase[IMC_MAX_PAGES];
+};
+
+/*
+ * Place holder for nest pmu events and values.
+ */
+struct imc_events {
+   char *ev_name;
+   char *ev_value;
+};
+
+#define IMC_FORMAT_ATTR0
+#define IMC_CPUMASK_ATTR   1
+#define IMC_EVENT_ATTR 2
+#define IMC_NULL_ATTR  3
+
+/*
+ * Device tree parser code detects IMC pmu support and
+ * registers new IMC pmus. This structure will hold the
+ * pmu functions, events, counter memory information
+ * and attrs for each imc pmu and will be referenced at
+ * the time of pmu registration.
+ */
+struct imc_pmu {
+   struct pmu pmu;
+   int domain;
+   /*
+* flag to notify whether the memory is mmaped
+* or allocated by kernel.
+*/
+   int imc_counter_mmaped;
+   struct imc_mem_info *mem_info;
+   struct imc_events *events;
+   u32 counter_mem_size;
+   /*
+* Attribute groups for the PMU. Slot 0 used for
+* format attribute, slot 1 used for cpusmask attribute,
+* slot 2 used for event attribute. Slot 3 keep as
+* NULL.
+*/
+   const struct attribute_group *attr_groups[4];
+};
+
+/*
+ * Domains for IMC PMUs
+ */
+#define IMC_DOMAIN_NEST1
+
+#endif /* PPC_POWERNV_IMC_PMU_DEF_H */
-- 
2.11.0



[PATCH v12 01/10] powerpc/powernv: Data structure and macros definitions for IMC

2017-07-03 Thread Anju T Sudhakar
From: Madhavan Srinivasan 

Create a new header file to add the data structures and
macros needed for In-Memory Collection (IMC) counter support.

Signed-off-by: Anju T Sudhakar 
Signed-off-by: Hemant Kumar 
Signed-off-by: Madhavan Srinivasan 
---
 arch/powerpc/include/asm/imc-pmu.h | 99 ++
 1 file changed, 99 insertions(+)
 create mode 100644 arch/powerpc/include/asm/imc-pmu.h

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
new file mode 100644
index ..ffaea0b9c13e
--- /dev/null
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -0,0 +1,99 @@
+#ifndef PPC_POWERNV_IMC_PMU_DEF_H
+#define PPC_POWERNV_IMC_PMU_DEF_H
+
+/*
+ * IMC Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ *   (C) 2017 Anju T Sudhakar, IBM Corporation.
+ *   (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * For static allocation of some of the structures.
+ */
+#define IMC_MAX_PMUS   32
+
+/*
+ * This macro is used for memory buffer allocation of
+ * event names and event string
+ */
+#define IMC_MAX_NAME_VAL_LEN   96
+
+/*
+ * Currently Microcode supports a max of 256KB of counter memory
+ * in the reserved memory region. Max pages to mmap (considering 4K PAGESIZE).
+ */
+#define IMC_MAX_PAGES  64
+
+/*
+ *Compatbility macros for IMC devices
+ */
+#define IMC_DTB_COMPAT "ibm,opal-in-memory-counters"
+#define IMC_DTB_UNIT_COMPAT"ibm,imc-counters"
+
+/*
+ * Structure to hold memory address information for imc units.
+ */
+struct imc_mem_info {
+   u32 id;
+   u64 *vbase[IMC_MAX_PAGES];
+};
+
+/*
+ * Place holder for nest pmu events and values.
+ */
+struct imc_events {
+   char *ev_name;
+   char *ev_value;
+};
+
+#define IMC_FORMAT_ATTR0
+#define IMC_CPUMASK_ATTR   1
+#define IMC_EVENT_ATTR 2
+#define IMC_NULL_ATTR  3
+
+/*
+ * Device tree parser code detects IMC pmu support and
+ * registers new IMC pmus. This structure will hold the
+ * pmu functions, events, counter memory information
+ * and attrs for each imc pmu and will be referenced at
+ * the time of pmu registration.
+ */
+struct imc_pmu {
+   struct pmu pmu;
+   int domain;
+   /*
+* flag to notify whether the memory is mmaped
+* or allocated by kernel.
+*/
+   int imc_counter_mmaped;
+   struct imc_mem_info *mem_info;
+   struct imc_events *events;
+   u32 counter_mem_size;
+   /*
+* Attribute groups for the PMU. Slot 0 used for
+* format attribute, slot 1 used for cpusmask attribute,
+* slot 2 used for event attribute. Slot 3 keep as
+* NULL.
+*/
+   const struct attribute_group *attr_groups[4];
+};
+
+/*
+ * Domains for IMC PMUs
+ */
+#define IMC_DOMAIN_NEST1
+
+#endif /* PPC_POWERNV_IMC_PMU_DEF_H */
-- 
2.11.0



[PATCH v12 02/10] powerpc/powernv: Autoload IMC device driver module

2017-07-03 Thread Anju T Sudhakar
Code to create platform device for the IMC counters.
Paltform devices are created based on the IMC compatibility
string.

New Config flag "CONFIG_HV_PERF_IMC_CTRS" add to contain the
IMC counter changes.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/Kconfig| 10 +
 arch/powerpc/platforms/powernv/Makefile   |  1 +
 arch/powerpc/platforms/powernv/opal-imc.c | 73 +++
 arch/powerpc/platforms/powernv/opal.c | 18 
 4 files changed, 102 insertions(+)
 create mode 100644 arch/powerpc/platforms/powernv/opal-imc.c

diff --git a/arch/powerpc/platforms/powernv/Kconfig 
b/arch/powerpc/platforms/powernv/Kconfig
index 6a6f4ef46b9e..543c6cd5e8d3 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -30,3 +30,13 @@ config OPAL_PRD
help
  This enables the opal-prd driver, a facility to run processor
  recovery diagnostics on OpenPower machines
+
+config HV_PERF_IMC_CTRS
+   bool "Hypervisor supplied In Memory Collection PMU events (Nest & Core)"
+   default y
+   depends on PERF_EVENTS && PPC_POWERNV
+   help
+ Enable access to hypervisor supplied in-memory collection counters
+ in perf. IMC counters are available from Power9 systems.
+
+  If unsure, select Y.
diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index b5d98cb3f482..715e531f6711 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_PPC_SCOM)+= opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)   += opal-memory-errors.o
 obj-$(CONFIG_TRACEPOINTS)  += opal-tracepoints.o
 obj-$(CONFIG_OPAL_PRD) += opal-prd.o
+obj-$(CONFIG_HV_PERF_IMC_CTRS) += opal-imc.o
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
new file mode 100644
index ..5b1045c81af4
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -0,0 +1,73 @@
+/*
+ * OPAL IMC interface detection driver
+ * Supported on POWERNV platform
+ *
+ * Copyright   (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ * (C) 2017 Anju T Sudhakar, IBM Corporation.
+ * (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static int opal_imc_counters_probe(struct platform_device *pdev)
+{
+   struct device_node *imc_dev = NULL;
+
+   if (!pdev || !pdev->dev.of_node)
+   return -ENODEV;
+
+   /*
+* Check whether this is kdump kernel. If yes, just return.
+*/
+   if (is_kdump_kernel())
+   return -ENODEV;
+
+   imc_dev = pdev->dev.of_node;
+   if (!imc_dev)
+   return -ENODEV;
+
+   return 0;
+}
+
+static const struct of_device_id opal_imc_match[] = {
+   { .compatible = IMC_DTB_COMPAT },
+   {},
+};
+
+static struct platform_driver opal_imc_driver = {
+   .driver = {
+   .name = "opal-imc-counters",
+   .of_match_table = opal_imc_match,
+   },
+   .probe = opal_imc_counters_probe,
+};
+
+MODULE_DEVICE_TABLE(of, opal_imc_match);
+module_platform_driver(opal_imc_driver);
+MODULE_DESCRIPTION("PowerNV OPAL IMC driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 59684b4af4d1..fbdca259ea76 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -30,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "powernv.h"
 
@@ -705,6 +707,17 @@ static void opal_pdev_init(const char *compatible)
of_platform_device_create(np, NULL, NULL);
 }
 
+#ifdef CONFIG_HV_PERF_IMC_CTRS
+static void __init opal_imc_init_dev(void)
+{
+   struct device_node *np;
+
+   np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
+   if (np)
+   of_platform_device_create

[PATCH v12 02/10] powerpc/powernv: Autoload IMC device driver module

2017-07-03 Thread Anju T Sudhakar
Code to create platform device for the IMC counters.
Paltform devices are created based on the IMC compatibility
string.

New Config flag "CONFIG_HV_PERF_IMC_CTRS" add to contain the
IMC counter changes.

Signed-off-by: Anju T Sudhakar 
Signed-off-by: Hemant Kumar 
Signed-off-by: Madhavan Srinivasan 
---
 arch/powerpc/platforms/powernv/Kconfig| 10 +
 arch/powerpc/platforms/powernv/Makefile   |  1 +
 arch/powerpc/platforms/powernv/opal-imc.c | 73 +++
 arch/powerpc/platforms/powernv/opal.c | 18 
 4 files changed, 102 insertions(+)
 create mode 100644 arch/powerpc/platforms/powernv/opal-imc.c

diff --git a/arch/powerpc/platforms/powernv/Kconfig 
b/arch/powerpc/platforms/powernv/Kconfig
index 6a6f4ef46b9e..543c6cd5e8d3 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -30,3 +30,13 @@ config OPAL_PRD
help
  This enables the opal-prd driver, a facility to run processor
  recovery diagnostics on OpenPower machines
+
+config HV_PERF_IMC_CTRS
+   bool "Hypervisor supplied In Memory Collection PMU events (Nest & Core)"
+   default y
+   depends on PERF_EVENTS && PPC_POWERNV
+   help
+ Enable access to hypervisor supplied in-memory collection counters
+ in perf. IMC counters are available from Power9 systems.
+
+  If unsure, select Y.
diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index b5d98cb3f482..715e531f6711 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_PPC_SCOM)+= opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)   += opal-memory-errors.o
 obj-$(CONFIG_TRACEPOINTS)  += opal-tracepoints.o
 obj-$(CONFIG_OPAL_PRD) += opal-prd.o
+obj-$(CONFIG_HV_PERF_IMC_CTRS) += opal-imc.o
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
new file mode 100644
index ..5b1045c81af4
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -0,0 +1,73 @@
+/*
+ * OPAL IMC interface detection driver
+ * Supported on POWERNV platform
+ *
+ * Copyright   (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ * (C) 2017 Anju T Sudhakar, IBM Corporation.
+ * (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static int opal_imc_counters_probe(struct platform_device *pdev)
+{
+   struct device_node *imc_dev = NULL;
+
+   if (!pdev || !pdev->dev.of_node)
+   return -ENODEV;
+
+   /*
+* Check whether this is kdump kernel. If yes, just return.
+*/
+   if (is_kdump_kernel())
+   return -ENODEV;
+
+   imc_dev = pdev->dev.of_node;
+   if (!imc_dev)
+   return -ENODEV;
+
+   return 0;
+}
+
+static const struct of_device_id opal_imc_match[] = {
+   { .compatible = IMC_DTB_COMPAT },
+   {},
+};
+
+static struct platform_driver opal_imc_driver = {
+   .driver = {
+   .name = "opal-imc-counters",
+   .of_match_table = opal_imc_match,
+   },
+   .probe = opal_imc_counters_probe,
+};
+
+MODULE_DEVICE_TABLE(of, opal_imc_match);
+module_platform_driver(opal_imc_driver);
+MODULE_DESCRIPTION("PowerNV OPAL IMC driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 59684b4af4d1..fbdca259ea76 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -30,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "powernv.h"
 
@@ -705,6 +707,17 @@ static void opal_pdev_init(const char *compatible)
of_platform_device_create(np, NULL, NULL);
 }
 
+#ifdef CONFIG_HV_PERF_IMC_CTRS
+static void __init opal_imc_init_dev(void)
+{
+   struct device_node *np;
+
+   np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
+   if (np)
+   of_platform_device_create(np, NULL, NULL);
+}
+#endif
+
 static int kopald(void *unused)
 {
unsigned long timeout = msecs_to_jiff

[PATCH v12 08/10] powerpc/powernv: Thread IMC events detection

2017-07-03 Thread Anju T Sudhakar
Code to add support for detection of thread IMC events. It adds a new
domain IMC_DOMAIN_THREAD and it is determined with the help of the
"type" property in the imc device-tree.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h| 2 ++
 arch/powerpc/platforms/powernv/opal-imc.c | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 24a6112ca0b5..e71e0d77d1d7 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -107,6 +107,7 @@ struct imc_pmu_ref {
 
 /* In-Memory Collection Counters Type */
 enum {
+   IMC_COUNTER_PER_THREAD  = 0x1,
IMC_COUNTER_PER_CORE= 0x4,
IMC_COUNTER_PER_CHIP= 0x10,
 };
@@ -116,6 +117,7 @@ enum {
  */
 #define IMC_DOMAIN_NEST1
 #define IMC_DOMAIN_CORE2
+#define IMC_DOMAIN_THREAD  3
 
 extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 extern struct imc_pmu *core_imc_pmu;
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 91b8dd8d7619..2f857ec826e6 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -380,7 +380,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
 /*
  * imc_pmu_create : Takes the parent device which is the pmu unit, pmu_index
  * and domain as the inputs.
- * Allocates memory for the pmu, sets up its domain (NEST/CORE), and
+ * Allocates memory for the pmu, sets up its domain (NEST/CORE/THREAD), and
  * calls imc_events_setup() to allocate memory for the events supported
  * by this pmu. Assigns a name for the pmu.
  *
@@ -531,6 +531,8 @@ static int opal_imc_counters_probe(struct platform_device 
*pdev)
domain = IMC_DOMAIN_NEST;
else if (type == IMC_COUNTER_PER_CORE)
domain = IMC_DOMAIN_CORE;
+   else if (type == IMC_COUNTER_PER_THREAD)
+   domain = IMC_DOMAIN_THREAD;
else
continue;
if (!imc_pmu_create(imc_dev, pmu_count, domain))
-- 
2.11.0



[PATCH v12 08/10] powerpc/powernv: Thread IMC events detection

2017-07-03 Thread Anju T Sudhakar
Code to add support for detection of thread IMC events. It adds a new
domain IMC_DOMAIN_THREAD and it is determined with the help of the
"type" property in the imc device-tree.

Signed-off-by: Anju T Sudhakar 
Signed-off-by: Hemant Kumar 
Signed-off-by: Madhavan Srinivasan 
---
 arch/powerpc/include/asm/imc-pmu.h| 2 ++
 arch/powerpc/platforms/powernv/opal-imc.c | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 24a6112ca0b5..e71e0d77d1d7 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -107,6 +107,7 @@ struct imc_pmu_ref {
 
 /* In-Memory Collection Counters Type */
 enum {
+   IMC_COUNTER_PER_THREAD  = 0x1,
IMC_COUNTER_PER_CORE= 0x4,
IMC_COUNTER_PER_CHIP= 0x10,
 };
@@ -116,6 +117,7 @@ enum {
  */
 #define IMC_DOMAIN_NEST1
 #define IMC_DOMAIN_CORE2
+#define IMC_DOMAIN_THREAD  3
 
 extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 extern struct imc_pmu *core_imc_pmu;
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 91b8dd8d7619..2f857ec826e6 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -380,7 +380,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
 /*
  * imc_pmu_create : Takes the parent device which is the pmu unit, pmu_index
  * and domain as the inputs.
- * Allocates memory for the pmu, sets up its domain (NEST/CORE), and
+ * Allocates memory for the pmu, sets up its domain (NEST/CORE/THREAD), and
  * calls imc_events_setup() to allocate memory for the events supported
  * by this pmu. Assigns a name for the pmu.
  *
@@ -531,6 +531,8 @@ static int opal_imc_counters_probe(struct platform_device 
*pdev)
domain = IMC_DOMAIN_NEST;
else if (type == IMC_COUNTER_PER_CORE)
domain = IMC_DOMAIN_CORE;
+   else if (type == IMC_COUNTER_PER_THREAD)
+   domain = IMC_DOMAIN_THREAD;
else
continue;
if (!imc_pmu_create(imc_dev, pmu_count, domain))
-- 
2.11.0



[PATCH v12 09/10] powerpc/perf: Thread IMC PMU functions

2017-07-03 Thread Anju T Sudhakar
Code to add PMU functions required for event initialization,
read, update, add, del etc. for thread IMC PMU. Thread IMC PMUs are used
for per-task monitoring.

For each CPU, a page of memory is allocated and is kept static i.e.,
these pages will exist till the machine shuts down. The base address of
this page is assigned to the ldbar of that cpu. As soon as we do that,
the thread IMC counters start running for that cpu and the data of these
counters are assigned to the page allocated. But we use this for
per-task monitoring. Whenever we start monitoring a task, the event is
added is onto the task. At that point, we read the initial value of the
event. Whenever, we stop monitoring the task, the final value is taken
and the difference is the event data.

Now, a task can move to a different cpu. Suppose a task X is moving from
cpu A to cpu B. When the task is scheduled out of A, we get an
event_del for A, and hence, the event data is updated. And, we stop
updating the X's event data. As soon as X moves on to B, event_add is
called for B, and we again update the event_data. And this is how it
keeps on updating the event data even when the task is scheduled on to
different cpus.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h|   4 +
 arch/powerpc/perf/imc-pmu.c   | 241 --
 arch/powerpc/platforms/powernv/opal-imc.c |   2 +
 3 files changed, 238 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index e71e0d77d1d7..470301ac806b 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -44,6 +44,9 @@
 #define IMC_DTB_COMPAT "ibm,opal-in-memory-counters"
 #define IMC_DTB_UNIT_COMPAT"ibm,imc-counters"
 
+#define THREAD_IMC_LDBAR_MASK   0x0003e000ULL
+#define THREAD_IMC_ENABLE   0x8000ULL
+
 /*
  * Structure to hold memory address information for imc units.
  */
@@ -122,4 +125,5 @@ enum {
 extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 extern struct imc_pmu *core_imc_pmu;
 extern int init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu 
*pmu_ptr);
+void thread_imc_disable(void);
 #endif /* PPC_POWERNV_IMC_PMU_DEF_H */
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index c1a275ed2510..bea4dafc2aad 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -18,6 +18,9 @@
 #include 
 #include 
 
+/* Maintains base address for all the cpus */
+static DEFINE_PER_CPU(u64 *, thread_imc_mem);
+
 /* Needed for sanity check */
 struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 static cpumask_t nest_imc_cpumask;
@@ -33,6 +36,7 @@ static DEFINE_MUTEX(imc_nest_inited_reserve);
 struct imc_pmu_ref *nest_imc_refc;
 struct imc_pmu_ref *core_imc_refc;
 struct imc_pmu *core_imc_pmu;
+static int thread_imc_mem_size;
 
 struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
 {
@@ -568,6 +572,137 @@ static int core_imc_event_init(struct perf_event *event)
return 0;
 }
 
+static int thread_imc_event_init(struct perf_event *event)
+{
+   int rc, core_id;
+   u32 config = event->attr.config;
+   struct task_struct *target;
+   struct imc_pmu *pmu;
+   struct imc_pmu_ref *ref;
+
+   if (event->attr.type != event->pmu->type)
+   return -ENOENT;
+
+   /* Sampling not supported */
+   if (event->hw.sample_period)
+   return -EINVAL;
+
+   event->hw.idx = -1;
+   pmu = imc_event_to_pmu(event);
+   core_id = event->cpu / threads_per_core;
+
+   /* Sanity check for config (event offset and rvalue) */
+   if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size) ||
+   ((config & IMC_EVENT_RVALUE_MASK) != 0))
+   return -EINVAL;
+
+   target = event->hw.target;
+   if (!target)
+   return -EINVAL;
+
+   if (!is_core_imc_mem_inited(event->cpu))
+   return -ENODEV;
+
+   event->pmu->task_ctx_nr = perf_sw_context;
+   core_id = event->cpu / threads_per_core;
+
+   /*
+* Core pmu units are enabled only when it is used.
+* See if this is triggered for the first time.
+* If yes, take the mutex lock and enable the core counters.
+* If not, just increment the count in core_imc_refc struct.
+*/
+   ref = _imc_refc[core_id];
+   if (!ref)
+   return -EINVAL;
+
+   mutex_lock(>lock);
+   if (ref->refc == 0) {
+   rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
+
get_hard_smp_processor_id(event->cpu));
+   if (rc) {
+

[PATCH v12 04/10] powerpc/perf: Add generic IMC pmu group and event functions

2017-07-03 Thread Anju T Sudhakar
Device tree IMC driver code parses the IMC units and their events. It
passes the information to IMC pmu code which is placed in powerpc/perf
as "imc-pmu.c".

Patch adds a set of generic imc pmu related event functions to be
used  by each imc pmu unit. Add code to setup format attribute and to
register imc pmus. Add a event_init function for nest_imc events.

Since, the IMC counters' data are periodically fed to a memory location,
the functions to read/update, start/stop, add/del can be generic and can
be used by all IMC PMU units.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h|   5 +
 arch/powerpc/perf/Makefile|   3 +
 arch/powerpc/perf/imc-pmu.c   | 283 ++
 arch/powerpc/platforms/powernv/opal-imc.c |  11 +-
 4 files changed, 300 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/perf/imc-pmu.c

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 2a0239e2590d..25d0c57d14fe 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -63,6 +63,9 @@ struct imc_events {
 #define IMC_CPUMASK_ATTR   1
 #define IMC_EVENT_ATTR 2
 #define IMC_NULL_ATTR  3
+#define IMC_EVENT_OFFSET_MASK  0xULL
+#define IMC_EVENT_RVALUE_MASK  0x1ULL
+#define IMC_NEST_EVENT_MODE0x1feULL
 
 /*
  * Device tree parser code detects IMC pmu support and
@@ -101,4 +104,6 @@ enum {
  */
 #define IMC_DOMAIN_NEST1
 
+extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+extern int init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu 
*pmu_ptr);
 #endif /* PPC_POWERNV_IMC_PMU_DEF_H */
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 4d606b99a5cb..b29d918814d3 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -6,6 +6,9 @@ obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
 obj64-$(CONFIG_PPC_PERF_CTRS)  += power4-pmu.o ppc970-pmu.o power5-pmu.o \
   power5+-pmu.o power6-pmu.o power7-pmu.o \
   isa207-common.o power8-pmu.o power9-pmu.o
+
+obj-$(CONFIG_HV_PERF_IMC_CTRS) += imc-pmu.o
+
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
 
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
new file mode 100644
index ..4e2f837b8bb7
--- /dev/null
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -0,0 +1,283 @@
+/*
+ * Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ *   (C) 2017 Anju T Sudhakar, IBM Corporation.
+ *   (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* Needed for sanity check */
+struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+
+struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
+{
+   return container_of(event->pmu, struct imc_pmu, pmu);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-47");
+PMU_FORMAT_ATTR(offset, "config:0-31");
+PMU_FORMAT_ATTR(rvalue, "config:32");
+PMU_FORMAT_ATTR(mode, "config:33-40");
+static struct attribute *nest_imc_format_attrs[] = {
+   _attr_event.attr,
+   _attr_offset.attr,
+   _attr_rvalue.attr,
+   _attr_mode.attr,
+   NULL,
+};
+
+static struct attribute_group imc_format_group = {
+   .name = "format",
+   .attrs = nest_imc_format_attrs,
+};
+
+static int nest_imc_event_init(struct perf_event *event)
+{
+   int chip_id;
+   u32 l_config, config = event->attr.config;
+   struct imc_mem_info *pcni;
+   struct imc_pmu *pmu;
+   bool flag = false;
+
+   if (event->attr.type != event->pmu->type)
+   return -ENOENT;
+
+   /* Sampling not supported */
+   if (event->hw.sample_period)
+   return -EINVAL;
+
+   /* unsupported modes and filters */
+   if (event->attr.exclude_user   ||
+   event->attr.exclude_kernel ||
+   event->attr.exclude_hv ||
+   event->attr.exclude_idle   ||
+   event->attr.exclude_host   ||
+   event->attr.exclude_guest)
+   return -EINVAL;
+
+   if (event->cpu < 0)
+   return -EINVAL;
+
+   pmu = imc_event_to_pmu(event);
+
+   /*
+* Sanity check for config (event offset, mode and rvalue).
+* mode and rval

[PATCH v12 09/10] powerpc/perf: Thread IMC PMU functions

2017-07-03 Thread Anju T Sudhakar
Code to add PMU functions required for event initialization,
read, update, add, del etc. for thread IMC PMU. Thread IMC PMUs are used
for per-task monitoring.

For each CPU, a page of memory is allocated and is kept static i.e.,
these pages will exist till the machine shuts down. The base address of
this page is assigned to the ldbar of that cpu. As soon as we do that,
the thread IMC counters start running for that cpu and the data of these
counters are assigned to the page allocated. But we use this for
per-task monitoring. Whenever we start monitoring a task, the event is
added is onto the task. At that point, we read the initial value of the
event. Whenever, we stop monitoring the task, the final value is taken
and the difference is the event data.

Now, a task can move to a different cpu. Suppose a task X is moving from
cpu A to cpu B. When the task is scheduled out of A, we get an
event_del for A, and hence, the event data is updated. And, we stop
updating the X's event data. As soon as X moves on to B, event_add is
called for B, and we again update the event_data. And this is how it
keeps on updating the event data even when the task is scheduled on to
different cpus.

Signed-off-by: Anju T Sudhakar 
Signed-off-by: Hemant Kumar 
Signed-off-by: Madhavan Srinivasan 
---
 arch/powerpc/include/asm/imc-pmu.h|   4 +
 arch/powerpc/perf/imc-pmu.c   | 241 --
 arch/powerpc/platforms/powernv/opal-imc.c |   2 +
 3 files changed, 238 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index e71e0d77d1d7..470301ac806b 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -44,6 +44,9 @@
 #define IMC_DTB_COMPAT "ibm,opal-in-memory-counters"
 #define IMC_DTB_UNIT_COMPAT"ibm,imc-counters"
 
+#define THREAD_IMC_LDBAR_MASK   0x0003e000ULL
+#define THREAD_IMC_ENABLE   0x8000ULL
+
 /*
  * Structure to hold memory address information for imc units.
  */
@@ -122,4 +125,5 @@ enum {
 extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 extern struct imc_pmu *core_imc_pmu;
 extern int init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu 
*pmu_ptr);
+void thread_imc_disable(void);
 #endif /* PPC_POWERNV_IMC_PMU_DEF_H */
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index c1a275ed2510..bea4dafc2aad 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -18,6 +18,9 @@
 #include 
 #include 
 
+/* Maintains base address for all the cpus */
+static DEFINE_PER_CPU(u64 *, thread_imc_mem);
+
 /* Needed for sanity check */
 struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 static cpumask_t nest_imc_cpumask;
@@ -33,6 +36,7 @@ static DEFINE_MUTEX(imc_nest_inited_reserve);
 struct imc_pmu_ref *nest_imc_refc;
 struct imc_pmu_ref *core_imc_refc;
 struct imc_pmu *core_imc_pmu;
+static int thread_imc_mem_size;
 
 struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
 {
@@ -568,6 +572,137 @@ static int core_imc_event_init(struct perf_event *event)
return 0;
 }
 
+static int thread_imc_event_init(struct perf_event *event)
+{
+   int rc, core_id;
+   u32 config = event->attr.config;
+   struct task_struct *target;
+   struct imc_pmu *pmu;
+   struct imc_pmu_ref *ref;
+
+   if (event->attr.type != event->pmu->type)
+   return -ENOENT;
+
+   /* Sampling not supported */
+   if (event->hw.sample_period)
+   return -EINVAL;
+
+   event->hw.idx = -1;
+   pmu = imc_event_to_pmu(event);
+   core_id = event->cpu / threads_per_core;
+
+   /* Sanity check for config (event offset and rvalue) */
+   if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size) ||
+   ((config & IMC_EVENT_RVALUE_MASK) != 0))
+   return -EINVAL;
+
+   target = event->hw.target;
+   if (!target)
+   return -EINVAL;
+
+   if (!is_core_imc_mem_inited(event->cpu))
+   return -ENODEV;
+
+   event->pmu->task_ctx_nr = perf_sw_context;
+   core_id = event->cpu / threads_per_core;
+
+   /*
+* Core pmu units are enabled only when it is used.
+* See if this is triggered for the first time.
+* If yes, take the mutex lock and enable the core counters.
+* If not, just increment the count in core_imc_refc struct.
+*/
+   ref = _imc_refc[core_id];
+   if (!ref)
+   return -EINVAL;
+
+   mutex_lock(>lock);
+   if (ref->refc == 0) {
+   rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
+
get_hard_smp_processor_id(event->cpu));
+   if (rc) {
+   mutex_unlock(>lock);
+   pr_err("IMC: Un

[PATCH v12 04/10] powerpc/perf: Add generic IMC pmu group and event functions

2017-07-03 Thread Anju T Sudhakar
Device tree IMC driver code parses the IMC units and their events. It
passes the information to IMC pmu code which is placed in powerpc/perf
as "imc-pmu.c".

Patch adds a set of generic imc pmu related event functions to be
used  by each imc pmu unit. Add code to setup format attribute and to
register imc pmus. Add a event_init function for nest_imc events.

Since, the IMC counters' data are periodically fed to a memory location,
the functions to read/update, start/stop, add/del can be generic and can
be used by all IMC PMU units.

Signed-off-by: Anju T Sudhakar 
Signed-off-by: Hemant Kumar 
Signed-off-by: Madhavan Srinivasan 
---
 arch/powerpc/include/asm/imc-pmu.h|   5 +
 arch/powerpc/perf/Makefile|   3 +
 arch/powerpc/perf/imc-pmu.c   | 283 ++
 arch/powerpc/platforms/powernv/opal-imc.c |  11 +-
 4 files changed, 300 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/perf/imc-pmu.c

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 2a0239e2590d..25d0c57d14fe 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -63,6 +63,9 @@ struct imc_events {
 #define IMC_CPUMASK_ATTR   1
 #define IMC_EVENT_ATTR 2
 #define IMC_NULL_ATTR  3
+#define IMC_EVENT_OFFSET_MASK  0xULL
+#define IMC_EVENT_RVALUE_MASK  0x1ULL
+#define IMC_NEST_EVENT_MODE0x1feULL
 
 /*
  * Device tree parser code detects IMC pmu support and
@@ -101,4 +104,6 @@ enum {
  */
 #define IMC_DOMAIN_NEST1
 
+extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+extern int init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu 
*pmu_ptr);
 #endif /* PPC_POWERNV_IMC_PMU_DEF_H */
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 4d606b99a5cb..b29d918814d3 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -6,6 +6,9 @@ obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
 obj64-$(CONFIG_PPC_PERF_CTRS)  += power4-pmu.o ppc970-pmu.o power5-pmu.o \
   power5+-pmu.o power6-pmu.o power7-pmu.o \
   isa207-common.o power8-pmu.o power9-pmu.o
+
+obj-$(CONFIG_HV_PERF_IMC_CTRS) += imc-pmu.o
+
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
 
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
new file mode 100644
index ..4e2f837b8bb7
--- /dev/null
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -0,0 +1,283 @@
+/*
+ * Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ *   (C) 2017 Anju T Sudhakar, IBM Corporation.
+ *   (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* Needed for sanity check */
+struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+
+struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
+{
+   return container_of(event->pmu, struct imc_pmu, pmu);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-47");
+PMU_FORMAT_ATTR(offset, "config:0-31");
+PMU_FORMAT_ATTR(rvalue, "config:32");
+PMU_FORMAT_ATTR(mode, "config:33-40");
+static struct attribute *nest_imc_format_attrs[] = {
+   _attr_event.attr,
+   _attr_offset.attr,
+   _attr_rvalue.attr,
+   _attr_mode.attr,
+   NULL,
+};
+
+static struct attribute_group imc_format_group = {
+   .name = "format",
+   .attrs = nest_imc_format_attrs,
+};
+
+static int nest_imc_event_init(struct perf_event *event)
+{
+   int chip_id;
+   u32 l_config, config = event->attr.config;
+   struct imc_mem_info *pcni;
+   struct imc_pmu *pmu;
+   bool flag = false;
+
+   if (event->attr.type != event->pmu->type)
+   return -ENOENT;
+
+   /* Sampling not supported */
+   if (event->hw.sample_period)
+   return -EINVAL;
+
+   /* unsupported modes and filters */
+   if (event->attr.exclude_user   ||
+   event->attr.exclude_kernel ||
+   event->attr.exclude_hv ||
+   event->attr.exclude_idle   ||
+   event->attr.exclude_host   ||
+   event->attr.exclude_guest)
+   return -EINVAL;
+
+   if (event->cpu < 0)
+   return -EINVAL;
+
+   pmu = imc_event_to_pmu(event);
+
+   /*
+* Sanity check for config (event offset, mode and rvalue).
+* mode and rvalue should be zero, if not just return.
+*/
+   if (((config & IMC_EVENT_OFFSET_MAS

[PATCH v12 06/10] powerpc/powernv: Core IMC events detection

2017-07-03 Thread Anju T Sudhakar
From: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>

This patch adds support for detection of core IMC events along with the
Nest IMC events. It adds a new domain IMC_DOMAIN_CORE and its determined
with the help of the "type" property in the IMC device tree.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/imc-pmu.h|  3 +++
 arch/powerpc/perf/imc-pmu.c   |  2 ++
 arch/powerpc/platforms/powernv/opal-imc.c | 14 +++---
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index aeed903b2a79..24a6112ca0b5 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -107,6 +107,7 @@ struct imc_pmu_ref {
 
 /* In-Memory Collection Counters Type */
 enum {
+   IMC_COUNTER_PER_CORE= 0x4,
IMC_COUNTER_PER_CHIP= 0x10,
 };
 
@@ -114,7 +115,9 @@ enum {
  * Domains for IMC PMUs
  */
 #define IMC_DOMAIN_NEST1
+#define IMC_DOMAIN_CORE2
 
 extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+extern struct imc_pmu *core_imc_pmu;
 extern int init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu 
*pmu_ptr);
 #endif /* PPC_POWERNV_IMC_PMU_DEF_H */
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index ca9662bea7d6..041d3097d42a 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -31,6 +31,8 @@ static DEFINE_MUTEX(imc_nest_inited_reserve);
 
 struct imc_pmu_ref *nest_imc_refc;
 
+struct imc_pmu *core_imc_pmu;
+
 struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
 {
return container_of(event->pmu, struct imc_pmu, pmu);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 406f7c10850a..aeef59b66420 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -379,7 +379,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
 /*
  * imc_pmu_create : Takes the parent device which is the pmu unit, pmu_index
  * and domain as the inputs.
- * Allocates memory for the pmu, sets up its domain (NEST), and
+ * Allocates memory for the pmu, sets up its domain (NEST/CORE), and
  * calls imc_events_setup() to allocate memory for the events supported
  * by this pmu. Assigns a name for the pmu.
  *
@@ -406,7 +406,10 @@ static int imc_pmu_create(struct device_node *parent, int 
pmu_index, int domain)
pmu_ptr->domain = domain;
 
/* Needed for hotplug/migration */
-   per_nest_pmu_arr[pmu_index] = pmu_ptr;
+   if (pmu_ptr->domain == IMC_DOMAIN_CORE)
+   core_imc_pmu = pmu_ptr;
+   else if (pmu_ptr->domain == IMC_DOMAIN_NEST)
+   per_nest_pmu_arr[pmu_index] = pmu_ptr;
 
pp = of_find_property(parent, "name", NULL);
if (!pp) {
@@ -427,7 +430,10 @@ static int imc_pmu_create(struct device_node *parent, int 
pmu_index, int domain)
goto free_pmu;
}
/* Save the name to register it later */
-   sprintf(buf, "nest_%s", (char *)pp->value);
+   if (pmu_ptr->domain == IMC_DOMAIN_NEST)
+   sprintf(buf, "nest_%s", (char *)pp->value);
+   else
+   sprintf(buf, "%s_imc", (char *)pp->value);
pmu_ptr->pmu.name = (char *)buf;
 
if (of_property_read_u32(parent, "size", _ptr->counter_mem_size))
@@ -505,6 +511,8 @@ static int opal_imc_counters_probe(struct platform_device 
*pdev)
continue;
if (type == IMC_COUNTER_PER_CHIP)
domain = IMC_DOMAIN_NEST;
+   else if (type == IMC_COUNTER_PER_CORE)
+   domain = IMC_DOMAIN_CORE;
else
continue;
if (!imc_pmu_create(imc_dev, pmu_count, domain))
-- 
2.11.0



[PATCH v12 06/10] powerpc/powernv: Core IMC events detection

2017-07-03 Thread Anju T Sudhakar
From: Madhavan Srinivasan 

This patch adds support for detection of core IMC events along with the
Nest IMC events. It adds a new domain IMC_DOMAIN_CORE and its determined
with the help of the "type" property in the IMC device tree.

Signed-off-by: Anju T Sudhakar 
Signed-off-by: Hemant Kumar 
Signed-off-by: Madhavan Srinivasan 
---
 arch/powerpc/include/asm/imc-pmu.h|  3 +++
 arch/powerpc/perf/imc-pmu.c   |  2 ++
 arch/powerpc/platforms/powernv/opal-imc.c | 14 +++---
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index aeed903b2a79..24a6112ca0b5 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -107,6 +107,7 @@ struct imc_pmu_ref {
 
 /* In-Memory Collection Counters Type */
 enum {
+   IMC_COUNTER_PER_CORE= 0x4,
IMC_COUNTER_PER_CHIP= 0x10,
 };
 
@@ -114,7 +115,9 @@ enum {
  * Domains for IMC PMUs
  */
 #define IMC_DOMAIN_NEST1
+#define IMC_DOMAIN_CORE2
 
 extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+extern struct imc_pmu *core_imc_pmu;
 extern int init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu 
*pmu_ptr);
 #endif /* PPC_POWERNV_IMC_PMU_DEF_H */
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index ca9662bea7d6..041d3097d42a 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -31,6 +31,8 @@ static DEFINE_MUTEX(imc_nest_inited_reserve);
 
 struct imc_pmu_ref *nest_imc_refc;
 
+struct imc_pmu *core_imc_pmu;
+
 struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
 {
return container_of(event->pmu, struct imc_pmu, pmu);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c 
b/arch/powerpc/platforms/powernv/opal-imc.c
index 406f7c10850a..aeef59b66420 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -379,7 +379,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
 /*
  * imc_pmu_create : Takes the parent device which is the pmu unit, pmu_index
  * and domain as the inputs.
- * Allocates memory for the pmu, sets up its domain (NEST), and
+ * Allocates memory for the pmu, sets up its domain (NEST/CORE), and
  * calls imc_events_setup() to allocate memory for the events supported
  * by this pmu. Assigns a name for the pmu.
  *
@@ -406,7 +406,10 @@ static int imc_pmu_create(struct device_node *parent, int 
pmu_index, int domain)
pmu_ptr->domain = domain;
 
/* Needed for hotplug/migration */
-   per_nest_pmu_arr[pmu_index] = pmu_ptr;
+   if (pmu_ptr->domain == IMC_DOMAIN_CORE)
+   core_imc_pmu = pmu_ptr;
+   else if (pmu_ptr->domain == IMC_DOMAIN_NEST)
+   per_nest_pmu_arr[pmu_index] = pmu_ptr;
 
pp = of_find_property(parent, "name", NULL);
if (!pp) {
@@ -427,7 +430,10 @@ static int imc_pmu_create(struct device_node *parent, int 
pmu_index, int domain)
goto free_pmu;
}
/* Save the name to register it later */
-   sprintf(buf, "nest_%s", (char *)pp->value);
+   if (pmu_ptr->domain == IMC_DOMAIN_NEST)
+   sprintf(buf, "nest_%s", (char *)pp->value);
+   else
+   sprintf(buf, "%s_imc", (char *)pp->value);
pmu_ptr->pmu.name = (char *)buf;
 
if (of_property_read_u32(parent, "size", _ptr->counter_mem_size))
@@ -505,6 +511,8 @@ static int opal_imc_counters_probe(struct platform_device 
*pdev)
continue;
if (type == IMC_COUNTER_PER_CHIP)
domain = IMC_DOMAIN_NEST;
+   else if (type == IMC_COUNTER_PER_CORE)
+   domain = IMC_DOMAIN_CORE;
else
continue;
if (!imc_pmu_create(imc_dev, pmu_count, domain))
-- 
2.11.0



  1   2   3   >