This patch adds support for performance monitoring of papr
nvdimm devices via perf interface. It adds callbacks functions
like add/del/read/event_init for nvdimm_pmu structure.

Patch adds a new parameter 'priv' in pdev_archdata structure to save
nvdimm_pmu device pointer, to handle the unregistering of pmu device.

papr_scm_pmu_register function populates the nvdimm_pmu structure
with events, attribute groups along with event handling functions.
Event handling functions internally uses hcall to get events and
counter data. Finally the populated nvdimm_pmu structure is passed
to register the pmu device.

Result in power9 machine with 2 nvdimm device:

Ex: List all event by perf list

command:# perf list nmem

  nmem0/cchrhcnt/                                    [Kernel PMU event]
  nmem0/cchwhcnt/                                    [Kernel PMU event]
  nmem0/critrscu/                                    [Kernel PMU event]
  nmem0/ctlresct/                                    [Kernel PMU event]
  nmem0/ctlrestm/                                    [Kernel PMU event]
  nmem0/fastwcnt/                                    [Kernel PMU event]
  nmem0/hostlcnt/                                    [Kernel PMU event]
  nmem0/hostldur/                                    [Kernel PMU event]
  nmem0/hostscnt/                                    [Kernel PMU event]
  nmem0/hostsdur/                                    [Kernel PMU event]
  nmem0/medrcnt/                                     [Kernel PMU event]
  nmem0/medrdur/                                     [Kernel PMU event]
  nmem0/medwcnt/                                     [Kernel PMU event]
  nmem0/medwdur/                                     [Kernel PMU event]
  nmem0/memlife/                                     [Kernel PMU event]
  nmem0/noopstat/                                    [Kernel PMU event]
  nmem0/ponsecs/                                     [Kernel PMU event]
  nmem1/cchrhcnt/                                    [Kernel PMU event]
  nmem1/cchwhcnt/                                    [Kernel PMU event]
  nmem1/critrscu/                                    [Kernel PMU event]
  ...
  nmem1/noopstat/                                    [Kernel PMU event]
  nmem1/ponsecs/                                     [Kernel PMU event]

Signed-off-by: Kajol Jain <kj...@linux.ibm.com>
---
 arch/powerpc/include/asm/device.h         |   5 +
 arch/powerpc/platforms/pseries/papr_scm.c | 284 +++++++++++++++++++++-
 2 files changed, 288 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/device.h 
b/arch/powerpc/include/asm/device.h
index 219559d65864..47ed639f3b8f 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -48,6 +48,11 @@ struct dev_archdata {
 
 struct pdev_archdata {
        u64 dma_mask;
+       /*
+        * Pointer to nvdimm_pmu structure, to handle the unregistering
+        * of pmu device
+        */
+       void *priv;
 };
 
 #endif /* _ASM_POWERPC_DEVICE_H */
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
b/arch/powerpc/platforms/pseries/papr_scm.c
index ef26fe40efb0..997d379094d0 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -18,6 +18,8 @@
 #include <asm/plpar_wrappers.h>
 #include <asm/papr_pdsm.h>
 #include <asm/mce.h>
+#include <linux/perf_event.h>
+#include <linux/ctype.h>
 
 #define BIND_ANY_ADDR (~0ul)
 
@@ -116,6 +118,9 @@ struct papr_scm_priv {
 
        /* length of the stat buffer as expected by phyp */
        size_t stat_buffer_len;
+
+        /* array to have event_code and stat_id mappings */
+       char **nvdimm_events_map;
 };
 
 static int papr_scm_pmem_flush(struct nd_region *nd_region,
@@ -329,6 +334,271 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv 
*p,
        return 0;
 }
 
+static struct attribute_group nvdimm_pmu_events_group = {
+       .name = "events",
+       /* .attrs is set in papr_scm_pmu_check_events function */
+};
+
+PMU_FORMAT_ATTR(event, "config:0-37");
+
+static struct attribute *nvdimm_pmu_format_attr[] = {
+       &format_attr_event.attr,
+       NULL,
+};
+
+static struct attribute_group nvdimm_pmu_format_group = {
+       .name = "format",
+       .attrs = nvdimm_pmu_format_attr,
+};
+
+static const struct attribute_group *nvdimm_pmu_attr_groups[] = {
+       &nvdimm_pmu_format_group,
+       &nvdimm_pmu_events_group,
+       NULL,
+};
+
+static void papr_scm_pmu_get_value(struct perf_event *event, struct device 
*dev, u64 *count)
+{
+       struct papr_scm_perf_stat *stat;
+       struct papr_scm_perf_stats *stats;
+       struct papr_scm_priv *p = (struct papr_scm_priv *)dev->driver_data;
+       int rc, size;
+       u64 statval;
+
+       /* Allocate buffer to hold single performance stat */
+       size = sizeof(struct papr_scm_perf_stats) +
+               sizeof(struct papr_scm_perf_stat);
+
+       if (!p->nvdimm_events_map)
+               return;
+
+       stats = kzalloc(size, GFP_KERNEL);
+       if (!stats)
+               return;
+
+       stat = &stats->scm_statistic[0];
+       memcpy(&stat->stat_id,
+              p->nvdimm_events_map[event->attr.config - 1],
+               sizeof(stat->stat_id));
+       stat->stat_val = 0;
+
+       rc = drc_pmem_query_stats(p, stats, 1);
+       if (rc < 0) {
+               kfree(stats);
+               return;
+       }
+
+       statval = be64_to_cpu(stat->stat_val);
+       *count = statval;
+       kfree(stats);
+}
+
+static int papr_scm_pmu_add(struct perf_event *event, int flags,  struct 
device *dev)
+{
+       u64 count = 0;
+
+       papr_scm_pmu_get_value(event, dev, &count);
+       local64_set(&event->hw.prev_count, count);
+       return 0;
+}
+
+static void papr_scm_pmu_read(struct perf_event *event, struct device *dev)
+{
+       u64 prev, now = 0;
+
+       papr_scm_pmu_get_value(event, dev, &now);
+       prev = local64_xchg(&event->hw.prev_count, now);
+
+       if (now - prev >= 0)
+               local64_add(now - prev, &event->count);
+}
+
+static void papr_scm_pmu_del(struct perf_event *event, int flags,  struct 
device *dev)
+{
+       papr_scm_pmu_read(event, dev);
+}
+
+static void nvdimm_pmu_uinit(struct nvdimm_pmu *nd_pmu)
+{
+       unregister_nvdimm_pmu(&nd_pmu->pmu);
+       kfree(nd_pmu);
+}
+
+static int papr_scm_pmu_register(struct papr_scm_priv *p)
+{
+       struct nvdimm_pmu *papr_scm_pmu;
+       int rc;
+
+       papr_scm_pmu = devm_kzalloc(&p->pdev->dev, sizeof(*papr_scm_pmu), 
GFP_KERNEL);
+       if (!papr_scm_pmu)
+               return -ENOMEM;
+
+       papr_scm_pmu->name = nvdimm_name(p->nvdimm);
+       papr_scm_pmu->read = papr_scm_pmu_read;
+       papr_scm_pmu->add = papr_scm_pmu_add;
+       papr_scm_pmu->del = papr_scm_pmu_del;
+       papr_scm_pmu->attr_groups = nvdimm_pmu_attr_groups;
+
+       rc = register_nvdimm_pmu(papr_scm_pmu, p->pdev);
+       if (rc)
+               goto pmu_register_err;
+
+       /*
+        * Set archdata.priv value to nvdimm_pmu structure, to handle the
+        * unregistering of pmu device.
+        */
+       p->pdev->archdata.priv = papr_scm_pmu;
+       return 0;
+
+pmu_register_err:
+       kfree(papr_scm_pmu);
+       return rc;
+}
+
+static ssize_t device_show_string(struct device *dev, struct device_attribute 
*attr,
+                                 char *buf)
+{
+       struct perf_pmu_events_attr *d;
+
+       d = container_of(attr, struct perf_pmu_events_attr, attr);
+
+       return sysfs_emit(buf, "%s\n", (char *)d->event_str);
+}
+
+static char *strtolower(char *updated_name)
+{
+       int i = 0;
+
+       while (updated_name[i]) {
+               if (isupper(updated_name[i]))
+                       updated_name[i] = tolower(updated_name[i]);
+               i++;
+       }
+       updated_name[i] = '\0';
+       return strim(updated_name);
+}
+
+/* device_str_attr_create : Populate event "name" and string "str" in 
attribute */
+static struct attribute *device_str_attr_create_(char *name, char *str)
+{
+       struct perf_pmu_events_attr *attr;
+
+       attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+
+       if (!attr)
+               return NULL;
+
+       sysfs_attr_init(&attr->attr.attr);
+       attr->event_str = str;
+       attr->attr.attr.name = strtolower(name);
+       attr->attr.attr.mode = 0444;
+       attr->attr.show = device_show_string;
+
+       return &attr->attr.attr;
+}
+
+static int papr_scm_pmu_check_events(struct papr_scm_priv *p)
+{
+       struct papr_scm_perf_stat *stat;
+       struct papr_scm_perf_stats *stats, *single_stats;
+       int index, size, rc, attrs;
+       u32 total_events;
+       struct attribute **events;
+       char *eventcode, *eventname, *statid;
+
+       if (!p->stat_buffer_len)
+               return -ENOENT;
+
+       total_events = (p->stat_buffer_len  - sizeof(struct 
papr_scm_perf_stats))
+                       / sizeof(struct papr_scm_perf_stat);
+
+       /* Allocate the buffer for phyp where stats are written */
+       stats = kzalloc(p->stat_buffer_len, GFP_KERNEL);
+       if (!stats)
+               return -ENOMEM;
+
+       /* Allocate memory to nvdimm_event_map */
+       p->nvdimm_events_map = kcalloc(total_events, sizeof(char *), 
GFP_KERNEL);
+       if (!p->nvdimm_events_map) {
+               rc = -ENOMEM;
+               goto out_stats;
+       }
+
+       /* Called to get list of events supported */
+       rc = drc_pmem_query_stats(p, stats, 0);
+       if (rc)
+               goto out_nvdimm_events_map;
+
+       /* Allocate buffer to hold single performance stat */
+       size = sizeof(struct papr_scm_perf_stats) + sizeof(struct 
papr_scm_perf_stat);
+
+       single_stats = kzalloc(size, GFP_KERNEL);
+       if (!single_stats) {
+               rc = -ENOMEM;
+               goto out_nvdimm_events_map;
+       }
+
+       events = kzalloc(total_events * sizeof(struct attribute *), GFP_KERNEL);
+       if (!events) {
+               rc = -ENOMEM;
+               goto out_single_stats;
+       }
+
+       for (index = 0, stat = stats->scm_statistic, attrs = 0;
+                    index < total_events; index++, ++stat) {
+
+               single_stats->scm_statistic[0] = *stat;
+               rc = drc_pmem_query_stats(p, single_stats, 1);
+
+               if (rc < 0) {
+                       pr_info("Event not supported %s for device %s\n",
+                               stat->stat_id, nvdimm_name(p->nvdimm));
+               } else {
+                       eventcode = kasprintf(GFP_KERNEL, "event=0x%x", attrs + 
1);
+                       eventname = kzalloc(strlen(stat->stat_id) + 1, 
GFP_KERNEL);
+                       statid = kzalloc(strlen(stat->stat_id) + 1, GFP_KERNEL);
+
+                       if (!eventname || !statid || !eventcode)
+                               goto out;
+
+                       strcpy(eventname, stat->stat_id);
+                       events[attrs] = device_str_attr_create_(eventname,
+                                                               eventcode);
+                       if (!events[attrs])
+                               goto out;
+
+                       strcpy(statid, stat->stat_id);
+                       p->nvdimm_events_map[attrs] = statid;
+                       attrs++;
+                       continue;
+out:
+                       kfree(eventcode);
+                       kfree(eventname);
+                       kfree(statid);
+               }
+       }
+       events[attrs] = NULL;
+       p->nvdimm_events_map[attrs] = NULL;
+
+       if (!attrs)
+               goto out_events;
+
+       nvdimm_pmu_events_group.attrs = events;
+       kfree(single_stats);
+       kfree(stats);
+       return 0;
+
+out_events:
+       kfree(events);
+out_single_stats:
+       kfree(single_stats);
+out_nvdimm_events_map:
+       kfree(p->nvdimm_events_map);
+out_stats:
+       kfree(stats);
+       return rc;
+}
+
 /*
  * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the
  * health information.
@@ -923,7 +1193,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
        struct nd_mapping_desc mapping;
        struct nd_region_desc ndr_desc;
        unsigned long dimm_flags;
-       int target_nid, online_nid;
+       int target_nid, online_nid, rc;
        ssize_t stat_size;
 
        p->bus_desc.ndctl = papr_scm_ndctl;
@@ -1015,6 +1285,15 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
                p->stat_buffer_len = stat_size;
                dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
                        p->stat_buffer_len);
+
+               rc = papr_scm_pmu_check_events(p);
+               if (rc) {
+                       dev_info(&p->pdev->dev, "nvdimm pmu check events 
failed, rc=%d\n", rc);
+               } else {
+                       rc = papr_scm_pmu_register(p);
+                       if (rc)
+                               dev_info(&p->pdev->dev, "nvdimm pmu didn't 
register rc=%d\n", rc);
+               }
        } else {
                dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n");
        }
@@ -1195,7 +1474,10 @@ static int papr_scm_remove(struct platform_device *pdev)
 
        nvdimm_bus_unregister(p->bus);
        drc_pmem_unbind(p);
+       nvdimm_pmu_uinit(pdev->archdata.priv);
+       pdev->archdata.priv = NULL;
        kfree(p->bus_desc.provider_name);
+       kfree(p->nvdimm_events_map);
        kfree(p);
 
        return 0;
-- 
2.27.0

Reply via email to