[PATCH v4] ARM: imx: Added perf functionality to mmdc driver

2016-08-31 Thread Zhengyu Shen
MMDC is a multi-mode DDR controller that supports DDR3/DDR3L x16/x32/x64
and LPDDR2 two channel x16/x32 memory types. MMDC is configurable, high
performance, and optimized. MMDC is present on i.MX6 Quad and i.MX6
QuadPlus devices, but this driver only supports i.MX6 Quad at the moment.
MMDC provides registers for performance counters which read via this
driver to help debug memory throughput and similar issues.

$ perf stat -a -e 
mmdc/busy-cycles/,mmdc/read-accesses/,mmdc/read-bytes/,mmdc/total-cycles/,mmdc/write-accesses/,mmdc/write-bytes/
 dd if=/dev/zero of=/dev/null bs=1M count=5000
Performance counter stats for 'dd if=/dev/zero of=/dev/null bs=1M count=5000':

 898021787  mmdc/busy-cycles/
  14819600  mmdc/read-accesses/
471.30 MB   mmdc/read-bytes/
2815419216  mmdc/total-cycles/
  13367354  mmdc/write-accesses/
427.76 MB   mmdc/write-bytes/

   5.334757334 seconds time elapsed

Signed-off-by: Zhengyu Shen 
Signed-off-by: Frank Li 
---
Changes from v3 to v4:
Tested and fixed crash relating to removing events with perf fuzzer
Adjusted formatting
Moved all perf event code under CONFIG_PERF_EVENTS
Switched cpuhp_setup_state to cpuhp_setup_state_nocalls

Changes from v2 to v3:
Use WARN_ONCE instead of returning generic error values
Replace CPU Notifiers with newer state machine hotplug
Added additional checks on event_init for grouping and sampling
Remove useless mmdc_enable_profiling function
Added comments
Moved start index of events from 0x01 to 0x00
Added a counter to pmu_mmdc to only stop hrtimer after all events are 
finished
Replace readl_relaxed and writel_relaxed with readl and writel
Removed duplicate update function
Used devm_kasprintf when naming mmdcs probed

Changes from v1 to v2:
Added cpumask and migration handling support to driver
Validated event during event_init
Added code to properly stop counters
Used perf_invalid_context instead of perf_sw_context
Added hrtimer to poll for overflow
Added better description
Added support for multiple mmdcs

 arch/arm/mach-imx/mmdc.c | 423 ++-
 1 file changed, 421 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index db9621c..77357c3 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011 Freescale Semiconductor, Inc.
+ * Copyright 2011,2016 Freescale Semiconductor, Inc.
  * Copyright 2011 Linaro Ltd.
  *
  * The code contained herein is licensed under the GNU General Public
@@ -10,12 +10,17 @@
  * http://www.gnu.org/copyleft/gpl.html
  */
 
+#include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "common.h"
 
@@ -27,8 +32,421 @@
 #define BM_MMDC_MDMISC_DDR_TYPE0x18
 #define BP_MMDC_MDMISC_DDR_TYPE0x3
 
+#define TOTAL_CYCLES   0x0
+#define BUSY_CYCLES0x1
+#define READ_ACCESSES  0x2
+#define WRITE_ACCESSES 0x3
+#define READ_BYTES 0x4
+#define WRITE_BYTES0x5
+
+/* Enables, resets, freezes, overflow profiling*/
+#define DBG_DIS0x0
+#define DBG_EN 0x1
+#define DBG_RST0x2
+#define PRF_FRZ0x4
+#define CYC_OVF0x8
+
+#define MMDC_MADPCR0   0x410
+#define MMDC_MADPSR0   0x418
+#define MMDC_MADPSR1   0x41C
+#define MMDC_MADPSR2   0x420
+#define MMDC_MADPSR3   0x424
+#define MMDC_MADPSR4   0x428
+#define MMDC_MADPSR5   0x42C
+
+#define MMDC_NUM_COUNTERS  6
+
+#define to_mmdc_pmu(p) container_of(p, struct mmdc_pmu, pmu)
+
 static int ddr_type;
 
+#ifdef CONFIG_PERF_EVENTS
+
+static DEFINE_IDA(mmdc_ida);
+
+PMU_EVENT_ATTR_STRING(total-cycles, mmdc_total_cycles, "event=0x00")
+PMU_EVENT_ATTR_STRING(busy-cycles, mmdc_busy_cycles, "event=0x01")
+PMU_EVENT_ATTR_STRING(read-accesses, mmdc_read_accesses, "event=0x02")
+PMU_EVENT_ATTR_STRING(write-accesses, mmdc_write_accesses, "config=0x03")
+PMU_EVENT_ATTR_STRING(read-bytes, mmdc_read_bytes, "event=0x04")
+PMU_EVENT_ATTR_STRING(read-bytes.unit, mmdc_read_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(read-bytes.scale, mmdc_read_bytes_scale, "0.01");
+PMU_EVENT_ATTR_STRING(write-bytes, mmdc_write_bytes, "event=0x05")
+PMU_EVENT_ATTR_STRING(write-bytes.unit, mmdc_write_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(write-bytes.scale, mmdc_write_bytes_scale, "0.01");
+
+struct mmdc_pmu {
+   struct pmu pmu;
+   void __iomem *mmdc_base;
+   cpumask_t cpu;
+   struct hrtimer hrtimer;
+   unsigned int irq;
+   unsigned int active_events;
+   struct device *dev;
+   struct perf_event *mmdc_events[MMDC_NUM_COUNTE

RE: [PATCH v3] Added perf functionality to mmdc driver

2016-08-31 Thread Zhengyu Shen
> > Mmdc was only responsible for one crash which I fixed (had to remove
> > the event from the pmu properly).
> 
> Ok. I take it that there will be a v4 appearing shortly with that fix?

Hopefully I'll get that out by the end of today. 
 
> Crashes are very bad. Do you have any logs from those crashes in other
> drivers that you can share?

It seems my kernel source was out of date, updating it seems to have fixed 
The issue.


RE: [PATCH v3] Added perf functionality to mmdc driver

2016-08-30 Thread Zhengyu Shen
> Hi,
> 
> On Tue, Aug 30, 2016 at 07:43:29PM +0800, Shawn Guo wrote:
> > On Mon, Aug 29, 2016 at 11:06:44AM -0500, Zhi Li wrote:
> > > On Wed, Aug 17, 2016 at 2:42 PM, Zhengyu Shen
>  wrote:
> > > > MMDC is a multi-mode DDR controller that supports DDR3/DDR3L
> > > > x16/x32/x64 and LPDDR2 two channel x16/x32 memory types. MMDC is
> > > > configurable, high performance, and optimized. MMDC is present on
> > > > i.MX6 Quad and i.MX6 QuadPlus devices, but this driver only supports
> i.MX6 Quad at the moment.
> > > > MMDC provides registers for performance counters which read via
> > > > this driver to help debug memory throughput and similar issues.
> > > >
> > > > $ perf stat -a -e
> > > > mmdc/busy-cycles/,mmdc/read-accesses/,mmdc/read-
> bytes/,mmdc/total-cycles/,mmdc/write-accesses/,mmdc/write-bytes/ dd
> if=/dev/zero of=/dev/null bs=1M count=5000 Performance counter stats for
> 'dd if=/dev/zero of=/dev/null bs=1M count=5000':
> > > >
> > > >  898021787  mmdc/busy-cycles/
> > > >   14819600  mmdc/read-accesses/
> > > > 471.30 MB   mmdc/read-bytes/
> > > > 2815419216  mmdc/total-cycles/
> > > >   13367354  mmdc/write-accesses/
> > > > 427.76 MB   mmdc/write-bytes/
> > > >
> > > >5.334757334 seconds time elapsed
> > > >
> > > > Signed-off-by: Zhengyu Shen 
> > > > Signed-off-by: Frank Li 
> > >
> > > Shawn Guo:
> > >
> > >  No new comments got more than 1 weeks.
> > >  Did you plan accept it?
> >
> > @Mark, how do you think of this version?
> 
> Sorry, I've been away for the last week and haven't had the chance to look at
> this yet. I will try to get round to it in the next few days.
> 
> In the meantime, could you please try attacking this with Vince's perf fuzzer
> [1] (as root, or with perf_event_paranoid dropped to -1)? It's rather good at
> finding (subtle) issues in drivers.
> 
> Thanks,
> Mark.
> 
> [1] https://github.com/deater/perf_event_tests

Hi, I've done some testing with the fuzzer. Mmdc was only responsible 
for one crash which I fixed (had to remove the event from the pmu properly). 
Other drivers also cause crashes and the program reports that events are
Throttling. Is this normal?


[PATCH v3] Added perf functionality to mmdc driver

2016-08-18 Thread Zhengyu Shen
MMDC is a multi-mode DDR controller that supports DDR3/DDR3L x16/x32/x64
and LPDDR2 two channel x16/x32 memory types. MMDC is configurable, high
performance, and optimized. MMDC is present on i.MX6 Quad and i.MX6
QuadPlus devices, but this driver only supports i.MX6 Quad at the moment.
MMDC provides registers for performance counters which read via this
driver to help debug memory throughput and similar issues.

$ perf stat -a -e 
mmdc/busy-cycles/,mmdc/read-accesses/,mmdc/read-bytes/,mmdc/total-cycles/,mmdc/write-accesses/,mmdc/write-bytes/
 dd if=/dev/zero of=/dev/null bs=1M count=5000
Performance counter stats for 'dd if=/dev/zero of=/dev/null bs=1M count=5000':

 898021787  mmdc/busy-cycles/
  14819600  mmdc/read-accesses/
471.30 MB   mmdc/read-bytes/
2815419216  mmdc/total-cycles/
  13367354  mmdc/write-accesses/
427.76 MB   mmdc/write-bytes/

   5.334757334 seconds time elapsed

Signed-off-by: Zhengyu Shen 
Signed-off-by: Frank Li 
---
Changes from v2 to v3:
Use WARN_ONCE instead of returning generic error values
Replace CPU Notifiers with newer state machine hotplug
Added additional checks on event_init for grouping and sampling
Remove useless mmdc_enable_profiling function
Added comments
Moved start index of events from 0x01 to 0x00
Added a counter to pmu_mmdc to only stop hrtimer after all events are 
finished
Replace readl_relaxed and writel_relaxed with readl and writel
Removed duplicate update function
Used devm_kasprintf when naming mmdcs probed

Changes from v1 to v2:
Added cpumask and migration handling support to driver
Validated event during event_init
Added code to properly stop counters
Used perf_invalid_context instead of perf_sw_context
Added hrtimer to poll for overflow 
Added better description
Added support for multiple mmdcs

 arch/arm/mach-imx/mmdc.c | 384 ++-
 1 file changed, 383 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index db9621c..5fe7696 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011 Freescale Semiconductor, Inc.
+ * Copyright 2011,2016 Freescale Semiconductor, Inc.
  * Copyright 2011 Linaro Ltd.
  *
  * The code contained herein is licensed under the GNU General Public
@@ -16,6 +16,11 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
 
 #include "common.h"
 
@@ -27,14 +32,363 @@
 #define BM_MMDC_MDMISC_DDR_TYPE0x18
 #define BP_MMDC_MDMISC_DDR_TYPE0x3
 
+#define TOTAL_CYCLES   0x0
+#define BUSY_CYCLES0x1
+#define READ_ACCESSES  0x2
+#define WRITE_ACCESSES 0x3
+#define READ_BYTES 0x4
+#define WRITE_BYTES0x5
+
+/* Enables, resets, freezes, overflow profiling*/
+#define DBG_DIS0x0
+#define DBG_EN 0x1
+#define DBG_RST0x2
+#define PRF_FRZ0x4
+#define CYC_OVF0x8
+
+#define MMDC_MADPCR0   0x410
+#define MMDC_MADPSR0   0x418
+#define MMDC_MADPSR1   0x41C
+#define MMDC_MADPSR2   0x420
+#define MMDC_MADPSR3   0x424
+#define MMDC_MADPSR4   0x428
+#define MMDC_MADPSR5   0x42C
+
+#define MMDC_NUM_COUNTERS  6
+
+#define to_mmdc_pmu(p) (container_of(p, struct mmdc_pmu, pmu))
+
+static DEFINE_IDA(mmdc_ida);
+
 static int ddr_type;
 
+PMU_EVENT_ATTR_STRING(total-cycles, mmdc_total_cycles, "event=0x00")
+PMU_EVENT_ATTR_STRING(busy-cycles, mmdc_busy_cycles, "event=0x01")
+PMU_EVENT_ATTR_STRING(read-accesses, mmdc_read_accesses, "event=0x02")
+PMU_EVENT_ATTR_STRING(write-accesses, mmdc_write_accesses, "config=0x03")
+PMU_EVENT_ATTR_STRING(read-bytes, mmdc_read_bytes, "event=0x04")
+PMU_EVENT_ATTR_STRING(read-bytes.unit, mmdc_read_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(read-bytes.scale, mmdc_read_bytes_scale, "0.01");
+PMU_EVENT_ATTR_STRING(write-bytes, mmdc_write_bytes, "event=0x05")
+PMU_EVENT_ATTR_STRING(write-bytes.unit, mmdc_write_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(write-bytes.scale, mmdc_write_bytes_scale, "0.01");
+
+struct mmdc_pmu {
+   struct pmu pmu;
+   void __iomem *mmdc_base;
+   cpumask_t cpu;
+   struct hrtimer hrtimer;
+   unsigned int irq;
+   unsigned int active_events;
+   struct device *dev;
+   struct perf_event *mmdc_events[MMDC_NUM_COUNTERS];
+   spinlock_t mmdc_active_events_lock;
+};
+static struct mmdc_pmu *cpuhp_mmdc_pmu;
+
+/* polling period is set to one second, overflow of total-cycles (the fastest
+ * increasing counter) takes ten seconds so one second is safe
+ */
+static unsigned int mmdc_poll_period_us = 100;

RE: [PATCH v2] Added perf functionality to mmdc driver

2016-08-16 Thread Zhengyu Shen
> > > > +   hrtimer_start(&pmu_mmdc->hrtimer, mmdc_timer_period(),
> > > > +   HRTIMER_MODE_REL_PINNED);
> > >
> > > Why is a hrtimer necessary? Is this just copy-pasted from CCN, or do
> > > you have similar HW issues?
> > >
> > > Is there no overflow interrupt?
> >
> > When overflow occurs, a register bit is set to one. There is no
> > overflow interrupt which is why the timer is needed.
> 
> I see. Please have add comment in the driver explaining this, so that this is
> obvious.
> 
> Does the counter itself wrap and continue counting, or does it saturate?
> 
> How have you tuned your polling period so as to avoid missing events in the
> case of an overflow?
> 
> Thanks,
> Mark.
The counter wraps around once every ten seconds for total-cycles (which is the 
Fastest increasing counter). Polling is done every one second just to be safe.


RE: [PATCH v2] Added perf functionality to mmdc driver

2016-08-16 Thread Zhengyu Shen
> > Added cpumask and migration handling support to driver
> > Validated event during event_init
> > Added code to properly stop counters
> > Used perf_invalid_context instead of perf_sw_context
> > Added hrtimer to poll for overflow
> > Added better description
> > Added support for multiple mmdcs
> 
> As I commented on v1 w.r.t. the above, I would appreciate being Cc'd on
> future versions of this patch.

Sorry about that, I'll be sure to CC you in the future. 

> > +static void mmdc_event_start(struct perf_event *event, int flags) {
> > +   struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> > +   void __iomem *mmdc_base, *reg;
> > +
> > +   local64_set(&event->count, 0);
> > +   mmdc_base = pmu_mmdc->mmdc_base;
> > +   reg = mmdc_base + MMDC_MADPCR0;
> > +   hrtimer_start(&pmu_mmdc->hrtimer, mmdc_timer_period(),
> > +   HRTIMER_MODE_REL_PINNED);
> 
> Why is a hrtimer necessary? Is this just copy-pasted from CCN, or do you
> have similar HW issues?
> 
> Is there no overflow interrupt?

When overflow occurs, a register bit is set to one. There is no overflow
interrupt which is why the timer is needed. 

Thanks a lot for the feedback!


[PATCH v2] Added perf functionality to mmdc driver

2016-08-15 Thread Zhengyu Shen
MMDC is a multi-mode DDR controller that supports DDR3/DDR3L x16/x32/x64 and
LPDDR2 two channel x16/x32 memory types. MMDC is configurable, high performance,
and optimized. MMDC is present on i.MX6 Quad and i.MX6 QuadPlus devices, but
this driver only supports i.MX6 Quad at the moment. MMDC provides registers
for performance counters which read via this driver to help debug memory
throughput and similar issues.

$ perf stat -a -e 
mmdc/busy-cycles/,mmdc/read-accesses/,mmdc/read-bytes/,mmdc/total-cycles/,mmdc/write-accesses/,mmdc/write-bytes/
 dd if=/dev/zero of=/dev/null bs=1M count=5000
Performance counter stats for 'dd if=/dev/zero of=/dev/null bs=1M count=5000':

 898021787  mmdc/busy-cycles/
  14819600  mmdc/read-accesses/
471.30 MB   mmdc/read-bytes/
2815419216  mmdc/total-cycles/
  13367354  mmdc/write-accesses/
427.76 MB   mmdc/write-bytes/

   5.334757334 seconds time elapsed

Signed-off-by: Zhengyu Shen 
---
change from v1 to v2:
Added cpumask and migration handling support to driver
Validated event during event_init
Added code to properly stop counters
Used perf_invalid_context instead of perf_sw_context
Added hrtimer to poll for overflow 
Added better description
Added support for multiple mmdcs

 arch/arm/mach-imx/mmdc.c | 362 ++-
 1 file changed, 361 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index db9621c..372b59c 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011 Freescale Semiconductor, Inc.
+ * Copyright 2011,2016 Freescale Semiconductor, Inc.
  * Copyright 2011 Linaro Ltd.
  *
  * The code contained herein is licensed under the GNU General Public
@@ -16,6 +16,10 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
 
 #include "common.h"
 
@@ -27,14 +31,341 @@
 #define BM_MMDC_MDMISC_DDR_TYPE0x18
 #define BP_MMDC_MDMISC_DDR_TYPE0x3
 
+#define TOTAL_CYCLES   0x1
+#define BUSY_CYCLES0x2
+#define READ_ACCESSES  0x3
+#define WRITE_ACCESSES 0x4
+#define READ_BYTES 0x5
+#define WRITE_BYTES0x6
+
+/* Enables, resets, freezes, overflow profiling*/
+#define DBG_DIS0x0
+#define DBG_EN 0x1 
+#define DBG_RST0x2
+#define PRF_FRZ0x4
+#define CYC_OVF0x8
+
+#define MMDC_MADPCR0   0x410
+#define MMDC_MADPSR0   0x418
+#define MMDC_MADPSR1   0x41C
+#define MMDC_MADPSR2   0x420
+#define MMDC_MADPSR3   0x424
+#define MMDC_MADPSR4   0x428
+#define MMDC_MADPSR5   0x42C
+
+#define MMDC_NUM_COUNTERS  6
+
+#define to_mmdc_pmu(p) (container_of(p, struct mmdc_pmu, pmu))
+
+static DEFINE_IDA(mmdc_ida);
+
 static int ddr_type;
 
+PMU_EVENT_ATTR_STRING(total-cycles, mmdc_total_cycles, "event=0x01")
+PMU_EVENT_ATTR_STRING(busy-cycles, mmdc_busy_cycles, "event=0x02")
+PMU_EVENT_ATTR_STRING(read-accesses, mmdc_read_accesses, "event=0x03")
+PMU_EVENT_ATTR_STRING(write-accesses, mmdc_write_accesses, "config=0x04")
+PMU_EVENT_ATTR_STRING(read-bytes, mmdc_read_bytes, "event=0x05")
+PMU_EVENT_ATTR_STRING(read-bytes.unit, mmdc_read_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(read-bytes.scale, mmdc_read_bytes_scale, "0.01");
+PMU_EVENT_ATTR_STRING(write-bytes, mmdc_write_bytes, "event=0x06")
+PMU_EVENT_ATTR_STRING(write-bytes.unit, mmdc_write_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(write-bytes.scale, mmdc_write_bytes_scale, "0.01");
+
+struct mmdc_pmu
+{
+   struct pmu pmu;
+   void __iomem *mmdc_base;
+   cpumask_t cpu;
+   struct notifier_block cpu_nb;
+   struct hrtimer hrtimer;
+   unsigned int irq;
+   struct device *dev;
+   struct perf_event *mmdc_events[MMDC_NUM_COUNTERS];
+};
+
+static unsigned int mmdc_poll_period_us = 100;
+module_param_named(pmu_poll_period_us, mmdc_poll_period_us, uint,
+   S_IRUGO | S_IWUSR);
+
+static ktime_t mmdc_timer_period(void)
+{
+   return ns_to_ktime((u64)mmdc_poll_period_us * 1000);
+}
+
+static ssize_t mmdc_cpumask_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   struct mmdc_pmu *pmu_mmdc = dev_get_drvdata(dev);
+   return cpumap_print_to_pagebuf(true, buf, &pmu_mmdc->cpu);
+}
+
+static struct device_attribute mmdc_cpumask_attr =
+__ATTR(cpumask, S_IRUGO, mmdc_cpumask_show, NULL);
+
+static struct attribute *mmdc_cpumask_attrs[] = {
+   &mmdc_cpumask_attr.attr,
+   NULL,
+};
+
+static struct attribute_group mmdc_cpumask_attr_group = {
+   .attrs = mmdc_cpumask_attrs,
+};
+
+static struct attribute *mmdc_events_attrs[] = {
+   &mmdc_tot

[PATCH] Added perf functionality to mmdc driver

2016-08-06 Thread Zhengyu Shen
$ perf stat -e 
mmdc/busy-cycles/,mmdc/read-accesses/,mmdc/read-bytes/,mmdc/total-cycles/,mmdc/write-accesses/,mmdc/write-bytes/
 dd if=/dev/zero of=/dev/null bs=1M count=5000
Performance counter stats for 'dd if=/dev/zero of=/dev/null bs=1M count=5000':

 898021787  mmdc/busy-cycles/
  14819600  mmdc/read-accesses/
471.30 MB   mmdc/read-bytes/
2815419216  mmdc/total-cycles/
  13367354  mmdc/write-accesses/
427.76 MB   mmdc/write-bytes/

   5.334757334 seconds time elapsed

Signed-off-by: Zhengyu Shen 
---
 arch/arm/mach-imx/mmdc.c | 206 ++-
 1 file changed, 205 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index db9621c..48f3a0b 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011 Freescale Semiconductor, Inc.
+ * Copyright 2011,2016 Freescale Semiconductor, Inc.
  * Copyright 2011 Linaro Ltd.
  *
  * The code contained herein is licensed under the GNU General Public
@@ -16,6 +16,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "common.h"
 
@@ -27,12 +29,198 @@
 #define BM_MMDC_MDMISC_DDR_TYPE0x18
 #define BP_MMDC_MDMISC_DDR_TYPE0x3
 
+#define TOTAL_CYCLES   0x1
+#define BUSY_CYCLES0x2
+#define READ_ACCESSES  0x3
+#define WRITE_ACCESSES 0x4
+#define READ_BYTES 0x5
+#define WRITE_BYTES0x6
+
+#define DBG_EN 0x1
+#define DBG_RST0x2
+#define PRF_FRZ0x4
+#define CYC_OVF0x8
+
+#define MMDC_MADPCR0   0x410
+#define MMDC_MADPSR0   0x418
+#define MMDC_MADPSR1   0x41C
+#define MMDC_MADPSR2   0x420
+#define MMDC_MADPSR3   0x424
+#define MMDC_MADPSR4   0x428
+#define MMDC_MADPSR5   0x42C
+
+#define to_mmdc_pmu(p) (container_of(p, struct mmdc_pmu, pmu))
+
 static int ddr_type;
 
+PMU_EVENT_ATTR_STRING(total-cycles, evattr_total_cycles, "event=0x01")
+PMU_EVENT_ATTR_STRING(busy-cycles, evattr_busy_cycles, "event=0x02")
+PMU_EVENT_ATTR_STRING(read-accesses, evattr_read_accesses, "event=0x03")
+PMU_EVENT_ATTR_STRING(write-accesses, evattr_write_accesses, "config=0x04")
+PMU_EVENT_ATTR_STRING(read-bytes, evattr_read_bytes, "event=0x05")
+PMU_EVENT_ATTR_STRING(read-bytes.unit, evattr_read_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(read-bytes.scale, evattr_read_bytes_scale, "0.01");
+PMU_EVENT_ATTR_STRING(write-bytes, evattr_write_bytes, "event=0x06")
+PMU_EVENT_ATTR_STRING(write-bytes.unit, evattr_write_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(write-bytes.scale, evattr_write_bytes_scale, "0.01");
+
+struct mmdc_pmu
+{
+   struct pmu pmu;
+   void __iomem *mmdc_base;
+};
+
+static struct attribute *events_attrs[] = {
+   &evattr_total_cycles.attr.attr,
+   &evattr_busy_cycles.attr.attr,
+   &evattr_read_accesses.attr.attr,
+   &evattr_write_accesses.attr.attr,
+   &evattr_read_bytes.attr.attr,
+   &evattr_read_bytes_unit.attr.attr,
+   &evattr_read_bytes_scale.attr.attr,
+   &evattr_write_bytes.attr.attr,
+   &evattr_write_bytes_unit.attr.attr,
+   &evattr_write_bytes_scale.attr.attr,
+   NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+static struct attribute *format_attrs[] = {
+   &format_attr_event.attr,
+   NULL,
+};
+
+static struct attribute_group format_attr_group = {
+   .name = "format",
+   .attrs = format_attrs,
+};
+
+static struct attribute_group events_attr_group = {
+   .name = "events",
+   .attrs = events_attrs,
+};
+
+static const struct attribute_group * attr_groups[] = {
+   &events_attr_group,
+   &format_attr_group,
+   NULL,
+};
+
+static inline u64 mmdc_read_counter(struct perf_event *event)
+{
+   unsigned int val;
+   u64 ret;
+   int cfg = (int) event->attr.config;
+   struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+   void __iomem *mmdc_base, *reg;
+
+   mmdc_base = pmu_mmdc->mmdc_base;
+
+   writel_relaxed(PRF_FRZ, mmdc_base + MMDC_MADPCR0);
+
+   switch (cfg)
+   {
+   case TOTAL_CYCLES:
+   reg = mmdc_base + MMDC_MADPSR0;
+   break;
+   case BUSY_CYCLES:
+   reg = mmdc_base + MMDC_MADPSR1;
+   break;
+   case READ_ACCESSES:
+   reg = mmdc_base + MMDC_MADPSR2;
+   break;
+   case WRITE_ACCESSES:
+   reg = mmdc_base + MMDC_MADPSR3;
+   break;
+   case READ_BYTES:
+   reg = mmdc_base + MMDC_MADPSR4;
+   break;
+