[PATCH 1/1] nds32: Power management for nds32

2018-10-23 Thread Nickhu
There are three sleep states in nds32:
suspend to idle,
suspend to standby,
suspend to ram

In suspend to ram, we use the 'standby' instruction to emulate
power management device to hang the system util wakeup source
send wakeup events to break the loop.

First, we push the general purpose registers and system registers
to stack. Second, we translate stack pointer to physical address
and store to memory to save the stack pointer. Third, after write
back and invalid the cache we hang in 'standby' intruction.
When wakeup source trigger wake up events, the loop will be break
and resume the system.

Signed-off-by: Nickhu 
---
 arch/nds32/Kconfig   |  10 +++
 arch/nds32/include/asm/suspend.h |  11 +++
 arch/nds32/kernel/Makefile   |   2 +-
 arch/nds32/kernel/pm.c   |  91 ++
 arch/nds32/kernel/sleep.S| 129 +++
 drivers/irqchip/irq-ativic32.c   |  29 +++
 6 files changed, 271 insertions(+), 1 deletion(-)
 create mode 100644 arch/nds32/include/asm/suspend.h
 create mode 100644 arch/nds32/kernel/pm.c
 create mode 100644 arch/nds32/kernel/sleep.S

diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index dd448d431f5a..8e2c5ac6acd1 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -95,3 +95,13 @@ endmenu
 menu "Kernel Features"
 source "kernel/Kconfig.hz"
 endmenu
+
+menu "Power management options"
+config SYS_SUPPORTS_APM_EMULATION
+   bool
+
+config ARCH_SUSPEND_POSSIBLE
+   def_bool y
+
+source "kernel/power/Kconfig"
+endmenu
diff --git a/arch/nds32/include/asm/suspend.h b/arch/nds32/include/asm/suspend.h
new file mode 100644
index ..6ed2418af1ac
--- /dev/null
+++ b/arch/nds32/include/asm/suspend.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2008-2017 Andes Technology Corporation
+
+#ifndef __ASM_NDS32_SUSPEND_H
+#define __ASM_NDS32_SUSPEND_H
+
+extern void suspend2ram(void);
+extern void cpu_resume(void);
+extern unsigned long wake_mask;
+
+#endif
diff --git a/arch/nds32/kernel/Makefile b/arch/nds32/kernel/Makefile
index f52bd2744f50..8d62f2ecb1ab 100644
--- a/arch/nds32/kernel/Makefile
+++ b/arch/nds32/kernel/Makefile
@@ -16,7 +16,7 @@ obj-$(CONFIG_STACKTRACE)  += stacktrace.o
 obj-$(CONFIG_OF)   += devtree.o
 obj-$(CONFIG_CACHE_L2) += atl2c.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event_cpu.o
-
+obj-$(CONFIG_PM)   += pm.o sleep.o
 extra-y := head.o vmlinux.lds
 
 obj-y  += vdso/
diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c
new file mode 100644
index ..e1eaf3bac709
--- /dev/null
+++ b/arch/nds32/kernel/pm.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2008-2017 Andes Technology Corporation
+
+/*
+ * nds32 Power Management Routines
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License.
+ *
+ *  Abstract:
+ *
+ *This program is for nds32 power management routines.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+unsigned int resume_addr;
+unsigned int *phy_addr_sp_tmp;
+
+static void nds32_suspend2ram(void)
+{
+   pgd_t *pgdv;
+   pud_t *pudv;
+   pmd_t *pmdv;
+   pte_t *ptev;
+
+   pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) &
+   L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume);
+
+   pudv = pud_offset(pgdv, (unsigned int)cpu_resume);
+   pmdv = pmd_offset(pudv, (unsigned int)cpu_resume);
+   ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume);
+
+   resume_addr = ((*ptev) & TLB_DATA_mskPPN)
+   | ((unsigned int)cpu_resume & 0x0fff);
+
+   suspend2ram();
+}
+
+static void nds32_suspend_cpu(void)
+{
+   while (!(__nds32__mfsr(NDS32_SR_INT_PEND) & wake_mask))
+   __asm__ volatile ("standby no_wake_grant\n\t");
+}
+
+static int nds32_pm_valid(suspend_state_t state)
+{
+   switch (state) {
+   case PM_SUSPEND_ON:
+   case PM_SUSPEND_STANDBY:
+   case PM_SUSPEND_MEM:
+   return 1;
+   default:
+   return 0;
+   }
+}
+
+static int nds32_pm_enter(suspend_state_t state)
+{
+   pr_debug("%s:state:%d\n", __func__, state);
+   switch (state) {
+   case PM_SUSPEND_STANDBY:
+   nds32_suspend_cpu();
+   return 0;
+   case PM_SUSPEND_MEM:
+   nds32_suspend2ram();
+   return 0;
+   default:
+   return -EINVAL;
+   }
+}
+
+static const struct platform_suspend_ops nds32_pm_ops = {
+   .valid = nds32_pm_valid,
+   .enter = nds32_pm_enter,
+};
+
+static int __init nds32_pm_init(void)
+{
+   pr_debug("Enter %s\n", __func__);
+   suspend_set_ops(_pm_ops);
+   return 0;
+}
+late_initcall(

[PATCH 0/1] nds32: Power management

2018-10-23 Thread Nickhu
This commit is power management porting for nds32.

Nickhu (1):
  nds32: Power management for nds32

 arch/nds32/Kconfig   |  10 +++
 arch/nds32/include/asm/suspend.h |  11 +++
 arch/nds32/kernel/Makefile   |   2 +-
 arch/nds32/kernel/pm.c   |  91 ++
 arch/nds32/kernel/sleep.S| 129 +++
 drivers/irqchip/irq-ativic32.c   |  29 +++
 6 files changed, 271 insertions(+), 1 deletion(-)
 create mode 100644 arch/nds32/include/asm/suspend.h
 create mode 100644 arch/nds32/kernel/pm.c
 create mode 100644 arch/nds32/kernel/sleep.S

-- 
2.17.0



[PATCH 1/1] nds32: Power management for nds32

2018-10-23 Thread Nickhu
There are three sleep states in nds32:
suspend to idle,
suspend to standby,
suspend to ram

In suspend to ram, we use the 'standby' instruction to emulate
power management device to hang the system util wakeup source
send wakeup events to break the loop.

First, we push the general purpose registers and system registers
to stack. Second, we translate stack pointer to physical address
and store to memory to save the stack pointer. Third, after write
back and invalid the cache we hang in 'standby' intruction.
When wakeup source trigger wake up events, the loop will be break
and resume the system.

Signed-off-by: Nickhu 
---
 arch/nds32/Kconfig   |  10 +++
 arch/nds32/include/asm/suspend.h |  11 +++
 arch/nds32/kernel/Makefile   |   2 +-
 arch/nds32/kernel/pm.c   |  91 ++
 arch/nds32/kernel/sleep.S| 129 +++
 drivers/irqchip/irq-ativic32.c   |  29 +++
 6 files changed, 271 insertions(+), 1 deletion(-)
 create mode 100644 arch/nds32/include/asm/suspend.h
 create mode 100644 arch/nds32/kernel/pm.c
 create mode 100644 arch/nds32/kernel/sleep.S

diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index dd448d431f5a..8e2c5ac6acd1 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -95,3 +95,13 @@ endmenu
 menu "Kernel Features"
 source "kernel/Kconfig.hz"
 endmenu
+
+menu "Power management options"
+config SYS_SUPPORTS_APM_EMULATION
+   bool
+
+config ARCH_SUSPEND_POSSIBLE
+   def_bool y
+
+source "kernel/power/Kconfig"
+endmenu
diff --git a/arch/nds32/include/asm/suspend.h b/arch/nds32/include/asm/suspend.h
new file mode 100644
index ..6ed2418af1ac
--- /dev/null
+++ b/arch/nds32/include/asm/suspend.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2008-2017 Andes Technology Corporation
+
+#ifndef __ASM_NDS32_SUSPEND_H
+#define __ASM_NDS32_SUSPEND_H
+
+extern void suspend2ram(void);
+extern void cpu_resume(void);
+extern unsigned long wake_mask;
+
+#endif
diff --git a/arch/nds32/kernel/Makefile b/arch/nds32/kernel/Makefile
index f52bd2744f50..8d62f2ecb1ab 100644
--- a/arch/nds32/kernel/Makefile
+++ b/arch/nds32/kernel/Makefile
@@ -16,7 +16,7 @@ obj-$(CONFIG_STACKTRACE)  += stacktrace.o
 obj-$(CONFIG_OF)   += devtree.o
 obj-$(CONFIG_CACHE_L2) += atl2c.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event_cpu.o
-
+obj-$(CONFIG_PM)   += pm.o sleep.o
 extra-y := head.o vmlinux.lds
 
 obj-y  += vdso/
diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c
new file mode 100644
index ..e1eaf3bac709
--- /dev/null
+++ b/arch/nds32/kernel/pm.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2008-2017 Andes Technology Corporation
+
+/*
+ * nds32 Power Management Routines
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License.
+ *
+ *  Abstract:
+ *
+ *This program is for nds32 power management routines.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+unsigned int resume_addr;
+unsigned int *phy_addr_sp_tmp;
+
+static void nds32_suspend2ram(void)
+{
+   pgd_t *pgdv;
+   pud_t *pudv;
+   pmd_t *pmdv;
+   pte_t *ptev;
+
+   pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) &
+   L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume);
+
+   pudv = pud_offset(pgdv, (unsigned int)cpu_resume);
+   pmdv = pmd_offset(pudv, (unsigned int)cpu_resume);
+   ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume);
+
+   resume_addr = ((*ptev) & TLB_DATA_mskPPN)
+   | ((unsigned int)cpu_resume & 0x0fff);
+
+   suspend2ram();
+}
+
+static void nds32_suspend_cpu(void)
+{
+   while (!(__nds32__mfsr(NDS32_SR_INT_PEND) & wake_mask))
+   __asm__ volatile ("standby no_wake_grant\n\t");
+}
+
+static int nds32_pm_valid(suspend_state_t state)
+{
+   switch (state) {
+   case PM_SUSPEND_ON:
+   case PM_SUSPEND_STANDBY:
+   case PM_SUSPEND_MEM:
+   return 1;
+   default:
+   return 0;
+   }
+}
+
+static int nds32_pm_enter(suspend_state_t state)
+{
+   pr_debug("%s:state:%d\n", __func__, state);
+   switch (state) {
+   case PM_SUSPEND_STANDBY:
+   nds32_suspend_cpu();
+   return 0;
+   case PM_SUSPEND_MEM:
+   nds32_suspend2ram();
+   return 0;
+   default:
+   return -EINVAL;
+   }
+}
+
+static const struct platform_suspend_ops nds32_pm_ops = {
+   .valid = nds32_pm_valid,
+   .enter = nds32_pm_enter,
+};
+
+static int __init nds32_pm_init(void)
+{
+   pr_debug("Enter %s\n", __func__);
+   suspend_set_ops(_pm_ops);
+   return 0;
+}
+late_initcall(

[PATCH 0/1] nds32: Power management

2018-10-23 Thread Nickhu
This commit is power management porting for nds32.

Nickhu (1):
  nds32: Power management for nds32

 arch/nds32/Kconfig   |  10 +++
 arch/nds32/include/asm/suspend.h |  11 +++
 arch/nds32/kernel/Makefile   |   2 +-
 arch/nds32/kernel/pm.c   |  91 ++
 arch/nds32/kernel/sleep.S| 129 +++
 drivers/irqchip/irq-ativic32.c   |  29 +++
 6 files changed, 271 insertions(+), 1 deletion(-)
 create mode 100644 arch/nds32/include/asm/suspend.h
 create mode 100644 arch/nds32/kernel/pm.c
 create mode 100644 arch/nds32/kernel/sleep.S

-- 
2.17.0



[PATCH v3 2/4] nds32: Perf porting

2018-10-23 Thread Nickhu
This is the commit that porting the perf for nds32.

1.Raw event:
The raw events start with 'r'.
Usage:
perf stat -e rXYZ ./app
X: the index of performance counter.
YZ: the index(convert to hexdecimal) of events

Example:
'perf stat -e r101 ./app' means the counter 1 will 
count the instruction
event.

The index of counter and events can be found in
"Andes System Privilege Architecture Version 3 Manual".

Or you can perform the 'perf list' to find the symbolic name of raw events.

2.Perf mmap2:

Fix unexpected perf mmap2() page fault

When the mmap2() called by perf application,
you will encounter such condition:"failed to write."
With return value -EFAULT

This is due to the page fault caused by "reading" buffer
from the mapped legal address region to write to the descriptor.
The page_fault handler will get a VM_FAULT_SIGBUS return value,
which should not happens here.(Due to this is a read request.)

You can refer to kernel/events/core.c:perf_mmap_fault(...)
If "(vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))" is evaluated
as true, you will get VM_FAULT_SIGBUS as return value.

However, this is not an write request. The flags which indicated
why the page fault happens is wrong.

Furthermore, NDS32 SPAv3 is not able to detect it is read or write.
It only know  either it is instruction fetch or data access.

Therefore, by removing the wrong flag assignment(actually, the hardware
is not able to show the reason), we can fix this bug.

3.Perf multiple events map to same counter.

When there are multiple events map to the same counter, the counter
counts inaccurately. This is because each counter only counts one event
in the same time.
So when there are multiple events map to same counter, they have to take
turns in each context.

There are two solution:
1. Print the error message when multiple events map to the same counter.
But print the error message would let the program hang in loop. The ltp
(linux test program) would be failed when the program hang in loop.

2. Don't print the error message, the ltp would pass. But the user need 
to
have the knowledge that don't count the events which map to the same
counter, or the user will get the inaccurate results.

    We choose method 2 for the solution

Signed-off-by: Nickhu 
---
 arch/nds32/Kconfig|1 +
 arch/nds32/boot/dts/ae3xx.dts |5 +
 arch/nds32/include/asm/Kbuild |1 +
 arch/nds32/include/asm/perf_event.h   |   16 +
 arch/nds32/include/asm/pmu.h  |  386 ++
 arch/nds32/include/asm/stacktrace.h   |   39 +
 arch/nds32/kernel/Makefile|3 +-
 arch/nds32/kernel/perf_event_cpu.c| 1223 +
 arch/nds32/mm/fault.c |   13 +-
 tools/include/asm/barrier.h   |2 +
 tools/perf/arch/nds32/Build   |1 +
 tools/perf/arch/nds32/util/Build  |1 +
 tools/perf/arch/nds32/util/header.c   |   29 +
 tools/perf/pmu-events/arch/nds32/mapfile.csv  |   15 +
 .../pmu-events/arch/nds32/n13/atcpmu.json |  290 
 15 files changed, 2019 insertions(+), 6 deletions(-)
 create mode 100644 arch/nds32/include/asm/perf_event.h
 create mode 100644 arch/nds32/include/asm/pmu.h
 create mode 100644 arch/nds32/include/asm/stacktrace.h
 create mode 100644 arch/nds32/kernel/perf_event_cpu.c
 create mode 100644 tools/perf/arch/nds32/Build
 create mode 100644 tools/perf/arch/nds32/util/Build
 create mode 100644 tools/perf/arch/nds32/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv
 create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json

diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 7068f341133d..dd448d431f5a 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -31,6 +31,7 @@ config NDS32
select HAVE_DEBUG_KMEMLEAK
select HAVE_MEMBLOCK
select HAVE_REGS_AND_STACK_ACCESS_API
+   select HAVE_PERF_EVENTS
select IRQ_DOMAIN
select LOCKDEP_SUPPORT
select MODULES_USE_ELF_RELA
diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts
index bb39749a6673..16a9f54a805e 100644
--- a/arch/nds32/boot/dts/ae3xx.dts
+++ b/arch/nds32/boot/dts/ae3xx.dts
@@ -82,4 +82,9 @@
interrupts = <18>;
};
};
+
+   pmu {
+   compatible = "andestech,nds32v3-pmu";
+   interrupts= <13>;
+   };
 };

[PATCH v3 2/4] nds32: Perf porting

2018-10-23 Thread Nickhu
This is the commit that porting the perf for nds32.

1.Raw event:
The raw events start with 'r'.
Usage:
perf stat -e rXYZ ./app
X: the index of performance counter.
YZ: the index(convert to hexdecimal) of events

Example:
'perf stat -e r101 ./app' means the counter 1 will 
count the instruction
event.

The index of counter and events can be found in
"Andes System Privilege Architecture Version 3 Manual".

Or you can perform the 'perf list' to find the symbolic name of raw events.

2.Perf mmap2:

Fix unexpected perf mmap2() page fault

When the mmap2() called by perf application,
you will encounter such condition:"failed to write."
With return value -EFAULT

This is due to the page fault caused by "reading" buffer
from the mapped legal address region to write to the descriptor.
The page_fault handler will get a VM_FAULT_SIGBUS return value,
which should not happens here.(Due to this is a read request.)

You can refer to kernel/events/core.c:perf_mmap_fault(...)
If "(vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))" is evaluated
as true, you will get VM_FAULT_SIGBUS as return value.

However, this is not an write request. The flags which indicated
why the page fault happens is wrong.

Furthermore, NDS32 SPAv3 is not able to detect it is read or write.
It only know  either it is instruction fetch or data access.

Therefore, by removing the wrong flag assignment(actually, the hardware
is not able to show the reason), we can fix this bug.

3.Perf multiple events map to same counter.

When there are multiple events map to the same counter, the counter
counts inaccurately. This is because each counter only counts one event
in the same time.
So when there are multiple events map to same counter, they have to take
turns in each context.

There are two solution:
1. Print the error message when multiple events map to the same counter.
But print the error message would let the program hang in loop. The ltp
(linux test program) would be failed when the program hang in loop.

2. Don't print the error message, the ltp would pass. But the user need 
to
have the knowledge that don't count the events which map to the same
counter, or the user will get the inaccurate results.

    We choose method 2 for the solution

Signed-off-by: Nickhu 
---
 arch/nds32/Kconfig|1 +
 arch/nds32/boot/dts/ae3xx.dts |5 +
 arch/nds32/include/asm/Kbuild |1 +
 arch/nds32/include/asm/perf_event.h   |   16 +
 arch/nds32/include/asm/pmu.h  |  386 ++
 arch/nds32/include/asm/stacktrace.h   |   39 +
 arch/nds32/kernel/Makefile|3 +-
 arch/nds32/kernel/perf_event_cpu.c| 1223 +
 arch/nds32/mm/fault.c |   13 +-
 tools/include/asm/barrier.h   |2 +
 tools/perf/arch/nds32/Build   |1 +
 tools/perf/arch/nds32/util/Build  |1 +
 tools/perf/arch/nds32/util/header.c   |   29 +
 tools/perf/pmu-events/arch/nds32/mapfile.csv  |   15 +
 .../pmu-events/arch/nds32/n13/atcpmu.json |  290 
 15 files changed, 2019 insertions(+), 6 deletions(-)
 create mode 100644 arch/nds32/include/asm/perf_event.h
 create mode 100644 arch/nds32/include/asm/pmu.h
 create mode 100644 arch/nds32/include/asm/stacktrace.h
 create mode 100644 arch/nds32/kernel/perf_event_cpu.c
 create mode 100644 tools/perf/arch/nds32/Build
 create mode 100644 tools/perf/arch/nds32/util/Build
 create mode 100644 tools/perf/arch/nds32/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv
 create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json

diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 7068f341133d..dd448d431f5a 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -31,6 +31,7 @@ config NDS32
select HAVE_DEBUG_KMEMLEAK
select HAVE_MEMBLOCK
select HAVE_REGS_AND_STACK_ACCESS_API
+   select HAVE_PERF_EVENTS
select IRQ_DOMAIN
select LOCKDEP_SUPPORT
select MODULES_USE_ELF_RELA
diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts
index bb39749a6673..16a9f54a805e 100644
--- a/arch/nds32/boot/dts/ae3xx.dts
+++ b/arch/nds32/boot/dts/ae3xx.dts
@@ -82,4 +82,9 @@
interrupts = <18>;
};
};
+
+   pmu {
+   compatible = "andestech,nds32v3-pmu";
+   interrupts= <13>;
+   };
 };

[PATCH v3 4/4] nds32: Add document for NDS32 PMU.

2018-10-23 Thread Nickhu
The document for how to add NDS32 PMU
in devicetree.

Signed-off-by: Nickhu 
---
 Documentation/devicetree/bindings/nds32/pmu.txt | 17 +
 1 file changed, 17 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt

diff --git a/Documentation/devicetree/bindings/nds32/pmu.txt 
b/Documentation/devicetree/bindings/nds32/pmu.txt
new file mode 100644
index ..1bd15785b4ae
--- /dev/null
+++ b/Documentation/devicetree/bindings/nds32/pmu.txt
@@ -0,0 +1,17 @@
+* NDS32 Performance Monitor Units
+
+NDS32 core have a PMU for counting cpu and cache events like cache misses.
+The NDS32 PMU representation in the device tree should be done as under:
+
+Required properties:
+
+- compatible :
+   "andestech,nds32v3-pmu"
+
+- interrupts : The interrupt number for NDS32 PMU is 13.
+
+Example:
+pmu{
+   compatible = "andestech,nds32v3-pmu";
+   interrupts = <13>;
+}
-- 
2.17.0



[PATCH v3 3/4] nds32: Add perf call-graph support.

2018-10-23 Thread Nickhu
The perf call-graph option can trace the callchain
between functions. This commit add the perf callchain
for nds32. There are kerenl callchain and user callchain.
The kerenl callchain can trace the function in kernel
space. There are two type for user callchain. One for the
'optimize for size' config is set, and another one for the
config is not set. The difference between two types is that
the index of frame-pointer in user stack is not the same.

For example:
With optimize for size:
User Stack:
-
|   lp  |
-
|   gp  |
-
|   fp  |

Without optimize for size:
User Stack:
1. non-leaf function:
-
|   lp  |
-
|   fp  |

2. leaf function:
-
|   fp  |

Signed-off-by: Nickhu 
---
 arch/nds32/kernel/perf_event_cpu.c | 299 +
 1 file changed, 299 insertions(+)

diff --git a/arch/nds32/kernel/perf_event_cpu.c 
b/arch/nds32/kernel/perf_event_cpu.c
index a6e723d0fdbc..5e00ce54d0ff 100644
--- a/arch/nds32/kernel/perf_event_cpu.c
+++ b/arch/nds32/kernel/perf_event_cpu.c
@@ -1193,6 +1193,305 @@ static int __init register_pmu_driver(void)
 
 device_initcall(register_pmu_driver);
 
+/*
+ * References: arch/nds32/kernel/traps.c:__dump()
+ * You will need to know the NDS ABI first.
+ */
+static int unwind_frame_kernel(struct stackframe *frame)
+{
+   int graph = 0;
+#ifdef CONFIG_FRAME_POINTER
+   /* 0x3 means misalignment */
+   if (!kstack_end((void *)frame->fp) &&
+   !((unsigned long)frame->fp & 0x3) &&
+   ((unsigned long)frame->fp >= TASK_SIZE)) {
+   /*
+*  The array index is based on the ABI, the below graph
+*  illustrate the reasons.
+*  Function call procedure: "smw" and "lmw" will always
+*  update SP and FP for you automatically.
+*
+*  Stack Relative Address
+*  |  |  0
+*  
+*  |LP| <-- SP(before smw)  <-- FP(after smw)   -1
+*  
+*  |FP| -2
+*  
+*  |  | <-- SP(after smw)   -3
+*/
+   frame->lp = ((unsigned long *)frame->fp)[-1];
+   frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
+   /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
+   if (__kernel_text_address(frame->lp))
+   frame->lp = ftrace_graph_ret_addr
+   (NULL, , frame->lp, NULL);
+
+   return 0;
+   } else {
+   return -EPERM;
+   }
+#else
+   /*
+* You can refer to arch/nds32/kernel/traps.c:__dump()
+* Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
+* And, the "sp" is not always correct.
+*
+*   Stack Relative Address
+*   |  |  0
+*   
+*   |LP| <-- SP(before smw)  -1
+*   
+*   |  | <-- SP(after smw)   -2
+*   
+*/
+   if (!kstack_end((void *)frame->sp)) {
+   frame->lp = ((unsigned long *)frame->sp)[1];
+   /* TODO: How to deal with the value in first
+* "sp" is not correct?
+*/
+   if (__kernel_text_address(frame->lp))
+   frame->lp = ftrace_graph_ret_addr
+   (tsk, , frame->lp, NULL);
+
+   frame->sp = ((unsigned long *)frame->sp) + 1;
+
+   return 0;
+   } else {
+   return -EPERM;
+   }
+#endif
+}
+
+static void notrace
+walk_stackframe(struct stackframe *frame,
+   int (*fn_record)(struct stackframe *, void *),
+   void *data)
+{
+   while (1) {
+   int ret;
+
+   if (fn_record(frame, data))
+   break;
+
+   ret = unwind_frame_kernel(frame);
+   if (ret < 0)
+   break;
+   }
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called

[PATCH v3 1/4] nds32: Fix bug in bitfield.h

2018-10-23 Thread Nickhu
There two bitfield bug for perfomance counter
in bitfield.h:

PFM_CTL_offSEL1 21 --> 16
PFM_CTL_offSEL2 27 --> 22

This commit fix it.

Signed-off-by: Nickhu 
---
 arch/nds32/include/asm/bitfield.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/nds32/include/asm/bitfield.h 
b/arch/nds32/include/asm/bitfield.h
index 8e84fc385b94..19b2841219ad 100644
--- a/arch/nds32/include/asm/bitfield.h
+++ b/arch/nds32/include/asm/bitfield.h
@@ -692,8 +692,8 @@
 #define PFM_CTL_offKU1 13  /* Enable user mode event counting for 
PFMC1 */
 #define PFM_CTL_offKU2 14  /* Enable user mode event counting for 
PFMC2 */
 #define PFM_CTL_offSEL015  /* The event selection for 
PFMC0 */
-#define PFM_CTL_offSEL121  /* The event selection for 
PFMC1 */
-#define PFM_CTL_offSEL227  /* The event selection for 
PFMC2 */
+#define PFM_CTL_offSEL116  /* The event selection for 
PFMC1 */
+#define PFM_CTL_offSEL222  /* The event selection for 
PFMC2 */
 /* bit 28:31 reserved */
 
 #define PFM_CTL_mskEN0 ( 0x01  << PFM_CTL_offEN0 )
-- 
2.17.0



[PATCH v3 4/4] nds32: Add document for NDS32 PMU.

2018-10-23 Thread Nickhu
The document for how to add NDS32 PMU
in devicetree.

Signed-off-by: Nickhu 
---
 Documentation/devicetree/bindings/nds32/pmu.txt | 17 +
 1 file changed, 17 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt

diff --git a/Documentation/devicetree/bindings/nds32/pmu.txt 
b/Documentation/devicetree/bindings/nds32/pmu.txt
new file mode 100644
index ..1bd15785b4ae
--- /dev/null
+++ b/Documentation/devicetree/bindings/nds32/pmu.txt
@@ -0,0 +1,17 @@
+* NDS32 Performance Monitor Units
+
+NDS32 core have a PMU for counting cpu and cache events like cache misses.
+The NDS32 PMU representation in the device tree should be done as under:
+
+Required properties:
+
+- compatible :
+   "andestech,nds32v3-pmu"
+
+- interrupts : The interrupt number for NDS32 PMU is 13.
+
+Example:
+pmu{
+   compatible = "andestech,nds32v3-pmu";
+   interrupts = <13>;
+}
-- 
2.17.0



[PATCH v3 3/4] nds32: Add perf call-graph support.

2018-10-23 Thread Nickhu
The perf call-graph option can trace the callchain
between functions. This commit add the perf callchain
for nds32. There are kerenl callchain and user callchain.
The kerenl callchain can trace the function in kernel
space. There are two type for user callchain. One for the
'optimize for size' config is set, and another one for the
config is not set. The difference between two types is that
the index of frame-pointer in user stack is not the same.

For example:
With optimize for size:
User Stack:
-
|   lp  |
-
|   gp  |
-
|   fp  |

Without optimize for size:
User Stack:
1. non-leaf function:
-
|   lp  |
-
|   fp  |

2. leaf function:
-
|   fp  |

Signed-off-by: Nickhu 
---
 arch/nds32/kernel/perf_event_cpu.c | 299 +
 1 file changed, 299 insertions(+)

diff --git a/arch/nds32/kernel/perf_event_cpu.c 
b/arch/nds32/kernel/perf_event_cpu.c
index a6e723d0fdbc..5e00ce54d0ff 100644
--- a/arch/nds32/kernel/perf_event_cpu.c
+++ b/arch/nds32/kernel/perf_event_cpu.c
@@ -1193,6 +1193,305 @@ static int __init register_pmu_driver(void)
 
 device_initcall(register_pmu_driver);
 
+/*
+ * References: arch/nds32/kernel/traps.c:__dump()
+ * You will need to know the NDS ABI first.
+ */
+static int unwind_frame_kernel(struct stackframe *frame)
+{
+   int graph = 0;
+#ifdef CONFIG_FRAME_POINTER
+   /* 0x3 means misalignment */
+   if (!kstack_end((void *)frame->fp) &&
+   !((unsigned long)frame->fp & 0x3) &&
+   ((unsigned long)frame->fp >= TASK_SIZE)) {
+   /*
+*  The array index is based on the ABI, the below graph
+*  illustrate the reasons.
+*  Function call procedure: "smw" and "lmw" will always
+*  update SP and FP for you automatically.
+*
+*  Stack Relative Address
+*  |  |  0
+*  
+*  |LP| <-- SP(before smw)  <-- FP(after smw)   -1
+*  
+*  |FP| -2
+*  
+*  |  | <-- SP(after smw)   -3
+*/
+   frame->lp = ((unsigned long *)frame->fp)[-1];
+   frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
+   /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
+   if (__kernel_text_address(frame->lp))
+   frame->lp = ftrace_graph_ret_addr
+   (NULL, , frame->lp, NULL);
+
+   return 0;
+   } else {
+   return -EPERM;
+   }
+#else
+   /*
+* You can refer to arch/nds32/kernel/traps.c:__dump()
+* Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
+* And, the "sp" is not always correct.
+*
+*   Stack Relative Address
+*   |  |  0
+*   
+*   |LP| <-- SP(before smw)  -1
+*   
+*   |  | <-- SP(after smw)   -2
+*   
+*/
+   if (!kstack_end((void *)frame->sp)) {
+   frame->lp = ((unsigned long *)frame->sp)[1];
+   /* TODO: How to deal with the value in first
+* "sp" is not correct?
+*/
+   if (__kernel_text_address(frame->lp))
+   frame->lp = ftrace_graph_ret_addr
+   (tsk, , frame->lp, NULL);
+
+   frame->sp = ((unsigned long *)frame->sp) + 1;
+
+   return 0;
+   } else {
+   return -EPERM;
+   }
+#endif
+}
+
+static void notrace
+walk_stackframe(struct stackframe *frame,
+   int (*fn_record)(struct stackframe *, void *),
+   void *data)
+{
+   while (1) {
+   int ret;
+
+   if (fn_record(frame, data))
+   break;
+
+   ret = unwind_frame_kernel(frame);
+   if (ret < 0)
+   break;
+   }
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called

[PATCH v3 1/4] nds32: Fix bug in bitfield.h

2018-10-23 Thread Nickhu
There two bitfield bug for perfomance counter
in bitfield.h:

PFM_CTL_offSEL1 21 --> 16
PFM_CTL_offSEL2 27 --> 22

This commit fix it.

Signed-off-by: Nickhu 
---
 arch/nds32/include/asm/bitfield.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/nds32/include/asm/bitfield.h 
b/arch/nds32/include/asm/bitfield.h
index 8e84fc385b94..19b2841219ad 100644
--- a/arch/nds32/include/asm/bitfield.h
+++ b/arch/nds32/include/asm/bitfield.h
@@ -692,8 +692,8 @@
 #define PFM_CTL_offKU1 13  /* Enable user mode event counting for 
PFMC1 */
 #define PFM_CTL_offKU2 14  /* Enable user mode event counting for 
PFMC2 */
 #define PFM_CTL_offSEL015  /* The event selection for 
PFMC0 */
-#define PFM_CTL_offSEL121  /* The event selection for 
PFMC1 */
-#define PFM_CTL_offSEL227  /* The event selection for 
PFMC2 */
+#define PFM_CTL_offSEL116  /* The event selection for 
PFMC1 */
+#define PFM_CTL_offSEL222  /* The event selection for 
PFMC2 */
 /* bit 28:31 reserved */
 
 #define PFM_CTL_mskEN0 ( 0x01  << PFM_CTL_offEN0 )
-- 
2.17.0



[PATCH v3 0/4] nds32: Perf Support

2018-10-23 Thread Nickhu
* Sorry everyone, I forgot to add the version number
of the patch set I just sent.

These four commits are perf supporting for nds32.
There are three perfomance counters in nds32, and
each of them can counts different events. You can
use 'perf list' to show the available events that
can be used.

Changes in V2:
1. Change the definition 'PFM_CTL_xxx' to
array form.

2. Simplify the PMU driver.

3. Stop all counters when handling irq
caused by performance counters overflow.

4. Rename the compatible string in
devicetree.

Changes in V3:
Fix the typo in Documentation/devicetree/
bindings/nds32/pmu.txt.

Nickhu (4):
  nds32: Fix bug in bitfield.h
  nds32: Perf porting
  nds32: Add perf call-graph support.
  nds32: Add document for NDS32 PMU.

 .../devicetree/bindings/nds32/pmu.txt |   17 +
 arch/nds32/Kconfig|1 +
 arch/nds32/boot/dts/ae3xx.dts |5 +
 arch/nds32/include/asm/Kbuild |1 +
 arch/nds32/include/asm/bitfield.h |4 +-
 arch/nds32/include/asm/perf_event.h   |   16 +
 arch/nds32/include/asm/pmu.h  |  386 +
 arch/nds32/include/asm/stacktrace.h   |   39 +
 arch/nds32/kernel/Makefile|3 +-
 arch/nds32/kernel/perf_event_cpu.c| 1522 +
 arch/nds32/mm/fault.c |   13 +-
 tools/include/asm/barrier.h   |2 +
 tools/perf/arch/nds32/Build   |1 +
 tools/perf/arch/nds32/util/Build  |1 +
 tools/perf/arch/nds32/util/header.c   |   29 +
 tools/perf/pmu-events/arch/nds32/mapfile.csv  |   15 +
 .../pmu-events/arch/nds32/n13/atcpmu.json |  290 
 17 files changed, 2337 insertions(+), 8 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt
 create mode 100644 arch/nds32/include/asm/perf_event.h
 create mode 100644 arch/nds32/include/asm/pmu.h
 create mode 100644 arch/nds32/include/asm/stacktrace.h
 create mode 100644 arch/nds32/kernel/perf_event_cpu.c
 create mode 100644 tools/perf/arch/nds32/Build
 create mode 100644 tools/perf/arch/nds32/util/Build
 create mode 100644 tools/perf/arch/nds32/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv
 create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json

-- 
2.17.0



[PATCH v3 0/4] nds32: Perf Support

2018-10-23 Thread Nickhu
* Sorry everyone, I forgot to add the version number
of the patch set I just sent.

These four commits are perf supporting for nds32.
There are three perfomance counters in nds32, and
each of them can counts different events. You can
use 'perf list' to show the available events that
can be used.

Changes in V2:
1. Change the definition 'PFM_CTL_xxx' to
array form.

2. Simplify the PMU driver.

3. Stop all counters when handling irq
caused by performance counters overflow.

4. Rename the compatible string in
devicetree.

Changes in V3:
Fix the typo in Documentation/devicetree/
bindings/nds32/pmu.txt.

Nickhu (4):
  nds32: Fix bug in bitfield.h
  nds32: Perf porting
  nds32: Add perf call-graph support.
  nds32: Add document for NDS32 PMU.

 .../devicetree/bindings/nds32/pmu.txt |   17 +
 arch/nds32/Kconfig|1 +
 arch/nds32/boot/dts/ae3xx.dts |5 +
 arch/nds32/include/asm/Kbuild |1 +
 arch/nds32/include/asm/bitfield.h |4 +-
 arch/nds32/include/asm/perf_event.h   |   16 +
 arch/nds32/include/asm/pmu.h  |  386 +
 arch/nds32/include/asm/stacktrace.h   |   39 +
 arch/nds32/kernel/Makefile|3 +-
 arch/nds32/kernel/perf_event_cpu.c| 1522 +
 arch/nds32/mm/fault.c |   13 +-
 tools/include/asm/barrier.h   |2 +
 tools/perf/arch/nds32/Build   |1 +
 tools/perf/arch/nds32/util/Build  |1 +
 tools/perf/arch/nds32/util/header.c   |   29 +
 tools/perf/pmu-events/arch/nds32/mapfile.csv  |   15 +
 .../pmu-events/arch/nds32/n13/atcpmu.json |  290 
 17 files changed, 2337 insertions(+), 8 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt
 create mode 100644 arch/nds32/include/asm/perf_event.h
 create mode 100644 arch/nds32/include/asm/pmu.h
 create mode 100644 arch/nds32/include/asm/stacktrace.h
 create mode 100644 arch/nds32/kernel/perf_event_cpu.c
 create mode 100644 tools/perf/arch/nds32/Build
 create mode 100644 tools/perf/arch/nds32/util/Build
 create mode 100644 tools/perf/arch/nds32/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv
 create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json

-- 
2.17.0



[PATCH 1/4] nds32: Fix bug in bitfield.h

2018-10-23 Thread Nickhu
There two bitfield bug for perfomance counter
in bitfield.h:

PFM_CTL_offSEL1 21 --> 16
PFM_CTL_offSEL2 27 --> 22

This commit fix it.

Signed-off-by: Nickhu 
---
 arch/nds32/include/asm/bitfield.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/nds32/include/asm/bitfield.h 
b/arch/nds32/include/asm/bitfield.h
index 8e84fc385b94..19b2841219ad 100644
--- a/arch/nds32/include/asm/bitfield.h
+++ b/arch/nds32/include/asm/bitfield.h
@@ -692,8 +692,8 @@
 #define PFM_CTL_offKU1 13  /* Enable user mode event counting for 
PFMC1 */
 #define PFM_CTL_offKU2 14  /* Enable user mode event counting for 
PFMC2 */
 #define PFM_CTL_offSEL015  /* The event selection for 
PFMC0 */
-#define PFM_CTL_offSEL121  /* The event selection for 
PFMC1 */
-#define PFM_CTL_offSEL227  /* The event selection for 
PFMC2 */
+#define PFM_CTL_offSEL116  /* The event selection for 
PFMC1 */
+#define PFM_CTL_offSEL222  /* The event selection for 
PFMC2 */
 /* bit 28:31 reserved */
 
 #define PFM_CTL_mskEN0 ( 0x01  << PFM_CTL_offEN0 )
-- 
2.17.0



[PATCH 1/4] nds32: Fix bug in bitfield.h

2018-10-23 Thread Nickhu
There two bitfield bug for perfomance counter
in bitfield.h:

PFM_CTL_offSEL1 21 --> 16
PFM_CTL_offSEL2 27 --> 22

This commit fix it.

Signed-off-by: Nickhu 
---
 arch/nds32/include/asm/bitfield.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/nds32/include/asm/bitfield.h 
b/arch/nds32/include/asm/bitfield.h
index 8e84fc385b94..19b2841219ad 100644
--- a/arch/nds32/include/asm/bitfield.h
+++ b/arch/nds32/include/asm/bitfield.h
@@ -692,8 +692,8 @@
 #define PFM_CTL_offKU1 13  /* Enable user mode event counting for 
PFMC1 */
 #define PFM_CTL_offKU2 14  /* Enable user mode event counting for 
PFMC2 */
 #define PFM_CTL_offSEL015  /* The event selection for 
PFMC0 */
-#define PFM_CTL_offSEL121  /* The event selection for 
PFMC1 */
-#define PFM_CTL_offSEL227  /* The event selection for 
PFMC2 */
+#define PFM_CTL_offSEL116  /* The event selection for 
PFMC1 */
+#define PFM_CTL_offSEL222  /* The event selection for 
PFMC2 */
 /* bit 28:31 reserved */
 
 #define PFM_CTL_mskEN0 ( 0x01  << PFM_CTL_offEN0 )
-- 
2.17.0



[PATCH 0/4] nds32: Perf Support

2018-10-23 Thread Nickhu
These four commits are perf supporting for nds32.
There are three perfomance counters in nds32, and
each of them can counts different events. You can
use 'perf list' to show the available events that
can be used.

Nickhu (4):
  nds32: Fix bug in bitfield.h
  nds32: Perf porting
  nds32: Add perf call-graph support.
  nds32: Add document for NDS32 PMU.

 .../devicetree/bindings/nds32/pmu.txt |   17 +
 arch/nds32/Kconfig|1 +
 arch/nds32/boot/dts/ae3xx.dts |5 +
 arch/nds32/include/asm/Kbuild |1 +
 arch/nds32/include/asm/bitfield.h |4 +-
 arch/nds32/include/asm/perf_event.h   |   16 +
 arch/nds32/include/asm/pmu.h  |  386 +
 arch/nds32/include/asm/stacktrace.h   |   39 +
 arch/nds32/kernel/Makefile|3 +-
 arch/nds32/kernel/perf_event_cpu.c| 1522 +
 arch/nds32/mm/fault.c |   13 +-
 tools/include/asm/barrier.h   |2 +
 tools/perf/arch/nds32/Build   |1 +
 tools/perf/arch/nds32/util/Build  |1 +
 tools/perf/arch/nds32/util/header.c   |   29 +
 tools/perf/pmu-events/arch/nds32/mapfile.csv  |   15 +
 .../pmu-events/arch/nds32/n13/atcpmu.json |  290 
 17 files changed, 2337 insertions(+), 8 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt
 create mode 100644 arch/nds32/include/asm/perf_event.h
 create mode 100644 arch/nds32/include/asm/pmu.h
 create mode 100644 arch/nds32/include/asm/stacktrace.h
 create mode 100644 arch/nds32/kernel/perf_event_cpu.c
 create mode 100644 tools/perf/arch/nds32/Build
 create mode 100644 tools/perf/arch/nds32/util/Build
 create mode 100644 tools/perf/arch/nds32/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv
 create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json

-- 
2.17.0



[PATCH 2/4] nds32: Perf porting

2018-10-23 Thread Nickhu
This is the commit that porting the perf for nds32.

1.Raw event:
The raw events start with 'r'.
Usage:
perf stat -e rXYZ ./app
X: the index of performance counter.
YZ: the index(convert to hexdecimal) of events

Example:
'perf stat -e r101 ./app' means the counter 1 will 
count the instruction
event.

The index of counter and events can be found in
"Andes System Privilege Architecture Version 3 Manual".

Or you can perform the 'perf list' to find the symbolic name of raw events.

2.Perf mmap2:

Fix unexpected perf mmap2() page fault

When the mmap2() called by perf application,
you will encounter such condition:"failed to write."
With return value -EFAULT

This is due to the page fault caused by "reading" buffer
from the mapped legal address region to write to the descriptor.
The page_fault handler will get a VM_FAULT_SIGBUS return value,
which should not happens here.(Due to this is a read request.)

You can refer to kernel/events/core.c:perf_mmap_fault(...)
If "(vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))" is evaluated
as true, you will get VM_FAULT_SIGBUS as return value.

However, this is not an write request. The flags which indicated
why the page fault happens is wrong.

Furthermore, NDS32 SPAv3 is not able to detect it is read or write.
It only know  either it is instruction fetch or data access.

Therefore, by removing the wrong flag assignment(actually, the hardware
is not able to show the reason), we can fix this bug.

3.Perf multiple events map to same counter.

When there are multiple events map to the same counter, the counter
counts inaccurately. This is because each counter only counts one event
in the same time.
So when there are multiple events map to same counter, they have to take
turns in each context.

There are two solution:
1. Print the error message when multiple events map to the same counter.
But print the error message would let the program hang in loop. The ltp
(linux test program) would be failed when the program hang in loop.

2. Don't print the error message, the ltp would pass. But the user need 
to
have the knowledge that don't count the events which map to the same
counter, or the user will get the inaccurate results.

    We choose method 2 for the solution

Signed-off-by: Nickhu 
---
 arch/nds32/Kconfig|1 +
 arch/nds32/boot/dts/ae3xx.dts |5 +
 arch/nds32/include/asm/Kbuild |1 +
 arch/nds32/include/asm/perf_event.h   |   16 +
 arch/nds32/include/asm/pmu.h  |  386 ++
 arch/nds32/include/asm/stacktrace.h   |   39 +
 arch/nds32/kernel/Makefile|3 +-
 arch/nds32/kernel/perf_event_cpu.c| 1223 +
 arch/nds32/mm/fault.c |   13 +-
 tools/include/asm/barrier.h   |2 +
 tools/perf/arch/nds32/Build   |1 +
 tools/perf/arch/nds32/util/Build  |1 +
 tools/perf/arch/nds32/util/header.c   |   29 +
 tools/perf/pmu-events/arch/nds32/mapfile.csv  |   15 +
 .../pmu-events/arch/nds32/n13/atcpmu.json |  290 
 15 files changed, 2019 insertions(+), 6 deletions(-)
 create mode 100644 arch/nds32/include/asm/perf_event.h
 create mode 100644 arch/nds32/include/asm/pmu.h
 create mode 100644 arch/nds32/include/asm/stacktrace.h
 create mode 100644 arch/nds32/kernel/perf_event_cpu.c
 create mode 100644 tools/perf/arch/nds32/Build
 create mode 100644 tools/perf/arch/nds32/util/Build
 create mode 100644 tools/perf/arch/nds32/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv
 create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json

diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 7068f341133d..dd448d431f5a 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -31,6 +31,7 @@ config NDS32
select HAVE_DEBUG_KMEMLEAK
select HAVE_MEMBLOCK
select HAVE_REGS_AND_STACK_ACCESS_API
+   select HAVE_PERF_EVENTS
select IRQ_DOMAIN
select LOCKDEP_SUPPORT
select MODULES_USE_ELF_RELA
diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts
index bb39749a6673..16a9f54a805e 100644
--- a/arch/nds32/boot/dts/ae3xx.dts
+++ b/arch/nds32/boot/dts/ae3xx.dts
@@ -82,4 +82,9 @@
interrupts = <18>;
};
};
+
+   pmu {
+   compatible = "andestech,nds32v3-pmu";
+   interrupts= <13>;
+   };
 };

[PATCH 0/4] nds32: Perf Support

2018-10-23 Thread Nickhu
These four commits are perf supporting for nds32.
There are three perfomance counters in nds32, and
each of them can counts different events. You can
use 'perf list' to show the available events that
can be used.

Nickhu (4):
  nds32: Fix bug in bitfield.h
  nds32: Perf porting
  nds32: Add perf call-graph support.
  nds32: Add document for NDS32 PMU.

 .../devicetree/bindings/nds32/pmu.txt |   17 +
 arch/nds32/Kconfig|1 +
 arch/nds32/boot/dts/ae3xx.dts |5 +
 arch/nds32/include/asm/Kbuild |1 +
 arch/nds32/include/asm/bitfield.h |4 +-
 arch/nds32/include/asm/perf_event.h   |   16 +
 arch/nds32/include/asm/pmu.h  |  386 +
 arch/nds32/include/asm/stacktrace.h   |   39 +
 arch/nds32/kernel/Makefile|3 +-
 arch/nds32/kernel/perf_event_cpu.c| 1522 +
 arch/nds32/mm/fault.c |   13 +-
 tools/include/asm/barrier.h   |2 +
 tools/perf/arch/nds32/Build   |1 +
 tools/perf/arch/nds32/util/Build  |1 +
 tools/perf/arch/nds32/util/header.c   |   29 +
 tools/perf/pmu-events/arch/nds32/mapfile.csv  |   15 +
 .../pmu-events/arch/nds32/n13/atcpmu.json |  290 
 17 files changed, 2337 insertions(+), 8 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt
 create mode 100644 arch/nds32/include/asm/perf_event.h
 create mode 100644 arch/nds32/include/asm/pmu.h
 create mode 100644 arch/nds32/include/asm/stacktrace.h
 create mode 100644 arch/nds32/kernel/perf_event_cpu.c
 create mode 100644 tools/perf/arch/nds32/Build
 create mode 100644 tools/perf/arch/nds32/util/Build
 create mode 100644 tools/perf/arch/nds32/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv
 create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json

-- 
2.17.0



[PATCH 2/4] nds32: Perf porting

2018-10-23 Thread Nickhu
This is the commit that porting the perf for nds32.

1.Raw event:
The raw events start with 'r'.
Usage:
perf stat -e rXYZ ./app
X: the index of performance counter.
YZ: the index(convert to hexdecimal) of events

Example:
'perf stat -e r101 ./app' means the counter 1 will 
count the instruction
event.

The index of counter and events can be found in
"Andes System Privilege Architecture Version 3 Manual".

Or you can perform the 'perf list' to find the symbolic name of raw events.

2.Perf mmap2:

Fix unexpected perf mmap2() page fault

When the mmap2() called by perf application,
you will encounter such condition:"failed to write."
With return value -EFAULT

This is due to the page fault caused by "reading" buffer
from the mapped legal address region to write to the descriptor.
The page_fault handler will get a VM_FAULT_SIGBUS return value,
which should not happens here.(Due to this is a read request.)

You can refer to kernel/events/core.c:perf_mmap_fault(...)
If "(vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))" is evaluated
as true, you will get VM_FAULT_SIGBUS as return value.

However, this is not an write request. The flags which indicated
why the page fault happens is wrong.

Furthermore, NDS32 SPAv3 is not able to detect it is read or write.
It only know  either it is instruction fetch or data access.

Therefore, by removing the wrong flag assignment(actually, the hardware
is not able to show the reason), we can fix this bug.

3.Perf multiple events map to same counter.

When there are multiple events map to the same counter, the counter
counts inaccurately. This is because each counter only counts one event
in the same time.
So when there are multiple events map to same counter, they have to take
turns in each context.

There are two solution:
1. Print the error message when multiple events map to the same counter.
But print the error message would let the program hang in loop. The ltp
(linux test program) would be failed when the program hang in loop.

2. Don't print the error message, the ltp would pass. But the user need 
to
have the knowledge that don't count the events which map to the same
counter, or the user will get the inaccurate results.

    We choose method 2 for the solution

Signed-off-by: Nickhu 
---
 arch/nds32/Kconfig|1 +
 arch/nds32/boot/dts/ae3xx.dts |5 +
 arch/nds32/include/asm/Kbuild |1 +
 arch/nds32/include/asm/perf_event.h   |   16 +
 arch/nds32/include/asm/pmu.h  |  386 ++
 arch/nds32/include/asm/stacktrace.h   |   39 +
 arch/nds32/kernel/Makefile|3 +-
 arch/nds32/kernel/perf_event_cpu.c| 1223 +
 arch/nds32/mm/fault.c |   13 +-
 tools/include/asm/barrier.h   |2 +
 tools/perf/arch/nds32/Build   |1 +
 tools/perf/arch/nds32/util/Build  |1 +
 tools/perf/arch/nds32/util/header.c   |   29 +
 tools/perf/pmu-events/arch/nds32/mapfile.csv  |   15 +
 .../pmu-events/arch/nds32/n13/atcpmu.json |  290 
 15 files changed, 2019 insertions(+), 6 deletions(-)
 create mode 100644 arch/nds32/include/asm/perf_event.h
 create mode 100644 arch/nds32/include/asm/pmu.h
 create mode 100644 arch/nds32/include/asm/stacktrace.h
 create mode 100644 arch/nds32/kernel/perf_event_cpu.c
 create mode 100644 tools/perf/arch/nds32/Build
 create mode 100644 tools/perf/arch/nds32/util/Build
 create mode 100644 tools/perf/arch/nds32/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv
 create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json

diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 7068f341133d..dd448d431f5a 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -31,6 +31,7 @@ config NDS32
select HAVE_DEBUG_KMEMLEAK
select HAVE_MEMBLOCK
select HAVE_REGS_AND_STACK_ACCESS_API
+   select HAVE_PERF_EVENTS
select IRQ_DOMAIN
select LOCKDEP_SUPPORT
select MODULES_USE_ELF_RELA
diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts
index bb39749a6673..16a9f54a805e 100644
--- a/arch/nds32/boot/dts/ae3xx.dts
+++ b/arch/nds32/boot/dts/ae3xx.dts
@@ -82,4 +82,9 @@
interrupts = <18>;
};
};
+
+   pmu {
+   compatible = "andestech,nds32v3-pmu";
+   interrupts= <13>;
+   };
 };

[PATCH 4/4] nds32: Add document for NDS32 PMU.

2018-10-23 Thread Nickhu
The document for how to add NDS32 PMU
in devicetree.

Signed-off-by: Nickhu 
---
 Documentation/devicetree/bindings/nds32/pmu.txt | 17 +
 1 file changed, 17 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt

diff --git a/Documentation/devicetree/bindings/nds32/pmu.txt 
b/Documentation/devicetree/bindings/nds32/pmu.txt
new file mode 100644
index ..1bd15785b4ae
--- /dev/null
+++ b/Documentation/devicetree/bindings/nds32/pmu.txt
@@ -0,0 +1,17 @@
+* NDS32 Performance Monitor Units
+
+NDS32 core have a PMU for counting cpu and cache events like cache misses.
+The NDS32 PMU representation in the device tree should be done as under:
+
+Required properties:
+
+- compatible :
+   "andestech,nds32v3-pmu"
+
+- interrupts : The interrupt number for NDS32 PMU is 13.
+
+Example:
+pmu{
+   compatible = "andestech,nds32v3-pmu";
+   interrupts = <13>;
+}
-- 
2.17.0



[PATCH 3/4] nds32: Add perf call-graph support.

2018-10-23 Thread Nickhu
The perf call-graph option can trace the callchain
between functions. This commit add the perf callchain
for nds32. There are kerenl callchain and user callchain.
The kerenl callchain can trace the function in kernel
space. There are two type for user callchain. One for the
'optimize for size' config is set, and another one for the
config is not set. The difference between two types is that
the index of frame-pointer in user stack is not the same.

For example:
With optimize for size:
User Stack:
-
|   lp  |
-
|   gp  |
-
|   fp  |

Without optimize for size:
User Stack:
1. non-leaf function:
-
|   lp  |
-
|   fp  |

2. leaf function:
-
|   fp  |

Signed-off-by: Nickhu 
---
 arch/nds32/kernel/perf_event_cpu.c | 299 +
 1 file changed, 299 insertions(+)

diff --git a/arch/nds32/kernel/perf_event_cpu.c 
b/arch/nds32/kernel/perf_event_cpu.c
index a6e723d0fdbc..5e00ce54d0ff 100644
--- a/arch/nds32/kernel/perf_event_cpu.c
+++ b/arch/nds32/kernel/perf_event_cpu.c
@@ -1193,6 +1193,305 @@ static int __init register_pmu_driver(void)
 
 device_initcall(register_pmu_driver);
 
+/*
+ * References: arch/nds32/kernel/traps.c:__dump()
+ * You will need to know the NDS ABI first.
+ */
+static int unwind_frame_kernel(struct stackframe *frame)
+{
+   int graph = 0;
+#ifdef CONFIG_FRAME_POINTER
+   /* 0x3 means misalignment */
+   if (!kstack_end((void *)frame->fp) &&
+   !((unsigned long)frame->fp & 0x3) &&
+   ((unsigned long)frame->fp >= TASK_SIZE)) {
+   /*
+*  The array index is based on the ABI, the below graph
+*  illustrate the reasons.
+*  Function call procedure: "smw" and "lmw" will always
+*  update SP and FP for you automatically.
+*
+*  Stack Relative Address
+*  |  |  0
+*  
+*  |LP| <-- SP(before smw)  <-- FP(after smw)   -1
+*  
+*  |FP| -2
+*  
+*  |  | <-- SP(after smw)   -3
+*/
+   frame->lp = ((unsigned long *)frame->fp)[-1];
+   frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
+   /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
+   if (__kernel_text_address(frame->lp))
+   frame->lp = ftrace_graph_ret_addr
+   (NULL, , frame->lp, NULL);
+
+   return 0;
+   } else {
+   return -EPERM;
+   }
+#else
+   /*
+* You can refer to arch/nds32/kernel/traps.c:__dump()
+* Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
+* And, the "sp" is not always correct.
+*
+*   Stack Relative Address
+*   |  |  0
+*   
+*   |LP| <-- SP(before smw)  -1
+*   
+*   |  | <-- SP(after smw)   -2
+*   
+*/
+   if (!kstack_end((void *)frame->sp)) {
+   frame->lp = ((unsigned long *)frame->sp)[1];
+   /* TODO: How to deal with the value in first
+* "sp" is not correct?
+*/
+   if (__kernel_text_address(frame->lp))
+   frame->lp = ftrace_graph_ret_addr
+   (tsk, , frame->lp, NULL);
+
+   frame->sp = ((unsigned long *)frame->sp) + 1;
+
+   return 0;
+   } else {
+   return -EPERM;
+   }
+#endif
+}
+
+static void notrace
+walk_stackframe(struct stackframe *frame,
+   int (*fn_record)(struct stackframe *, void *),
+   void *data)
+{
+   while (1) {
+   int ret;
+
+   if (fn_record(frame, data))
+   break;
+
+   ret = unwind_frame_kernel(frame);
+   if (ret < 0)
+   break;
+   }
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called

[PATCH 4/4] nds32: Add document for NDS32 PMU.

2018-10-23 Thread Nickhu
The document for how to add NDS32 PMU
in devicetree.

Signed-off-by: Nickhu 
---
 Documentation/devicetree/bindings/nds32/pmu.txt | 17 +
 1 file changed, 17 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt

diff --git a/Documentation/devicetree/bindings/nds32/pmu.txt 
b/Documentation/devicetree/bindings/nds32/pmu.txt
new file mode 100644
index ..1bd15785b4ae
--- /dev/null
+++ b/Documentation/devicetree/bindings/nds32/pmu.txt
@@ -0,0 +1,17 @@
+* NDS32 Performance Monitor Units
+
+NDS32 core have a PMU for counting cpu and cache events like cache misses.
+The NDS32 PMU representation in the device tree should be done as under:
+
+Required properties:
+
+- compatible :
+   "andestech,nds32v3-pmu"
+
+- interrupts : The interrupt number for NDS32 PMU is 13.
+
+Example:
+pmu{
+   compatible = "andestech,nds32v3-pmu";
+   interrupts = <13>;
+}
-- 
2.17.0



[PATCH 3/4] nds32: Add perf call-graph support.

2018-10-23 Thread Nickhu
The perf call-graph option can trace the callchain
between functions. This commit add the perf callchain
for nds32. There are kerenl callchain and user callchain.
The kerenl callchain can trace the function in kernel
space. There are two type for user callchain. One for the
'optimize for size' config is set, and another one for the
config is not set. The difference between two types is that
the index of frame-pointer in user stack is not the same.

For example:
With optimize for size:
User Stack:
-
|   lp  |
-
|   gp  |
-
|   fp  |

Without optimize for size:
User Stack:
1. non-leaf function:
-
|   lp  |
-
|   fp  |

2. leaf function:
-
|   fp  |

Signed-off-by: Nickhu 
---
 arch/nds32/kernel/perf_event_cpu.c | 299 +
 1 file changed, 299 insertions(+)

diff --git a/arch/nds32/kernel/perf_event_cpu.c 
b/arch/nds32/kernel/perf_event_cpu.c
index a6e723d0fdbc..5e00ce54d0ff 100644
--- a/arch/nds32/kernel/perf_event_cpu.c
+++ b/arch/nds32/kernel/perf_event_cpu.c
@@ -1193,6 +1193,305 @@ static int __init register_pmu_driver(void)
 
 device_initcall(register_pmu_driver);
 
+/*
+ * References: arch/nds32/kernel/traps.c:__dump()
+ * You will need to know the NDS ABI first.
+ */
+static int unwind_frame_kernel(struct stackframe *frame)
+{
+   int graph = 0;
+#ifdef CONFIG_FRAME_POINTER
+   /* 0x3 means misalignment */
+   if (!kstack_end((void *)frame->fp) &&
+   !((unsigned long)frame->fp & 0x3) &&
+   ((unsigned long)frame->fp >= TASK_SIZE)) {
+   /*
+*  The array index is based on the ABI, the below graph
+*  illustrate the reasons.
+*  Function call procedure: "smw" and "lmw" will always
+*  update SP and FP for you automatically.
+*
+*  Stack Relative Address
+*  |  |  0
+*  
+*  |LP| <-- SP(before smw)  <-- FP(after smw)   -1
+*  
+*  |FP| -2
+*  
+*  |  | <-- SP(after smw)   -3
+*/
+   frame->lp = ((unsigned long *)frame->fp)[-1];
+   frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
+   /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
+   if (__kernel_text_address(frame->lp))
+   frame->lp = ftrace_graph_ret_addr
+   (NULL, , frame->lp, NULL);
+
+   return 0;
+   } else {
+   return -EPERM;
+   }
+#else
+   /*
+* You can refer to arch/nds32/kernel/traps.c:__dump()
+* Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
+* And, the "sp" is not always correct.
+*
+*   Stack Relative Address
+*   |  |  0
+*   
+*   |LP| <-- SP(before smw)  -1
+*   
+*   |  | <-- SP(after smw)   -2
+*   
+*/
+   if (!kstack_end((void *)frame->sp)) {
+   frame->lp = ((unsigned long *)frame->sp)[1];
+   /* TODO: How to deal with the value in first
+* "sp" is not correct?
+*/
+   if (__kernel_text_address(frame->lp))
+   frame->lp = ftrace_graph_ret_addr
+   (tsk, , frame->lp, NULL);
+
+   frame->sp = ((unsigned long *)frame->sp) + 1;
+
+   return 0;
+   } else {
+   return -EPERM;
+   }
+#endif
+}
+
+static void notrace
+walk_stackframe(struct stackframe *frame,
+   int (*fn_record)(struct stackframe *, void *),
+   void *data)
+{
+   while (1) {
+   int ret;
+
+   if (fn_record(frame, data))
+   break;
+
+   ret = unwind_frame_kernel(frame);
+   if (ret < 0)
+   break;
+   }
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called

[PATCH 3/3] nds32: Add unaligned access in kernel space.

2018-10-18 Thread Nickhu
As my colleague has encountered kernel panic when unaligned access
in kernel space. Here is the situation, the structure 'TP_STRUCT__entry':

TP_STRUCT__entry(
__field(u32,tb_id   )
__field(int,err )
__field(int,oif )
__field(int,iif )
__field(__u8,   tos )
__field(__u8,   scope   )
__field(__u8,   flags   )
__field(u8, proto   )
__array(__u8,   src,4   )
__array(__u8,   dst,4   )
__array(__u8,   gw, 4   )
__array(__u8,   saddr,  4   )
__field(u16,sport   )
__field(u16,dport   )
__dynamic_array(char,  name,   IFNAMSIZ )
)

When he try to access the element in the structure, the kernel panic
happen. Although he has rearrange the order of the structure to fix
the problem, but we cannot ignore the fact that there still need
unaligned access in kernel space. It can help us to avoid kernel panic
when reasonable unaligned address access happen. The users need to have
the knowledge that some unreasonable unaligned address may cause the bug
in kernel.

The config 'HAVE_EFFICIENT_UNALIGNED_ACCESS' must be with the hw
unaligned access config 'HW_SUPPORT_UNALIGNMENT_ACCESS'. In sw
unalinged access handler, the code 'get_inst()' in arch/nds32/mm/
alignment.c:522 would be generate as load word instruction if
'HAVE_EFFICIENT_UNALIGNED_ACCESS' is set. This would cause the kernel
hang in loop if the address of the load word instruction is unaligned.
For example:

0xbc39e: lwi450 $r0, [$r1], if the $r1 cause unaligned access.
|
| unaligned 
access handler
v
arch/nds32/mm/alignment.c:522: get_ints():0xb0874b7e lwi450 $r2, [$3],
$r3 is the address '0xbc39e', it would cause kernel unaligned access.
|
| unaligned 
access handler
v
arch/nds32/mm/alignment.c:522: get_ints():0xb0874b7e lwi450 $r2, [$3],
$r3 is the address '0xb0874b7e', it would cause kernel unaligned access.

The kernel is hang in the loop.

Signed-off-by: Nickhu 
---
 arch/nds32/kernel/traps.c | 4 +++-
 arch/nds32/mm/alignment.c | 6 --
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index 1496aab48998..dcde7abc5515 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -331,6 +331,7 @@ void do_revinsn(struct pt_regs *regs)
 #ifdef CONFIG_ALIGNMENT_TRAP
 extern int unalign_access_mode;
 extern int do_unaligned_access(unsigned long addr, struct pt_regs *regs);
+extern int va_kernel_present(unsigned long addr);
 #endif
 void do_dispatch_general(unsigned long entry, unsigned long addr,
 unsigned long itype, struct pt_regs *regs,
@@ -341,7 +342,8 @@ void do_dispatch_general(unsigned long entry, unsigned long 
addr,
if (type == ETYPE_ALIGNMENT_CHECK) {
 #ifdef CONFIG_ALIGNMENT_TRAP
/* Alignment check */
-   if (user_mode(regs) && unalign_access_mode) {
+   if ((user_mode(regs) && unalign_access_mode) ||
+   va_kernel_present(addr)) {
int ret;
ret = do_unaligned_access(addr, regs);
 
diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c
index 66a556befd05..2d7a08af6622 100644
--- a/arch/nds32/mm/alignment.c
+++ b/arch/nds32/mm/alignment.c
@@ -524,8 +524,10 @@ int do_unaligned_access(unsigned long addr, struct pt_regs 
*regs)
DEBUG((unalign_access_debug > 0), 1,
  "Faulting addr: 0x%08lx, pc: 0x%08lx [inst: 0x%08lx ]\n", addr,
  regs->ipc, inst);
-
-   set_fs(USER_DS);
+   if ((user_mode(regs) && unalign_access_mode))
+   set_fs(USER_DS);
+   else if (va_kernel_present(addr))
+   set_fs(KERNEL_DS);
 
if (inst & NDS32_16BIT_INSTRUCTION)
ret = do_16((inst >> 16) & 0x, regs);
-- 
2.17.0



[PATCH 1/1] Perf: Compile failed when compile with libelf.

2018-10-18 Thread Nickhu
The error message:
=
util/symbol-elf.c:46:12: error: static declaration of 'elf_getphdrnum'
follows non-static declaration
static int elf_getphdrnum(Elf *elf, size_t *dst)
^~
In file included from util/symbol.h:20,
 from util/symbol-elf.c:9:
/local/nickhu/build-system-3/toolchain/nds32le-linux-glibc-v3-upstream/
nds32le-linux/sysroot/usr/include/libelf.h:266:12: note: previous declaration
of 'elf_getphdrnum' was here
extern int elf_getphdrnum (Elf *__elf, size_t *__dst);
^~
util/symbol-elf.c:62:12: error: static declaration of 'elf_getshdrstrndx'
follows non-static declaration
static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe
_unused)
^
In file included from util/symbol.h:20,
 from util/symbol-elf.c:9:
/local/nickhu/build-system-3/toolchain/nds32le-linux-glibc-v3-upstream/
nds32le-linux/sysroot/usr/include/libelf.h:316:12: note: previous declaration
of 'elf_getshdrstrndx' was here
extern int elf_getshdrstrndx (Elf *__elf, size_t *__dst);
=

Fix it.

Signed-off-by: Nickhu 
---
 tools/perf/util/symbol-elf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 29770ea61768..3ccdfe603d67 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -43,7 +43,7 @@ static inline char *bfd_demangle(void __maybe_unused *v,
 #endif
 
 #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT
-static int elf_getphdrnum(Elf *elf, size_t *dst)
+int elf_getphdrnum(Elf *elf, size_t *dst)
 {
GElf_Ehdr gehdr;
GElf_Ehdr *ehdr;
@@ -59,7 +59,7 @@ static int elf_getphdrnum(Elf *elf, size_t *dst)
 #endif
 
 #ifndef HAVE_ELF_GETSHDRSTRNDX_SUPPORT
-static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst 
__maybe_unused)
+int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe_unused)
 {
pr_err("%s: update your libelf to > 0.140, this one lacks 
elf_getshdrstrndx().\n", __func__);
return -1;
-- 
2.17.0



[PATCH 0/1] nds32: Fix gcc 8.0 compiler option incompatible.

2018-10-18 Thread Nickhu
Fix gcc 8.0 compiler option incompatible When the kernel configs of
ftrace and frame pointer options are choosed.

Nickhu (1):
  nds32: Fix gcc 8.0 compiler option incompatible.

 arch/nds32/mm/Makefile | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

-- 
2.17.0



[PATCH 0/1] Perf: Compile failed when compile with libelf.

2018-10-18 Thread Nickhu
Fix perf failed when compile with libelf.

Nickhu (1):
  Perf: Compile failed when compile with libelf.

 tools/perf/util/symbol-elf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

-- 
2.17.0



[PATCH 1/1] Perf: Compile failed when compile with libelf.

2018-10-18 Thread Nickhu
The error message:
=
util/symbol-elf.c:46:12: error: static declaration of 'elf_getphdrnum'
follows non-static declaration
static int elf_getphdrnum(Elf *elf, size_t *dst)
^~
In file included from util/symbol.h:20,
 from util/symbol-elf.c:9:
/local/nickhu/build-system-3/toolchain/nds32le-linux-glibc-v3-upstream/
nds32le-linux/sysroot/usr/include/libelf.h:266:12: note: previous declaration
of 'elf_getphdrnum' was here
extern int elf_getphdrnum (Elf *__elf, size_t *__dst);
^~
util/symbol-elf.c:62:12: error: static declaration of 'elf_getshdrstrndx'
follows non-static declaration
static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe
_unused)
^
In file included from util/symbol.h:20,
 from util/symbol-elf.c:9:
/local/nickhu/build-system-3/toolchain/nds32le-linux-glibc-v3-upstream/
nds32le-linux/sysroot/usr/include/libelf.h:316:12: note: previous declaration
of 'elf_getshdrstrndx' was here
extern int elf_getshdrstrndx (Elf *__elf, size_t *__dst);
=

Fix it.

Signed-off-by: Nickhu 
---
 tools/perf/util/symbol-elf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 29770ea61768..3ccdfe603d67 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -43,7 +43,7 @@ static inline char *bfd_demangle(void __maybe_unused *v,
 #endif
 
 #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT
-static int elf_getphdrnum(Elf *elf, size_t *dst)
+int elf_getphdrnum(Elf *elf, size_t *dst)
 {
GElf_Ehdr gehdr;
GElf_Ehdr *ehdr;
@@ -59,7 +59,7 @@ static int elf_getphdrnum(Elf *elf, size_t *dst)
 #endif
 
 #ifndef HAVE_ELF_GETSHDRSTRNDX_SUPPORT
-static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst 
__maybe_unused)
+int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe_unused)
 {
pr_err("%s: update your libelf to > 0.140, this one lacks 
elf_getshdrstrndx().\n", __func__);
return -1;
-- 
2.17.0



[PATCH 0/1] nds32: Fix gcc 8.0 compiler option incompatible.

2018-10-18 Thread Nickhu
Fix gcc 8.0 compiler option incompatible When the kernel configs of
ftrace and frame pointer options are choosed.

Nickhu (1):
  nds32: Fix gcc 8.0 compiler option incompatible.

 arch/nds32/mm/Makefile | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

-- 
2.17.0



[PATCH 0/1] Perf: Compile failed when compile with libelf.

2018-10-18 Thread Nickhu
Fix perf failed when compile with libelf.

Nickhu (1):
  Perf: Compile failed when compile with libelf.

 tools/perf/util/symbol-elf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

-- 
2.17.0



[PATCH 3/3] nds32: Add unaligned access in kernel space.

2018-10-18 Thread Nickhu
As my colleague has encountered kernel panic when unaligned access
in kernel space. Here is the situation, the structure 'TP_STRUCT__entry':

TP_STRUCT__entry(
__field(u32,tb_id   )
__field(int,err )
__field(int,oif )
__field(int,iif )
__field(__u8,   tos )
__field(__u8,   scope   )
__field(__u8,   flags   )
__field(u8, proto   )
__array(__u8,   src,4   )
__array(__u8,   dst,4   )
__array(__u8,   gw, 4   )
__array(__u8,   saddr,  4   )
__field(u16,sport   )
__field(u16,dport   )
__dynamic_array(char,  name,   IFNAMSIZ )
)

When he try to access the element in the structure, the kernel panic
happen. Although he has rearrange the order of the structure to fix
the problem, but we cannot ignore the fact that there still need
unaligned access in kernel space. It can help us to avoid kernel panic
when reasonable unaligned address access happen. The users need to have
the knowledge that some unreasonable unaligned address may cause the bug
in kernel.

The config 'HAVE_EFFICIENT_UNALIGNED_ACCESS' must be with the hw
unaligned access config 'HW_SUPPORT_UNALIGNMENT_ACCESS'. In sw
unalinged access handler, the code 'get_inst()' in arch/nds32/mm/
alignment.c:522 would be generate as load word instruction if
'HAVE_EFFICIENT_UNALIGNED_ACCESS' is set. This would cause the kernel
hang in loop if the address of the load word instruction is unaligned.
For example:

0xbc39e: lwi450 $r0, [$r1], if the $r1 cause unaligned access.
|
| unaligned 
access handler
v
arch/nds32/mm/alignment.c:522: get_ints():0xb0874b7e lwi450 $r2, [$3],
$r3 is the address '0xbc39e', it would cause kernel unaligned access.
|
| unaligned 
access handler
v
arch/nds32/mm/alignment.c:522: get_ints():0xb0874b7e lwi450 $r2, [$3],
$r3 is the address '0xb0874b7e', it would cause kernel unaligned access.

The kernel is hang in the loop.

Signed-off-by: Nickhu 
---
 arch/nds32/kernel/traps.c | 4 +++-
 arch/nds32/mm/alignment.c | 6 --
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index 1496aab48998..dcde7abc5515 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -331,6 +331,7 @@ void do_revinsn(struct pt_regs *regs)
 #ifdef CONFIG_ALIGNMENT_TRAP
 extern int unalign_access_mode;
 extern int do_unaligned_access(unsigned long addr, struct pt_regs *regs);
+extern int va_kernel_present(unsigned long addr);
 #endif
 void do_dispatch_general(unsigned long entry, unsigned long addr,
 unsigned long itype, struct pt_regs *regs,
@@ -341,7 +342,8 @@ void do_dispatch_general(unsigned long entry, unsigned long 
addr,
if (type == ETYPE_ALIGNMENT_CHECK) {
 #ifdef CONFIG_ALIGNMENT_TRAP
/* Alignment check */
-   if (user_mode(regs) && unalign_access_mode) {
+   if ((user_mode(regs) && unalign_access_mode) ||
+   va_kernel_present(addr)) {
int ret;
ret = do_unaligned_access(addr, regs);
 
diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c
index 66a556befd05..2d7a08af6622 100644
--- a/arch/nds32/mm/alignment.c
+++ b/arch/nds32/mm/alignment.c
@@ -524,8 +524,10 @@ int do_unaligned_access(unsigned long addr, struct pt_regs 
*regs)
DEBUG((unalign_access_debug > 0), 1,
  "Faulting addr: 0x%08lx, pc: 0x%08lx [inst: 0x%08lx ]\n", addr,
  regs->ipc, inst);
-
-   set_fs(USER_DS);
+   if ((user_mode(regs) && unalign_access_mode))
+   set_fs(USER_DS);
+   else if (va_kernel_present(addr))
+   set_fs(KERNEL_DS);
 
if (inst & NDS32_16BIT_INSTRUCTION)
ret = do_16((inst >> 16) & 0x, regs);
-- 
2.17.0



[PATCH 1/3] nds32: Fix instruction simulator bug for unaligned access handler.

2018-10-18 Thread Nickhu
When emulating the 16 bits instructions, the mapping of general
purpose registers is not the same as 32 bits instructions.

Example:
'LWI450 r16, [r15]' 16-bit instruction will be decoded as
'1011010110001110', the target register field is decode as index=12.

But the index of target register should be 16. So the mapping of
register in unaligned access handler is wrong.

Signed-off-by: Nickhu 
---
 arch/nds32/mm/alignment.c | 37 +
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c
index e1aed9dc692d..66a556befd05 100644
--- a/arch/nds32/mm/alignment.c
+++ b/arch/nds32/mm/alignment.c
@@ -152,12 +152,16 @@ extern int va_writable(struct pt_regs *regs, unsigned 
long addr);
 
 int unalign_access_mode = 0, unalign_access_debug = 0;
 
-static inline unsigned long *idx_to_addr(struct pt_regs *regs, int idx)
+static inline unsigned long *idx_to_addr(struct pt_regs *regs, int idx,
+   int idx_mode)
 {
/* this should be consistent with ptrace.h */
-   if (idx >= 0 && idx <= 25)  /* R0-R25 */
-   return >uregs[0] + idx;
-   else if (idx >= 28 && idx <= 30)/* FP, GP, LP */
+   if (idx >= 0 && idx <= 25) {/* R0-R25 */
+   if (idx_mode == 4 && idx > 11)
+   return >uregs[0] + idx + 4;
+   else
+   return >uregs[0] + idx;
+   } else if (idx >= 28 && idx <= 30)  /* FP, GP, LP */
return >fp + (idx - 28);
else if (idx == 31) /* SP */
return >sp;
@@ -270,10 +274,10 @@ static inline int do_16(unsigned long inst, struct 
pt_regs *regs)
}
 
if (addr_mode == 3) {
-   unaligned_addr = *idx_to_addr(regs, RA3(inst));
+   unaligned_addr = *idx_to_addr(regs, RA3(inst), addr_mode);
source_idx = RA3(inst);
} else {
-   unaligned_addr = *idx_to_addr(regs, RA5(inst));
+   unaligned_addr = *idx_to_addr(regs, RA5(inst), addr_mode);
source_idx = RA5(inst);
}
 
@@ -293,16 +297,17 @@ static inline int do_16(unsigned long inst, struct 
pt_regs *regs)
return -EACCES;
 
get_data(unaligned_addr, _val, len);
-   *idx_to_addr(regs, target_idx) = target_val;
+   *idx_to_addr(regs, target_idx, idx_mode) = target_val;
} else {
if (!access_ok(VERIFY_WRITE, (void *)unaligned_addr, len))
return -EACCES;
-   target_val = *idx_to_addr(regs, target_idx);
+   target_val = *idx_to_addr(regs, target_idx, idx_mode);
set_data((void *)unaligned_addr, target_val, len);
}
 
if (!regular)
-   *idx_to_addr(regs, source_idx) = unaligned_addr + shift;
+   *idx_to_addr(regs, source_idx, idx_mode) =
+   unaligned_addr + shift;
regs->ipc += 2;
 
return 0;
@@ -312,10 +317,10 @@ static inline int do_16(unsigned long inst, struct 
pt_regs *regs)
 
 static inline int do_32(unsigned long inst, struct pt_regs *regs)
 {
-   int imm, regular, load, len, sign_ext;
+   int imm, regular, load, len, sign_ext, idx_mode = 5;
unsigned long unaligned_addr, target_val, shift;
 
-   unaligned_addr = *idx_to_addr(regs, RA(inst));
+   unaligned_addr = *idx_to_addr(regs, RA(inst), idx_mode);
 
switch ((inst >> 25) << 1) {
 
@@ -472,7 +477,7 @@ static inline int do_32(unsigned long inst, struct pt_regs 
*regs)
if (imm)
shift = GET_IMMSVAL(IMM(inst)) * len;
else
-   shift = *idx_to_addr(regs, RB(inst)) << SV(inst);
+   shift = *idx_to_addr(regs, RB(inst), idx_mode) << SV(inst);
 
if (regular)
unaligned_addr += shift;
@@ -485,21 +490,21 @@ static inline int do_32(unsigned long inst, struct 
pt_regs *regs)
get_data(unaligned_addr, _val, len);
 
if (sign_ext)
-   *idx_to_addr(regs, RT(inst)) =
+   *idx_to_addr(regs, RT(inst), idx_mode) =
sign_extend(target_val, len);
else
-   *idx_to_addr(regs, RT(inst)) = target_val;
+   *idx_to_addr(regs, RT(inst), idx_mode) = target_val;
} else {
 
if (!access_ok(VERIFY_WRITE, (void *)unaligned_addr, len))
return -EACCES;
 
-   target_val = *idx_to_addr(regs, RT(inst));
+   target_val = *idx_to_addr(regs, RT(inst), idx_mode);
set_data((void *)unaligned_addr, target_val, len);
}
 
if (!regular)
-   *idx_to_addr(regs

[PATCH 2/3] nds32: Add 'HAVE_EFFICIENT_UNALIGNED_ACCESS' config

2018-10-18 Thread Nickhu
According to my understanding, this config will optimize the code generate.
When there is an unaligned access happened, the load word instruction
still can be used if there is unaligned access support or the load byte
instruction is used. So this config need unaligned access support.

'HAVE_EFFICIENT_UNALIGNED_ACCESS' and 'HW_SUPPORT_UNALIGNMENT_ACCESS' are
default configs in nds32.

Signed-off-by: Nickhu 
---
 arch/nds32/Kconfig.cpu | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu
index b8c8984d1456..b8eecd0cde6b 100644
--- a/arch/nds32/Kconfig.cpu
+++ b/arch/nds32/Kconfig.cpu
@@ -111,8 +111,9 @@ config ALIGNMENT_TRAP
 
 config HW_SUPPORT_UNALIGNMENT_ACCESS
bool "Kernel support unaligned access handling by hw"
+   select HAVE_EFFICIENT_UNALIGNED_ACCESS
depends on !ALIGNMENT_TRAP
-   default n
+   default y
help
  Andes processors load/store world/half-word instructions can access
  unaligned memory locations without generating the Data Alignment
-- 
2.17.0



[PATCH 2/3] nds32: Add 'HAVE_EFFICIENT_UNALIGNED_ACCESS' config

2018-10-18 Thread Nickhu
According to my understanding, this config will optimize the code generate.
When there is an unaligned access happened, the load word instruction
still can be used if there is unaligned access support or the load byte
instruction is used. So this config need unaligned access support.

'HAVE_EFFICIENT_UNALIGNED_ACCESS' and 'HW_SUPPORT_UNALIGNMENT_ACCESS' are
default configs in nds32.

Signed-off-by: Nickhu 
---
 arch/nds32/Kconfig.cpu | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu
index b8c8984d1456..b8eecd0cde6b 100644
--- a/arch/nds32/Kconfig.cpu
+++ b/arch/nds32/Kconfig.cpu
@@ -111,8 +111,9 @@ config ALIGNMENT_TRAP
 
 config HW_SUPPORT_UNALIGNMENT_ACCESS
bool "Kernel support unaligned access handling by hw"
+   select HAVE_EFFICIENT_UNALIGNED_ACCESS
depends on !ALIGNMENT_TRAP
-   default n
+   default y
help
  Andes processors load/store world/half-word instructions can access
  unaligned memory locations without generating the Data Alignment
-- 
2.17.0



[PATCH 1/3] nds32: Fix instruction simulator bug for unaligned access handler.

2018-10-18 Thread Nickhu
When emulating the 16 bits instructions, the mapping of general
purpose registers is not the same as 32 bits instructions.

Example:
'LWI450 r16, [r15]' 16-bit instruction will be decoded as
'1011010110001110', the target register field is decode as index=12.

But the index of target register should be 16. So the mapping of
register in unaligned access handler is wrong.

Signed-off-by: Nickhu 
---
 arch/nds32/mm/alignment.c | 37 +
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c
index e1aed9dc692d..66a556befd05 100644
--- a/arch/nds32/mm/alignment.c
+++ b/arch/nds32/mm/alignment.c
@@ -152,12 +152,16 @@ extern int va_writable(struct pt_regs *regs, unsigned 
long addr);
 
 int unalign_access_mode = 0, unalign_access_debug = 0;
 
-static inline unsigned long *idx_to_addr(struct pt_regs *regs, int idx)
+static inline unsigned long *idx_to_addr(struct pt_regs *regs, int idx,
+   int idx_mode)
 {
/* this should be consistent with ptrace.h */
-   if (idx >= 0 && idx <= 25)  /* R0-R25 */
-   return >uregs[0] + idx;
-   else if (idx >= 28 && idx <= 30)/* FP, GP, LP */
+   if (idx >= 0 && idx <= 25) {/* R0-R25 */
+   if (idx_mode == 4 && idx > 11)
+   return >uregs[0] + idx + 4;
+   else
+   return >uregs[0] + idx;
+   } else if (idx >= 28 && idx <= 30)  /* FP, GP, LP */
return >fp + (idx - 28);
else if (idx == 31) /* SP */
return >sp;
@@ -270,10 +274,10 @@ static inline int do_16(unsigned long inst, struct 
pt_regs *regs)
}
 
if (addr_mode == 3) {
-   unaligned_addr = *idx_to_addr(regs, RA3(inst));
+   unaligned_addr = *idx_to_addr(regs, RA3(inst), addr_mode);
source_idx = RA3(inst);
} else {
-   unaligned_addr = *idx_to_addr(regs, RA5(inst));
+   unaligned_addr = *idx_to_addr(regs, RA5(inst), addr_mode);
source_idx = RA5(inst);
}
 
@@ -293,16 +297,17 @@ static inline int do_16(unsigned long inst, struct 
pt_regs *regs)
return -EACCES;
 
get_data(unaligned_addr, _val, len);
-   *idx_to_addr(regs, target_idx) = target_val;
+   *idx_to_addr(regs, target_idx, idx_mode) = target_val;
} else {
if (!access_ok(VERIFY_WRITE, (void *)unaligned_addr, len))
return -EACCES;
-   target_val = *idx_to_addr(regs, target_idx);
+   target_val = *idx_to_addr(regs, target_idx, idx_mode);
set_data((void *)unaligned_addr, target_val, len);
}
 
if (!regular)
-   *idx_to_addr(regs, source_idx) = unaligned_addr + shift;
+   *idx_to_addr(regs, source_idx, idx_mode) =
+   unaligned_addr + shift;
regs->ipc += 2;
 
return 0;
@@ -312,10 +317,10 @@ static inline int do_16(unsigned long inst, struct 
pt_regs *regs)
 
 static inline int do_32(unsigned long inst, struct pt_regs *regs)
 {
-   int imm, regular, load, len, sign_ext;
+   int imm, regular, load, len, sign_ext, idx_mode = 5;
unsigned long unaligned_addr, target_val, shift;
 
-   unaligned_addr = *idx_to_addr(regs, RA(inst));
+   unaligned_addr = *idx_to_addr(regs, RA(inst), idx_mode);
 
switch ((inst >> 25) << 1) {
 
@@ -472,7 +477,7 @@ static inline int do_32(unsigned long inst, struct pt_regs 
*regs)
if (imm)
shift = GET_IMMSVAL(IMM(inst)) * len;
else
-   shift = *idx_to_addr(regs, RB(inst)) << SV(inst);
+   shift = *idx_to_addr(regs, RB(inst), idx_mode) << SV(inst);
 
if (regular)
unaligned_addr += shift;
@@ -485,21 +490,21 @@ static inline int do_32(unsigned long inst, struct 
pt_regs *regs)
get_data(unaligned_addr, _val, len);
 
if (sign_ext)
-   *idx_to_addr(regs, RT(inst)) =
+   *idx_to_addr(regs, RT(inst), idx_mode) =
sign_extend(target_val, len);
else
-   *idx_to_addr(regs, RT(inst)) = target_val;
+   *idx_to_addr(regs, RT(inst), idx_mode) = target_val;
} else {
 
if (!access_ok(VERIFY_WRITE, (void *)unaligned_addr, len))
return -EACCES;
 
-   target_val = *idx_to_addr(regs, RT(inst));
+   target_val = *idx_to_addr(regs, RT(inst), idx_mode);
set_data((void *)unaligned_addr, target_val, len);
}
 
if (!regular)
-   *idx_to_addr(regs

[PATCH 1/1] nds32: Fix gcc 8.0 compiler option incompatible.

2018-10-18 Thread Nickhu
When the kernel configs of ftrace and frame pointer options are
choosed, the compiler option of kernel will incompatible.
Error message:
nds32le-linux-gcc: error: -pg and -fomit-frame-pointer are 
incompatible

Signed-off-by: Nickhu 
Signed-off-by: Zong Li 
---
 arch/nds32/mm/Makefile | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/nds32/mm/Makefile b/arch/nds32/mm/Makefile
index 6b6855852223..7c5c15ad854a 100644
--- a/arch/nds32/mm/Makefile
+++ b/arch/nds32/mm/Makefile
@@ -4,4 +4,8 @@ obj-y   := extable.o tlb.o \
 
 obj-$(CONFIG_ALIGNMENT_TRAP)   += alignment.o
 obj-$(CONFIG_HIGHMEM)   += highmem.o
-CFLAGS_proc-n13.o  += -fomit-frame-pointer
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_proc.o = $(CC_FLAGS_FTRACE)
+endif
+CFLAGS_proc.o  += -fomit-frame-pointer
-- 
2.17.0



[PATCH 1/1] nds32: Fix gcc 8.0 compiler option incompatible.

2018-10-18 Thread Nickhu
When the kernel configs of ftrace and frame pointer options are
choosed, the compiler option of kernel will incompatible.
Error message:
nds32le-linux-gcc: error: -pg and -fomit-frame-pointer are 
incompatible

Signed-off-by: Nickhu 
Signed-off-by: Zong Li 
---
 arch/nds32/mm/Makefile | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/nds32/mm/Makefile b/arch/nds32/mm/Makefile
index 6b6855852223..7c5c15ad854a 100644
--- a/arch/nds32/mm/Makefile
+++ b/arch/nds32/mm/Makefile
@@ -4,4 +4,8 @@ obj-y   := extable.o tlb.o \
 
 obj-$(CONFIG_ALIGNMENT_TRAP)   += alignment.o
 obj-$(CONFIG_HIGHMEM)   += highmem.o
-CFLAGS_proc-n13.o  += -fomit-frame-pointer
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_proc.o = $(CC_FLAGS_FTRACE)
+endif
+CFLAGS_proc.o  += -fomit-frame-pointer
-- 
2.17.0



[PATCH 0/3] nds32: Unaligned access handler fix

2018-10-18 Thread Nickhu
The patches are about unaligned access handler. We fix some
bugs in unaligned access handler and add some kernel configs
for unaligned access handler. Then we add the kernel unaligned
access handled by software in handler.

Nickhu (3):
  nds32: Fix instruction simulator bug for unaligned access handler.
  nds32: Add 'HAVE_EFFICIENT_UNALIGNED_ACCESS' config
  nds32: Add unaligned access in kernel space.

 arch/nds32/Kconfig.cpu|  3 ++-
 arch/nds32/kernel/traps.c |  4 +++-
 arch/nds32/mm/alignment.c | 43 +++
 3 files changed, 30 insertions(+), 20 deletions(-)

-- 
2.17.0



[PATCH 0/3] nds32: Unaligned access handler fix

2018-10-18 Thread Nickhu
The patches are about unaligned access handler. We fix some
bugs in unaligned access handler and add some kernel configs
for unaligned access handler. Then we add the kernel unaligned
access handled by software in handler.

Nickhu (3):
  nds32: Fix instruction simulator bug for unaligned access handler.
  nds32: Add 'HAVE_EFFICIENT_UNALIGNED_ACCESS' config
  nds32: Add unaligned access in kernel space.

 arch/nds32/Kconfig.cpu|  3 ++-
 arch/nds32/kernel/traps.c |  4 +++-
 arch/nds32/mm/alignment.c | 43 +++
 3 files changed, 30 insertions(+), 20 deletions(-)

-- 
2.17.0



[PATCH 3/5] nds32: Add perf call-graph support.

2018-10-18 Thread Nickhu
The perf call-graph option can trace the callchain
between functions. This commit add the perf callchain
for nds32. There are kerenl callchain and user callchain.
The kerenl callchain can trace the function in kernel
space. There are two type for user callchain. One for the
'optimize for size' config is set, and another one for the
config is not set. The difference between two types is that
the index of frame-pointer in user stack is not the same.

For example:
With optimize for size:
User Stack:
-
|   lp  |
-
|   gp  |
-
|   fp  |

Without optimize for size:
User Stack:
1. non-leaf function:
-
|   lp  |
-
|   fp  |

2. leaf function:
-
|   fp  |

Signed-off-by: Nickhu 
---
 arch/nds32/kernel/perf_event_cpu.c | 299 +
 1 file changed, 299 insertions(+)

diff --git a/arch/nds32/kernel/perf_event_cpu.c 
b/arch/nds32/kernel/perf_event_cpu.c
index c39c6746a3e8..7bb4ebb87b5c 100644
--- a/arch/nds32/kernel/perf_event_cpu.c
+++ b/arch/nds32/kernel/perf_event_cpu.c
@@ -1240,6 +1240,305 @@ static int __init register_pmu_driver(void)
 
 device_initcall(register_pmu_driver);
 
+/*
+ * References: arch/nds32/kernel/traps.c:__dump()
+ * You will need to know the NDS ABI first.
+ */
+static int unwind_frame_kernel(struct stackframe *frame)
+{
+   int graph = 0;
+#ifdef CONFIG_FRAME_POINTER
+   /* 0x3 means misalignment */
+   if (!kstack_end((void *)frame->fp) &&
+   !((unsigned long)frame->fp & 0x3) &&
+   ((unsigned long)frame->fp >= TASK_SIZE)) {
+   /*
+*  The array index is based on the ABI, the below graph
+*  illustrate the reasons.
+*  Function call procedure: "smw" and "lmw" will always
+*  update SP and FP for you automatically.
+*
+*  Stack Relative Address
+*  |  |  0
+*  
+*  |LP| <-- SP(before smw)  <-- FP(after smw)   -1
+*  
+*  |FP| -2
+*  
+*  |  | <-- SP(after smw)   -3
+*/
+   frame->lp = ((unsigned long *)frame->fp)[-1];
+   frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
+   /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
+   if (__kernel_text_address(frame->lp))
+   frame->lp = ftrace_graph_ret_addr
+   (NULL, , frame->lp, NULL);
+
+   return 0;
+   } else {
+   return -EPERM;
+   }
+#else
+   /*
+* You can refer to arch/nds32/kernel/traps.c:__dump()
+* Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
+* And, the "sp" is not always correct.
+*
+*   Stack Relative Address
+*   |  |  0
+*   
+*   |LP| <-- SP(before smw)  -1
+*   
+*   |  | <-- SP(after smw)   -2
+*   
+*/
+   if (!kstack_end((void *)frame->sp)) {
+   frame->lp = ((unsigned long *)frame->sp)[1];
+   /* TODO: How to deal with the value in first
+* "sp" is not correct?
+*/
+   if (__kernel_text_address(frame->lp))
+   frame->lp = ftrace_graph_ret_addr
+   (tsk, , frame->lp, NULL);
+
+   frame->sp = ((unsigned long *)frame->sp) + 1;
+
+   return 0;
+   } else {
+   return -EPERM;
+   }
+#endif
+}
+
+static void notrace
+walk_stackframe(struct stackframe *frame,
+   int (*fn_record)(struct stackframe *, void *),
+   void *data)
+{
+   while (1) {
+   int ret;
+
+   if (fn_record(frame, data))
+   break;
+
+   ret = unwind_frame_kernel(frame);
+   if (ret < 0)
+   break;
+   }
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called

[PATCH 3/5] nds32: Add perf call-graph support.

2018-10-18 Thread Nickhu
The perf call-graph option can trace the callchain
between functions. This commit add the perf callchain
for nds32. There are kerenl callchain and user callchain.
The kerenl callchain can trace the function in kernel
space. There are two type for user callchain. One for the
'optimize for size' config is set, and another one for the
config is not set. The difference between two types is that
the index of frame-pointer in user stack is not the same.

For example:
With optimize for size:
User Stack:
-
|   lp  |
-
|   gp  |
-
|   fp  |

Without optimize for size:
User Stack:
1. non-leaf function:
-
|   lp  |
-
|   fp  |

2. leaf function:
-
|   fp  |

Signed-off-by: Nickhu 
---
 arch/nds32/kernel/perf_event_cpu.c | 299 +
 1 file changed, 299 insertions(+)

diff --git a/arch/nds32/kernel/perf_event_cpu.c 
b/arch/nds32/kernel/perf_event_cpu.c
index c39c6746a3e8..7bb4ebb87b5c 100644
--- a/arch/nds32/kernel/perf_event_cpu.c
+++ b/arch/nds32/kernel/perf_event_cpu.c
@@ -1240,6 +1240,305 @@ static int __init register_pmu_driver(void)
 
 device_initcall(register_pmu_driver);
 
+/*
+ * References: arch/nds32/kernel/traps.c:__dump()
+ * You will need to know the NDS ABI first.
+ */
+static int unwind_frame_kernel(struct stackframe *frame)
+{
+   int graph = 0;
+#ifdef CONFIG_FRAME_POINTER
+   /* 0x3 means misalignment */
+   if (!kstack_end((void *)frame->fp) &&
+   !((unsigned long)frame->fp & 0x3) &&
+   ((unsigned long)frame->fp >= TASK_SIZE)) {
+   /*
+*  The array index is based on the ABI, the below graph
+*  illustrate the reasons.
+*  Function call procedure: "smw" and "lmw" will always
+*  update SP and FP for you automatically.
+*
+*  Stack Relative Address
+*  |  |  0
+*  
+*  |LP| <-- SP(before smw)  <-- FP(after smw)   -1
+*  
+*  |FP| -2
+*  
+*  |  | <-- SP(after smw)   -3
+*/
+   frame->lp = ((unsigned long *)frame->fp)[-1];
+   frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
+   /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
+   if (__kernel_text_address(frame->lp))
+   frame->lp = ftrace_graph_ret_addr
+   (NULL, , frame->lp, NULL);
+
+   return 0;
+   } else {
+   return -EPERM;
+   }
+#else
+   /*
+* You can refer to arch/nds32/kernel/traps.c:__dump()
+* Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
+* And, the "sp" is not always correct.
+*
+*   Stack Relative Address
+*   |  |  0
+*   
+*   |LP| <-- SP(before smw)  -1
+*   
+*   |  | <-- SP(after smw)   -2
+*   
+*/
+   if (!kstack_end((void *)frame->sp)) {
+   frame->lp = ((unsigned long *)frame->sp)[1];
+   /* TODO: How to deal with the value in first
+* "sp" is not correct?
+*/
+   if (__kernel_text_address(frame->lp))
+   frame->lp = ftrace_graph_ret_addr
+   (tsk, , frame->lp, NULL);
+
+   frame->sp = ((unsigned long *)frame->sp) + 1;
+
+   return 0;
+   } else {
+   return -EPERM;
+   }
+#endif
+}
+
+static void notrace
+walk_stackframe(struct stackframe *frame,
+   int (*fn_record)(struct stackframe *, void *),
+   void *data)
+{
+   while (1) {
+   int ret;
+
+   if (fn_record(frame, data))
+   break;
+
+   ret = unwind_frame_kernel(frame);
+   if (ret < 0)
+   break;
+   }
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called

[PATCH 2/5] nds32: Fix bug in bitfield.h

2018-10-18 Thread Nickhu
There two bitfield bug for perfomance counter
in bitfield.h:

PFM_CTL_offSEL1 21 --> 16
PFM_CTL_offSEL2 27 --> 22

This commit fix it.

Signed-off-by: Nickhu 
---
 arch/nds32/include/asm/bitfield.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/nds32/include/asm/bitfield.h 
b/arch/nds32/include/asm/bitfield.h
index 8e84fc385b94..19b2841219ad 100644
--- a/arch/nds32/include/asm/bitfield.h
+++ b/arch/nds32/include/asm/bitfield.h
@@ -692,8 +692,8 @@
 #define PFM_CTL_offKU1 13  /* Enable user mode event counting for 
PFMC1 */
 #define PFM_CTL_offKU2 14  /* Enable user mode event counting for 
PFMC2 */
 #define PFM_CTL_offSEL015  /* The event selection for 
PFMC0 */
-#define PFM_CTL_offSEL121  /* The event selection for 
PFMC1 */
-#define PFM_CTL_offSEL227  /* The event selection for 
PFMC2 */
+#define PFM_CTL_offSEL116  /* The event selection for 
PFMC1 */
+#define PFM_CTL_offSEL222  /* The event selection for 
PFMC2 */
 /* bit 28:31 reserved */
 
 #define PFM_CTL_mskEN0 ( 0x01  << PFM_CTL_offEN0 )
-- 
2.17.0



[PATCH 4/5] nds32: Fix perf multiple events map to same counter.

2018-10-18 Thread Nickhu
When there are multiple events map to the same counter, the counter
counts inaccurately. This is because each counter only counts one event
in the same time.
So when there are multiple events map to same counter, they have to take
turns in each context.

There are two solution:
1. Print the error message when multiple events map to the same counter.
But print the error message would let the program hang in loop. The ltp
(linux test program) would be failed when the program hang in loop.

2. Don't print the error message, the ltp would pass. But the user need to
have the knowledge that don't count the events which map to the same
counter, or the user will get the inaccurate results.

We choose method 2 for the solution

Signed-off-by: Nickhu 
---
 arch/nds32/include/asm/pmu.h   |  1 +
 arch/nds32/kernel/perf_event_cpu.c | 30 --
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/arch/nds32/include/asm/pmu.h b/arch/nds32/include/asm/pmu.h
index 3fbbe97c2d42..e75ec34af5f6 100644
--- a/arch/nds32/include/asm/pmu.h
+++ b/arch/nds32/include/asm/pmu.h
@@ -55,6 +55,7 @@ enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS };
  */
 #define NDS32_IDX_CYCLE_COUNTER0
 #define NDS32_IDX_COUNTER0 1
+#define NDS32_IDX_COUNTER1 2
 #define NDS32_IDX_COUNTER_LAST(cpu_pmu) \
(NDS32_IDX_CYCLE_COUNTER + (cpu_pmu)->num_events - 1)
 
diff --git a/arch/nds32/kernel/perf_event_cpu.c 
b/arch/nds32/kernel/perf_event_cpu.c
index 7bb4ebb87b5c..e9a0d8bb2bc1 100644
--- a/arch/nds32/kernel/perf_event_cpu.c
+++ b/arch/nds32/kernel/perf_event_cpu.c
@@ -566,16 +566,26 @@ static int nds32_pmu_get_event_idx(struct pmu_hw_events 
*cpuc,
/*
 * Try to get the counter for correpsonding event
 */
-   if (!test_and_set_bit(idx, cpuc->used_mask))
-   return idx;
-
-   /*
-* The counter is in use.
-* The system will hang in the loop.
-*/
-   pr_err
-   ("Multiple events map to one counter, the behavior is undefined.\n");
-   return -EPERM;
+   if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) {
+   if (!test_and_set_bit(idx, cpuc->used_mask))
+   return idx;
+   if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask))
+   return NDS32_IDX_COUNTER0;
+   if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
+   return NDS32_IDX_COUNTER1;
+   } else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) {
+   if (!test_and_set_bit(idx, cpuc->used_mask))
+   return idx;
+   else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
+   return NDS32_IDX_COUNTER1;
+   else if (!test_and_set_bit
+(NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask))
+   return NDS32_IDX_CYCLE_COUNTER;
+   } else {
+   if (!test_and_set_bit(idx, cpuc->used_mask))
+   return idx;
+   }
+   return -EAGAIN;
 }
 
 static void nds32_pmu_start(struct nds32_pmu *cpu_pmu)
-- 
2.17.0



[PATCH 1/5] nds32: Perf porting

2018-10-18 Thread Nickhu
This is the commit that porting the perf for nds32.

Raw event:
The raw events start with 'r'.
Usage:
perf stat -e rXYZ ./app
X: the index of performance counter.
YZ: the index(convert to hexdecimal) of events

Example:
'perf stat -e r101 ./app' means the counter 1 will 
count the instruction
event.

The index of counter and events can be found in
"Andes System Privilege Architecture Version 3 Manual".

Or you can perform the 'perf list' to find the symbolic name of raw events.

Perf mmap2:
Fix unexpected perf mmap2() page fault

When the mmap2() called by perf application,
you will encounter such condition:"failed to write."
With return value -EFAULT

This is due to the page fault caused by "reading" buffer
from the mapped legal address region to write to the descriptor.
The page_fault handler will get a VM_FAULT_SIGBUS return value,
which should not happens here.(Due to this is a read request.)

You can refer to kernel/events/core.c:perf_mmap_fault(...)
If "(vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))" is evaluated
as true, you will get VM_FAULT_SIGBUS as return value.

However, this is not an write request. The flags which indicated
why the page fault happens is wrong.

Furthermore, NDS32 SPAv3 is not able to detect it is read or write.
It only know  either it is instruction fetch or data access.

Therefore, by removing the wrong flag assignment(actually, the hardware
is not able to show the reason), we can fix this bug.

Signed-off-by: Nickhu 
---
 arch/nds32/Kconfig|1 +
 arch/nds32/boot/dts/ae3xx.dts |5 +
 arch/nds32/include/asm/Kbuild |1 +
 arch/nds32/include/asm/perf_event.h   |   16 +
 arch/nds32/include/asm/pmu.h  |  430 ++
 arch/nds32/include/asm/stacktrace.h   |   39 +
 arch/nds32/kernel/Makefile|3 +-
 arch/nds32/kernel/perf_event_cpu.c| 1270 +
 arch/nds32/mm/fault.c |   13 +-
 tools/include/asm/barrier.h   |2 +
 tools/perf/arch/nds32/Build   |1 +
 tools/perf/arch/nds32/util/Build  |1 +
 tools/perf/arch/nds32/util/header.c   |   29 +
 tools/perf/pmu-events/arch/nds32/mapfile.csv  |   15 +
 .../pmu-events/arch/nds32/n13/atcpmu.json |  290 
 15 files changed, 2110 insertions(+), 6 deletions(-)
 create mode 100644 arch/nds32/include/asm/perf_event.h
 create mode 100644 arch/nds32/include/asm/pmu.h
 create mode 100644 arch/nds32/include/asm/stacktrace.h
 create mode 100644 arch/nds32/kernel/perf_event_cpu.c
 create mode 100644 tools/perf/arch/nds32/Build
 create mode 100644 tools/perf/arch/nds32/util/Build
 create mode 100644 tools/perf/arch/nds32/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv
 create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json

diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 7068f341133d..dd448d431f5a 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -31,6 +31,7 @@ config NDS32
select HAVE_DEBUG_KMEMLEAK
select HAVE_MEMBLOCK
select HAVE_REGS_AND_STACK_ACCESS_API
+   select HAVE_PERF_EVENTS
select IRQ_DOMAIN
select LOCKDEP_SUPPORT
select MODULES_USE_ELF_RELA
diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts
index bb39749a6673..7e92f436ce87 100644
--- a/arch/nds32/boot/dts/ae3xx.dts
+++ b/arch/nds32/boot/dts/ae3xx.dts
@@ -82,4 +82,9 @@
interrupts = <18>;
};
};
+
+   pmu {
+   compatible = "andestech,atcpmu";
+   interrupts= <13>;
+   };
 };
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index dbc4e5422550..f81b633d5379 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild
@@ -36,6 +36,7 @@ generic-y += kprobes.h
 generic-y += kvm_para.h
 generic-y += limits.h
 generic-y += local.h
+generic-y += local64.h
 generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += parport.h
diff --git a/arch/nds32/include/asm/perf_event.h 
b/arch/nds32/include/asm/perf_event.h
new file mode 100644
index ..fcdff02acc14
--- /dev/null
+++ b/arch/nds32/include/asm/perf_event.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_PERF_EVENT_H
+#define __ASM_PERF_EVENT_H
+
+/*
+ * This file is request by Perf,
+ * please refer to tools/perf/design.txt for more details
+ */
+struct pt_regs;
+unsigned long perf_instruction_pointer(struct pt_regs *regs);
+un

[PATCH 2/5] nds32: Fix bug in bitfield.h

2018-10-18 Thread Nickhu
There two bitfield bug for perfomance counter
in bitfield.h:

PFM_CTL_offSEL1 21 --> 16
PFM_CTL_offSEL2 27 --> 22

This commit fix it.

Signed-off-by: Nickhu 
---
 arch/nds32/include/asm/bitfield.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/nds32/include/asm/bitfield.h 
b/arch/nds32/include/asm/bitfield.h
index 8e84fc385b94..19b2841219ad 100644
--- a/arch/nds32/include/asm/bitfield.h
+++ b/arch/nds32/include/asm/bitfield.h
@@ -692,8 +692,8 @@
 #define PFM_CTL_offKU1 13  /* Enable user mode event counting for 
PFMC1 */
 #define PFM_CTL_offKU2 14  /* Enable user mode event counting for 
PFMC2 */
 #define PFM_CTL_offSEL015  /* The event selection for 
PFMC0 */
-#define PFM_CTL_offSEL121  /* The event selection for 
PFMC1 */
-#define PFM_CTL_offSEL227  /* The event selection for 
PFMC2 */
+#define PFM_CTL_offSEL116  /* The event selection for 
PFMC1 */
+#define PFM_CTL_offSEL222  /* The event selection for 
PFMC2 */
 /* bit 28:31 reserved */
 
 #define PFM_CTL_mskEN0 ( 0x01  << PFM_CTL_offEN0 )
-- 
2.17.0



[PATCH 4/5] nds32: Fix perf multiple events map to same counter.

2018-10-18 Thread Nickhu
When there are multiple events map to the same counter, the counter
counts inaccurately. This is because each counter only counts one event
in the same time.
So when there are multiple events map to same counter, they have to take
turns in each context.

There are two solution:
1. Print the error message when multiple events map to the same counter.
But print the error message would let the program hang in loop. The ltp
(linux test program) would be failed when the program hang in loop.

2. Don't print the error message, the ltp would pass. But the user need to
have the knowledge that don't count the events which map to the same
counter, or the user will get the inaccurate results.

We choose method 2 for the solution

Signed-off-by: Nickhu 
---
 arch/nds32/include/asm/pmu.h   |  1 +
 arch/nds32/kernel/perf_event_cpu.c | 30 --
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/arch/nds32/include/asm/pmu.h b/arch/nds32/include/asm/pmu.h
index 3fbbe97c2d42..e75ec34af5f6 100644
--- a/arch/nds32/include/asm/pmu.h
+++ b/arch/nds32/include/asm/pmu.h
@@ -55,6 +55,7 @@ enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS };
  */
 #define NDS32_IDX_CYCLE_COUNTER0
 #define NDS32_IDX_COUNTER0 1
+#define NDS32_IDX_COUNTER1 2
 #define NDS32_IDX_COUNTER_LAST(cpu_pmu) \
(NDS32_IDX_CYCLE_COUNTER + (cpu_pmu)->num_events - 1)
 
diff --git a/arch/nds32/kernel/perf_event_cpu.c 
b/arch/nds32/kernel/perf_event_cpu.c
index 7bb4ebb87b5c..e9a0d8bb2bc1 100644
--- a/arch/nds32/kernel/perf_event_cpu.c
+++ b/arch/nds32/kernel/perf_event_cpu.c
@@ -566,16 +566,26 @@ static int nds32_pmu_get_event_idx(struct pmu_hw_events 
*cpuc,
/*
 * Try to get the counter for correpsonding event
 */
-   if (!test_and_set_bit(idx, cpuc->used_mask))
-   return idx;
-
-   /*
-* The counter is in use.
-* The system will hang in the loop.
-*/
-   pr_err
-   ("Multiple events map to one counter, the behavior is undefined.\n");
-   return -EPERM;
+   if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) {
+   if (!test_and_set_bit(idx, cpuc->used_mask))
+   return idx;
+   if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask))
+   return NDS32_IDX_COUNTER0;
+   if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
+   return NDS32_IDX_COUNTER1;
+   } else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) {
+   if (!test_and_set_bit(idx, cpuc->used_mask))
+   return idx;
+   else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
+   return NDS32_IDX_COUNTER1;
+   else if (!test_and_set_bit
+(NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask))
+   return NDS32_IDX_CYCLE_COUNTER;
+   } else {
+   if (!test_and_set_bit(idx, cpuc->used_mask))
+   return idx;
+   }
+   return -EAGAIN;
 }
 
 static void nds32_pmu_start(struct nds32_pmu *cpu_pmu)
-- 
2.17.0



[PATCH 1/5] nds32: Perf porting

2018-10-18 Thread Nickhu
This is the commit that porting the perf for nds32.

Raw event:
The raw events start with 'r'.
Usage:
perf stat -e rXYZ ./app
X: the index of performance counter.
YZ: the index(convert to hexdecimal) of events

Example:
'perf stat -e r101 ./app' means the counter 1 will 
count the instruction
event.

The index of counter and events can be found in
"Andes System Privilege Architecture Version 3 Manual".

Or you can perform the 'perf list' to find the symbolic name of raw events.

Perf mmap2:
Fix unexpected perf mmap2() page fault

When the mmap2() called by perf application,
you will encounter such condition:"failed to write."
With return value -EFAULT

This is due to the page fault caused by "reading" buffer
from the mapped legal address region to write to the descriptor.
The page_fault handler will get a VM_FAULT_SIGBUS return value,
which should not happens here.(Due to this is a read request.)

You can refer to kernel/events/core.c:perf_mmap_fault(...)
If "(vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))" is evaluated
as true, you will get VM_FAULT_SIGBUS as return value.

However, this is not an write request. The flags which indicated
why the page fault happens is wrong.

Furthermore, NDS32 SPAv3 is not able to detect it is read or write.
It only know  either it is instruction fetch or data access.

Therefore, by removing the wrong flag assignment(actually, the hardware
is not able to show the reason), we can fix this bug.

Signed-off-by: Nickhu 
---
 arch/nds32/Kconfig|1 +
 arch/nds32/boot/dts/ae3xx.dts |5 +
 arch/nds32/include/asm/Kbuild |1 +
 arch/nds32/include/asm/perf_event.h   |   16 +
 arch/nds32/include/asm/pmu.h  |  430 ++
 arch/nds32/include/asm/stacktrace.h   |   39 +
 arch/nds32/kernel/Makefile|3 +-
 arch/nds32/kernel/perf_event_cpu.c| 1270 +
 arch/nds32/mm/fault.c |   13 +-
 tools/include/asm/barrier.h   |2 +
 tools/perf/arch/nds32/Build   |1 +
 tools/perf/arch/nds32/util/Build  |1 +
 tools/perf/arch/nds32/util/header.c   |   29 +
 tools/perf/pmu-events/arch/nds32/mapfile.csv  |   15 +
 .../pmu-events/arch/nds32/n13/atcpmu.json |  290 
 15 files changed, 2110 insertions(+), 6 deletions(-)
 create mode 100644 arch/nds32/include/asm/perf_event.h
 create mode 100644 arch/nds32/include/asm/pmu.h
 create mode 100644 arch/nds32/include/asm/stacktrace.h
 create mode 100644 arch/nds32/kernel/perf_event_cpu.c
 create mode 100644 tools/perf/arch/nds32/Build
 create mode 100644 tools/perf/arch/nds32/util/Build
 create mode 100644 tools/perf/arch/nds32/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv
 create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json

diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 7068f341133d..dd448d431f5a 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -31,6 +31,7 @@ config NDS32
select HAVE_DEBUG_KMEMLEAK
select HAVE_MEMBLOCK
select HAVE_REGS_AND_STACK_ACCESS_API
+   select HAVE_PERF_EVENTS
select IRQ_DOMAIN
select LOCKDEP_SUPPORT
select MODULES_USE_ELF_RELA
diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts
index bb39749a6673..7e92f436ce87 100644
--- a/arch/nds32/boot/dts/ae3xx.dts
+++ b/arch/nds32/boot/dts/ae3xx.dts
@@ -82,4 +82,9 @@
interrupts = <18>;
};
};
+
+   pmu {
+   compatible = "andestech,atcpmu";
+   interrupts= <13>;
+   };
 };
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index dbc4e5422550..f81b633d5379 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild
@@ -36,6 +36,7 @@ generic-y += kprobes.h
 generic-y += kvm_para.h
 generic-y += limits.h
 generic-y += local.h
+generic-y += local64.h
 generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += parport.h
diff --git a/arch/nds32/include/asm/perf_event.h 
b/arch/nds32/include/asm/perf_event.h
new file mode 100644
index ..fcdff02acc14
--- /dev/null
+++ b/arch/nds32/include/asm/perf_event.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_PERF_EVENT_H
+#define __ASM_PERF_EVENT_H
+
+/*
+ * This file is request by Perf,
+ * please refer to tools/perf/design.txt for more details
+ */
+struct pt_regs;
+unsigned long perf_instruction_pointer(struct pt_regs *regs);
+un

[PATCH 5/5] nds32: Add document for NDS32 PMU.

2018-10-18 Thread Nickhu
The document for how to add NDS32 PMU
in devicetree.

Signed-off-by: Nickhu 
---
 Documentation/devicetree/bindings/nds32/pmu.txt | 17 +
 1 file changed, 17 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt

diff --git a/Documentation/devicetree/bindings/nds32/pmu.txt 
b/Documentation/devicetree/bindings/nds32/pmu.txt
new file mode 100644
index ..02762b850e59
--- /dev/null
+++ b/Documentation/devicetree/bindings/nds32/pmu.txt
@@ -0,0 +1,17 @@
+* NDS32 Performance Monitor Units
+
+NDS32 core have a PMU for counting cpu and cache events like cache misses.
+The NDS32 PMU representation in the device tree should be done as under:
+
+Required properties:
+
+- compatilbe :
+   "andestech,atcpmu"
+
+- interrupts : The interrupt number for NDS32 PMU is 13.
+
+Example:
+pmu{
+   compatible = "andestech,atcpmu";
+   interrupts = <13>;
+}
-- 
2.17.0



[PATCH 0/5] nds32: Perf support

2018-10-18 Thread Nickhu
These two commit are perf supporting for nds32.
There are three perfomance counters in nds32, and
each of them can counts different events. You can
use 'perf list' to show the available events that
can be used.

Nickhu (5):
  nds32: Perf porting
  nds32: Fix bug in bitfield.h
  nds32: Add perf call-graph support.
  nds32: Fix perf multiple events map to same counter.
  nds32: Add document for NDS32 PMU.

 .../devicetree/bindings/nds32/pmu.txt |   17 +
 arch/nds32/Kconfig|1 +
 arch/nds32/boot/dts/ae3xx.dts |5 +
 arch/nds32/include/asm/Kbuild |1 +
 arch/nds32/include/asm/bitfield.h |4 +-
 arch/nds32/include/asm/perf_event.h   |   16 +
 arch/nds32/include/asm/pmu.h  |  431 +
 arch/nds32/include/asm/stacktrace.h   |   39 +
 arch/nds32/kernel/Makefile|3 +-
 arch/nds32/kernel/perf_event_cpu.c| 1579 +
 arch/nds32/mm/fault.c |   13 +-
 tools/include/asm/barrier.h   |2 +
 tools/perf/arch/nds32/Build   |1 +
 tools/perf/arch/nds32/util/Build  |1 +
 tools/perf/arch/nds32/util/header.c   |   29 +
 tools/perf/pmu-events/arch/nds32/mapfile.csv  |   15 +
 .../pmu-events/arch/nds32/n13/atcpmu.json |  290 +++
 17 files changed, 2439 insertions(+), 8 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt
 create mode 100644 arch/nds32/include/asm/perf_event.h
 create mode 100644 arch/nds32/include/asm/pmu.h
 create mode 100644 arch/nds32/include/asm/stacktrace.h
 create mode 100644 arch/nds32/kernel/perf_event_cpu.c
 create mode 100644 tools/perf/arch/nds32/Build
 create mode 100644 tools/perf/arch/nds32/util/Build
 create mode 100644 tools/perf/arch/nds32/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv
 create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json

-- 
2.17.0



[PATCH 5/5] nds32: Add document for NDS32 PMU.

2018-10-18 Thread Nickhu
The document for how to add NDS32 PMU
in devicetree.

Signed-off-by: Nickhu 
---
 Documentation/devicetree/bindings/nds32/pmu.txt | 17 +
 1 file changed, 17 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt

diff --git a/Documentation/devicetree/bindings/nds32/pmu.txt 
b/Documentation/devicetree/bindings/nds32/pmu.txt
new file mode 100644
index ..02762b850e59
--- /dev/null
+++ b/Documentation/devicetree/bindings/nds32/pmu.txt
@@ -0,0 +1,17 @@
+* NDS32 Performance Monitor Units
+
+NDS32 core have a PMU for counting cpu and cache events like cache misses.
+The NDS32 PMU representation in the device tree should be done as under:
+
+Required properties:
+
+- compatilbe :
+   "andestech,atcpmu"
+
+- interrupts : The interrupt number for NDS32 PMU is 13.
+
+Example:
+pmu{
+   compatible = "andestech,atcpmu";
+   interrupts = <13>;
+}
-- 
2.17.0



[PATCH 0/5] nds32: Perf support

2018-10-18 Thread Nickhu
These two commit are perf supporting for nds32.
There are three perfomance counters in nds32, and
each of them can counts different events. You can
use 'perf list' to show the available events that
can be used.

Nickhu (5):
  nds32: Perf porting
  nds32: Fix bug in bitfield.h
  nds32: Add perf call-graph support.
  nds32: Fix perf multiple events map to same counter.
  nds32: Add document for NDS32 PMU.

 .../devicetree/bindings/nds32/pmu.txt |   17 +
 arch/nds32/Kconfig|1 +
 arch/nds32/boot/dts/ae3xx.dts |5 +
 arch/nds32/include/asm/Kbuild |1 +
 arch/nds32/include/asm/bitfield.h |4 +-
 arch/nds32/include/asm/perf_event.h   |   16 +
 arch/nds32/include/asm/pmu.h  |  431 +
 arch/nds32/include/asm/stacktrace.h   |   39 +
 arch/nds32/kernel/Makefile|3 +-
 arch/nds32/kernel/perf_event_cpu.c| 1579 +
 arch/nds32/mm/fault.c |   13 +-
 tools/include/asm/barrier.h   |2 +
 tools/perf/arch/nds32/Build   |1 +
 tools/perf/arch/nds32/util/Build  |1 +
 tools/perf/arch/nds32/util/header.c   |   29 +
 tools/perf/pmu-events/arch/nds32/mapfile.csv  |   15 +
 .../pmu-events/arch/nds32/n13/atcpmu.json |  290 +++
 17 files changed, 2439 insertions(+), 8 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt
 create mode 100644 arch/nds32/include/asm/perf_event.h
 create mode 100644 arch/nds32/include/asm/pmu.h
 create mode 100644 arch/nds32/include/asm/stacktrace.h
 create mode 100644 arch/nds32/kernel/perf_event_cpu.c
 create mode 100644 tools/perf/arch/nds32/Build
 create mode 100644 tools/perf/arch/nds32/util/Build
 create mode 100644 tools/perf/arch/nds32/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv
 create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json

-- 
2.17.0



[PATCH 2/2] nds32: Fix the unaligned access handler

2018-05-07 Thread Nickhu
If the kernel config 'CONFIG_ALIGNMENT_TRAP' and the file
'/proc/sys/nds32/unaligned_access/enable' are set, the kernel
unaligned access handler does not handle correctly when the
value of immediate field is negative. This commit fixes the
unaligned access handler in kernel.

Signed-off-by: Nickhu <nic...@andestech.com>
---
 arch/nds32/mm/alignment.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c
index e515f6f3d247..e1aed9dc692d 100644
--- a/arch/nds32/mm/alignment.c
+++ b/arch/nds32/mm/alignment.c
@@ -19,7 +19,7 @@
 #define RA(inst)   (((inst) >> 15) & 0x1FUL)
 #define RB(inst)   (((inst) >> 10) & 0x1FUL)
 #define SV(inst)   (((inst) >> 8) & 0x3UL)
-#define IMM(inst)  (((inst) >> 0) & 0x3FFFUL)
+#define IMM(inst)  (((inst) >> 0) & 0x7FFFUL)
 
 #define RA3(inst)  (((inst) >> 3) & 0x7UL)
 #define RT3(inst)  (((inst) >> 6) & 0x7UL)
@@ -28,6 +28,9 @@
 #define RA5(inst)  (((inst) >> 0) & 0x1FUL)
 #define RT4(inst)  (((inst) >> 5) & 0xFUL)
 
+#define GET_IMMSVAL(imm_value) \
+   (((imm_value >> 14) & 0x1) ? (imm_value - 0x8000) : imm_value)
+
 #define __get8_data(val,addr,err)  \
__asm__(\
"1: lbi.bi  %1, [%2], #1\n" \
@@ -467,7 +470,7 @@ static inline int do_32(unsigned long inst, struct pt_regs 
*regs)
}
 
if (imm)
-   shift = IMM(inst) * len;
+   shift = GET_IMMSVAL(IMM(inst)) * len;
else
shift = *idx_to_addr(regs, RB(inst)) << SV(inst);
 
-- 
2.17.0



[PATCH 2/2] nds32: Fix the unaligned access handler

2018-05-07 Thread Nickhu
If the kernel config 'CONFIG_ALIGNMENT_TRAP' and the file
'/proc/sys/nds32/unaligned_access/enable' are set, the kernel
unaligned access handler does not handle correctly when the
value of immediate field is negative. This commit fixes the
unaligned access handler in kernel.

Signed-off-by: Nickhu 
---
 arch/nds32/mm/alignment.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c
index e515f6f3d247..e1aed9dc692d 100644
--- a/arch/nds32/mm/alignment.c
+++ b/arch/nds32/mm/alignment.c
@@ -19,7 +19,7 @@
 #define RA(inst)   (((inst) >> 15) & 0x1FUL)
 #define RB(inst)   (((inst) >> 10) & 0x1FUL)
 #define SV(inst)   (((inst) >> 8) & 0x3UL)
-#define IMM(inst)  (((inst) >> 0) & 0x3FFFUL)
+#define IMM(inst)  (((inst) >> 0) & 0x7FFFUL)
 
 #define RA3(inst)  (((inst) >> 3) & 0x7UL)
 #define RT3(inst)  (((inst) >> 6) & 0x7UL)
@@ -28,6 +28,9 @@
 #define RA5(inst)  (((inst) >> 0) & 0x1FUL)
 #define RT4(inst)  (((inst) >> 5) & 0xFUL)
 
+#define GET_IMMSVAL(imm_value) \
+   (((imm_value >> 14) & 0x1) ? (imm_value - 0x8000) : imm_value)
+
 #define __get8_data(val,addr,err)  \
__asm__(\
"1: lbi.bi  %1, [%2], #1\n" \
@@ -467,7 +470,7 @@ static inline int do_32(unsigned long inst, struct pt_regs 
*regs)
}
 
if (imm)
-   shift = IMM(inst) * len;
+   shift = GET_IMMSVAL(IMM(inst)) * len;
else
shift = *idx_to_addr(regs, RB(inst)) << SV(inst);
 
-- 
2.17.0



[PATCH 0/2] nds32:Renaming file and fixing the unaligned access handler

2018-05-07 Thread Nickhu
The name of /proc/sys/nds32/unaligned_acess spelled wrong, so we renaming it
to /proc/sys/nds32/unaligned_access.

The unaligned access handler in nds32 goes wrong when the immediate field of
load/store instruction is negative. We fix it by recongnizing whether the
immediate field is positive or negative and then change the value of immediate
filed to unsigned integer number.

Nickhu (2):
  nds32: Renaming the file for unaligned access
  nds32: Fix the unaligned access handler

 arch/nds32/mm/alignment.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

-- 
2.17.0



[PATCH 0/2] nds32:Renaming file and fixing the unaligned access handler

2018-05-07 Thread Nickhu
The name of /proc/sys/nds32/unaligned_acess spelled wrong, so we renaming it
to /proc/sys/nds32/unaligned_access.

The unaligned access handler in nds32 goes wrong when the immediate field of
load/store instruction is negative. We fix it by recongnizing whether the
immediate field is positive or negative and then change the value of immediate
filed to unsigned integer number.

Nickhu (2):
  nds32: Renaming the file for unaligned access
  nds32: Fix the unaligned access handler

 arch/nds32/mm/alignment.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

-- 
2.17.0



[PATCH 1/2] nds32: Renaming the file for unaligned access

2018-05-07 Thread Nickhu
Change the name of the file '/proc/sys/nds32/unaligned_acess'
to '/proc/sys/nds32/unaligned_access'

Signed-off-by: Nickhu <nic...@andestech.com>
---
 arch/nds32/mm/alignment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c
index b96a01b10ca7..e515f6f3d247 100644
--- a/arch/nds32/mm/alignment.c
+++ b/arch/nds32/mm/alignment.c
@@ -552,7 +552,7 @@ static struct ctl_table alignment_tbl[3] = {
 
 static struct ctl_table nds32_sysctl_table[2] = {
{
-.procname = "unaligned_acess",
+.procname = "unaligned_access",
 .mode = 0555,
 .child = alignment_tbl},
{}
-- 
2.17.0



[PATCH 1/2] nds32: Renaming the file for unaligned access

2018-05-07 Thread Nickhu
Change the name of the file '/proc/sys/nds32/unaligned_acess'
to '/proc/sys/nds32/unaligned_access'

Signed-off-by: Nickhu 
---
 arch/nds32/mm/alignment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c
index b96a01b10ca7..e515f6f3d247 100644
--- a/arch/nds32/mm/alignment.c
+++ b/arch/nds32/mm/alignment.c
@@ -552,7 +552,7 @@ static struct ctl_table alignment_tbl[3] = {
 
 static struct ctl_table nds32_sysctl_table[2] = {
{
-.procname = "unaligned_acess",
+.procname = "unaligned_access",
 .mode = 0555,
 .child = alignment_tbl},
{}
-- 
2.17.0