[PATCH 1/1] nds32: Power management for nds32
There are three sleep states in nds32: suspend to idle, suspend to standby, suspend to ram In suspend to ram, we use the 'standby' instruction to emulate power management device to hang the system util wakeup source send wakeup events to break the loop. First, we push the general purpose registers and system registers to stack. Second, we translate stack pointer to physical address and store to memory to save the stack pointer. Third, after write back and invalid the cache we hang in 'standby' intruction. When wakeup source trigger wake up events, the loop will be break and resume the system. Signed-off-by: Nickhu --- arch/nds32/Kconfig | 10 +++ arch/nds32/include/asm/suspend.h | 11 +++ arch/nds32/kernel/Makefile | 2 +- arch/nds32/kernel/pm.c | 91 ++ arch/nds32/kernel/sleep.S| 129 +++ drivers/irqchip/irq-ativic32.c | 29 +++ 6 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 arch/nds32/include/asm/suspend.h create mode 100644 arch/nds32/kernel/pm.c create mode 100644 arch/nds32/kernel/sleep.S diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index dd448d431f5a..8e2c5ac6acd1 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -95,3 +95,13 @@ endmenu menu "Kernel Features" source "kernel/Kconfig.hz" endmenu + +menu "Power management options" +config SYS_SUPPORTS_APM_EMULATION + bool + +config ARCH_SUSPEND_POSSIBLE + def_bool y + +source "kernel/power/Kconfig" +endmenu diff --git a/arch/nds32/include/asm/suspend.h b/arch/nds32/include/asm/suspend.h new file mode 100644 index ..6ed2418af1ac --- /dev/null +++ b/arch/nds32/include/asm/suspend.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2008-2017 Andes Technology Corporation + +#ifndef __ASM_NDS32_SUSPEND_H +#define __ASM_NDS32_SUSPEND_H + +extern void suspend2ram(void); +extern void cpu_resume(void); +extern unsigned long wake_mask; + +#endif diff --git a/arch/nds32/kernel/Makefile b/arch/nds32/kernel/Makefile index f52bd2744f50..8d62f2ecb1ab 100644 --- a/arch/nds32/kernel/Makefile +++ b/arch/nds32/kernel/Makefile @@ -16,7 +16,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_OF) += devtree.o obj-$(CONFIG_CACHE_L2) += atl2c.o obj-$(CONFIG_PERF_EVENTS) += perf_event_cpu.o - +obj-$(CONFIG_PM) += pm.o sleep.o extra-y := head.o vmlinux.lds obj-y += vdso/ diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c new file mode 100644 index ..e1eaf3bac709 --- /dev/null +++ b/arch/nds32/kernel/pm.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2008-2017 Andes Technology Corporation + +/* + * nds32 Power Management Routines + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License. + * + * Abstract: + * + *This program is for nds32 power management routines. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +unsigned int resume_addr; +unsigned int *phy_addr_sp_tmp; + +static void nds32_suspend2ram(void) +{ + pgd_t *pgdv; + pud_t *pudv; + pmd_t *pmdv; + pte_t *ptev; + + pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) & + L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume); + + pudv = pud_offset(pgdv, (unsigned int)cpu_resume); + pmdv = pmd_offset(pudv, (unsigned int)cpu_resume); + ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume); + + resume_addr = ((*ptev) & TLB_DATA_mskPPN) + | ((unsigned int)cpu_resume & 0x0fff); + + suspend2ram(); +} + +static void nds32_suspend_cpu(void) +{ + while (!(__nds32__mfsr(NDS32_SR_INT_PEND) & wake_mask)) + __asm__ volatile ("standby no_wake_grant\n\t"); +} + +static int nds32_pm_valid(suspend_state_t state) +{ + switch (state) { + case PM_SUSPEND_ON: + case PM_SUSPEND_STANDBY: + case PM_SUSPEND_MEM: + return 1; + default: + return 0; + } +} + +static int nds32_pm_enter(suspend_state_t state) +{ + pr_debug("%s:state:%d\n", __func__, state); + switch (state) { + case PM_SUSPEND_STANDBY: + nds32_suspend_cpu(); + return 0; + case PM_SUSPEND_MEM: + nds32_suspend2ram(); + return 0; + default: + return -EINVAL; + } +} + +static const struct platform_suspend_ops nds32_pm_ops = { + .valid = nds32_pm_valid, + .enter = nds32_pm_enter, +}; + +static int __init nds32_pm_init(void) +{ + pr_debug("Enter %s\n", __func__); + suspend_set_ops(_pm_ops); + return 0; +} +late_initcall(
[PATCH 0/1] nds32: Power management
This commit is power management porting for nds32. Nickhu (1): nds32: Power management for nds32 arch/nds32/Kconfig | 10 +++ arch/nds32/include/asm/suspend.h | 11 +++ arch/nds32/kernel/Makefile | 2 +- arch/nds32/kernel/pm.c | 91 ++ arch/nds32/kernel/sleep.S| 129 +++ drivers/irqchip/irq-ativic32.c | 29 +++ 6 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 arch/nds32/include/asm/suspend.h create mode 100644 arch/nds32/kernel/pm.c create mode 100644 arch/nds32/kernel/sleep.S -- 2.17.0
[PATCH 1/1] nds32: Power management for nds32
There are three sleep states in nds32: suspend to idle, suspend to standby, suspend to ram In suspend to ram, we use the 'standby' instruction to emulate power management device to hang the system util wakeup source send wakeup events to break the loop. First, we push the general purpose registers and system registers to stack. Second, we translate stack pointer to physical address and store to memory to save the stack pointer. Third, after write back and invalid the cache we hang in 'standby' intruction. When wakeup source trigger wake up events, the loop will be break and resume the system. Signed-off-by: Nickhu --- arch/nds32/Kconfig | 10 +++ arch/nds32/include/asm/suspend.h | 11 +++ arch/nds32/kernel/Makefile | 2 +- arch/nds32/kernel/pm.c | 91 ++ arch/nds32/kernel/sleep.S| 129 +++ drivers/irqchip/irq-ativic32.c | 29 +++ 6 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 arch/nds32/include/asm/suspend.h create mode 100644 arch/nds32/kernel/pm.c create mode 100644 arch/nds32/kernel/sleep.S diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index dd448d431f5a..8e2c5ac6acd1 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -95,3 +95,13 @@ endmenu menu "Kernel Features" source "kernel/Kconfig.hz" endmenu + +menu "Power management options" +config SYS_SUPPORTS_APM_EMULATION + bool + +config ARCH_SUSPEND_POSSIBLE + def_bool y + +source "kernel/power/Kconfig" +endmenu diff --git a/arch/nds32/include/asm/suspend.h b/arch/nds32/include/asm/suspend.h new file mode 100644 index ..6ed2418af1ac --- /dev/null +++ b/arch/nds32/include/asm/suspend.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2008-2017 Andes Technology Corporation + +#ifndef __ASM_NDS32_SUSPEND_H +#define __ASM_NDS32_SUSPEND_H + +extern void suspend2ram(void); +extern void cpu_resume(void); +extern unsigned long wake_mask; + +#endif diff --git a/arch/nds32/kernel/Makefile b/arch/nds32/kernel/Makefile index f52bd2744f50..8d62f2ecb1ab 100644 --- a/arch/nds32/kernel/Makefile +++ b/arch/nds32/kernel/Makefile @@ -16,7 +16,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_OF) += devtree.o obj-$(CONFIG_CACHE_L2) += atl2c.o obj-$(CONFIG_PERF_EVENTS) += perf_event_cpu.o - +obj-$(CONFIG_PM) += pm.o sleep.o extra-y := head.o vmlinux.lds obj-y += vdso/ diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c new file mode 100644 index ..e1eaf3bac709 --- /dev/null +++ b/arch/nds32/kernel/pm.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2008-2017 Andes Technology Corporation + +/* + * nds32 Power Management Routines + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License. + * + * Abstract: + * + *This program is for nds32 power management routines. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +unsigned int resume_addr; +unsigned int *phy_addr_sp_tmp; + +static void nds32_suspend2ram(void) +{ + pgd_t *pgdv; + pud_t *pudv; + pmd_t *pmdv; + pte_t *ptev; + + pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) & + L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume); + + pudv = pud_offset(pgdv, (unsigned int)cpu_resume); + pmdv = pmd_offset(pudv, (unsigned int)cpu_resume); + ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume); + + resume_addr = ((*ptev) & TLB_DATA_mskPPN) + | ((unsigned int)cpu_resume & 0x0fff); + + suspend2ram(); +} + +static void nds32_suspend_cpu(void) +{ + while (!(__nds32__mfsr(NDS32_SR_INT_PEND) & wake_mask)) + __asm__ volatile ("standby no_wake_grant\n\t"); +} + +static int nds32_pm_valid(suspend_state_t state) +{ + switch (state) { + case PM_SUSPEND_ON: + case PM_SUSPEND_STANDBY: + case PM_SUSPEND_MEM: + return 1; + default: + return 0; + } +} + +static int nds32_pm_enter(suspend_state_t state) +{ + pr_debug("%s:state:%d\n", __func__, state); + switch (state) { + case PM_SUSPEND_STANDBY: + nds32_suspend_cpu(); + return 0; + case PM_SUSPEND_MEM: + nds32_suspend2ram(); + return 0; + default: + return -EINVAL; + } +} + +static const struct platform_suspend_ops nds32_pm_ops = { + .valid = nds32_pm_valid, + .enter = nds32_pm_enter, +}; + +static int __init nds32_pm_init(void) +{ + pr_debug("Enter %s\n", __func__); + suspend_set_ops(_pm_ops); + return 0; +} +late_initcall(
[PATCH 0/1] nds32: Power management
This commit is power management porting for nds32. Nickhu (1): nds32: Power management for nds32 arch/nds32/Kconfig | 10 +++ arch/nds32/include/asm/suspend.h | 11 +++ arch/nds32/kernel/Makefile | 2 +- arch/nds32/kernel/pm.c | 91 ++ arch/nds32/kernel/sleep.S| 129 +++ drivers/irqchip/irq-ativic32.c | 29 +++ 6 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 arch/nds32/include/asm/suspend.h create mode 100644 arch/nds32/kernel/pm.c create mode 100644 arch/nds32/kernel/sleep.S -- 2.17.0
[PATCH v3 2/4] nds32: Perf porting
This is the commit that porting the perf for nds32. 1.Raw event: The raw events start with 'r'. Usage: perf stat -e rXYZ ./app X: the index of performance counter. YZ: the index(convert to hexdecimal) of events Example: 'perf stat -e r101 ./app' means the counter 1 will count the instruction event. The index of counter and events can be found in "Andes System Privilege Architecture Version 3 Manual". Or you can perform the 'perf list' to find the symbolic name of raw events. 2.Perf mmap2: Fix unexpected perf mmap2() page fault When the mmap2() called by perf application, you will encounter such condition:"failed to write." With return value -EFAULT This is due to the page fault caused by "reading" buffer from the mapped legal address region to write to the descriptor. The page_fault handler will get a VM_FAULT_SIGBUS return value, which should not happens here.(Due to this is a read request.) You can refer to kernel/events/core.c:perf_mmap_fault(...) If "(vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))" is evaluated as true, you will get VM_FAULT_SIGBUS as return value. However, this is not an write request. The flags which indicated why the page fault happens is wrong. Furthermore, NDS32 SPAv3 is not able to detect it is read or write. It only know either it is instruction fetch or data access. Therefore, by removing the wrong flag assignment(actually, the hardware is not able to show the reason), we can fix this bug. 3.Perf multiple events map to same counter. When there are multiple events map to the same counter, the counter counts inaccurately. This is because each counter only counts one event in the same time. So when there are multiple events map to same counter, they have to take turns in each context. There are two solution: 1. Print the error message when multiple events map to the same counter. But print the error message would let the program hang in loop. The ltp (linux test program) would be failed when the program hang in loop. 2. Don't print the error message, the ltp would pass. But the user need to have the knowledge that don't count the events which map to the same counter, or the user will get the inaccurate results. We choose method 2 for the solution Signed-off-by: Nickhu --- arch/nds32/Kconfig|1 + arch/nds32/boot/dts/ae3xx.dts |5 + arch/nds32/include/asm/Kbuild |1 + arch/nds32/include/asm/perf_event.h | 16 + arch/nds32/include/asm/pmu.h | 386 ++ arch/nds32/include/asm/stacktrace.h | 39 + arch/nds32/kernel/Makefile|3 +- arch/nds32/kernel/perf_event_cpu.c| 1223 + arch/nds32/mm/fault.c | 13 +- tools/include/asm/barrier.h |2 + tools/perf/arch/nds32/Build |1 + tools/perf/arch/nds32/util/Build |1 + tools/perf/arch/nds32/util/header.c | 29 + tools/perf/pmu-events/arch/nds32/mapfile.csv | 15 + .../pmu-events/arch/nds32/n13/atcpmu.json | 290 15 files changed, 2019 insertions(+), 6 deletions(-) create mode 100644 arch/nds32/include/asm/perf_event.h create mode 100644 arch/nds32/include/asm/pmu.h create mode 100644 arch/nds32/include/asm/stacktrace.h create mode 100644 arch/nds32/kernel/perf_event_cpu.c create mode 100644 tools/perf/arch/nds32/Build create mode 100644 tools/perf/arch/nds32/util/Build create mode 100644 tools/perf/arch/nds32/util/header.c create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 7068f341133d..dd448d431f5a 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -31,6 +31,7 @@ config NDS32 select HAVE_DEBUG_KMEMLEAK select HAVE_MEMBLOCK select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_PERF_EVENTS select IRQ_DOMAIN select LOCKDEP_SUPPORT select MODULES_USE_ELF_RELA diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts index bb39749a6673..16a9f54a805e 100644 --- a/arch/nds32/boot/dts/ae3xx.dts +++ b/arch/nds32/boot/dts/ae3xx.dts @@ -82,4 +82,9 @@ interrupts = <18>; }; }; + + pmu { + compatible = "andestech,nds32v3-pmu"; + interrupts= <13>; + }; };
[PATCH v3 2/4] nds32: Perf porting
This is the commit that porting the perf for nds32. 1.Raw event: The raw events start with 'r'. Usage: perf stat -e rXYZ ./app X: the index of performance counter. YZ: the index(convert to hexdecimal) of events Example: 'perf stat -e r101 ./app' means the counter 1 will count the instruction event. The index of counter and events can be found in "Andes System Privilege Architecture Version 3 Manual". Or you can perform the 'perf list' to find the symbolic name of raw events. 2.Perf mmap2: Fix unexpected perf mmap2() page fault When the mmap2() called by perf application, you will encounter such condition:"failed to write." With return value -EFAULT This is due to the page fault caused by "reading" buffer from the mapped legal address region to write to the descriptor. The page_fault handler will get a VM_FAULT_SIGBUS return value, which should not happens here.(Due to this is a read request.) You can refer to kernel/events/core.c:perf_mmap_fault(...) If "(vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))" is evaluated as true, you will get VM_FAULT_SIGBUS as return value. However, this is not an write request. The flags which indicated why the page fault happens is wrong. Furthermore, NDS32 SPAv3 is not able to detect it is read or write. It only know either it is instruction fetch or data access. Therefore, by removing the wrong flag assignment(actually, the hardware is not able to show the reason), we can fix this bug. 3.Perf multiple events map to same counter. When there are multiple events map to the same counter, the counter counts inaccurately. This is because each counter only counts one event in the same time. So when there are multiple events map to same counter, they have to take turns in each context. There are two solution: 1. Print the error message when multiple events map to the same counter. But print the error message would let the program hang in loop. The ltp (linux test program) would be failed when the program hang in loop. 2. Don't print the error message, the ltp would pass. But the user need to have the knowledge that don't count the events which map to the same counter, or the user will get the inaccurate results. We choose method 2 for the solution Signed-off-by: Nickhu --- arch/nds32/Kconfig|1 + arch/nds32/boot/dts/ae3xx.dts |5 + arch/nds32/include/asm/Kbuild |1 + arch/nds32/include/asm/perf_event.h | 16 + arch/nds32/include/asm/pmu.h | 386 ++ arch/nds32/include/asm/stacktrace.h | 39 + arch/nds32/kernel/Makefile|3 +- arch/nds32/kernel/perf_event_cpu.c| 1223 + arch/nds32/mm/fault.c | 13 +- tools/include/asm/barrier.h |2 + tools/perf/arch/nds32/Build |1 + tools/perf/arch/nds32/util/Build |1 + tools/perf/arch/nds32/util/header.c | 29 + tools/perf/pmu-events/arch/nds32/mapfile.csv | 15 + .../pmu-events/arch/nds32/n13/atcpmu.json | 290 15 files changed, 2019 insertions(+), 6 deletions(-) create mode 100644 arch/nds32/include/asm/perf_event.h create mode 100644 arch/nds32/include/asm/pmu.h create mode 100644 arch/nds32/include/asm/stacktrace.h create mode 100644 arch/nds32/kernel/perf_event_cpu.c create mode 100644 tools/perf/arch/nds32/Build create mode 100644 tools/perf/arch/nds32/util/Build create mode 100644 tools/perf/arch/nds32/util/header.c create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 7068f341133d..dd448d431f5a 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -31,6 +31,7 @@ config NDS32 select HAVE_DEBUG_KMEMLEAK select HAVE_MEMBLOCK select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_PERF_EVENTS select IRQ_DOMAIN select LOCKDEP_SUPPORT select MODULES_USE_ELF_RELA diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts index bb39749a6673..16a9f54a805e 100644 --- a/arch/nds32/boot/dts/ae3xx.dts +++ b/arch/nds32/boot/dts/ae3xx.dts @@ -82,4 +82,9 @@ interrupts = <18>; }; }; + + pmu { + compatible = "andestech,nds32v3-pmu"; + interrupts= <13>; + }; };
[PATCH v3 4/4] nds32: Add document for NDS32 PMU.
The document for how to add NDS32 PMU in devicetree. Signed-off-by: Nickhu --- Documentation/devicetree/bindings/nds32/pmu.txt | 17 + 1 file changed, 17 insertions(+) create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt diff --git a/Documentation/devicetree/bindings/nds32/pmu.txt b/Documentation/devicetree/bindings/nds32/pmu.txt new file mode 100644 index ..1bd15785b4ae --- /dev/null +++ b/Documentation/devicetree/bindings/nds32/pmu.txt @@ -0,0 +1,17 @@ +* NDS32 Performance Monitor Units + +NDS32 core have a PMU for counting cpu and cache events like cache misses. +The NDS32 PMU representation in the device tree should be done as under: + +Required properties: + +- compatible : + "andestech,nds32v3-pmu" + +- interrupts : The interrupt number for NDS32 PMU is 13. + +Example: +pmu{ + compatible = "andestech,nds32v3-pmu"; + interrupts = <13>; +} -- 2.17.0
[PATCH v3 3/4] nds32: Add perf call-graph support.
The perf call-graph option can trace the callchain between functions. This commit add the perf callchain for nds32. There are kerenl callchain and user callchain. The kerenl callchain can trace the function in kernel space. There are two type for user callchain. One for the 'optimize for size' config is set, and another one for the config is not set. The difference between two types is that the index of frame-pointer in user stack is not the same. For example: With optimize for size: User Stack: - | lp | - | gp | - | fp | Without optimize for size: User Stack: 1. non-leaf function: - | lp | - | fp | 2. leaf function: - | fp | Signed-off-by: Nickhu --- arch/nds32/kernel/perf_event_cpu.c | 299 + 1 file changed, 299 insertions(+) diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index a6e723d0fdbc..5e00ce54d0ff 100644 --- a/arch/nds32/kernel/perf_event_cpu.c +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -1193,6 +1193,305 @@ static int __init register_pmu_driver(void) device_initcall(register_pmu_driver); +/* + * References: arch/nds32/kernel/traps.c:__dump() + * You will need to know the NDS ABI first. + */ +static int unwind_frame_kernel(struct stackframe *frame) +{ + int graph = 0; +#ifdef CONFIG_FRAME_POINTER + /* 0x3 means misalignment */ + if (!kstack_end((void *)frame->fp) && + !((unsigned long)frame->fp & 0x3) && + ((unsigned long)frame->fp >= TASK_SIZE)) { + /* +* The array index is based on the ABI, the below graph +* illustrate the reasons. +* Function call procedure: "smw" and "lmw" will always +* update SP and FP for you automatically. +* +* Stack Relative Address +* | | 0 +* +* |LP| <-- SP(before smw) <-- FP(after smw) -1 +* +* |FP| -2 +* +* | | <-- SP(after smw) -3 +*/ + frame->lp = ((unsigned long *)frame->fp)[-1]; + frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET]; + /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (NULL, , frame->lp, NULL); + + return 0; + } else { + return -EPERM; + } +#else + /* +* You can refer to arch/nds32/kernel/traps.c:__dump() +* Treat "sp" as "fp", but the "sp" is one frame ahead of "fp". +* And, the "sp" is not always correct. +* +* Stack Relative Address +* | | 0 +* +* |LP| <-- SP(before smw) -1 +* +* | | <-- SP(after smw) -2 +* +*/ + if (!kstack_end((void *)frame->sp)) { + frame->lp = ((unsigned long *)frame->sp)[1]; + /* TODO: How to deal with the value in first +* "sp" is not correct? +*/ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (tsk, , frame->lp, NULL); + + frame->sp = ((unsigned long *)frame->sp) + 1; + + return 0; + } else { + return -EPERM; + } +#endif +} + +static void notrace +walk_stackframe(struct stackframe *frame, + int (*fn_record)(struct stackframe *, void *), + void *data) +{ + while (1) { + int ret; + + if (fn_record(frame, data)) + break; + + ret = unwind_frame_kernel(frame); + if (ret < 0) + break; + } +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called
[PATCH v3 1/4] nds32: Fix bug in bitfield.h
There two bitfield bug for perfomance counter in bitfield.h: PFM_CTL_offSEL1 21 --> 16 PFM_CTL_offSEL2 27 --> 22 This commit fix it. Signed-off-by: Nickhu --- arch/nds32/include/asm/bitfield.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h index 8e84fc385b94..19b2841219ad 100644 --- a/arch/nds32/include/asm/bitfield.h +++ b/arch/nds32/include/asm/bitfield.h @@ -692,8 +692,8 @@ #define PFM_CTL_offKU1 13 /* Enable user mode event counting for PFMC1 */ #define PFM_CTL_offKU2 14 /* Enable user mode event counting for PFMC2 */ #define PFM_CTL_offSEL015 /* The event selection for PFMC0 */ -#define PFM_CTL_offSEL121 /* The event selection for PFMC1 */ -#define PFM_CTL_offSEL227 /* The event selection for PFMC2 */ +#define PFM_CTL_offSEL116 /* The event selection for PFMC1 */ +#define PFM_CTL_offSEL222 /* The event selection for PFMC2 */ /* bit 28:31 reserved */ #define PFM_CTL_mskEN0 ( 0x01 << PFM_CTL_offEN0 ) -- 2.17.0
[PATCH v3 4/4] nds32: Add document for NDS32 PMU.
The document for how to add NDS32 PMU in devicetree. Signed-off-by: Nickhu --- Documentation/devicetree/bindings/nds32/pmu.txt | 17 + 1 file changed, 17 insertions(+) create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt diff --git a/Documentation/devicetree/bindings/nds32/pmu.txt b/Documentation/devicetree/bindings/nds32/pmu.txt new file mode 100644 index ..1bd15785b4ae --- /dev/null +++ b/Documentation/devicetree/bindings/nds32/pmu.txt @@ -0,0 +1,17 @@ +* NDS32 Performance Monitor Units + +NDS32 core have a PMU for counting cpu and cache events like cache misses. +The NDS32 PMU representation in the device tree should be done as under: + +Required properties: + +- compatible : + "andestech,nds32v3-pmu" + +- interrupts : The interrupt number for NDS32 PMU is 13. + +Example: +pmu{ + compatible = "andestech,nds32v3-pmu"; + interrupts = <13>; +} -- 2.17.0
[PATCH v3 3/4] nds32: Add perf call-graph support.
The perf call-graph option can trace the callchain between functions. This commit add the perf callchain for nds32. There are kerenl callchain and user callchain. The kerenl callchain can trace the function in kernel space. There are two type for user callchain. One for the 'optimize for size' config is set, and another one for the config is not set. The difference between two types is that the index of frame-pointer in user stack is not the same. For example: With optimize for size: User Stack: - | lp | - | gp | - | fp | Without optimize for size: User Stack: 1. non-leaf function: - | lp | - | fp | 2. leaf function: - | fp | Signed-off-by: Nickhu --- arch/nds32/kernel/perf_event_cpu.c | 299 + 1 file changed, 299 insertions(+) diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index a6e723d0fdbc..5e00ce54d0ff 100644 --- a/arch/nds32/kernel/perf_event_cpu.c +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -1193,6 +1193,305 @@ static int __init register_pmu_driver(void) device_initcall(register_pmu_driver); +/* + * References: arch/nds32/kernel/traps.c:__dump() + * You will need to know the NDS ABI first. + */ +static int unwind_frame_kernel(struct stackframe *frame) +{ + int graph = 0; +#ifdef CONFIG_FRAME_POINTER + /* 0x3 means misalignment */ + if (!kstack_end((void *)frame->fp) && + !((unsigned long)frame->fp & 0x3) && + ((unsigned long)frame->fp >= TASK_SIZE)) { + /* +* The array index is based on the ABI, the below graph +* illustrate the reasons. +* Function call procedure: "smw" and "lmw" will always +* update SP and FP for you automatically. +* +* Stack Relative Address +* | | 0 +* +* |LP| <-- SP(before smw) <-- FP(after smw) -1 +* +* |FP| -2 +* +* | | <-- SP(after smw) -3 +*/ + frame->lp = ((unsigned long *)frame->fp)[-1]; + frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET]; + /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (NULL, , frame->lp, NULL); + + return 0; + } else { + return -EPERM; + } +#else + /* +* You can refer to arch/nds32/kernel/traps.c:__dump() +* Treat "sp" as "fp", but the "sp" is one frame ahead of "fp". +* And, the "sp" is not always correct. +* +* Stack Relative Address +* | | 0 +* +* |LP| <-- SP(before smw) -1 +* +* | | <-- SP(after smw) -2 +* +*/ + if (!kstack_end((void *)frame->sp)) { + frame->lp = ((unsigned long *)frame->sp)[1]; + /* TODO: How to deal with the value in first +* "sp" is not correct? +*/ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (tsk, , frame->lp, NULL); + + frame->sp = ((unsigned long *)frame->sp) + 1; + + return 0; + } else { + return -EPERM; + } +#endif +} + +static void notrace +walk_stackframe(struct stackframe *frame, + int (*fn_record)(struct stackframe *, void *), + void *data) +{ + while (1) { + int ret; + + if (fn_record(frame, data)) + break; + + ret = unwind_frame_kernel(frame); + if (ret < 0) + break; + } +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called
[PATCH v3 1/4] nds32: Fix bug in bitfield.h
There two bitfield bug for perfomance counter in bitfield.h: PFM_CTL_offSEL1 21 --> 16 PFM_CTL_offSEL2 27 --> 22 This commit fix it. Signed-off-by: Nickhu --- arch/nds32/include/asm/bitfield.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h index 8e84fc385b94..19b2841219ad 100644 --- a/arch/nds32/include/asm/bitfield.h +++ b/arch/nds32/include/asm/bitfield.h @@ -692,8 +692,8 @@ #define PFM_CTL_offKU1 13 /* Enable user mode event counting for PFMC1 */ #define PFM_CTL_offKU2 14 /* Enable user mode event counting for PFMC2 */ #define PFM_CTL_offSEL015 /* The event selection for PFMC0 */ -#define PFM_CTL_offSEL121 /* The event selection for PFMC1 */ -#define PFM_CTL_offSEL227 /* The event selection for PFMC2 */ +#define PFM_CTL_offSEL116 /* The event selection for PFMC1 */ +#define PFM_CTL_offSEL222 /* The event selection for PFMC2 */ /* bit 28:31 reserved */ #define PFM_CTL_mskEN0 ( 0x01 << PFM_CTL_offEN0 ) -- 2.17.0
[PATCH v3 0/4] nds32: Perf Support
* Sorry everyone, I forgot to add the version number of the patch set I just sent. These four commits are perf supporting for nds32. There are three perfomance counters in nds32, and each of them can counts different events. You can use 'perf list' to show the available events that can be used. Changes in V2: 1. Change the definition 'PFM_CTL_xxx' to array form. 2. Simplify the PMU driver. 3. Stop all counters when handling irq caused by performance counters overflow. 4. Rename the compatible string in devicetree. Changes in V3: Fix the typo in Documentation/devicetree/ bindings/nds32/pmu.txt. Nickhu (4): nds32: Fix bug in bitfield.h nds32: Perf porting nds32: Add perf call-graph support. nds32: Add document for NDS32 PMU. .../devicetree/bindings/nds32/pmu.txt | 17 + arch/nds32/Kconfig|1 + arch/nds32/boot/dts/ae3xx.dts |5 + arch/nds32/include/asm/Kbuild |1 + arch/nds32/include/asm/bitfield.h |4 +- arch/nds32/include/asm/perf_event.h | 16 + arch/nds32/include/asm/pmu.h | 386 + arch/nds32/include/asm/stacktrace.h | 39 + arch/nds32/kernel/Makefile|3 +- arch/nds32/kernel/perf_event_cpu.c| 1522 + arch/nds32/mm/fault.c | 13 +- tools/include/asm/barrier.h |2 + tools/perf/arch/nds32/Build |1 + tools/perf/arch/nds32/util/Build |1 + tools/perf/arch/nds32/util/header.c | 29 + tools/perf/pmu-events/arch/nds32/mapfile.csv | 15 + .../pmu-events/arch/nds32/n13/atcpmu.json | 290 17 files changed, 2337 insertions(+), 8 deletions(-) create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt create mode 100644 arch/nds32/include/asm/perf_event.h create mode 100644 arch/nds32/include/asm/pmu.h create mode 100644 arch/nds32/include/asm/stacktrace.h create mode 100644 arch/nds32/kernel/perf_event_cpu.c create mode 100644 tools/perf/arch/nds32/Build create mode 100644 tools/perf/arch/nds32/util/Build create mode 100644 tools/perf/arch/nds32/util/header.c create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json -- 2.17.0
[PATCH v3 0/4] nds32: Perf Support
* Sorry everyone, I forgot to add the version number of the patch set I just sent. These four commits are perf supporting for nds32. There are three perfomance counters in nds32, and each of them can counts different events. You can use 'perf list' to show the available events that can be used. Changes in V2: 1. Change the definition 'PFM_CTL_xxx' to array form. 2. Simplify the PMU driver. 3. Stop all counters when handling irq caused by performance counters overflow. 4. Rename the compatible string in devicetree. Changes in V3: Fix the typo in Documentation/devicetree/ bindings/nds32/pmu.txt. Nickhu (4): nds32: Fix bug in bitfield.h nds32: Perf porting nds32: Add perf call-graph support. nds32: Add document for NDS32 PMU. .../devicetree/bindings/nds32/pmu.txt | 17 + arch/nds32/Kconfig|1 + arch/nds32/boot/dts/ae3xx.dts |5 + arch/nds32/include/asm/Kbuild |1 + arch/nds32/include/asm/bitfield.h |4 +- arch/nds32/include/asm/perf_event.h | 16 + arch/nds32/include/asm/pmu.h | 386 + arch/nds32/include/asm/stacktrace.h | 39 + arch/nds32/kernel/Makefile|3 +- arch/nds32/kernel/perf_event_cpu.c| 1522 + arch/nds32/mm/fault.c | 13 +- tools/include/asm/barrier.h |2 + tools/perf/arch/nds32/Build |1 + tools/perf/arch/nds32/util/Build |1 + tools/perf/arch/nds32/util/header.c | 29 + tools/perf/pmu-events/arch/nds32/mapfile.csv | 15 + .../pmu-events/arch/nds32/n13/atcpmu.json | 290 17 files changed, 2337 insertions(+), 8 deletions(-) create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt create mode 100644 arch/nds32/include/asm/perf_event.h create mode 100644 arch/nds32/include/asm/pmu.h create mode 100644 arch/nds32/include/asm/stacktrace.h create mode 100644 arch/nds32/kernel/perf_event_cpu.c create mode 100644 tools/perf/arch/nds32/Build create mode 100644 tools/perf/arch/nds32/util/Build create mode 100644 tools/perf/arch/nds32/util/header.c create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json -- 2.17.0
[PATCH 1/4] nds32: Fix bug in bitfield.h
There two bitfield bug for perfomance counter in bitfield.h: PFM_CTL_offSEL1 21 --> 16 PFM_CTL_offSEL2 27 --> 22 This commit fix it. Signed-off-by: Nickhu --- arch/nds32/include/asm/bitfield.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h index 8e84fc385b94..19b2841219ad 100644 --- a/arch/nds32/include/asm/bitfield.h +++ b/arch/nds32/include/asm/bitfield.h @@ -692,8 +692,8 @@ #define PFM_CTL_offKU1 13 /* Enable user mode event counting for PFMC1 */ #define PFM_CTL_offKU2 14 /* Enable user mode event counting for PFMC2 */ #define PFM_CTL_offSEL015 /* The event selection for PFMC0 */ -#define PFM_CTL_offSEL121 /* The event selection for PFMC1 */ -#define PFM_CTL_offSEL227 /* The event selection for PFMC2 */ +#define PFM_CTL_offSEL116 /* The event selection for PFMC1 */ +#define PFM_CTL_offSEL222 /* The event selection for PFMC2 */ /* bit 28:31 reserved */ #define PFM_CTL_mskEN0 ( 0x01 << PFM_CTL_offEN0 ) -- 2.17.0
[PATCH 1/4] nds32: Fix bug in bitfield.h
There two bitfield bug for perfomance counter in bitfield.h: PFM_CTL_offSEL1 21 --> 16 PFM_CTL_offSEL2 27 --> 22 This commit fix it. Signed-off-by: Nickhu --- arch/nds32/include/asm/bitfield.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h index 8e84fc385b94..19b2841219ad 100644 --- a/arch/nds32/include/asm/bitfield.h +++ b/arch/nds32/include/asm/bitfield.h @@ -692,8 +692,8 @@ #define PFM_CTL_offKU1 13 /* Enable user mode event counting for PFMC1 */ #define PFM_CTL_offKU2 14 /* Enable user mode event counting for PFMC2 */ #define PFM_CTL_offSEL015 /* The event selection for PFMC0 */ -#define PFM_CTL_offSEL121 /* The event selection for PFMC1 */ -#define PFM_CTL_offSEL227 /* The event selection for PFMC2 */ +#define PFM_CTL_offSEL116 /* The event selection for PFMC1 */ +#define PFM_CTL_offSEL222 /* The event selection for PFMC2 */ /* bit 28:31 reserved */ #define PFM_CTL_mskEN0 ( 0x01 << PFM_CTL_offEN0 ) -- 2.17.0
[PATCH 0/4] nds32: Perf Support
These four commits are perf supporting for nds32. There are three perfomance counters in nds32, and each of them can counts different events. You can use 'perf list' to show the available events that can be used. Nickhu (4): nds32: Fix bug in bitfield.h nds32: Perf porting nds32: Add perf call-graph support. nds32: Add document for NDS32 PMU. .../devicetree/bindings/nds32/pmu.txt | 17 + arch/nds32/Kconfig|1 + arch/nds32/boot/dts/ae3xx.dts |5 + arch/nds32/include/asm/Kbuild |1 + arch/nds32/include/asm/bitfield.h |4 +- arch/nds32/include/asm/perf_event.h | 16 + arch/nds32/include/asm/pmu.h | 386 + arch/nds32/include/asm/stacktrace.h | 39 + arch/nds32/kernel/Makefile|3 +- arch/nds32/kernel/perf_event_cpu.c| 1522 + arch/nds32/mm/fault.c | 13 +- tools/include/asm/barrier.h |2 + tools/perf/arch/nds32/Build |1 + tools/perf/arch/nds32/util/Build |1 + tools/perf/arch/nds32/util/header.c | 29 + tools/perf/pmu-events/arch/nds32/mapfile.csv | 15 + .../pmu-events/arch/nds32/n13/atcpmu.json | 290 17 files changed, 2337 insertions(+), 8 deletions(-) create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt create mode 100644 arch/nds32/include/asm/perf_event.h create mode 100644 arch/nds32/include/asm/pmu.h create mode 100644 arch/nds32/include/asm/stacktrace.h create mode 100644 arch/nds32/kernel/perf_event_cpu.c create mode 100644 tools/perf/arch/nds32/Build create mode 100644 tools/perf/arch/nds32/util/Build create mode 100644 tools/perf/arch/nds32/util/header.c create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json -- 2.17.0
[PATCH 2/4] nds32: Perf porting
This is the commit that porting the perf for nds32. 1.Raw event: The raw events start with 'r'. Usage: perf stat -e rXYZ ./app X: the index of performance counter. YZ: the index(convert to hexdecimal) of events Example: 'perf stat -e r101 ./app' means the counter 1 will count the instruction event. The index of counter and events can be found in "Andes System Privilege Architecture Version 3 Manual". Or you can perform the 'perf list' to find the symbolic name of raw events. 2.Perf mmap2: Fix unexpected perf mmap2() page fault When the mmap2() called by perf application, you will encounter such condition:"failed to write." With return value -EFAULT This is due to the page fault caused by "reading" buffer from the mapped legal address region to write to the descriptor. The page_fault handler will get a VM_FAULT_SIGBUS return value, which should not happens here.(Due to this is a read request.) You can refer to kernel/events/core.c:perf_mmap_fault(...) If "(vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))" is evaluated as true, you will get VM_FAULT_SIGBUS as return value. However, this is not an write request. The flags which indicated why the page fault happens is wrong. Furthermore, NDS32 SPAv3 is not able to detect it is read or write. It only know either it is instruction fetch or data access. Therefore, by removing the wrong flag assignment(actually, the hardware is not able to show the reason), we can fix this bug. 3.Perf multiple events map to same counter. When there are multiple events map to the same counter, the counter counts inaccurately. This is because each counter only counts one event in the same time. So when there are multiple events map to same counter, they have to take turns in each context. There are two solution: 1. Print the error message when multiple events map to the same counter. But print the error message would let the program hang in loop. The ltp (linux test program) would be failed when the program hang in loop. 2. Don't print the error message, the ltp would pass. But the user need to have the knowledge that don't count the events which map to the same counter, or the user will get the inaccurate results. We choose method 2 for the solution Signed-off-by: Nickhu --- arch/nds32/Kconfig|1 + arch/nds32/boot/dts/ae3xx.dts |5 + arch/nds32/include/asm/Kbuild |1 + arch/nds32/include/asm/perf_event.h | 16 + arch/nds32/include/asm/pmu.h | 386 ++ arch/nds32/include/asm/stacktrace.h | 39 + arch/nds32/kernel/Makefile|3 +- arch/nds32/kernel/perf_event_cpu.c| 1223 + arch/nds32/mm/fault.c | 13 +- tools/include/asm/barrier.h |2 + tools/perf/arch/nds32/Build |1 + tools/perf/arch/nds32/util/Build |1 + tools/perf/arch/nds32/util/header.c | 29 + tools/perf/pmu-events/arch/nds32/mapfile.csv | 15 + .../pmu-events/arch/nds32/n13/atcpmu.json | 290 15 files changed, 2019 insertions(+), 6 deletions(-) create mode 100644 arch/nds32/include/asm/perf_event.h create mode 100644 arch/nds32/include/asm/pmu.h create mode 100644 arch/nds32/include/asm/stacktrace.h create mode 100644 arch/nds32/kernel/perf_event_cpu.c create mode 100644 tools/perf/arch/nds32/Build create mode 100644 tools/perf/arch/nds32/util/Build create mode 100644 tools/perf/arch/nds32/util/header.c create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 7068f341133d..dd448d431f5a 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -31,6 +31,7 @@ config NDS32 select HAVE_DEBUG_KMEMLEAK select HAVE_MEMBLOCK select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_PERF_EVENTS select IRQ_DOMAIN select LOCKDEP_SUPPORT select MODULES_USE_ELF_RELA diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts index bb39749a6673..16a9f54a805e 100644 --- a/arch/nds32/boot/dts/ae3xx.dts +++ b/arch/nds32/boot/dts/ae3xx.dts @@ -82,4 +82,9 @@ interrupts = <18>; }; }; + + pmu { + compatible = "andestech,nds32v3-pmu"; + interrupts= <13>; + }; };
[PATCH 0/4] nds32: Perf Support
These four commits are perf supporting for nds32. There are three perfomance counters in nds32, and each of them can counts different events. You can use 'perf list' to show the available events that can be used. Nickhu (4): nds32: Fix bug in bitfield.h nds32: Perf porting nds32: Add perf call-graph support. nds32: Add document for NDS32 PMU. .../devicetree/bindings/nds32/pmu.txt | 17 + arch/nds32/Kconfig|1 + arch/nds32/boot/dts/ae3xx.dts |5 + arch/nds32/include/asm/Kbuild |1 + arch/nds32/include/asm/bitfield.h |4 +- arch/nds32/include/asm/perf_event.h | 16 + arch/nds32/include/asm/pmu.h | 386 + arch/nds32/include/asm/stacktrace.h | 39 + arch/nds32/kernel/Makefile|3 +- arch/nds32/kernel/perf_event_cpu.c| 1522 + arch/nds32/mm/fault.c | 13 +- tools/include/asm/barrier.h |2 + tools/perf/arch/nds32/Build |1 + tools/perf/arch/nds32/util/Build |1 + tools/perf/arch/nds32/util/header.c | 29 + tools/perf/pmu-events/arch/nds32/mapfile.csv | 15 + .../pmu-events/arch/nds32/n13/atcpmu.json | 290 17 files changed, 2337 insertions(+), 8 deletions(-) create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt create mode 100644 arch/nds32/include/asm/perf_event.h create mode 100644 arch/nds32/include/asm/pmu.h create mode 100644 arch/nds32/include/asm/stacktrace.h create mode 100644 arch/nds32/kernel/perf_event_cpu.c create mode 100644 tools/perf/arch/nds32/Build create mode 100644 tools/perf/arch/nds32/util/Build create mode 100644 tools/perf/arch/nds32/util/header.c create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json -- 2.17.0
[PATCH 2/4] nds32: Perf porting
This is the commit that porting the perf for nds32. 1.Raw event: The raw events start with 'r'. Usage: perf stat -e rXYZ ./app X: the index of performance counter. YZ: the index(convert to hexdecimal) of events Example: 'perf stat -e r101 ./app' means the counter 1 will count the instruction event. The index of counter and events can be found in "Andes System Privilege Architecture Version 3 Manual". Or you can perform the 'perf list' to find the symbolic name of raw events. 2.Perf mmap2: Fix unexpected perf mmap2() page fault When the mmap2() called by perf application, you will encounter such condition:"failed to write." With return value -EFAULT This is due to the page fault caused by "reading" buffer from the mapped legal address region to write to the descriptor. The page_fault handler will get a VM_FAULT_SIGBUS return value, which should not happens here.(Due to this is a read request.) You can refer to kernel/events/core.c:perf_mmap_fault(...) If "(vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))" is evaluated as true, you will get VM_FAULT_SIGBUS as return value. However, this is not an write request. The flags which indicated why the page fault happens is wrong. Furthermore, NDS32 SPAv3 is not able to detect it is read or write. It only know either it is instruction fetch or data access. Therefore, by removing the wrong flag assignment(actually, the hardware is not able to show the reason), we can fix this bug. 3.Perf multiple events map to same counter. When there are multiple events map to the same counter, the counter counts inaccurately. This is because each counter only counts one event in the same time. So when there are multiple events map to same counter, they have to take turns in each context. There are two solution: 1. Print the error message when multiple events map to the same counter. But print the error message would let the program hang in loop. The ltp (linux test program) would be failed when the program hang in loop. 2. Don't print the error message, the ltp would pass. But the user need to have the knowledge that don't count the events which map to the same counter, or the user will get the inaccurate results. We choose method 2 for the solution Signed-off-by: Nickhu --- arch/nds32/Kconfig|1 + arch/nds32/boot/dts/ae3xx.dts |5 + arch/nds32/include/asm/Kbuild |1 + arch/nds32/include/asm/perf_event.h | 16 + arch/nds32/include/asm/pmu.h | 386 ++ arch/nds32/include/asm/stacktrace.h | 39 + arch/nds32/kernel/Makefile|3 +- arch/nds32/kernel/perf_event_cpu.c| 1223 + arch/nds32/mm/fault.c | 13 +- tools/include/asm/barrier.h |2 + tools/perf/arch/nds32/Build |1 + tools/perf/arch/nds32/util/Build |1 + tools/perf/arch/nds32/util/header.c | 29 + tools/perf/pmu-events/arch/nds32/mapfile.csv | 15 + .../pmu-events/arch/nds32/n13/atcpmu.json | 290 15 files changed, 2019 insertions(+), 6 deletions(-) create mode 100644 arch/nds32/include/asm/perf_event.h create mode 100644 arch/nds32/include/asm/pmu.h create mode 100644 arch/nds32/include/asm/stacktrace.h create mode 100644 arch/nds32/kernel/perf_event_cpu.c create mode 100644 tools/perf/arch/nds32/Build create mode 100644 tools/perf/arch/nds32/util/Build create mode 100644 tools/perf/arch/nds32/util/header.c create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 7068f341133d..dd448d431f5a 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -31,6 +31,7 @@ config NDS32 select HAVE_DEBUG_KMEMLEAK select HAVE_MEMBLOCK select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_PERF_EVENTS select IRQ_DOMAIN select LOCKDEP_SUPPORT select MODULES_USE_ELF_RELA diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts index bb39749a6673..16a9f54a805e 100644 --- a/arch/nds32/boot/dts/ae3xx.dts +++ b/arch/nds32/boot/dts/ae3xx.dts @@ -82,4 +82,9 @@ interrupts = <18>; }; }; + + pmu { + compatible = "andestech,nds32v3-pmu"; + interrupts= <13>; + }; };
[PATCH 4/4] nds32: Add document for NDS32 PMU.
The document for how to add NDS32 PMU in devicetree. Signed-off-by: Nickhu --- Documentation/devicetree/bindings/nds32/pmu.txt | 17 + 1 file changed, 17 insertions(+) create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt diff --git a/Documentation/devicetree/bindings/nds32/pmu.txt b/Documentation/devicetree/bindings/nds32/pmu.txt new file mode 100644 index ..1bd15785b4ae --- /dev/null +++ b/Documentation/devicetree/bindings/nds32/pmu.txt @@ -0,0 +1,17 @@ +* NDS32 Performance Monitor Units + +NDS32 core have a PMU for counting cpu and cache events like cache misses. +The NDS32 PMU representation in the device tree should be done as under: + +Required properties: + +- compatible : + "andestech,nds32v3-pmu" + +- interrupts : The interrupt number for NDS32 PMU is 13. + +Example: +pmu{ + compatible = "andestech,nds32v3-pmu"; + interrupts = <13>; +} -- 2.17.0
[PATCH 3/4] nds32: Add perf call-graph support.
The perf call-graph option can trace the callchain between functions. This commit add the perf callchain for nds32. There are kerenl callchain and user callchain. The kerenl callchain can trace the function in kernel space. There are two type for user callchain. One for the 'optimize for size' config is set, and another one for the config is not set. The difference between two types is that the index of frame-pointer in user stack is not the same. For example: With optimize for size: User Stack: - | lp | - | gp | - | fp | Without optimize for size: User Stack: 1. non-leaf function: - | lp | - | fp | 2. leaf function: - | fp | Signed-off-by: Nickhu --- arch/nds32/kernel/perf_event_cpu.c | 299 + 1 file changed, 299 insertions(+) diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index a6e723d0fdbc..5e00ce54d0ff 100644 --- a/arch/nds32/kernel/perf_event_cpu.c +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -1193,6 +1193,305 @@ static int __init register_pmu_driver(void) device_initcall(register_pmu_driver); +/* + * References: arch/nds32/kernel/traps.c:__dump() + * You will need to know the NDS ABI first. + */ +static int unwind_frame_kernel(struct stackframe *frame) +{ + int graph = 0; +#ifdef CONFIG_FRAME_POINTER + /* 0x3 means misalignment */ + if (!kstack_end((void *)frame->fp) && + !((unsigned long)frame->fp & 0x3) && + ((unsigned long)frame->fp >= TASK_SIZE)) { + /* +* The array index is based on the ABI, the below graph +* illustrate the reasons. +* Function call procedure: "smw" and "lmw" will always +* update SP and FP for you automatically. +* +* Stack Relative Address +* | | 0 +* +* |LP| <-- SP(before smw) <-- FP(after smw) -1 +* +* |FP| -2 +* +* | | <-- SP(after smw) -3 +*/ + frame->lp = ((unsigned long *)frame->fp)[-1]; + frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET]; + /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (NULL, , frame->lp, NULL); + + return 0; + } else { + return -EPERM; + } +#else + /* +* You can refer to arch/nds32/kernel/traps.c:__dump() +* Treat "sp" as "fp", but the "sp" is one frame ahead of "fp". +* And, the "sp" is not always correct. +* +* Stack Relative Address +* | | 0 +* +* |LP| <-- SP(before smw) -1 +* +* | | <-- SP(after smw) -2 +* +*/ + if (!kstack_end((void *)frame->sp)) { + frame->lp = ((unsigned long *)frame->sp)[1]; + /* TODO: How to deal with the value in first +* "sp" is not correct? +*/ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (tsk, , frame->lp, NULL); + + frame->sp = ((unsigned long *)frame->sp) + 1; + + return 0; + } else { + return -EPERM; + } +#endif +} + +static void notrace +walk_stackframe(struct stackframe *frame, + int (*fn_record)(struct stackframe *, void *), + void *data) +{ + while (1) { + int ret; + + if (fn_record(frame, data)) + break; + + ret = unwind_frame_kernel(frame); + if (ret < 0) + break; + } +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called
[PATCH 4/4] nds32: Add document for NDS32 PMU.
The document for how to add NDS32 PMU in devicetree. Signed-off-by: Nickhu --- Documentation/devicetree/bindings/nds32/pmu.txt | 17 + 1 file changed, 17 insertions(+) create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt diff --git a/Documentation/devicetree/bindings/nds32/pmu.txt b/Documentation/devicetree/bindings/nds32/pmu.txt new file mode 100644 index ..1bd15785b4ae --- /dev/null +++ b/Documentation/devicetree/bindings/nds32/pmu.txt @@ -0,0 +1,17 @@ +* NDS32 Performance Monitor Units + +NDS32 core have a PMU for counting cpu and cache events like cache misses. +The NDS32 PMU representation in the device tree should be done as under: + +Required properties: + +- compatible : + "andestech,nds32v3-pmu" + +- interrupts : The interrupt number for NDS32 PMU is 13. + +Example: +pmu{ + compatible = "andestech,nds32v3-pmu"; + interrupts = <13>; +} -- 2.17.0
[PATCH 3/4] nds32: Add perf call-graph support.
The perf call-graph option can trace the callchain between functions. This commit add the perf callchain for nds32. There are kerenl callchain and user callchain. The kerenl callchain can trace the function in kernel space. There are two type for user callchain. One for the 'optimize for size' config is set, and another one for the config is not set. The difference between two types is that the index of frame-pointer in user stack is not the same. For example: With optimize for size: User Stack: - | lp | - | gp | - | fp | Without optimize for size: User Stack: 1. non-leaf function: - | lp | - | fp | 2. leaf function: - | fp | Signed-off-by: Nickhu --- arch/nds32/kernel/perf_event_cpu.c | 299 + 1 file changed, 299 insertions(+) diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index a6e723d0fdbc..5e00ce54d0ff 100644 --- a/arch/nds32/kernel/perf_event_cpu.c +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -1193,6 +1193,305 @@ static int __init register_pmu_driver(void) device_initcall(register_pmu_driver); +/* + * References: arch/nds32/kernel/traps.c:__dump() + * You will need to know the NDS ABI first. + */ +static int unwind_frame_kernel(struct stackframe *frame) +{ + int graph = 0; +#ifdef CONFIG_FRAME_POINTER + /* 0x3 means misalignment */ + if (!kstack_end((void *)frame->fp) && + !((unsigned long)frame->fp & 0x3) && + ((unsigned long)frame->fp >= TASK_SIZE)) { + /* +* The array index is based on the ABI, the below graph +* illustrate the reasons. +* Function call procedure: "smw" and "lmw" will always +* update SP and FP for you automatically. +* +* Stack Relative Address +* | | 0 +* +* |LP| <-- SP(before smw) <-- FP(after smw) -1 +* +* |FP| -2 +* +* | | <-- SP(after smw) -3 +*/ + frame->lp = ((unsigned long *)frame->fp)[-1]; + frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET]; + /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (NULL, , frame->lp, NULL); + + return 0; + } else { + return -EPERM; + } +#else + /* +* You can refer to arch/nds32/kernel/traps.c:__dump() +* Treat "sp" as "fp", but the "sp" is one frame ahead of "fp". +* And, the "sp" is not always correct. +* +* Stack Relative Address +* | | 0 +* +* |LP| <-- SP(before smw) -1 +* +* | | <-- SP(after smw) -2 +* +*/ + if (!kstack_end((void *)frame->sp)) { + frame->lp = ((unsigned long *)frame->sp)[1]; + /* TODO: How to deal with the value in first +* "sp" is not correct? +*/ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (tsk, , frame->lp, NULL); + + frame->sp = ((unsigned long *)frame->sp) + 1; + + return 0; + } else { + return -EPERM; + } +#endif +} + +static void notrace +walk_stackframe(struct stackframe *frame, + int (*fn_record)(struct stackframe *, void *), + void *data) +{ + while (1) { + int ret; + + if (fn_record(frame, data)) + break; + + ret = unwind_frame_kernel(frame); + if (ret < 0) + break; + } +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called
[PATCH 3/3] nds32: Add unaligned access in kernel space.
As my colleague has encountered kernel panic when unaligned access in kernel space. Here is the situation, the structure 'TP_STRUCT__entry': TP_STRUCT__entry( __field(u32,tb_id ) __field(int,err ) __field(int,oif ) __field(int,iif ) __field(__u8, tos ) __field(__u8, scope ) __field(__u8, flags ) __field(u8, proto ) __array(__u8, src,4 ) __array(__u8, dst,4 ) __array(__u8, gw, 4 ) __array(__u8, saddr, 4 ) __field(u16,sport ) __field(u16,dport ) __dynamic_array(char, name, IFNAMSIZ ) ) When he try to access the element in the structure, the kernel panic happen. Although he has rearrange the order of the structure to fix the problem, but we cannot ignore the fact that there still need unaligned access in kernel space. It can help us to avoid kernel panic when reasonable unaligned address access happen. The users need to have the knowledge that some unreasonable unaligned address may cause the bug in kernel. The config 'HAVE_EFFICIENT_UNALIGNED_ACCESS' must be with the hw unaligned access config 'HW_SUPPORT_UNALIGNMENT_ACCESS'. In sw unalinged access handler, the code 'get_inst()' in arch/nds32/mm/ alignment.c:522 would be generate as load word instruction if 'HAVE_EFFICIENT_UNALIGNED_ACCESS' is set. This would cause the kernel hang in loop if the address of the load word instruction is unaligned. For example: 0xbc39e: lwi450 $r0, [$r1], if the $r1 cause unaligned access. | | unaligned access handler v arch/nds32/mm/alignment.c:522: get_ints():0xb0874b7e lwi450 $r2, [$3], $r3 is the address '0xbc39e', it would cause kernel unaligned access. | | unaligned access handler v arch/nds32/mm/alignment.c:522: get_ints():0xb0874b7e lwi450 $r2, [$3], $r3 is the address '0xb0874b7e', it would cause kernel unaligned access. The kernel is hang in the loop. Signed-off-by: Nickhu --- arch/nds32/kernel/traps.c | 4 +++- arch/nds32/mm/alignment.c | 6 -- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index 1496aab48998..dcde7abc5515 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -331,6 +331,7 @@ void do_revinsn(struct pt_regs *regs) #ifdef CONFIG_ALIGNMENT_TRAP extern int unalign_access_mode; extern int do_unaligned_access(unsigned long addr, struct pt_regs *regs); +extern int va_kernel_present(unsigned long addr); #endif void do_dispatch_general(unsigned long entry, unsigned long addr, unsigned long itype, struct pt_regs *regs, @@ -341,7 +342,8 @@ void do_dispatch_general(unsigned long entry, unsigned long addr, if (type == ETYPE_ALIGNMENT_CHECK) { #ifdef CONFIG_ALIGNMENT_TRAP /* Alignment check */ - if (user_mode(regs) && unalign_access_mode) { + if ((user_mode(regs) && unalign_access_mode) || + va_kernel_present(addr)) { int ret; ret = do_unaligned_access(addr, regs); diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index 66a556befd05..2d7a08af6622 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -524,8 +524,10 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs) DEBUG((unalign_access_debug > 0), 1, "Faulting addr: 0x%08lx, pc: 0x%08lx [inst: 0x%08lx ]\n", addr, regs->ipc, inst); - - set_fs(USER_DS); + if ((user_mode(regs) && unalign_access_mode)) + set_fs(USER_DS); + else if (va_kernel_present(addr)) + set_fs(KERNEL_DS); if (inst & NDS32_16BIT_INSTRUCTION) ret = do_16((inst >> 16) & 0x, regs); -- 2.17.0
[PATCH 1/1] Perf: Compile failed when compile with libelf.
The error message: = util/symbol-elf.c:46:12: error: static declaration of 'elf_getphdrnum' follows non-static declaration static int elf_getphdrnum(Elf *elf, size_t *dst) ^~ In file included from util/symbol.h:20, from util/symbol-elf.c:9: /local/nickhu/build-system-3/toolchain/nds32le-linux-glibc-v3-upstream/ nds32le-linux/sysroot/usr/include/libelf.h:266:12: note: previous declaration of 'elf_getphdrnum' was here extern int elf_getphdrnum (Elf *__elf, size_t *__dst); ^~ util/symbol-elf.c:62:12: error: static declaration of 'elf_getshdrstrndx' follows non-static declaration static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe _unused) ^ In file included from util/symbol.h:20, from util/symbol-elf.c:9: /local/nickhu/build-system-3/toolchain/nds32le-linux-glibc-v3-upstream/ nds32le-linux/sysroot/usr/include/libelf.h:316:12: note: previous declaration of 'elf_getshdrstrndx' was here extern int elf_getshdrstrndx (Elf *__elf, size_t *__dst); = Fix it. Signed-off-by: Nickhu --- tools/perf/util/symbol-elf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 29770ea61768..3ccdfe603d67 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -43,7 +43,7 @@ static inline char *bfd_demangle(void __maybe_unused *v, #endif #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT -static int elf_getphdrnum(Elf *elf, size_t *dst) +int elf_getphdrnum(Elf *elf, size_t *dst) { GElf_Ehdr gehdr; GElf_Ehdr *ehdr; @@ -59,7 +59,7 @@ static int elf_getphdrnum(Elf *elf, size_t *dst) #endif #ifndef HAVE_ELF_GETSHDRSTRNDX_SUPPORT -static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe_unused) +int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe_unused) { pr_err("%s: update your libelf to > 0.140, this one lacks elf_getshdrstrndx().\n", __func__); return -1; -- 2.17.0
[PATCH 0/1] nds32: Fix gcc 8.0 compiler option incompatible.
Fix gcc 8.0 compiler option incompatible When the kernel configs of ftrace and frame pointer options are choosed. Nickhu (1): nds32: Fix gcc 8.0 compiler option incompatible. arch/nds32/mm/Makefile | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) -- 2.17.0
[PATCH 0/1] Perf: Compile failed when compile with libelf.
Fix perf failed when compile with libelf. Nickhu (1): Perf: Compile failed when compile with libelf. tools/perf/util/symbol-elf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- 2.17.0
[PATCH 1/1] Perf: Compile failed when compile with libelf.
The error message: = util/symbol-elf.c:46:12: error: static declaration of 'elf_getphdrnum' follows non-static declaration static int elf_getphdrnum(Elf *elf, size_t *dst) ^~ In file included from util/symbol.h:20, from util/symbol-elf.c:9: /local/nickhu/build-system-3/toolchain/nds32le-linux-glibc-v3-upstream/ nds32le-linux/sysroot/usr/include/libelf.h:266:12: note: previous declaration of 'elf_getphdrnum' was here extern int elf_getphdrnum (Elf *__elf, size_t *__dst); ^~ util/symbol-elf.c:62:12: error: static declaration of 'elf_getshdrstrndx' follows non-static declaration static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe _unused) ^ In file included from util/symbol.h:20, from util/symbol-elf.c:9: /local/nickhu/build-system-3/toolchain/nds32le-linux-glibc-v3-upstream/ nds32le-linux/sysroot/usr/include/libelf.h:316:12: note: previous declaration of 'elf_getshdrstrndx' was here extern int elf_getshdrstrndx (Elf *__elf, size_t *__dst); = Fix it. Signed-off-by: Nickhu --- tools/perf/util/symbol-elf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 29770ea61768..3ccdfe603d67 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -43,7 +43,7 @@ static inline char *bfd_demangle(void __maybe_unused *v, #endif #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT -static int elf_getphdrnum(Elf *elf, size_t *dst) +int elf_getphdrnum(Elf *elf, size_t *dst) { GElf_Ehdr gehdr; GElf_Ehdr *ehdr; @@ -59,7 +59,7 @@ static int elf_getphdrnum(Elf *elf, size_t *dst) #endif #ifndef HAVE_ELF_GETSHDRSTRNDX_SUPPORT -static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe_unused) +int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe_unused) { pr_err("%s: update your libelf to > 0.140, this one lacks elf_getshdrstrndx().\n", __func__); return -1; -- 2.17.0
[PATCH 0/1] nds32: Fix gcc 8.0 compiler option incompatible.
Fix gcc 8.0 compiler option incompatible When the kernel configs of ftrace and frame pointer options are choosed. Nickhu (1): nds32: Fix gcc 8.0 compiler option incompatible. arch/nds32/mm/Makefile | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) -- 2.17.0
[PATCH 0/1] Perf: Compile failed when compile with libelf.
Fix perf failed when compile with libelf. Nickhu (1): Perf: Compile failed when compile with libelf. tools/perf/util/symbol-elf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- 2.17.0
[PATCH 3/3] nds32: Add unaligned access in kernel space.
As my colleague has encountered kernel panic when unaligned access in kernel space. Here is the situation, the structure 'TP_STRUCT__entry': TP_STRUCT__entry( __field(u32,tb_id ) __field(int,err ) __field(int,oif ) __field(int,iif ) __field(__u8, tos ) __field(__u8, scope ) __field(__u8, flags ) __field(u8, proto ) __array(__u8, src,4 ) __array(__u8, dst,4 ) __array(__u8, gw, 4 ) __array(__u8, saddr, 4 ) __field(u16,sport ) __field(u16,dport ) __dynamic_array(char, name, IFNAMSIZ ) ) When he try to access the element in the structure, the kernel panic happen. Although he has rearrange the order of the structure to fix the problem, but we cannot ignore the fact that there still need unaligned access in kernel space. It can help us to avoid kernel panic when reasonable unaligned address access happen. The users need to have the knowledge that some unreasonable unaligned address may cause the bug in kernel. The config 'HAVE_EFFICIENT_UNALIGNED_ACCESS' must be with the hw unaligned access config 'HW_SUPPORT_UNALIGNMENT_ACCESS'. In sw unalinged access handler, the code 'get_inst()' in arch/nds32/mm/ alignment.c:522 would be generate as load word instruction if 'HAVE_EFFICIENT_UNALIGNED_ACCESS' is set. This would cause the kernel hang in loop if the address of the load word instruction is unaligned. For example: 0xbc39e: lwi450 $r0, [$r1], if the $r1 cause unaligned access. | | unaligned access handler v arch/nds32/mm/alignment.c:522: get_ints():0xb0874b7e lwi450 $r2, [$3], $r3 is the address '0xbc39e', it would cause kernel unaligned access. | | unaligned access handler v arch/nds32/mm/alignment.c:522: get_ints():0xb0874b7e lwi450 $r2, [$3], $r3 is the address '0xb0874b7e', it would cause kernel unaligned access. The kernel is hang in the loop. Signed-off-by: Nickhu --- arch/nds32/kernel/traps.c | 4 +++- arch/nds32/mm/alignment.c | 6 -- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index 1496aab48998..dcde7abc5515 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -331,6 +331,7 @@ void do_revinsn(struct pt_regs *regs) #ifdef CONFIG_ALIGNMENT_TRAP extern int unalign_access_mode; extern int do_unaligned_access(unsigned long addr, struct pt_regs *regs); +extern int va_kernel_present(unsigned long addr); #endif void do_dispatch_general(unsigned long entry, unsigned long addr, unsigned long itype, struct pt_regs *regs, @@ -341,7 +342,8 @@ void do_dispatch_general(unsigned long entry, unsigned long addr, if (type == ETYPE_ALIGNMENT_CHECK) { #ifdef CONFIG_ALIGNMENT_TRAP /* Alignment check */ - if (user_mode(regs) && unalign_access_mode) { + if ((user_mode(regs) && unalign_access_mode) || + va_kernel_present(addr)) { int ret; ret = do_unaligned_access(addr, regs); diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index 66a556befd05..2d7a08af6622 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -524,8 +524,10 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs) DEBUG((unalign_access_debug > 0), 1, "Faulting addr: 0x%08lx, pc: 0x%08lx [inst: 0x%08lx ]\n", addr, regs->ipc, inst); - - set_fs(USER_DS); + if ((user_mode(regs) && unalign_access_mode)) + set_fs(USER_DS); + else if (va_kernel_present(addr)) + set_fs(KERNEL_DS); if (inst & NDS32_16BIT_INSTRUCTION) ret = do_16((inst >> 16) & 0x, regs); -- 2.17.0
[PATCH 1/3] nds32: Fix instruction simulator bug for unaligned access handler.
When emulating the 16 bits instructions, the mapping of general purpose registers is not the same as 32 bits instructions. Example: 'LWI450 r16, [r15]' 16-bit instruction will be decoded as '1011010110001110', the target register field is decode as index=12. But the index of target register should be 16. So the mapping of register in unaligned access handler is wrong. Signed-off-by: Nickhu --- arch/nds32/mm/alignment.c | 37 + 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index e1aed9dc692d..66a556befd05 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -152,12 +152,16 @@ extern int va_writable(struct pt_regs *regs, unsigned long addr); int unalign_access_mode = 0, unalign_access_debug = 0; -static inline unsigned long *idx_to_addr(struct pt_regs *regs, int idx) +static inline unsigned long *idx_to_addr(struct pt_regs *regs, int idx, + int idx_mode) { /* this should be consistent with ptrace.h */ - if (idx >= 0 && idx <= 25) /* R0-R25 */ - return >uregs[0] + idx; - else if (idx >= 28 && idx <= 30)/* FP, GP, LP */ + if (idx >= 0 && idx <= 25) {/* R0-R25 */ + if (idx_mode == 4 && idx > 11) + return >uregs[0] + idx + 4; + else + return >uregs[0] + idx; + } else if (idx >= 28 && idx <= 30) /* FP, GP, LP */ return >fp + (idx - 28); else if (idx == 31) /* SP */ return >sp; @@ -270,10 +274,10 @@ static inline int do_16(unsigned long inst, struct pt_regs *regs) } if (addr_mode == 3) { - unaligned_addr = *idx_to_addr(regs, RA3(inst)); + unaligned_addr = *idx_to_addr(regs, RA3(inst), addr_mode); source_idx = RA3(inst); } else { - unaligned_addr = *idx_to_addr(regs, RA5(inst)); + unaligned_addr = *idx_to_addr(regs, RA5(inst), addr_mode); source_idx = RA5(inst); } @@ -293,16 +297,17 @@ static inline int do_16(unsigned long inst, struct pt_regs *regs) return -EACCES; get_data(unaligned_addr, _val, len); - *idx_to_addr(regs, target_idx) = target_val; + *idx_to_addr(regs, target_idx, idx_mode) = target_val; } else { if (!access_ok(VERIFY_WRITE, (void *)unaligned_addr, len)) return -EACCES; - target_val = *idx_to_addr(regs, target_idx); + target_val = *idx_to_addr(regs, target_idx, idx_mode); set_data((void *)unaligned_addr, target_val, len); } if (!regular) - *idx_to_addr(regs, source_idx) = unaligned_addr + shift; + *idx_to_addr(regs, source_idx, idx_mode) = + unaligned_addr + shift; regs->ipc += 2; return 0; @@ -312,10 +317,10 @@ static inline int do_16(unsigned long inst, struct pt_regs *regs) static inline int do_32(unsigned long inst, struct pt_regs *regs) { - int imm, regular, load, len, sign_ext; + int imm, regular, load, len, sign_ext, idx_mode = 5; unsigned long unaligned_addr, target_val, shift; - unaligned_addr = *idx_to_addr(regs, RA(inst)); + unaligned_addr = *idx_to_addr(regs, RA(inst), idx_mode); switch ((inst >> 25) << 1) { @@ -472,7 +477,7 @@ static inline int do_32(unsigned long inst, struct pt_regs *regs) if (imm) shift = GET_IMMSVAL(IMM(inst)) * len; else - shift = *idx_to_addr(regs, RB(inst)) << SV(inst); + shift = *idx_to_addr(regs, RB(inst), idx_mode) << SV(inst); if (regular) unaligned_addr += shift; @@ -485,21 +490,21 @@ static inline int do_32(unsigned long inst, struct pt_regs *regs) get_data(unaligned_addr, _val, len); if (sign_ext) - *idx_to_addr(regs, RT(inst)) = + *idx_to_addr(regs, RT(inst), idx_mode) = sign_extend(target_val, len); else - *idx_to_addr(regs, RT(inst)) = target_val; + *idx_to_addr(regs, RT(inst), idx_mode) = target_val; } else { if (!access_ok(VERIFY_WRITE, (void *)unaligned_addr, len)) return -EACCES; - target_val = *idx_to_addr(regs, RT(inst)); + target_val = *idx_to_addr(regs, RT(inst), idx_mode); set_data((void *)unaligned_addr, target_val, len); } if (!regular) - *idx_to_addr(regs
[PATCH 2/3] nds32: Add 'HAVE_EFFICIENT_UNALIGNED_ACCESS' config
According to my understanding, this config will optimize the code generate. When there is an unaligned access happened, the load word instruction still can be used if there is unaligned access support or the load byte instruction is used. So this config need unaligned access support. 'HAVE_EFFICIENT_UNALIGNED_ACCESS' and 'HW_SUPPORT_UNALIGNMENT_ACCESS' are default configs in nds32. Signed-off-by: Nickhu --- arch/nds32/Kconfig.cpu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu index b8c8984d1456..b8eecd0cde6b 100644 --- a/arch/nds32/Kconfig.cpu +++ b/arch/nds32/Kconfig.cpu @@ -111,8 +111,9 @@ config ALIGNMENT_TRAP config HW_SUPPORT_UNALIGNMENT_ACCESS bool "Kernel support unaligned access handling by hw" + select HAVE_EFFICIENT_UNALIGNED_ACCESS depends on !ALIGNMENT_TRAP - default n + default y help Andes processors load/store world/half-word instructions can access unaligned memory locations without generating the Data Alignment -- 2.17.0
[PATCH 2/3] nds32: Add 'HAVE_EFFICIENT_UNALIGNED_ACCESS' config
According to my understanding, this config will optimize the code generate. When there is an unaligned access happened, the load word instruction still can be used if there is unaligned access support or the load byte instruction is used. So this config need unaligned access support. 'HAVE_EFFICIENT_UNALIGNED_ACCESS' and 'HW_SUPPORT_UNALIGNMENT_ACCESS' are default configs in nds32. Signed-off-by: Nickhu --- arch/nds32/Kconfig.cpu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu index b8c8984d1456..b8eecd0cde6b 100644 --- a/arch/nds32/Kconfig.cpu +++ b/arch/nds32/Kconfig.cpu @@ -111,8 +111,9 @@ config ALIGNMENT_TRAP config HW_SUPPORT_UNALIGNMENT_ACCESS bool "Kernel support unaligned access handling by hw" + select HAVE_EFFICIENT_UNALIGNED_ACCESS depends on !ALIGNMENT_TRAP - default n + default y help Andes processors load/store world/half-word instructions can access unaligned memory locations without generating the Data Alignment -- 2.17.0
[PATCH 1/3] nds32: Fix instruction simulator bug for unaligned access handler.
When emulating the 16 bits instructions, the mapping of general purpose registers is not the same as 32 bits instructions. Example: 'LWI450 r16, [r15]' 16-bit instruction will be decoded as '1011010110001110', the target register field is decode as index=12. But the index of target register should be 16. So the mapping of register in unaligned access handler is wrong. Signed-off-by: Nickhu --- arch/nds32/mm/alignment.c | 37 + 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index e1aed9dc692d..66a556befd05 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -152,12 +152,16 @@ extern int va_writable(struct pt_regs *regs, unsigned long addr); int unalign_access_mode = 0, unalign_access_debug = 0; -static inline unsigned long *idx_to_addr(struct pt_regs *regs, int idx) +static inline unsigned long *idx_to_addr(struct pt_regs *regs, int idx, + int idx_mode) { /* this should be consistent with ptrace.h */ - if (idx >= 0 && idx <= 25) /* R0-R25 */ - return >uregs[0] + idx; - else if (idx >= 28 && idx <= 30)/* FP, GP, LP */ + if (idx >= 0 && idx <= 25) {/* R0-R25 */ + if (idx_mode == 4 && idx > 11) + return >uregs[0] + idx + 4; + else + return >uregs[0] + idx; + } else if (idx >= 28 && idx <= 30) /* FP, GP, LP */ return >fp + (idx - 28); else if (idx == 31) /* SP */ return >sp; @@ -270,10 +274,10 @@ static inline int do_16(unsigned long inst, struct pt_regs *regs) } if (addr_mode == 3) { - unaligned_addr = *idx_to_addr(regs, RA3(inst)); + unaligned_addr = *idx_to_addr(regs, RA3(inst), addr_mode); source_idx = RA3(inst); } else { - unaligned_addr = *idx_to_addr(regs, RA5(inst)); + unaligned_addr = *idx_to_addr(regs, RA5(inst), addr_mode); source_idx = RA5(inst); } @@ -293,16 +297,17 @@ static inline int do_16(unsigned long inst, struct pt_regs *regs) return -EACCES; get_data(unaligned_addr, _val, len); - *idx_to_addr(regs, target_idx) = target_val; + *idx_to_addr(regs, target_idx, idx_mode) = target_val; } else { if (!access_ok(VERIFY_WRITE, (void *)unaligned_addr, len)) return -EACCES; - target_val = *idx_to_addr(regs, target_idx); + target_val = *idx_to_addr(regs, target_idx, idx_mode); set_data((void *)unaligned_addr, target_val, len); } if (!regular) - *idx_to_addr(regs, source_idx) = unaligned_addr + shift; + *idx_to_addr(regs, source_idx, idx_mode) = + unaligned_addr + shift; regs->ipc += 2; return 0; @@ -312,10 +317,10 @@ static inline int do_16(unsigned long inst, struct pt_regs *regs) static inline int do_32(unsigned long inst, struct pt_regs *regs) { - int imm, regular, load, len, sign_ext; + int imm, regular, load, len, sign_ext, idx_mode = 5; unsigned long unaligned_addr, target_val, shift; - unaligned_addr = *idx_to_addr(regs, RA(inst)); + unaligned_addr = *idx_to_addr(regs, RA(inst), idx_mode); switch ((inst >> 25) << 1) { @@ -472,7 +477,7 @@ static inline int do_32(unsigned long inst, struct pt_regs *regs) if (imm) shift = GET_IMMSVAL(IMM(inst)) * len; else - shift = *idx_to_addr(regs, RB(inst)) << SV(inst); + shift = *idx_to_addr(regs, RB(inst), idx_mode) << SV(inst); if (regular) unaligned_addr += shift; @@ -485,21 +490,21 @@ static inline int do_32(unsigned long inst, struct pt_regs *regs) get_data(unaligned_addr, _val, len); if (sign_ext) - *idx_to_addr(regs, RT(inst)) = + *idx_to_addr(regs, RT(inst), idx_mode) = sign_extend(target_val, len); else - *idx_to_addr(regs, RT(inst)) = target_val; + *idx_to_addr(regs, RT(inst), idx_mode) = target_val; } else { if (!access_ok(VERIFY_WRITE, (void *)unaligned_addr, len)) return -EACCES; - target_val = *idx_to_addr(regs, RT(inst)); + target_val = *idx_to_addr(regs, RT(inst), idx_mode); set_data((void *)unaligned_addr, target_val, len); } if (!regular) - *idx_to_addr(regs
[PATCH 1/1] nds32: Fix gcc 8.0 compiler option incompatible.
When the kernel configs of ftrace and frame pointer options are choosed, the compiler option of kernel will incompatible. Error message: nds32le-linux-gcc: error: -pg and -fomit-frame-pointer are incompatible Signed-off-by: Nickhu Signed-off-by: Zong Li --- arch/nds32/mm/Makefile | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/nds32/mm/Makefile b/arch/nds32/mm/Makefile index 6b6855852223..7c5c15ad854a 100644 --- a/arch/nds32/mm/Makefile +++ b/arch/nds32/mm/Makefile @@ -4,4 +4,8 @@ obj-y := extable.o tlb.o \ obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o obj-$(CONFIG_HIGHMEM) += highmem.o -CFLAGS_proc-n13.o += -fomit-frame-pointer + +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_proc.o = $(CC_FLAGS_FTRACE) +endif +CFLAGS_proc.o += -fomit-frame-pointer -- 2.17.0
[PATCH 1/1] nds32: Fix gcc 8.0 compiler option incompatible.
When the kernel configs of ftrace and frame pointer options are choosed, the compiler option of kernel will incompatible. Error message: nds32le-linux-gcc: error: -pg and -fomit-frame-pointer are incompatible Signed-off-by: Nickhu Signed-off-by: Zong Li --- arch/nds32/mm/Makefile | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/nds32/mm/Makefile b/arch/nds32/mm/Makefile index 6b6855852223..7c5c15ad854a 100644 --- a/arch/nds32/mm/Makefile +++ b/arch/nds32/mm/Makefile @@ -4,4 +4,8 @@ obj-y := extable.o tlb.o \ obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o obj-$(CONFIG_HIGHMEM) += highmem.o -CFLAGS_proc-n13.o += -fomit-frame-pointer + +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_proc.o = $(CC_FLAGS_FTRACE) +endif +CFLAGS_proc.o += -fomit-frame-pointer -- 2.17.0
[PATCH 0/3] nds32: Unaligned access handler fix
The patches are about unaligned access handler. We fix some bugs in unaligned access handler and add some kernel configs for unaligned access handler. Then we add the kernel unaligned access handled by software in handler. Nickhu (3): nds32: Fix instruction simulator bug for unaligned access handler. nds32: Add 'HAVE_EFFICIENT_UNALIGNED_ACCESS' config nds32: Add unaligned access in kernel space. arch/nds32/Kconfig.cpu| 3 ++- arch/nds32/kernel/traps.c | 4 +++- arch/nds32/mm/alignment.c | 43 +++ 3 files changed, 30 insertions(+), 20 deletions(-) -- 2.17.0
[PATCH 0/3] nds32: Unaligned access handler fix
The patches are about unaligned access handler. We fix some bugs in unaligned access handler and add some kernel configs for unaligned access handler. Then we add the kernel unaligned access handled by software in handler. Nickhu (3): nds32: Fix instruction simulator bug for unaligned access handler. nds32: Add 'HAVE_EFFICIENT_UNALIGNED_ACCESS' config nds32: Add unaligned access in kernel space. arch/nds32/Kconfig.cpu| 3 ++- arch/nds32/kernel/traps.c | 4 +++- arch/nds32/mm/alignment.c | 43 +++ 3 files changed, 30 insertions(+), 20 deletions(-) -- 2.17.0
[PATCH 3/5] nds32: Add perf call-graph support.
The perf call-graph option can trace the callchain between functions. This commit add the perf callchain for nds32. There are kerenl callchain and user callchain. The kerenl callchain can trace the function in kernel space. There are two type for user callchain. One for the 'optimize for size' config is set, and another one for the config is not set. The difference between two types is that the index of frame-pointer in user stack is not the same. For example: With optimize for size: User Stack: - | lp | - | gp | - | fp | Without optimize for size: User Stack: 1. non-leaf function: - | lp | - | fp | 2. leaf function: - | fp | Signed-off-by: Nickhu --- arch/nds32/kernel/perf_event_cpu.c | 299 + 1 file changed, 299 insertions(+) diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index c39c6746a3e8..7bb4ebb87b5c 100644 --- a/arch/nds32/kernel/perf_event_cpu.c +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -1240,6 +1240,305 @@ static int __init register_pmu_driver(void) device_initcall(register_pmu_driver); +/* + * References: arch/nds32/kernel/traps.c:__dump() + * You will need to know the NDS ABI first. + */ +static int unwind_frame_kernel(struct stackframe *frame) +{ + int graph = 0; +#ifdef CONFIG_FRAME_POINTER + /* 0x3 means misalignment */ + if (!kstack_end((void *)frame->fp) && + !((unsigned long)frame->fp & 0x3) && + ((unsigned long)frame->fp >= TASK_SIZE)) { + /* +* The array index is based on the ABI, the below graph +* illustrate the reasons. +* Function call procedure: "smw" and "lmw" will always +* update SP and FP for you automatically. +* +* Stack Relative Address +* | | 0 +* +* |LP| <-- SP(before smw) <-- FP(after smw) -1 +* +* |FP| -2 +* +* | | <-- SP(after smw) -3 +*/ + frame->lp = ((unsigned long *)frame->fp)[-1]; + frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET]; + /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (NULL, , frame->lp, NULL); + + return 0; + } else { + return -EPERM; + } +#else + /* +* You can refer to arch/nds32/kernel/traps.c:__dump() +* Treat "sp" as "fp", but the "sp" is one frame ahead of "fp". +* And, the "sp" is not always correct. +* +* Stack Relative Address +* | | 0 +* +* |LP| <-- SP(before smw) -1 +* +* | | <-- SP(after smw) -2 +* +*/ + if (!kstack_end((void *)frame->sp)) { + frame->lp = ((unsigned long *)frame->sp)[1]; + /* TODO: How to deal with the value in first +* "sp" is not correct? +*/ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (tsk, , frame->lp, NULL); + + frame->sp = ((unsigned long *)frame->sp) + 1; + + return 0; + } else { + return -EPERM; + } +#endif +} + +static void notrace +walk_stackframe(struct stackframe *frame, + int (*fn_record)(struct stackframe *, void *), + void *data) +{ + while (1) { + int ret; + + if (fn_record(frame, data)) + break; + + ret = unwind_frame_kernel(frame); + if (ret < 0) + break; + } +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called
[PATCH 3/5] nds32: Add perf call-graph support.
The perf call-graph option can trace the callchain between functions. This commit add the perf callchain for nds32. There are kerenl callchain and user callchain. The kerenl callchain can trace the function in kernel space. There are two type for user callchain. One for the 'optimize for size' config is set, and another one for the config is not set. The difference between two types is that the index of frame-pointer in user stack is not the same. For example: With optimize for size: User Stack: - | lp | - | gp | - | fp | Without optimize for size: User Stack: 1. non-leaf function: - | lp | - | fp | 2. leaf function: - | fp | Signed-off-by: Nickhu --- arch/nds32/kernel/perf_event_cpu.c | 299 + 1 file changed, 299 insertions(+) diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index c39c6746a3e8..7bb4ebb87b5c 100644 --- a/arch/nds32/kernel/perf_event_cpu.c +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -1240,6 +1240,305 @@ static int __init register_pmu_driver(void) device_initcall(register_pmu_driver); +/* + * References: arch/nds32/kernel/traps.c:__dump() + * You will need to know the NDS ABI first. + */ +static int unwind_frame_kernel(struct stackframe *frame) +{ + int graph = 0; +#ifdef CONFIG_FRAME_POINTER + /* 0x3 means misalignment */ + if (!kstack_end((void *)frame->fp) && + !((unsigned long)frame->fp & 0x3) && + ((unsigned long)frame->fp >= TASK_SIZE)) { + /* +* The array index is based on the ABI, the below graph +* illustrate the reasons. +* Function call procedure: "smw" and "lmw" will always +* update SP and FP for you automatically. +* +* Stack Relative Address +* | | 0 +* +* |LP| <-- SP(before smw) <-- FP(after smw) -1 +* +* |FP| -2 +* +* | | <-- SP(after smw) -3 +*/ + frame->lp = ((unsigned long *)frame->fp)[-1]; + frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET]; + /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (NULL, , frame->lp, NULL); + + return 0; + } else { + return -EPERM; + } +#else + /* +* You can refer to arch/nds32/kernel/traps.c:__dump() +* Treat "sp" as "fp", but the "sp" is one frame ahead of "fp". +* And, the "sp" is not always correct. +* +* Stack Relative Address +* | | 0 +* +* |LP| <-- SP(before smw) -1 +* +* | | <-- SP(after smw) -2 +* +*/ + if (!kstack_end((void *)frame->sp)) { + frame->lp = ((unsigned long *)frame->sp)[1]; + /* TODO: How to deal with the value in first +* "sp" is not correct? +*/ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (tsk, , frame->lp, NULL); + + frame->sp = ((unsigned long *)frame->sp) + 1; + + return 0; + } else { + return -EPERM; + } +#endif +} + +static void notrace +walk_stackframe(struct stackframe *frame, + int (*fn_record)(struct stackframe *, void *), + void *data) +{ + while (1) { + int ret; + + if (fn_record(frame, data)) + break; + + ret = unwind_frame_kernel(frame); + if (ret < 0) + break; + } +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called
[PATCH 2/5] nds32: Fix bug in bitfield.h
There two bitfield bug for perfomance counter in bitfield.h: PFM_CTL_offSEL1 21 --> 16 PFM_CTL_offSEL2 27 --> 22 This commit fix it. Signed-off-by: Nickhu --- arch/nds32/include/asm/bitfield.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h index 8e84fc385b94..19b2841219ad 100644 --- a/arch/nds32/include/asm/bitfield.h +++ b/arch/nds32/include/asm/bitfield.h @@ -692,8 +692,8 @@ #define PFM_CTL_offKU1 13 /* Enable user mode event counting for PFMC1 */ #define PFM_CTL_offKU2 14 /* Enable user mode event counting for PFMC2 */ #define PFM_CTL_offSEL015 /* The event selection for PFMC0 */ -#define PFM_CTL_offSEL121 /* The event selection for PFMC1 */ -#define PFM_CTL_offSEL227 /* The event selection for PFMC2 */ +#define PFM_CTL_offSEL116 /* The event selection for PFMC1 */ +#define PFM_CTL_offSEL222 /* The event selection for PFMC2 */ /* bit 28:31 reserved */ #define PFM_CTL_mskEN0 ( 0x01 << PFM_CTL_offEN0 ) -- 2.17.0
[PATCH 4/5] nds32: Fix perf multiple events map to same counter.
When there are multiple events map to the same counter, the counter counts inaccurately. This is because each counter only counts one event in the same time. So when there are multiple events map to same counter, they have to take turns in each context. There are two solution: 1. Print the error message when multiple events map to the same counter. But print the error message would let the program hang in loop. The ltp (linux test program) would be failed when the program hang in loop. 2. Don't print the error message, the ltp would pass. But the user need to have the knowledge that don't count the events which map to the same counter, or the user will get the inaccurate results. We choose method 2 for the solution Signed-off-by: Nickhu --- arch/nds32/include/asm/pmu.h | 1 + arch/nds32/kernel/perf_event_cpu.c | 30 -- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/arch/nds32/include/asm/pmu.h b/arch/nds32/include/asm/pmu.h index 3fbbe97c2d42..e75ec34af5f6 100644 --- a/arch/nds32/include/asm/pmu.h +++ b/arch/nds32/include/asm/pmu.h @@ -55,6 +55,7 @@ enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS }; */ #define NDS32_IDX_CYCLE_COUNTER0 #define NDS32_IDX_COUNTER0 1 +#define NDS32_IDX_COUNTER1 2 #define NDS32_IDX_COUNTER_LAST(cpu_pmu) \ (NDS32_IDX_CYCLE_COUNTER + (cpu_pmu)->num_events - 1) diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index 7bb4ebb87b5c..e9a0d8bb2bc1 100644 --- a/arch/nds32/kernel/perf_event_cpu.c +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -566,16 +566,26 @@ static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc, /* * Try to get the counter for correpsonding event */ - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - - /* -* The counter is in use. -* The system will hang in the loop. -*/ - pr_err - ("Multiple events map to one counter, the behavior is undefined.\n"); - return -EPERM; + if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask)) + return NDS32_IDX_COUNTER0; + if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask)) + return NDS32_IDX_COUNTER1; + } else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask)) + return NDS32_IDX_COUNTER1; + else if (!test_and_set_bit +(NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask)) + return NDS32_IDX_CYCLE_COUNTER; + } else { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + return -EAGAIN; } static void nds32_pmu_start(struct nds32_pmu *cpu_pmu) -- 2.17.0
[PATCH 1/5] nds32: Perf porting
This is the commit that porting the perf for nds32. Raw event: The raw events start with 'r'. Usage: perf stat -e rXYZ ./app X: the index of performance counter. YZ: the index(convert to hexdecimal) of events Example: 'perf stat -e r101 ./app' means the counter 1 will count the instruction event. The index of counter and events can be found in "Andes System Privilege Architecture Version 3 Manual". Or you can perform the 'perf list' to find the symbolic name of raw events. Perf mmap2: Fix unexpected perf mmap2() page fault When the mmap2() called by perf application, you will encounter such condition:"failed to write." With return value -EFAULT This is due to the page fault caused by "reading" buffer from the mapped legal address region to write to the descriptor. The page_fault handler will get a VM_FAULT_SIGBUS return value, which should not happens here.(Due to this is a read request.) You can refer to kernel/events/core.c:perf_mmap_fault(...) If "(vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))" is evaluated as true, you will get VM_FAULT_SIGBUS as return value. However, this is not an write request. The flags which indicated why the page fault happens is wrong. Furthermore, NDS32 SPAv3 is not able to detect it is read or write. It only know either it is instruction fetch or data access. Therefore, by removing the wrong flag assignment(actually, the hardware is not able to show the reason), we can fix this bug. Signed-off-by: Nickhu --- arch/nds32/Kconfig|1 + arch/nds32/boot/dts/ae3xx.dts |5 + arch/nds32/include/asm/Kbuild |1 + arch/nds32/include/asm/perf_event.h | 16 + arch/nds32/include/asm/pmu.h | 430 ++ arch/nds32/include/asm/stacktrace.h | 39 + arch/nds32/kernel/Makefile|3 +- arch/nds32/kernel/perf_event_cpu.c| 1270 + arch/nds32/mm/fault.c | 13 +- tools/include/asm/barrier.h |2 + tools/perf/arch/nds32/Build |1 + tools/perf/arch/nds32/util/Build |1 + tools/perf/arch/nds32/util/header.c | 29 + tools/perf/pmu-events/arch/nds32/mapfile.csv | 15 + .../pmu-events/arch/nds32/n13/atcpmu.json | 290 15 files changed, 2110 insertions(+), 6 deletions(-) create mode 100644 arch/nds32/include/asm/perf_event.h create mode 100644 arch/nds32/include/asm/pmu.h create mode 100644 arch/nds32/include/asm/stacktrace.h create mode 100644 arch/nds32/kernel/perf_event_cpu.c create mode 100644 tools/perf/arch/nds32/Build create mode 100644 tools/perf/arch/nds32/util/Build create mode 100644 tools/perf/arch/nds32/util/header.c create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 7068f341133d..dd448d431f5a 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -31,6 +31,7 @@ config NDS32 select HAVE_DEBUG_KMEMLEAK select HAVE_MEMBLOCK select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_PERF_EVENTS select IRQ_DOMAIN select LOCKDEP_SUPPORT select MODULES_USE_ELF_RELA diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts index bb39749a6673..7e92f436ce87 100644 --- a/arch/nds32/boot/dts/ae3xx.dts +++ b/arch/nds32/boot/dts/ae3xx.dts @@ -82,4 +82,9 @@ interrupts = <18>; }; }; + + pmu { + compatible = "andestech,atcpmu"; + interrupts= <13>; + }; }; diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild index dbc4e5422550..f81b633d5379 100644 --- a/arch/nds32/include/asm/Kbuild +++ b/arch/nds32/include/asm/Kbuild @@ -36,6 +36,7 @@ generic-y += kprobes.h generic-y += kvm_para.h generic-y += limits.h generic-y += local.h +generic-y += local64.h generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += parport.h diff --git a/arch/nds32/include/asm/perf_event.h b/arch/nds32/include/asm/perf_event.h new file mode 100644 index ..fcdff02acc14 --- /dev/null +++ b/arch/nds32/include/asm/perf_event.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2008-2018 Andes Technology Corporation */ + +#ifndef __ASM_PERF_EVENT_H +#define __ASM_PERF_EVENT_H + +/* + * This file is request by Perf, + * please refer to tools/perf/design.txt for more details + */ +struct pt_regs; +unsigned long perf_instruction_pointer(struct pt_regs *regs); +un
[PATCH 2/5] nds32: Fix bug in bitfield.h
There two bitfield bug for perfomance counter in bitfield.h: PFM_CTL_offSEL1 21 --> 16 PFM_CTL_offSEL2 27 --> 22 This commit fix it. Signed-off-by: Nickhu --- arch/nds32/include/asm/bitfield.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h index 8e84fc385b94..19b2841219ad 100644 --- a/arch/nds32/include/asm/bitfield.h +++ b/arch/nds32/include/asm/bitfield.h @@ -692,8 +692,8 @@ #define PFM_CTL_offKU1 13 /* Enable user mode event counting for PFMC1 */ #define PFM_CTL_offKU2 14 /* Enable user mode event counting for PFMC2 */ #define PFM_CTL_offSEL015 /* The event selection for PFMC0 */ -#define PFM_CTL_offSEL121 /* The event selection for PFMC1 */ -#define PFM_CTL_offSEL227 /* The event selection for PFMC2 */ +#define PFM_CTL_offSEL116 /* The event selection for PFMC1 */ +#define PFM_CTL_offSEL222 /* The event selection for PFMC2 */ /* bit 28:31 reserved */ #define PFM_CTL_mskEN0 ( 0x01 << PFM_CTL_offEN0 ) -- 2.17.0
[PATCH 4/5] nds32: Fix perf multiple events map to same counter.
When there are multiple events map to the same counter, the counter counts inaccurately. This is because each counter only counts one event in the same time. So when there are multiple events map to same counter, they have to take turns in each context. There are two solution: 1. Print the error message when multiple events map to the same counter. But print the error message would let the program hang in loop. The ltp (linux test program) would be failed when the program hang in loop. 2. Don't print the error message, the ltp would pass. But the user need to have the knowledge that don't count the events which map to the same counter, or the user will get the inaccurate results. We choose method 2 for the solution Signed-off-by: Nickhu --- arch/nds32/include/asm/pmu.h | 1 + arch/nds32/kernel/perf_event_cpu.c | 30 -- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/arch/nds32/include/asm/pmu.h b/arch/nds32/include/asm/pmu.h index 3fbbe97c2d42..e75ec34af5f6 100644 --- a/arch/nds32/include/asm/pmu.h +++ b/arch/nds32/include/asm/pmu.h @@ -55,6 +55,7 @@ enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS }; */ #define NDS32_IDX_CYCLE_COUNTER0 #define NDS32_IDX_COUNTER0 1 +#define NDS32_IDX_COUNTER1 2 #define NDS32_IDX_COUNTER_LAST(cpu_pmu) \ (NDS32_IDX_CYCLE_COUNTER + (cpu_pmu)->num_events - 1) diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index 7bb4ebb87b5c..e9a0d8bb2bc1 100644 --- a/arch/nds32/kernel/perf_event_cpu.c +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -566,16 +566,26 @@ static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc, /* * Try to get the counter for correpsonding event */ - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - - /* -* The counter is in use. -* The system will hang in the loop. -*/ - pr_err - ("Multiple events map to one counter, the behavior is undefined.\n"); - return -EPERM; + if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask)) + return NDS32_IDX_COUNTER0; + if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask)) + return NDS32_IDX_COUNTER1; + } else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask)) + return NDS32_IDX_COUNTER1; + else if (!test_and_set_bit +(NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask)) + return NDS32_IDX_CYCLE_COUNTER; + } else { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + return -EAGAIN; } static void nds32_pmu_start(struct nds32_pmu *cpu_pmu) -- 2.17.0
[PATCH 1/5] nds32: Perf porting
This is the commit that porting the perf for nds32. Raw event: The raw events start with 'r'. Usage: perf stat -e rXYZ ./app X: the index of performance counter. YZ: the index(convert to hexdecimal) of events Example: 'perf stat -e r101 ./app' means the counter 1 will count the instruction event. The index of counter and events can be found in "Andes System Privilege Architecture Version 3 Manual". Or you can perform the 'perf list' to find the symbolic name of raw events. Perf mmap2: Fix unexpected perf mmap2() page fault When the mmap2() called by perf application, you will encounter such condition:"failed to write." With return value -EFAULT This is due to the page fault caused by "reading" buffer from the mapped legal address region to write to the descriptor. The page_fault handler will get a VM_FAULT_SIGBUS return value, which should not happens here.(Due to this is a read request.) You can refer to kernel/events/core.c:perf_mmap_fault(...) If "(vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))" is evaluated as true, you will get VM_FAULT_SIGBUS as return value. However, this is not an write request. The flags which indicated why the page fault happens is wrong. Furthermore, NDS32 SPAv3 is not able to detect it is read or write. It only know either it is instruction fetch or data access. Therefore, by removing the wrong flag assignment(actually, the hardware is not able to show the reason), we can fix this bug. Signed-off-by: Nickhu --- arch/nds32/Kconfig|1 + arch/nds32/boot/dts/ae3xx.dts |5 + arch/nds32/include/asm/Kbuild |1 + arch/nds32/include/asm/perf_event.h | 16 + arch/nds32/include/asm/pmu.h | 430 ++ arch/nds32/include/asm/stacktrace.h | 39 + arch/nds32/kernel/Makefile|3 +- arch/nds32/kernel/perf_event_cpu.c| 1270 + arch/nds32/mm/fault.c | 13 +- tools/include/asm/barrier.h |2 + tools/perf/arch/nds32/Build |1 + tools/perf/arch/nds32/util/Build |1 + tools/perf/arch/nds32/util/header.c | 29 + tools/perf/pmu-events/arch/nds32/mapfile.csv | 15 + .../pmu-events/arch/nds32/n13/atcpmu.json | 290 15 files changed, 2110 insertions(+), 6 deletions(-) create mode 100644 arch/nds32/include/asm/perf_event.h create mode 100644 arch/nds32/include/asm/pmu.h create mode 100644 arch/nds32/include/asm/stacktrace.h create mode 100644 arch/nds32/kernel/perf_event_cpu.c create mode 100644 tools/perf/arch/nds32/Build create mode 100644 tools/perf/arch/nds32/util/Build create mode 100644 tools/perf/arch/nds32/util/header.c create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 7068f341133d..dd448d431f5a 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -31,6 +31,7 @@ config NDS32 select HAVE_DEBUG_KMEMLEAK select HAVE_MEMBLOCK select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_PERF_EVENTS select IRQ_DOMAIN select LOCKDEP_SUPPORT select MODULES_USE_ELF_RELA diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts index bb39749a6673..7e92f436ce87 100644 --- a/arch/nds32/boot/dts/ae3xx.dts +++ b/arch/nds32/boot/dts/ae3xx.dts @@ -82,4 +82,9 @@ interrupts = <18>; }; }; + + pmu { + compatible = "andestech,atcpmu"; + interrupts= <13>; + }; }; diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild index dbc4e5422550..f81b633d5379 100644 --- a/arch/nds32/include/asm/Kbuild +++ b/arch/nds32/include/asm/Kbuild @@ -36,6 +36,7 @@ generic-y += kprobes.h generic-y += kvm_para.h generic-y += limits.h generic-y += local.h +generic-y += local64.h generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += parport.h diff --git a/arch/nds32/include/asm/perf_event.h b/arch/nds32/include/asm/perf_event.h new file mode 100644 index ..fcdff02acc14 --- /dev/null +++ b/arch/nds32/include/asm/perf_event.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2008-2018 Andes Technology Corporation */ + +#ifndef __ASM_PERF_EVENT_H +#define __ASM_PERF_EVENT_H + +/* + * This file is request by Perf, + * please refer to tools/perf/design.txt for more details + */ +struct pt_regs; +unsigned long perf_instruction_pointer(struct pt_regs *regs); +un
[PATCH 5/5] nds32: Add document for NDS32 PMU.
The document for how to add NDS32 PMU in devicetree. Signed-off-by: Nickhu --- Documentation/devicetree/bindings/nds32/pmu.txt | 17 + 1 file changed, 17 insertions(+) create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt diff --git a/Documentation/devicetree/bindings/nds32/pmu.txt b/Documentation/devicetree/bindings/nds32/pmu.txt new file mode 100644 index ..02762b850e59 --- /dev/null +++ b/Documentation/devicetree/bindings/nds32/pmu.txt @@ -0,0 +1,17 @@ +* NDS32 Performance Monitor Units + +NDS32 core have a PMU for counting cpu and cache events like cache misses. +The NDS32 PMU representation in the device tree should be done as under: + +Required properties: + +- compatilbe : + "andestech,atcpmu" + +- interrupts : The interrupt number for NDS32 PMU is 13. + +Example: +pmu{ + compatible = "andestech,atcpmu"; + interrupts = <13>; +} -- 2.17.0
[PATCH 0/5] nds32: Perf support
These two commit are perf supporting for nds32. There are three perfomance counters in nds32, and each of them can counts different events. You can use 'perf list' to show the available events that can be used. Nickhu (5): nds32: Perf porting nds32: Fix bug in bitfield.h nds32: Add perf call-graph support. nds32: Fix perf multiple events map to same counter. nds32: Add document for NDS32 PMU. .../devicetree/bindings/nds32/pmu.txt | 17 + arch/nds32/Kconfig|1 + arch/nds32/boot/dts/ae3xx.dts |5 + arch/nds32/include/asm/Kbuild |1 + arch/nds32/include/asm/bitfield.h |4 +- arch/nds32/include/asm/perf_event.h | 16 + arch/nds32/include/asm/pmu.h | 431 + arch/nds32/include/asm/stacktrace.h | 39 + arch/nds32/kernel/Makefile|3 +- arch/nds32/kernel/perf_event_cpu.c| 1579 + arch/nds32/mm/fault.c | 13 +- tools/include/asm/barrier.h |2 + tools/perf/arch/nds32/Build |1 + tools/perf/arch/nds32/util/Build |1 + tools/perf/arch/nds32/util/header.c | 29 + tools/perf/pmu-events/arch/nds32/mapfile.csv | 15 + .../pmu-events/arch/nds32/n13/atcpmu.json | 290 +++ 17 files changed, 2439 insertions(+), 8 deletions(-) create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt create mode 100644 arch/nds32/include/asm/perf_event.h create mode 100644 arch/nds32/include/asm/pmu.h create mode 100644 arch/nds32/include/asm/stacktrace.h create mode 100644 arch/nds32/kernel/perf_event_cpu.c create mode 100644 tools/perf/arch/nds32/Build create mode 100644 tools/perf/arch/nds32/util/Build create mode 100644 tools/perf/arch/nds32/util/header.c create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json -- 2.17.0
[PATCH 5/5] nds32: Add document for NDS32 PMU.
The document for how to add NDS32 PMU in devicetree. Signed-off-by: Nickhu --- Documentation/devicetree/bindings/nds32/pmu.txt | 17 + 1 file changed, 17 insertions(+) create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt diff --git a/Documentation/devicetree/bindings/nds32/pmu.txt b/Documentation/devicetree/bindings/nds32/pmu.txt new file mode 100644 index ..02762b850e59 --- /dev/null +++ b/Documentation/devicetree/bindings/nds32/pmu.txt @@ -0,0 +1,17 @@ +* NDS32 Performance Monitor Units + +NDS32 core have a PMU for counting cpu and cache events like cache misses. +The NDS32 PMU representation in the device tree should be done as under: + +Required properties: + +- compatilbe : + "andestech,atcpmu" + +- interrupts : The interrupt number for NDS32 PMU is 13. + +Example: +pmu{ + compatible = "andestech,atcpmu"; + interrupts = <13>; +} -- 2.17.0
[PATCH 0/5] nds32: Perf support
These two commit are perf supporting for nds32. There are three perfomance counters in nds32, and each of them can counts different events. You can use 'perf list' to show the available events that can be used. Nickhu (5): nds32: Perf porting nds32: Fix bug in bitfield.h nds32: Add perf call-graph support. nds32: Fix perf multiple events map to same counter. nds32: Add document for NDS32 PMU. .../devicetree/bindings/nds32/pmu.txt | 17 + arch/nds32/Kconfig|1 + arch/nds32/boot/dts/ae3xx.dts |5 + arch/nds32/include/asm/Kbuild |1 + arch/nds32/include/asm/bitfield.h |4 +- arch/nds32/include/asm/perf_event.h | 16 + arch/nds32/include/asm/pmu.h | 431 + arch/nds32/include/asm/stacktrace.h | 39 + arch/nds32/kernel/Makefile|3 +- arch/nds32/kernel/perf_event_cpu.c| 1579 + arch/nds32/mm/fault.c | 13 +- tools/include/asm/barrier.h |2 + tools/perf/arch/nds32/Build |1 + tools/perf/arch/nds32/util/Build |1 + tools/perf/arch/nds32/util/header.c | 29 + tools/perf/pmu-events/arch/nds32/mapfile.csv | 15 + .../pmu-events/arch/nds32/n13/atcpmu.json | 290 +++ 17 files changed, 2439 insertions(+), 8 deletions(-) create mode 100644 Documentation/devicetree/bindings/nds32/pmu.txt create mode 100644 arch/nds32/include/asm/perf_event.h create mode 100644 arch/nds32/include/asm/pmu.h create mode 100644 arch/nds32/include/asm/stacktrace.h create mode 100644 arch/nds32/kernel/perf_event_cpu.c create mode 100644 tools/perf/arch/nds32/Build create mode 100644 tools/perf/arch/nds32/util/Build create mode 100644 tools/perf/arch/nds32/util/header.c create mode 100644 tools/perf/pmu-events/arch/nds32/mapfile.csv create mode 100644 tools/perf/pmu-events/arch/nds32/n13/atcpmu.json -- 2.17.0
[PATCH 2/2] nds32: Fix the unaligned access handler
If the kernel config 'CONFIG_ALIGNMENT_TRAP' and the file '/proc/sys/nds32/unaligned_access/enable' are set, the kernel unaligned access handler does not handle correctly when the value of immediate field is negative. This commit fixes the unaligned access handler in kernel. Signed-off-by: Nickhu <nic...@andestech.com> --- arch/nds32/mm/alignment.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index e515f6f3d247..e1aed9dc692d 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -19,7 +19,7 @@ #define RA(inst) (((inst) >> 15) & 0x1FUL) #define RB(inst) (((inst) >> 10) & 0x1FUL) #define SV(inst) (((inst) >> 8) & 0x3UL) -#define IMM(inst) (((inst) >> 0) & 0x3FFFUL) +#define IMM(inst) (((inst) >> 0) & 0x7FFFUL) #define RA3(inst) (((inst) >> 3) & 0x7UL) #define RT3(inst) (((inst) >> 6) & 0x7UL) @@ -28,6 +28,9 @@ #define RA5(inst) (((inst) >> 0) & 0x1FUL) #define RT4(inst) (((inst) >> 5) & 0xFUL) +#define GET_IMMSVAL(imm_value) \ + (((imm_value >> 14) & 0x1) ? (imm_value - 0x8000) : imm_value) + #define __get8_data(val,addr,err) \ __asm__(\ "1: lbi.bi %1, [%2], #1\n" \ @@ -467,7 +470,7 @@ static inline int do_32(unsigned long inst, struct pt_regs *regs) } if (imm) - shift = IMM(inst) * len; + shift = GET_IMMSVAL(IMM(inst)) * len; else shift = *idx_to_addr(regs, RB(inst)) << SV(inst); -- 2.17.0
[PATCH 2/2] nds32: Fix the unaligned access handler
If the kernel config 'CONFIG_ALIGNMENT_TRAP' and the file '/proc/sys/nds32/unaligned_access/enable' are set, the kernel unaligned access handler does not handle correctly when the value of immediate field is negative. This commit fixes the unaligned access handler in kernel. Signed-off-by: Nickhu --- arch/nds32/mm/alignment.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index e515f6f3d247..e1aed9dc692d 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -19,7 +19,7 @@ #define RA(inst) (((inst) >> 15) & 0x1FUL) #define RB(inst) (((inst) >> 10) & 0x1FUL) #define SV(inst) (((inst) >> 8) & 0x3UL) -#define IMM(inst) (((inst) >> 0) & 0x3FFFUL) +#define IMM(inst) (((inst) >> 0) & 0x7FFFUL) #define RA3(inst) (((inst) >> 3) & 0x7UL) #define RT3(inst) (((inst) >> 6) & 0x7UL) @@ -28,6 +28,9 @@ #define RA5(inst) (((inst) >> 0) & 0x1FUL) #define RT4(inst) (((inst) >> 5) & 0xFUL) +#define GET_IMMSVAL(imm_value) \ + (((imm_value >> 14) & 0x1) ? (imm_value - 0x8000) : imm_value) + #define __get8_data(val,addr,err) \ __asm__(\ "1: lbi.bi %1, [%2], #1\n" \ @@ -467,7 +470,7 @@ static inline int do_32(unsigned long inst, struct pt_regs *regs) } if (imm) - shift = IMM(inst) * len; + shift = GET_IMMSVAL(IMM(inst)) * len; else shift = *idx_to_addr(regs, RB(inst)) << SV(inst); -- 2.17.0
[PATCH 0/2] nds32:Renaming file and fixing the unaligned access handler
The name of /proc/sys/nds32/unaligned_acess spelled wrong, so we renaming it to /proc/sys/nds32/unaligned_access. The unaligned access handler in nds32 goes wrong when the immediate field of load/store instruction is negative. We fix it by recongnizing whether the immediate field is positive or negative and then change the value of immediate filed to unsigned integer number. Nickhu (2): nds32: Renaming the file for unaligned access nds32: Fix the unaligned access handler arch/nds32/mm/alignment.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) -- 2.17.0
[PATCH 0/2] nds32:Renaming file and fixing the unaligned access handler
The name of /proc/sys/nds32/unaligned_acess spelled wrong, so we renaming it to /proc/sys/nds32/unaligned_access. The unaligned access handler in nds32 goes wrong when the immediate field of load/store instruction is negative. We fix it by recongnizing whether the immediate field is positive or negative and then change the value of immediate filed to unsigned integer number. Nickhu (2): nds32: Renaming the file for unaligned access nds32: Fix the unaligned access handler arch/nds32/mm/alignment.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) -- 2.17.0
[PATCH 1/2] nds32: Renaming the file for unaligned access
Change the name of the file '/proc/sys/nds32/unaligned_acess' to '/proc/sys/nds32/unaligned_access' Signed-off-by: Nickhu <nic...@andestech.com> --- arch/nds32/mm/alignment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index b96a01b10ca7..e515f6f3d247 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -552,7 +552,7 @@ static struct ctl_table alignment_tbl[3] = { static struct ctl_table nds32_sysctl_table[2] = { { -.procname = "unaligned_acess", +.procname = "unaligned_access", .mode = 0555, .child = alignment_tbl}, {} -- 2.17.0
[PATCH 1/2] nds32: Renaming the file for unaligned access
Change the name of the file '/proc/sys/nds32/unaligned_acess' to '/proc/sys/nds32/unaligned_access' Signed-off-by: Nickhu --- arch/nds32/mm/alignment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index b96a01b10ca7..e515f6f3d247 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -552,7 +552,7 @@ static struct ctl_table alignment_tbl[3] = { static struct ctl_table nds32_sysctl_table[2] = { { -.procname = "unaligned_acess", +.procname = "unaligned_access", .mode = 0555, .child = alignment_tbl}, {} -- 2.17.0