[PATCH 2/5] powerpc: Remove some old bootmem related comments
Now bootmem is gone from powerpc we can remove comments mentioning it. Signed-off-by: Anton Blanchard an...@samba.org --- arch/powerpc/kernel/prom.c | 5 + arch/powerpc/kernel/rtas.c | 4 ++-- arch/powerpc/kvm/book3s_hv_builtin.c | 2 +- arch/powerpc/mm/hugetlbpage.c| 4 ++-- arch/powerpc/mm/pgtable_64.c | 4 5 files changed, 6 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 5957625..02e3e4c 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -698,10 +698,7 @@ void __init early_init_devtree(void *params) reserve_crashkernel(); early_reserve_mem(); - /* -* Ensure that total memory size is page-aligned, because otherwise -* mark_bootmem() gets upset. -*/ + /* Ensure that total memory size is page-aligned. */ limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); memblock_enforce_memory_limit(limit); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8b4c857..4af905e 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1091,8 +1091,8 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) } /* - * Call early during boot, before mem init or bootmem, to retrieve the RTAS - * informations from the device-tree and allocate the RMO buffer for userland + * Call early during boot, before mem init, to retrieve the RTAS + * information from the device-tree and allocate the RMO buffer for userland * accesses. */ void __init rtas_initialize(void) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index b9615ba..297dbaf 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(kvm_release_hpt); * kvm_cma_reserve() - reserve area for kvm hash pagetable * * This function reserves memory from early allocator. It should be - * called by arch specific code once the early allocator (memblock or bootmem) + * called by arch specific code once the memblock allocator * has been activated and all other subsystems have already allocated/reserved * memory. */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7e70ae9..5215d25 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -276,7 +276,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz #ifdef CONFIG_PPC_FSL_BOOK3E /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { @@ -399,7 +399,7 @@ void __init reserve_hugetlb_gpages(void) #else /* !PPC_FSL_BOOK3E */ /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index cdb19ab..aa91737 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -109,10 +109,6 @@ int map_kernel_page(unsigned long ea, unsigned long pa, int flags) __pgprot(flags))); } else { #ifdef CONFIG_PPC_MMU_NOHASH - /* Warning ! This will blow up if bootmem is not initialized -* which our ppc64 code is keen to do that, we'll need to -* fix it and/or be more careful -*/ pgdp = pgd_offset_k(ea); #ifdef PUD_TABLE_SIZE if (pgd_none(*pgdp)) { -- 1.9.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 3/5] powerpc: Remove superfluous bootmem includes
Lots of places included bootmem.h even when not using bootmem. Signed-off-by: Anton Blanchard an...@samba.org --- arch/powerpc/kernel/crash_dump.c | 1 - arch/powerpc/kernel/irq.c | 1 - arch/powerpc/kernel/pci_64.c | 1 - arch/powerpc/kernel/rtas_pci.c | 1 - arch/powerpc/kernel/setup_32.c | 1 - arch/powerpc/kernel/vdso.c | 1 - arch/powerpc/kvm/book3s_hv_builtin.c | 1 - arch/powerpc/mm/init_32.c | 1 - arch/powerpc/mm/init_64.c | 1 - arch/powerpc/mm/pgtable_64.c | 1 - arch/powerpc/platforms/cell/celleb_scc_epci.c | 1 - arch/powerpc/platforms/cell/celleb_scc_pciex.c | 1 - arch/powerpc/platforms/maple/pci.c | 1 - arch/powerpc/platforms/powermac/pci.c | 1 - arch/powerpc/platforms/powernv/eeh-ioda.c | 1 - arch/powerpc/platforms/powernv/pci.c | 1 - arch/powerpc/sysdev/fsl_msi.c | 1 - arch/powerpc/sysdev/ipic.c | 1 - arch/powerpc/sysdev/mpic.c | 1 - arch/powerpc/sysdev/mpic_pasemi_msi.c | 1 - arch/powerpc/sysdev/mpic_u3msi.c | 1 - arch/powerpc/sysdev/ppc4xx_msi.c | 1 - arch/powerpc/sysdev/ppc4xx_pci.c | 1 - arch/powerpc/sysdev/qe_lib/qe.c| 1 - arch/powerpc/sysdev/qe_lib/qe_ic.c | 1 - arch/powerpc/sysdev/uic.c | 1 - 26 files changed, 26 deletions(-) diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 7a13f37..0bfe370 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -12,7 +12,6 @@ #undef DEBUG #include linux/crash_dump.h -#include linux/bootmem.h #include linux/memblock.h #include asm/code-patching.h #include asm/kdump.h diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4c5891d..c47a74d 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -50,7 +50,6 @@ #include linux/list.h #include linux/radix-tree.h #include linux/mutex.h -#include linux/bootmem.h #include linux/pci.h #include linux/debugfs.h #include linux/of.h diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 155013d..ba0f2d6 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -17,7 +17,6 @@ #include linux/pci.h #include linux/string.h #include linux/init.h -#include linux/bootmem.h #include linux/export.h #include linux/mm.h #include linux/list.h diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index c168337..fe39926 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -26,7 +26,6 @@ #include linux/pci.h #include linux/string.h #include linux/init.h -#include linux/bootmem.h #include asm/io.h #include asm/pgtable.h diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index e2bc044..84dafdf 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -11,7 +11,6 @@ #include linux/delay.h #include linux/initrd.h #include linux/tty.h -#include linux/bootmem.h #include linux/seq_file.h #include linux/root_dev.h #include linux/cpu.h diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index f174351..305eb0d 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -20,7 +20,6 @@ #include linux/user.h #include linux/elf.h #include linux/security.h -#include linux/bootmem.h #include linux/memblock.h #include asm/pgtable.h diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 297dbaf..6baa186 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -12,7 +12,6 @@ #include linux/export.h #include linux/sched.h #include linux/spinlock.h -#include linux/bootmem.h #include linux/init.h #include linux/memblock.h #include linux/sizes.h diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 9d1bde2..6332368 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -26,7 +26,6 @@ #include linux/mm.h #include linux/stddef.h #include linux/init.h -#include linux/bootmem.h #include linux/highmem.h #include linux/initrd.h #include linux/pagemap.h diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 3481556..10471f9 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -34,7 +34,6 @@ #include linux/vmalloc.h #include linux/init.h #include linux/delay.h -#include linux/bootmem.h #include linux/highmem.h #include linux/idr.h #include linux/nodemask.h diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index aa91737..feed47c 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -33,7 +33,6 @@ #include linux/swap.h #include
[PATCH 5/5] powerpc: make __ffs return unsigned long
I'm seeing a build warning in mm/nobootmem.c after removing bootmem: mm/nobootmem.c: In function '__free_pages_memory': include/linux/kernel.h:713:17: warning: comparison of distinct pointer types lacks a cast [enabled by default] (void) (_min1 == _min2); \ ^ mm/nobootmem.c:90:11: note: in expansion of macro 'min' order = min(MAX_ORDER - 1UL, __ffs(start)); ^ The rest of the worlds seems to define __ffs as returning unsigned long, so lets do that. Signed-off-by: Anton Blanchard an...@samba.org --- arch/powerpc/include/asm/bitops.h | 2 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index bd3bd57..c633f05 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -213,7 +213,7 @@ static __inline__ unsigned long ffz(unsigned long x) return __ilog2(x -x); } -static __inline__ int __ffs(unsigned long x) +static __inline__ unsigned long __ffs(unsigned long x) { return __ilog2(x -x); } diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index c507767..dae96fb 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -152,7 +152,7 @@ static int setup_one_atmu(struct ccsr_pci __iomem *pci, flags |= 0x1000; /* enable relaxed ordering */ for (i = 0; size 0; i++) { - unsigned int bits = min(ilog2(size), + unsigned int bits = min_t(u32, ilog2(size), __ffs(pci_addr | phys_addr)); if (index + i = 5) -- 1.9.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 4/5] powerpc: Move sparse_init() into initmem_init
We did part of sparse initialisation in setup_arch and part in initmem_init. Put them together. Signed-off-by: Anton Blanchard an...@samba.org --- arch/powerpc/kernel/setup_64.c | 1 - arch/powerpc/mm/mem.c | 1 + arch/powerpc/mm/numa.c | 2 ++ 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 9f8f472..8bc8f77 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -682,7 +682,6 @@ void __init setup_arch(char **cmdline_p) emergency_stack_init(); initmem_init(); - sparse_init(); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = dummy_con; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index aa067b7..abbc55d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -195,6 +195,7 @@ void __init initmem_init(void) /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); + sparse_init(); } /* mark pages that don't exist as nosave */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 4f9c18a..52559ff 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -964,6 +964,8 @@ void __init initmem_init(void) sparse_memory_present_with_active_regions(nid); } + sparse_init(); + setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); -- 1.9.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [1/5] pseries: Define rtas hotplug event sections
On Mon, 2014-09-15 at 15:29 -0500, Nathan Fontenot wrote: diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index b390f55..a01879e 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -273,6 +273,7 @@ inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log) #define PSERIES_ELOG_SECT_ID_MANUFACT_INFO (('M' 8) | 'I') #define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' 8) | 'H') #define PSERIES_ELOG_SECT_ID_USER_DEF(('U' 8) | 'D') +#define PSERIES_ELOG_SECT_ID_HOTPLUG (('H' 8) | 'P') /* Vendor specific Platform Event Log Format, Version 6, section header */ struct pseries_errorlog { @@ -296,6 +297,31 @@ inline uint16_t pseries_errorlog_length(struct pseries_errorlog *sect) return be16_to_cpu(sect-length); } +/* RTAS pseries hotplug errorlog section */ +struct pseries_hp_errorlog { + uint8_t resource; + uint8_t action; + uint8_t id_type; + uint8_t reserved; These should be u8. + union { + __be32 drc_index; + __be32 drc_count; + chardrc_name[1]; I don't see drc_name used? + } _drc_u; +}; cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [2/5] pseries: Export drc_[acquire|release]_drc() routines
On Mon, 2014-09-15 at 15:30 -0500, Nathan Fontenot wrote: diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 361add6..b94516b 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -59,6 +59,8 @@ extern void dlpar_free_cc_property(struct property *); extern struct device_node *dlpar_configure_connector(u32, struct device_node *); extern int dlpar_attach_node(struct device_node *); extern int dlpar_detach_node(struct device_node *); +extern int dlpar_acquire_drc(u32); +extern int dlpar_release_drc(u32); Please name the parameters. And don't bother with extern. cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/3] powerpc: Remove -mno-sched-epilog workaround
We added -mno-sched-epilog in commit 7563dc645853 (powerpc: Work around gcc's -fno-omit-frame-pointer bug). We shouldn't apply -fno-omit-frame-pointer on powerpc any more (it's protected by CONFIG_FRAME_POINTER and CONFIG_SCHED_OMIT_FRAME_POINTER). It's also an undocumented gcc option, so lets remove it. Signed-off-by: Anton Blanchard an...@samba.org --- arch/powerpc/Makefile| 5 - arch/powerpc/kernel/Makefile | 12 ++-- arch/powerpc/platforms/powermac/Makefile | 2 +- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 132d9c6..c6f64e2 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -198,11 +198,6 @@ ifeq ($(CONFIG_6xx),y) KBUILD_CFLAGS += -mcpu=powerpc endif -# Work around a gcc code-gen bug with -fno-omit-frame-pointer. -ifeq ($(CONFIG_FUNCTION_TRACER),y) -KBUILD_CFLAGS += -mno-sched-epilog -endif - cpu-as-$(CONFIG_4xx) += -Wa,-m405 cpu-as-$(CONFIG_ALTIVEC) += -Wa,-maltivec cpu-as-$(CONFIG_E200) += -Wa,-me200 diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 502cf69..e14bda6 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -17,14 +17,14 @@ endif ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code -CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog -CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog -CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog -CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_cputable.o = -pg +CFLAGS_REMOVE_prom_init.o = -pg +CFLAGS_REMOVE_btext.o = -pg +CFLAGS_REMOVE_prom.o = -pg # do not trace tracer code -CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_ftrace.o = -pg # timers used by tracing -CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_time.o = -pg endif obj-y := cputable.o ptrace.o syscalls.o \ diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile index 52c6ce1..e238872 100644 --- a/arch/powerpc/platforms/powermac/Makefile +++ b/arch/powerpc/platforms/powermac/Makefile @@ -2,7 +2,7 @@ CFLAGS_bootx_init.o += -fPIC ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code -CFLAGS_REMOVE_bootx_init.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_bootx_init.o = -pg endif obj-y += pic.o setup.o time.o feature.o pci.o \ -- 1.9.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/3] powerpc/ftrace: Remove mod_return_to_handler
mod_return_to_handler is the same as return_to_handler, except it handles the change of the TOC (r2). Add this into return_to_handler and remove mod_return_to_handler. Signed-off-by: Anton Blanchard an...@samba.org --- arch/powerpc/kernel/entry_64.S | 24 +--- arch/powerpc/kernel/ftrace.c | 14 ++ arch/powerpc/kernel/process.c | 9 + 3 files changed, 4 insertions(+), 43 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 5bbd1bc..955d509 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1235,28 +1235,6 @@ _GLOBAL(ftrace_graph_caller) _GLOBAL(return_to_handler) /* need to save return values */ - std r4, -24(r1) - std r3, -16(r1) - std r31, -8(r1) - mr r31, r1 - stdur1, -112(r1) - - bl ftrace_return_to_handler - nop - - /* return value has real return address */ - mtlrr3 - - ld r1, 0(r1) - ld r4, -24(r1) - ld r3, -16(r1) - ld r31, -8(r1) - - /* Jump back to real return address */ - blr - -_GLOBAL(mod_return_to_handler) - /* need to save return values */ std r4, -32(r1) std r3, -24(r1) /* save TOC */ @@ -1266,7 +1244,7 @@ _GLOBAL(mod_return_to_handler) stdur1, -112(r1) /* -* We are in a module using the module's TOC. +* We might be called from a module. * Switch to our TOC to run inside the core kernel. */ ld r2, PACATOC(r13) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 390311c..abf7921 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -510,10 +510,6 @@ int ftrace_disable_ftrace_graph_caller(void) } #endif /* CONFIG_DYNAMIC_FTRACE */ -#ifdef CONFIG_PPC64 -extern void mod_return_to_handler(void); -#endif - /* * Hook the return address and push it in the stack of return addrs * in current thread info. @@ -523,7 +519,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) unsigned long old; int faulted; struct ftrace_graph_ent trace; - unsigned long return_hooker = (unsigned long)return_to_handler; + unsigned long return_hooker; if (unlikely(ftrace_graph_is_dead())) return; @@ -531,13 +527,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) if (unlikely(atomic_read(current-tracing_graph_pause))) return; -#ifdef CONFIG_PPC64 - /* non core kernel code needs to save and restore the TOC */ - if (REGION_ID(self_addr) != KERNEL_REGION_ID) - return_hooker = (unsigned long)mod_return_to_handler; -#endif - - return_hooker = ppc_function_entry((void *)return_hooker); + return_hooker = ppc_function_entry(return_to_handler); /* * Protect against fault, even if it shouldn't diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index aa1df89..080c0b9 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1531,13 +1531,6 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) int curr_frame = current-curr_ret_stack; extern void return_to_handler(void); unsigned long rth = (unsigned long)return_to_handler; - unsigned long mrth = -1; -#ifdef CONFIG_PPC64 - extern void mod_return_to_handler(void); - rth = *(unsigned long *)rth; - mrth = (unsigned long)mod_return_to_handler; - mrth = *(unsigned long *)mrth; -#endif #endif sp = (unsigned long) stack; @@ -1562,7 +1555,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) if (!firstframe || ip != lr) { printk([REG] [REG] %pS, sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if ((ip == rth || ip == mrth) curr_frame = 0) { + if ((ip == rth) curr_frame = 0) { printk( (%pS), (void *)current-ret_stack[curr_frame].ret); curr_frame--; -- 1.9.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 3/3] powerpc/ftrace: simplify prepare_ftrace_return
Instead of passing in the stack address of the link register to be modified, just pass in the old value and return the new value and rely on ftrace_graph_caller to do the modification. This removes the exception handling around the stack update - it isn't needed and we weren't consistent about it. Later on we would do an unprotected modification: if (!ftrace_graph_entry(trace)) { *parent = old; Signed-off-by: Anton Blanchard an...@samba.org --- arch/powerpc/kernel/entry_32.S | 10 +-- arch/powerpc/kernel/entry_64.S | 11 ++-- arch/powerpc/kernel/ftrace.c | 59 ++ 3 files changed, 30 insertions(+), 50 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 22b45a4..ad837d8 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1424,12 +1424,18 @@ _GLOBAL(ftrace_graph_caller) lwz r4, 44(r1) subir4, r4, MCOUNT_INSN_SIZE - /* get the parent address */ - addir3, r1, 52 + /* Grab the LR out of the caller stack frame */ + lwz r3,52(r1) bl prepare_ftrace_return nop +/* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + stw r3,52(r1) + MCOUNT_RESTORE_FRAME /* old link register ends up in ctr reg */ bctr diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 955d509..9caab69 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1221,13 +1221,20 @@ _GLOBAL(ftrace_graph_caller) ld r4, 128(r1) subir4, r4, MCOUNT_INSN_SIZE - /* get the parent address */ + /* Grab the LR out of the caller stack frame */ ld r11, 112(r1) - addir3, r11, 16 + ld r3, 16(r11) bl prepare_ftrace_return nop + /* +* prepare_ftrace_return gives us the address we divert to. +* Change the LR in the callers stack frame to this. +*/ + ld r11, 112(r1) + std r3, 16(r11) + ld r0, 128(r1) mtlrr0 addir1, r1, 112 diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index abf7921..d795031 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -512,67 +512,34 @@ int ftrace_disable_ftrace_graph_caller(void) /* * Hook the return address and push it in the stack of return addrs - * in current thread info. + * in current thread info. Return the address we want to divert to. */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) { - unsigned long old; - int faulted; struct ftrace_graph_ent trace; unsigned long return_hooker; if (unlikely(ftrace_graph_is_dead())) - return; + goto out; if (unlikely(atomic_read(current-tracing_graph_pause))) - return; + goto out; return_hooker = ppc_function_entry(return_to_handler); - /* -* Protect against fault, even if it shouldn't -* happen. This tool is too much intrusive to -* ignore such a protection. -*/ - asm volatile( - 1: PPC_LL %[old], 0(%[parent])\n - 2: PPC_STL %[return_hooker], 0(%[parent])\n - li %[faulted], 0\n - 3:\n - - .section .fixup, \ax\\n - 4: li %[faulted], 1\n - b 3b\n - .previous\n - - .section __ex_table,\a\\n - PPC_LONG_ALIGN \n - PPC_LONG 1b,4b\n - PPC_LONG 2b,4b\n - .previous - - : [old] =r (old), [faulted] =r (faulted) - : [parent] r (parent), [return_hooker] r (return_hooker) - : memory - ); - - if (unlikely(faulted)) { - ftrace_graph_stop(); - WARN_ON(1); - return; - } - - trace.func = self_addr; + trace.func = ip; trace.depth = current-curr_ret_stack + 1; /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(trace)) { - *parent = old; - return; - } + if (!ftrace_graph_entry(trace)) + goto out; + + if (ftrace_push_return_trace(parent, ip, trace.depth, 0) == -EBUSY) + goto out; - if (ftrace_push_return_trace(old, self_addr, trace.depth, 0) == -EBUSY) - *parent = old; + parent = return_hooker; +out: + return parent; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- 1.9.1
Re: [3/5] pseries: Create device hotplug entry point
On Mon, 2014-09-15 at 15:31 -0500, Nathan Fontenot wrote: For pseries system the kernel will be notified of hotplug requests in the form of rtas hotplug events. Can you flesh that design out a bit for me, I don't entirely get how it's going to work. The kernel gets the rtas hotplug events (in rtasd.c) and spits them out to userspace, which then writes them back in ? This patch creates a common routine that can handle these requests in both the PowerVM anbd PowerKVM environments, handle_dlpar_errorlog(). This also ^ creates the initial memory hotplug request handling stub. For PowerVM this patch also creates a new /proc file that the drmgr command will use to write rtas hotplug events to. Why is this different between phyp and KVM? For future PowerKVM handling the rtas check-exception code can pass any rtas hotplug events received to handle_dlpar_errorlog(). Internally to the kernel you mean? diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index a2450b8..574ec73 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -16,7 +16,9 @@ #include linux/cpu.h #include linux/slab.h #include linux/of.h +#include linux/proc_fs.h #include offline_states.h +#include pseries.h #include asm/prom.h #include asm/machdep.h @@ -530,13 +532,72 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count) return count; } +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ That is really confusing, but I think it's just a diff artifact? +static int handle_dlpar_errorlog(struct rtas_error_log *error_log) +{ + struct pseries_errorlog *pseries_log; + struct pseries_hp_errorlog *hp_elog; + int rc = -EINVAL; + + pseries_log = get_pseries_errorlog(error_log, +PSERIES_ELOG_SECT_ID_HOTPLUG); + if (!pseries_log) + return rc; + + hp_elog = (struct pseries_hp_errorlog *)pseries_log-data; + if (!hp_elog) + return rc; I don't see how that can happen? struct pseries_errorlog { __be16 id; /* 0x00 2-byte ASCII section ID */ __be16 length; /* 0x02 Section length in bytes */ uint8_t version;/* 0x04 Section version */ uint8_t subtype;/* 0x05 Section subtype */ __be16 creator_component; /* 0x06 Creator component ID*/ uint8_t data[]; /* 0x08 Start of section data */ }; Should you be checking for length == 0 instead ? Also I think the code will probably end up cleaner if you do the endian conversions immediately when you read the hp_elog, rather than passing it around in BE and having to remember to convert at all the usages. + switch (hp_elog-resource) { + case PSERIES_HP_ELOG_RESOURCE_MEM: + rc = dlpar_memory(hp_elog); + break; Please add: default: pr_warn_ratelimited(Unknown resource ..) Or something. + } + + return rc; +} + +static ssize_t dlpar_write(struct file *file, const char __user *buf, +size_t count, loff_t *offset) +{ + char *event_buf; + int rc; + + event_buf = kmalloc(count + 1, GFP_KERNEL); Why + 1 ? It's not null-terminated AFAICS. + if (!event_buf) + return -ENOMEM; + + rc = copy_from_user(event_buf, buf, count); + if (rc) { + kfree(event_buf); + return rc; + } + + rc = handle_dlpar_errorlog((struct rtas_error_log *)event_buf); If you start with a struct rtas_error_log * you shouldn't need any casts. + kfree(event_buf); + return rc ? rc : count; +} + +static const struct file_operations dlpar_fops = { + .write = dlpar_write, + .llseek = noop_llseek, +}; + static int __init pseries_dlpar_init(void) { + struct proc_dir_entry *proc_ent; + + proc_ent = proc_create(powerpc/dlpar, S_IWUSR, NULL, dlpar_fops); + if (proc_ent) + proc_set_size(proc_ent, 0); else error message at least please Why are we putting it in /proc, can't it go in /sys/kernel like the mobility stuff? diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 24abc5c..0e60e15 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -20,6 +22,9 @@ #include asm/machdep.h #include asm/prom.h #include asm/sparsemem.h +#include asm/rtas.h + +DEFINE_MUTEX(dlpar_mem_mutex); static ? diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index b94516b..28bd994 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -62,6 +63,15 @@ extern int dlpar_detach_node(struct
Re: [4/5] pseries: Implement memory hotplug add in the kernel
On Mon, 2014-09-15 at 15:32 -0500, Nathan Fontenot wrote: This patch adds the ability to do memory hotplug adding in the kernel. Currently the hotplug add/remove of memory is handled by the drmgr command. The drmgr command performs the add/remove by performing some work in user-space and making requests to the kernel to handle other pieces. By moving all of the work to the kernel we can do the add and remove faster, and provide a common place to do memory hotplug for both the PowerVM and PowerKVM environments. Signed-off-by: Nathan Fontenot nf...@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 170 +++ 1 file changed, 170 insertions(+) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 0e60e15..b254773 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -17,6 +17,7 @@ #include linux/vmalloc.h #include linux/memory.h #include linux/memory_hotplug.h +#include linux/slab.h #include asm/firmware.h #include asm/machdep.h @@ -24,6 +25,8 @@ #include asm/sparsemem.h #include asm/rtas.h +#include pseries.h + DEFINE_MUTEX(dlpar_mem_mutex); unsigned long pseries_memory_block_size(void) @@ -69,6 +72,53 @@ unsigned long pseries_memory_block_size(void) return memblock_size; } +static void dlpar_free_drconf_property(struct property *prop) +{ + kfree(prop-name); + kfree(prop-value); + kfree(prop); +} + +static struct property *dlpar_clone_drconf_property(struct device_node *dn) +{ + struct property *prop, *new_prop; + + prop = of_find_property(dn, ibm,dynamic-memory, NULL); + if (!prop) + return NULL; + + new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL); + if (!new_prop) + return NULL; + + new_prop-name = kstrdup(prop-name, GFP_KERNEL); + new_prop-value = kmalloc(prop-length + 1, GFP_KERNEL); + if (!new_prop-name || !new_prop-value) { + dlpar_free_drconf_property(new_prop); + return NULL; + } + + memcpy(new_prop-value, prop-value, prop-length); + new_prop-length = prop-length; + *(((char *)new_prop-value) + new_prop-length) = 0; It's not a string, is it? + return new_prop; +} + +static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) +{ + unsigned long section_nr; + struct mem_section *mem_sect; + struct memory_block *mem_block; + u64 phys_addr = be64_to_cpu(lmb-base_addr); + + section_nr = pfn_to_section_nr(PFN_DOWN(phys_addr)); + mem_sect = __nr_to_section(section_nr); + + mem_block = find_memory_block(mem_sect); + return mem_block; +} + #ifdef CONFIG_MEMORY_HOTREMOVE static int pseries_remove_memory(u64 start, u64 size) { @@ -155,13 +205,133 @@ static inline int pseries_remove_mem_node(struct device_node *np) } #endif /* CONFIG_MEMORY_HOTREMOVE */ +static int dlpar_add_one_lmb(struct of_drconf_cell *lmb) +{ + struct memory_block *mem_block; + u64 phys_addr; + unsigned long pages_per_block; + unsigned long block_sz; + int nid, sections_per_block; + int rc; + + phys_addr = be64_to_cpu(lmb-base_addr); of_drconf_cell needs endian annotations. + block_sz = memory_block_size_bytes(); + sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; + pages_per_block = PAGES_PER_SECTION * sections_per_block; + + if (phys_addr ((pages_per_block PAGE_SHIFT) - 1)) + return -EINVAL; + + nid = memory_add_physaddr_to_nid(phys_addr); + rc = add_memory(nid, phys_addr, block_sz); + if (rc) + return rc; + + rc = memblock_add(phys_addr, block_sz); + if (rc) { + remove_memory(nid, phys_addr, block_sz); + return rc; + } + + mem_block = lmb_to_memblock(lmb); + if (!mem_block) { + remove_memory(nid, phys_addr, block_sz); + return -EINVAL; + } That could all use a lot of comments. ie. why do we have to add it twice? + rc = device_online(mem_block-dev); + put_device(mem_block-dev); + if (rc) + remove_memory(nid, phys_addr, block_sz); + + return rc; +} + +static int dlpar_memory_add(struct pseries_hp_errorlog *hp_elog) +{ + struct of_drconf_cell *lmb; + struct device_node *dn; + struct property *prop; + uint32_t entries, *p; *p should be __be32. + int i, lmbs_to_add; + int lmbs_added = 0; + int rc = -EINVAL; Don't pre-initialise your rc variables. + if (hp_elog-id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) { + lmbs_to_add = be32_to_cpu(hp_elog-_drc_u.drc_count); + pr_info(Attempting to hot-add %d LMB(s)\n, lmbs_to_add); + } else { + lmbs_to_add = 1; +
Re: [5/5] pseries: Implement memory hotplug remove in the kernel
On Mon, 2014-09-15 at 15:33 -0500, Nathan Fontenot wrote: This patch adds the ability to do memory hotplug remove in the kernel. Currently the hotplug add/remove of memory is handled by the drmgr command. The drmgr command performs the add/remove by performing some work in user-space and making requests to the kernel to handle other pieces. By moving all of the work to the kernel we can do the add and remove faster, and provide a common place to do memory hotplug for both the PowerVM and PowerKVM environments. Signed-off-by: Nathan Fontenot nf...@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 140 +++ 1 file changed, 139 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index b254773..160c424 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -193,7 +193,137 @@ static int pseries_remove_mem_node(struct device_node *np) pseries_remove_memblock(base, lmb_size); return 0; } + +static int lmb_is_removable(struct of_drconf_cell *lmb) +{ Do we not already have something like this? + int i, scns_per_block; + int rc = 1; I can see this makes the = work below. But what if block_sz / MIN_MEMORY_BLOCK_SIZE = 0 ? + unsigned long pfn, block_sz; + u64 phys_addr; + + phys_addr = be64_to_cpu(lmb-base_addr); + block_sz = memory_block_size_bytes(); + scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; + + for (i = 0; i scns_per_block; i++) { + pfn = PFN_DOWN(phys_addr); + if (!pfn_present(pfn)) + continue; + + rc = is_mem_section_removable(pfn, PAGES_PER_SECTION); + phys_addr += MIN_MEMORY_BLOCK_SIZE; + } + + return rc; +} +static int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog) +{ ... +} Most of the same comments as for add. cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/5] powerpc: Remove superfluous bootmem includes
Hello Anton, On 09/17/2014 12:58 AM, Anton Blanchard wrote: Lots of places included bootmem.h even when not using bootmem. Signed-off-by: Anton Blanchard an...@samba.org --- arch/powerpc/kernel/crash_dump.c | 1 - arch/powerpc/kernel/irq.c | 1 - arch/powerpc/kernel/pci_64.c | 1 - arch/powerpc/kernel/rtas_pci.c | 1 - arch/powerpc/kernel/setup_32.c | 1 - arch/powerpc/kernel/vdso.c | 1 - arch/powerpc/kvm/book3s_hv_builtin.c | 1 - arch/powerpc/mm/init_32.c | 1 - arch/powerpc/mm/init_64.c | 1 - arch/powerpc/mm/pgtable_64.c | 1 - arch/powerpc/platforms/cell/celleb_scc_epci.c | 1 - arch/powerpc/platforms/cell/celleb_scc_pciex.c | 1 - arch/powerpc/platforms/maple/pci.c | 1 - arch/powerpc/platforms/powermac/pci.c | 1 - arch/powerpc/platforms/powernv/eeh-ioda.c | 1 - arch/powerpc/platforms/powernv/pci.c | 1 - arch/powerpc/sysdev/fsl_msi.c | 1 - arch/powerpc/sysdev/ipic.c | 1 - arch/powerpc/sysdev/mpic.c | 1 - arch/powerpc/sysdev/mpic_pasemi_msi.c | 1 - arch/powerpc/sysdev/mpic_u3msi.c | 1 - arch/powerpc/sysdev/ppc4xx_msi.c | 1 - arch/powerpc/sysdev/ppc4xx_pci.c | 1 - arch/powerpc/sysdev/qe_lib/qe.c| 1 - arch/powerpc/sysdev/qe_lib/qe_ic.c | 1 - arch/powerpc/sysdev/uic.c | 1 - 26 files changed, 26 deletions(-) ... diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index aa91737..feed47c 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -33,7 +33,6 @@ #include linux/swap.h #include linux/stddef.h #include linux/vmalloc.h -#include linux/bootmem.h #include linux/memblock.h #include linux/slab.h While building corenet64_smp I get this: ../arch/powerpc/mm/pgtable_64.c: In function 'early_alloc_pgtable': ../arch/powerpc/mm/pgtable_64.c:77:95: error: 'MAX_DMA_ADDRESS' undeclared (first use in this function) ../arch/powerpc/mm/pgtable_64.c:77:95: note: each undeclared identifier is reported only once for each function it appears in Adding '#include asm/dma.h' seems to fix it Other than that, for the entire set: Tested-by: Emil Medve emilian.me...@freescale.com Cheers, ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH][V2] Freescale Frame Manager Device Tree binding document
From: Igal Liberman igal.liber...@freescale.com The Frame Manager (FMan) combines the Ethernet network interfaces with packet distribution logic to provide intelligent distribution and queuing decisions for incoming traffic at line rate. This binding document describes Freescale's Frame Manager hardware attributes that are used by the Frame Manager driver for its basic initialization and configuration. Difference between [V1] and [V2]: Addressed all comments recieved from Scott in [V1] Signed-off-by: Igal Liberman igal.liber...@freescale.com --- .../devicetree/bindings/powerpc/fsl/fman.txt | 529 1 file changed, 529 insertions(+) create mode 100644 Documentation/devicetree/bindings/powerpc/fsl/fman.txt diff --git a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt new file mode 100644 index 000..da8e5f2 --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt @@ -0,0 +1,529 @@ += +Freescale Frame Manager Device Bindings + +CONTENTS + - FMan Node + - FMan Port Node + - FMan MURAM Node + - FMan dTSEC/XGEC/mEMAC Node + - FMan IEEE 1588 Node + - Example + += +FMan Node + +DESCRIPTION + +Due to the fact that the FMan is an aggregation of sub-engines (ports, MACs, +etc.) the FMan node will have child nodes for each of them. + +PROPERTIES + +- compatible + Usage: required + Value type: stringlist + Definition: Must include fsl,fman + FMan version can be determined via FM_IP_REV_1 register in the + FMan block. The offset is 0xc4 from the beginning of the + Frame Processing Manager memory map (0xc3000 from the + beginning of the FMan node). + +- cell-index + Usage: required + Value type: u32 + Definition: Specifies the index of the FMan unit. + + The cell-index value may be used by the SoC, to identify the + FMan unit in the SoC memory map. In the table bellow, + there's a description of the cell-index use in each SoC: + + - P1023: + register[bit] FMan unit cell-index + + DEVDISR[1] 1 0 + + - P2041, P3041, P4080 P5020, P5040: + register[bit] FMan unit cell-index + + DCFG_DEVDISR2[6]1 0 + DCFG_DEVDISR2[14] 2 1 + (Second FM available only in P4080 and P5040) + + - B4860, T1040, T2080, T4240: + register[bit] FMan unit cell-index + + DCFG_CCSR_DEVDISR2[24] 1 0 + DCFG_CCSR_DEVDISR2[25] 2 1 + (Second FM available only in T4240) + + DEVDISR, DCFG_DEVDISR2 and DCFG_CCSR_DEVDISR2 are located in + the specific SoC Device Configuration/Pin Control Memory + Map. + +- reg + Usage: required + Value type: prop-encoded-array + Definition: A standard property. Specifies the offset of the + following configuration registers: + - BMI configuration registers. + - QMI configuration registers. + - DMA configuration registers. + - FPM configuration registers. + - FMan controller configuration registers. + +- ranges + Usage: required + Value type: prop-encoded-array + Definition: A standard property. + +- clocks + Usage: required + Value type: prop-encoded-array + Definition: phandle for fman clock. + +- clock-names + usage: optional + Value type: stringlist + Definition: A standard property + +- interrupts + Usage: required + Value type: prop-encoded-array + Definition: A pair of IRQs are specified in this property. + The first element is associated with the event interrupts and + the second element is associated with the error interrupts. + +- fsl,qman-channel-range + Usage: required + Value type: prop-encoded-array + Definition: Specifies the range of the available dedicated + channels in the FMan. The first cell specifies the beginning +
Re: [PATCH 3/5] powerpc: Remove superfluous bootmem includes
Hi Emil, While building corenet64_smp I get this: ../arch/powerpc/mm/pgtable_64.c: In function 'early_alloc_pgtable': ../arch/powerpc/mm/pgtable_64.c:77:95: error: 'MAX_DMA_ADDRESS' undeclared (first use in this function) ../arch/powerpc/mm/pgtable_64.c:77:95: note: each undeclared identifier is reported only once for each function it appears in Adding '#include asm/dma.h' seems to fix it Other than that, for the entire set: Tested-by: Emil Medve emilian.me...@freescale.com Thanks for testing! I'll respin the patch with that include added. Anton ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/5] powerpc: Remove bootmem allocator
At the moment we transition from the memblock alloctor to the bootmem allocator. Gitting rid of the bootmem allocator removes a bunch of complicated code (most of which I owe the dubious honour of being responsible for writing). Signed-off-by: Anton Blanchard an...@samba.org Tested-by: Emil Medve emilian.me...@freescale.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/setup.h | 3 +- arch/powerpc/kernel/setup_32.c | 5 +- arch/powerpc/kernel/setup_64.c | 3 +- arch/powerpc/mm/init_32.c| 9 -- arch/powerpc/mm/mem.c| 62 +-- arch/powerpc/mm/numa.c | 224 ++- arch/powerpc/mm/pgtable_32.c | 3 +- arch/powerpc/mm/pgtable_64.c | 6 +- 9 files changed, 43 insertions(+), 273 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 90fe77a..3eeeb9d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -148,6 +148,7 @@ config PPC select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW select HAVE_PERF_EVENTS_NMI if PPC64 + select NO_BOOTMEM config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 11ba86e..fbdf18c 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -8,7 +8,6 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern int mem_init_done; /* set on boot once kmalloc can be called */ -extern int init_bootmem_done; /* set once bootmem is available */ extern unsigned long long memory_limit; extern unsigned long klimit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); @@ -24,7 +23,7 @@ extern void reloc_got2(unsigned long); #define PTRRELOC(x)((typeof(x)) add_reloc_offset((unsigned long)(x))) void check_for_initrd(void); -void do_init_bootmem(void); +void initmem_init(void); void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ea4fda6..e2bc044 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -311,9 +311,8 @@ void __init setup_arch(char **cmdline_p) irqstack_early_init(); - /* set up the bootmem stuff with available memory */ - do_init_bootmem(); - if ( ppc_md.progress ) ppc_md.progress(setup_arch: bootmem, 0x3eab); + initmem_init(); + if ( ppc_md.progress ) ppc_md.progress(setup_arch: initmem, 0x3eab); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = dummy_con; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index fa17c94..9f8f472 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -681,8 +681,7 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); - /* set up the bootmem stuff with available memory */ - do_init_bootmem(); + initmem_init(); sparse_init(); #ifdef CONFIG_DUMMY_CONSOLE diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index cff59f1..9d1bde2 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -195,15 +195,6 @@ void __init MMU_init(void) memblock_set_current_limit(lowmem_end_addr); } -/* This is only called until mem_init is done. */ -void __init *early_get_page(void) -{ - if (init_bootmem_done) - return alloc_bootmem_pages(PAGE_SIZE); - else - return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); -} - #ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */ void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e0f7a18..aa067b7 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -60,7 +60,6 @@ #define CPU_FTR_NOEXECUTE 0 #endif -int init_bootmem_done; int mem_init_done; unsigned long long memory_limit; @@ -180,70 +179,22 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, } EXPORT_SYMBOL_GPL(walk_system_ram_range); -/* - * Initialize the bootmem system and give it all the memory we - * have available. If we are using highmem, we only put the - * lowmem into the bootmem system. - */ #ifndef CONFIG_NEED_MULTIPLE_NODES -void __init do_init_bootmem(void) +void __init initmem_init(void) { - unsigned long start, bootmap_pages; - unsigned long total_pages; - struct memblock_region *reg; - int boot_mapsize; - max_low_pfn = max_pfn = memblock_end_of_DRAM() PAGE_SHIFT; - total_pages = (memblock_end_of_DRAM() - memstart_addr) PAGE_SHIFT; + min_low_pfn = MEMORY_START PAGE_SHIFT; #ifdef CONFIG_HIGHMEM - total_pages = total_lowmem PAGE_SHIFT; max_low_pfn =
[PATCH 2/5] powerpc: Remove some old bootmem related comments
Now bootmem is gone from powerpc we can remove comments mentioning it. Signed-off-by: Anton Blanchard an...@samba.org Tested-by: Emil Medve emilian.me...@freescale.com --- arch/powerpc/kernel/prom.c | 5 + arch/powerpc/kernel/rtas.c | 4 ++-- arch/powerpc/kvm/book3s_hv_builtin.c | 2 +- arch/powerpc/mm/hugetlbpage.c| 4 ++-- arch/powerpc/mm/pgtable_64.c | 4 5 files changed, 6 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 5957625..02e3e4c 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -698,10 +698,7 @@ void __init early_init_devtree(void *params) reserve_crashkernel(); early_reserve_mem(); - /* -* Ensure that total memory size is page-aligned, because otherwise -* mark_bootmem() gets upset. -*/ + /* Ensure that total memory size is page-aligned. */ limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); memblock_enforce_memory_limit(limit); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8b4c857..4af905e 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1091,8 +1091,8 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) } /* - * Call early during boot, before mem init or bootmem, to retrieve the RTAS - * informations from the device-tree and allocate the RMO buffer for userland + * Call early during boot, before mem init, to retrieve the RTAS + * information from the device-tree and allocate the RMO buffer for userland * accesses. */ void __init rtas_initialize(void) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index b9615ba..297dbaf 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(kvm_release_hpt); * kvm_cma_reserve() - reserve area for kvm hash pagetable * * This function reserves memory from early allocator. It should be - * called by arch specific code once the early allocator (memblock or bootmem) + * called by arch specific code once the memblock allocator * has been activated and all other subsystems have already allocated/reserved * memory. */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7e70ae9..5215d25 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -276,7 +276,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz #ifdef CONFIG_PPC_FSL_BOOK3E /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { @@ -399,7 +399,7 @@ void __init reserve_hugetlb_gpages(void) #else /* !PPC_FSL_BOOK3E */ /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index cdb19ab..aa91737 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -109,10 +109,6 @@ int map_kernel_page(unsigned long ea, unsigned long pa, int flags) __pgprot(flags))); } else { #ifdef CONFIG_PPC_MMU_NOHASH - /* Warning ! This will blow up if bootmem is not initialized -* which our ppc64 code is keen to do that, we'll need to -* fix it and/or be more careful -*/ pgdp = pgd_offset_k(ea); #ifdef PUD_TABLE_SIZE if (pgd_none(*pgdp)) { -- 1.9.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 3/5] powerpc: Remove superfluous bootmem includes
Lots of places included bootmem.h even when not using bootmem. Signed-off-by: Anton Blanchard an...@samba.org Tested-by: Emil Medve emilian.me...@freescale.com --- arch/powerpc/kernel/crash_dump.c | 1 - arch/powerpc/kernel/irq.c | 1 - arch/powerpc/kernel/pci_64.c | 1 - arch/powerpc/kernel/rtas_pci.c | 1 - arch/powerpc/kernel/setup_32.c | 1 - arch/powerpc/kernel/vdso.c | 1 - arch/powerpc/kvm/book3s_hv_builtin.c | 1 - arch/powerpc/mm/init_32.c | 1 - arch/powerpc/mm/init_64.c | 1 - arch/powerpc/mm/pgtable_64.c | 2 +- arch/powerpc/platforms/cell/celleb_scc_epci.c | 1 - arch/powerpc/platforms/cell/celleb_scc_pciex.c | 1 - arch/powerpc/platforms/maple/pci.c | 1 - arch/powerpc/platforms/powermac/pci.c | 1 - arch/powerpc/platforms/powernv/eeh-ioda.c | 1 - arch/powerpc/platforms/powernv/pci.c | 1 - arch/powerpc/sysdev/fsl_msi.c | 1 - arch/powerpc/sysdev/ipic.c | 1 - arch/powerpc/sysdev/mpic.c | 1 - arch/powerpc/sysdev/mpic_pasemi_msi.c | 1 - arch/powerpc/sysdev/mpic_u3msi.c | 1 - arch/powerpc/sysdev/ppc4xx_msi.c | 1 - arch/powerpc/sysdev/ppc4xx_pci.c | 1 - arch/powerpc/sysdev/qe_lib/qe.c| 1 - arch/powerpc/sysdev/qe_lib/qe_ic.c | 1 - arch/powerpc/sysdev/uic.c | 1 - 26 files changed, 1 insertion(+), 26 deletions(-) diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 7a13f37..0bfe370 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -12,7 +12,6 @@ #undef DEBUG #include linux/crash_dump.h -#include linux/bootmem.h #include linux/memblock.h #include asm/code-patching.h #include asm/kdump.h diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4c5891d..c47a74d 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -50,7 +50,6 @@ #include linux/list.h #include linux/radix-tree.h #include linux/mutex.h -#include linux/bootmem.h #include linux/pci.h #include linux/debugfs.h #include linux/of.h diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 155013d..ba0f2d6 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -17,7 +17,6 @@ #include linux/pci.h #include linux/string.h #include linux/init.h -#include linux/bootmem.h #include linux/export.h #include linux/mm.h #include linux/list.h diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index c168337..fe39926 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -26,7 +26,6 @@ #include linux/pci.h #include linux/string.h #include linux/init.h -#include linux/bootmem.h #include asm/io.h #include asm/pgtable.h diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index e2bc044..84dafdf 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -11,7 +11,6 @@ #include linux/delay.h #include linux/initrd.h #include linux/tty.h -#include linux/bootmem.h #include linux/seq_file.h #include linux/root_dev.h #include linux/cpu.h diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index f174351..305eb0d 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -20,7 +20,6 @@ #include linux/user.h #include linux/elf.h #include linux/security.h -#include linux/bootmem.h #include linux/memblock.h #include asm/pgtable.h diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 297dbaf..6baa186 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -12,7 +12,6 @@ #include linux/export.h #include linux/sched.h #include linux/spinlock.h -#include linux/bootmem.h #include linux/init.h #include linux/memblock.h #include linux/sizes.h diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 9d1bde2..6332368 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -26,7 +26,6 @@ #include linux/mm.h #include linux/stddef.h #include linux/init.h -#include linux/bootmem.h #include linux/highmem.h #include linux/initrd.h #include linux/pagemap.h diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 3481556..10471f9 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -34,7 +34,6 @@ #include linux/vmalloc.h #include linux/init.h #include linux/delay.h -#include linux/bootmem.h #include linux/highmem.h #include linux/idr.h #include linux/nodemask.h diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index aa91737..e0c7185 100644 --- a/arch/powerpc/mm/pgtable_64.c +++
[PATCH 4/5] powerpc: Move sparse_init() into initmem_init
We did part of sparse initialisation in setup_arch and part in initmem_init. Put them together. Signed-off-by: Anton Blanchard an...@samba.org Tested-by: Emil Medve emilian.me...@freescale.com --- arch/powerpc/kernel/setup_64.c | 1 - arch/powerpc/mm/mem.c | 1 + arch/powerpc/mm/numa.c | 2 ++ 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 9f8f472..8bc8f77 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -682,7 +682,6 @@ void __init setup_arch(char **cmdline_p) emergency_stack_init(); initmem_init(); - sparse_init(); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = dummy_con; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index aa067b7..abbc55d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -195,6 +195,7 @@ void __init initmem_init(void) /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); + sparse_init(); } /* mark pages that don't exist as nosave */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 4f9c18a..52559ff 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -964,6 +964,8 @@ void __init initmem_init(void) sparse_memory_present_with_active_regions(nid); } + sparse_init(); + setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); -- 1.9.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 5/5] powerpc: make __ffs return unsigned long
I'm seeing a build warning in mm/nobootmem.c after removing bootmem: mm/nobootmem.c: In function '__free_pages_memory': include/linux/kernel.h:713:17: warning: comparison of distinct pointer types lacks a cast [enabled by default] (void) (_min1 == _min2); \ ^ mm/nobootmem.c:90:11: note: in expansion of macro 'min' order = min(MAX_ORDER - 1UL, __ffs(start)); ^ The rest of the worlds seems to define __ffs as returning unsigned long, so lets do that. Signed-off-by: Anton Blanchard an...@samba.org Tested-by: Emil Medve emilian.me...@freescale.com --- arch/powerpc/include/asm/bitops.h | 2 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index bd3bd57..c633f05 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -213,7 +213,7 @@ static __inline__ unsigned long ffz(unsigned long x) return __ilog2(x -x); } -static __inline__ int __ffs(unsigned long x) +static __inline__ unsigned long __ffs(unsigned long x) { return __ilog2(x -x); } diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index c507767..dae96fb 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -152,7 +152,7 @@ static int setup_one_atmu(struct ccsr_pci __iomem *pci, flags |= 0x1000; /* enable relaxed ordering */ for (i = 0; size 0; i++) { - unsigned int bits = min(ilog2(size), + unsigned int bits = min_t(u32, ilog2(size), __ffs(pci_addr | phys_addr)); if (index + i = 5) -- 1.9.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 0/3] PCI/MSI: Remove arch_msi_check_device()
Dear Alexander Gordeev, On Sun, 7 Sep 2014 20:57:52 +0200, Alexander Gordeev wrote: Alexander Gordeev (3): patch 1 - PCI/MSI/PPC: Remove arch_msi_check_device() patch 2 - PCI/MSI/Armada-370-xp: Remove arch_msi_check_device() patch 3 - PCI/MSI: Remove arch_msi_check_device() For the entire series: Tested-by: Thomas Petazzoni thomas.petazz...@free-electrons.com I tested on the Armada XP GP platform, which is particularly affected by PATCH 2/3, as it is using the irq-armada-370-xp irqchip driver. I tested with an igb PCIe NIC that supports MSI and MSI-X, and I verified that MSI-X is still rejected (since we don't support it on Armada XP, at least for now), and that MSI is accepted and actually works. Thanks for doing this, and sorry for the delay in getting the patches tested! Best regards, Thomas -- Thomas Petazzoni, CTO, Free Electrons Embedded Linux, Kernel and Android engineering http://free-electrons.com ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH] dt:numa: adding numa node mapping for memory nodes.
On Sep 17, 2014, at 1:56 AM, Ganapatrao Kulkarni ganapatrao.kulka...@caviumnetworks.com wrote: From: Ganapatrao Kulkarni ganapatrao.kulka...@cavium.com This patch adds property nid to memory node to provide the memory range to numa node id mapping. Signed-off-by: Ganapatrao Kulkarni ganapatrao.kulka...@cavium.com — Adding the PPC guys as they’ve been doing NUMA on IBM Power Servers for years with OF/DT. So we should really try and follow what they’ve done. Documentation/devicetree/bindings/numa.txt | 58 ++ 1 file changed, 58 insertions(+) create mode 100644 Documentation/devicetree/bindings/numa.txt diff --git a/Documentation/devicetree/bindings/numa.txt b/Documentation/devicetree/bindings/numa.txt new file mode 100644 index 000..c4a94f2 --- /dev/null +++ b/Documentation/devicetree/bindings/numa.txt @@ -0,0 +1,58 @@ +== +numa id binding description +== + +== +1 - Introduction +== +The device node property nid(numa node id) can be added to memory +device node to map the range of memory addresses as defined in property reg. +The property nid maps the memory range to the numa node id, which is used to +find the local and remory pages on numa aware systems. + +== +2 - nid property +== +Numa node id, nid is required property of memory device node for +numa enabled platforms. + +|--| +|Property Type | Usage | Value Type | Definition | +|--| +| nid | R|u32 | Numa Node id| +| | || for this memory | +|--| + + +4 - Example memory nodes with numa node id mapping + + +Example 1 (2 memory nodes, each mapped to a numa node.): + + memory@ { + device_type = memory; + reg = 0x0 0x 0x0 0x8000; + nid = 0x0; + }; + + memory@100 { + device_type = memory; + reg = 0x100 0x 0x0 0x8000; + nid = 0x1; + }; + +Example 2 (multiple memory ranges in each memory node and mapped to numa node): + + memory@ { + device_type = memory; + reg = 0x0 0x 0x0 0x8000, + 0x0 0x8000 0x0 0x8000; + nid = 0x0; + }; + + memory@100 { + device_type = memory; + reg = 0x100 0x 0x0 0x8000, + 0x100 0x8000 0x0 0x8000; + nid = 0x1; + }; -- 1.8.1.4 -- Employee of Qualcomm Innovation Center, Inc. Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 01/21] powerpc/8xx: Declare SPRG2 as a SCRATCH register
Since coming 469d62be9263b92f2c3329540cbb1c076111f4f3, SPRG2 is used as a scratch register just like SPRG0 and SPRG1. So Declare it as such and fix the comment which is not valid anymore since that commit. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/include/asm/reg.h |3 ++- arch/powerpc/kernel/head_8xx.S | 10 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index cb9c174..b6a7d62 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -888,7 +888,7 @@ * 32-bit 8xx: * - SPRG0 scratch for exception vectors * - SPRG1 scratch for exception vectors - * - SPRG2 apparently unused but initialized + * - SPRG2 scratch for exception vectors * */ #ifdef CONFIG_PPC64 @@ -994,6 +994,7 @@ #ifdef CONFIG_8xx #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 +#define SPRN_SPRG_SCRATCH2 SPRN_SPRG2 #endif diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 55d12fb..1329c5a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -301,7 +301,7 @@ InstructionTLBMiss: stw r11, 4(r0) #else mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG2, r11 + mtspr SPRN_SPRG_SCRATCH2, r11 #endif mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 @@ -363,7 +363,7 @@ InstructionTLBMiss: mfspr r10, SPRN_DAR mtcrr10 mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 + mfspr r11, SPRN_SPRG_SCRATCH2 #else lwz r11, 0(r0) mtcrr11 @@ -386,7 +386,7 @@ InstructionTLBMiss: mtcrr10 li r11, 0x00f0 mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 + mfspr r11, SPRN_SPRG_SCRATCH2 #else lwz r11, 0(r0) mtcrr11 @@ -409,7 +409,7 @@ DataStoreTLBMiss: stw r11, 4(r0) #else mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG2, r11 + mtspr SPRN_SPRG_SCRATCH2, r11 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ @@ -487,7 +487,7 @@ DataStoreTLBMiss: mfspr r10, SPRN_DAR mtcrr10 mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 + mfspr r11, SPRN_SPRG_SCRATCH2 #else mtspr SPRN_DAR, r11 /* Tag DAR */ lwz r11, 0(r0) -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 03/21] powerpc/8xx: exception InstructionAccess does not exist on MPC8xx
Exception InstructionAccess does not exist on MPC8xx. No need to branch there from somewhere else. Handling can be done directly in InstructionTLBError Exception. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - arch/powerpc/mm/fault.c uses the vector number, so make sure it understand the new ones. arch/powerpc/kernel/head_8xx.S | 17 +++-- arch/powerpc/mm/fault.c| 1 + 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 3af6db1..fbe5d10 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -234,15 +234,9 @@ DataAccess: EXC_XFER_LITE(0x300, handle_page_fault) /* Instruction access exception. - * This is never generated by the MPC8xx. We jump to it for other - * translation errors. + * This is never generated by the MPC8xx. */ - . = 0x400 -InstructionAccess: - EXCEPTION_PROLOG - mr r4,r12 - mr r5,r9 - EXC_XFER_LITE(0x400, handle_page_fault) + EXCEPTION(0x400, InstructionAccess, unknown_exception, EXC_XFER_STD) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) @@ -382,7 +376,7 @@ InstructionTLBMiss: #endif mfspr r10, SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 - b InstructionAccess + b InstructionTLBError . = 0x1200 DataStoreTLBMiss: @@ -477,7 +471,10 @@ DataStoreTLBMiss: */ . = 0x1300 InstructionTLBError: - b InstructionAccess + EXCEPTION_PROLOG + mr r4,r12 + mr r5,r9 + EXC_XFER_LITE(0x1300, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We can catch that diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 51ab9e7..4d63c96 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -526,6 +526,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) break; case 0x400: case 0x480: + case 0x1300: printk(KERN_ALERT Unable to handle kernel paging request for instruction fetch\n); break; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 02/21] powerpc/8xx: Use SCRATCH0 and SCRATCH1 also for TLB handlers
SCRATCH0 and SCRATCH1 are only used in Exceptions prologs where no other exception can happen. There is therefore no need to preserve them accross TLB handlers, we can use them there as in other exceptions. One of the advantages is that they do not suffer CPU6 errata unlike M_TW register. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/kernel/head_8xx.S | 104 -- 1 files changed, 36 insertions(+), 68 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 1329c5a..3af6db1 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -104,12 +104,15 @@ turn_on_mmu: * task's thread_struct. */ #define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcrr10;\ + EXCEPTION_PROLOG_0; \ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 +#define EXCEPTION_PROLOG_0 \ + mtspr SPRN_SPRG_SCRATCH0,r10; \ + mtspr SPRN_SPRG_SCRATCH1,r11; \ + mfcrr10 + #define EXCEPTION_PROLOG_1 \ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ andi. r11,r11,MSR_PR; \ @@ -145,6 +148,14 @@ turn_on_mmu: SAVE_2GPRS(7, r11) /* + * Exception exit code. + */ +#define EXCEPTION_EPILOG_0 \ + mtcrr10;\ + mfspr r10,SPRN_SPRG_SCRATCH0; \ + mfspr r11,SPRN_SPRG_SCRATCH1 + +/* * Note: code which follows this uses cr0.eq (set if from kernel), * r11, r12 (SRR0), and r9 (SRR1). * @@ -293,16 +304,8 @@ InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 stw r3, 8(r0) #endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcrr10 -#ifdef CONFIG_8xx_CPU6 - stw r10, 0(r0) - stw r11, 4(r0) -#else - mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG_SCRATCH2, r11 -#endif + EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 addir11, r10, 0x1000 @@ -359,18 +362,11 @@ InstructionTLBMiss: mtspr SPRN_MI_RPN, r10/* Update TLB entry */ /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcrr10 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG_SCRATCH2 -#else - lwz r11, 0(r0) - mtcrr11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 rfi 2: mfspr r11, SPRN_SRR1 @@ -381,19 +377,11 @@ InstructionTLBMiss: mtspr SPRN_SRR1, r11 /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcrr10 - li r11, 0x00f0 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG_SCRATCH2 -#else - lwz r11, 0(r0) - mtcrr11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 b InstructionAccess . = 0x1200 @@ -401,16 +389,8 @@ DataStoreTLBMiss: #ifdef CONFIG_8xx_CPU6 stw r3, 8(r0) #endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcrr10 -#ifdef CONFIG_8xx_CPU6 - stw r10, 0(r0) - stw r11, 4(r0) -#else - mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG_SCRATCH2, r11 -#endif + EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ /* If we are faulting a kernel address, we have to use the @@ -483,19 +463,12 @@ DataStoreTLBMiss: mtspr SPRN_MD_RPN, r10/* Update TLB entry */ /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcrr10 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG_SCRATCH2 -#else - mtspr SPRN_DAR, r11 /* Tag DAR */ - lwz r11, 0(r0) - mtcrr11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 rfi /* This is an instruction TLB error on the MPC8xx. This could be due @@ -519,23 +492,18 @@ DataTLBError: #ifdef CONFIG_8xx_CPU6 stw r3, 8(r0) #endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcrr10 - stw r10, 0(r0) - stw r11, 4(r0) +
[PATCH v3 04/21] powerpc/8xx: Remove loading of r10 at end of FixupDAR
Since commit 2321f33790a6c5b80322d907a92d5739e7521a13, r10 is not used anymore after FixupDAR. There is therefore no need to set it up with the value of DAR. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/kernel/head_8xx.S |7 +++ 1 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index fbe5d10..e59e39e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -495,7 +495,7 @@ DataTLBError: mfspr r10, SPRN_DAR cmpwi cr0, r10, 0x00f0 beq-FixupDAR/* must be a buggy dcbX, icbi insn. */ -DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */ +DARFixed:/* Return from dcbx instruction bug workaround */ #ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif @@ -524,7 +524,7 @@ DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions * by decoding the registers used by the dcbx instruction and adding them. - * DAR is set to the calculated address and r10 also holds the EA on exit. + * DAR is set to the calculated address. */ /* define if you don't want to use self modifying code */ #define NO_SELF_MODIFYING_CODE @@ -564,8 +564,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq+142f cmpwi cr0, r10, 1964 /* Is icbi? */ beq+142f -141: mfspr r10, SPRN_DAR /* r10 must hold DAR at exit */ - b DARFixed/* Nope, go back to normal TLB processing */ +141: b DARFixed/* Nope, go back to normal TLB processing */ 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 00/21] powerpc/8xx: Optimise MMU TLB handling and add support of 16k pages
This patchset: 1) provides several MMU TLB handling optimisation on MPC8xx. 2) adds support of 16k pages on MPC8xx. All changes have been successfully tested on a custom board equipped with MPC885 Signed-off-by: Christophe Leroy christophe.le...@c-s.fr Tested-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - Patch number 10 removed, it was implementing a 16 bit alignment of the PGDIR. It is not worth potentially wasting up to 64k of memory just for removing one instruction (ori). - Preserve r11 while calculating the level 2 address, therefore no more need to save r11 into CR. Changes in v3: - Few fixes following review from Joachim Tjernlund - Removed the major hack which was saving resisters in memory for CPU6 errata - Invalidating non present TLB entries earlier (in head_8xx instead of fault.c) arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/mmu-8xx.h | 2 + arch/powerpc/include/asm/pgtable-ppc32.h | 20 ++ arch/powerpc/include/asm/pte-8xx.h | 7 +- arch/powerpc/include/asm/reg.h | 3 +- arch/powerpc/kernel/head_8xx.S | 370 - arch/powerpc/mm/fault.c | 9 +- 7 files changed, 203 insertions(+), 210 deletions(-) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 05/21] powerpc/8xx: Fix comment about DIRTY update
Since commit 2321f33790a6c5b80322d907a92d5739e7521a13, dirty handling is not handled here anymore. So we fix the comment. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/kernel/head_8xx.S |8 ++-- 1 files changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index e59e39e..171c6ef 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -477,12 +477,8 @@ InstructionTLBError: EXC_XFER_LITE(0x1300, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to - * many reasons, including a dirty update to a pte. We can catch that - * one here, but anything else is an error. First, we track down the - * Linux pte. If it is valid, write access is allowed, but the - * page dirty bit is not set, we will set it and reload the TLB. For - * any other case, we bail out to a higher level function that can - * handle it. + * many reasons, including a dirty update to a pte. We bail out to + * a higher level function that can handle it. */ . = 0x1400 DataTLBError: -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 09/21] powerpc/8xx: Optimize verification in FixupDAR
By XORing the upper part of the instruction code, we get a value that can directly be verified with the second test and we can remove the first test. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/kernel/head_8xx.S |6 ++ 1 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index e5a250c..5037420 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -542,10 +542,8 @@ FixupDAR:/* Entry point for dcbx workaround. */ /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, * no need to include them here */ - srwir10, r11, 26/* check if major OP code is 31 */ - cmpwi cr0, r10, 31 - bne-141f - rlwinm r10, r11, 0, 21, 30 + xoris r10, r11, 0x7c00/* check if major OP code is 31 */ + rlwinm r10, r10, 0, 21, 5 cmpwi cr0, r10, 2028 /* Is dcbz? */ beq+142f cmpwi cr0, r10, 940 /* Is dcbi? */ -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 07/21] powerpc/8xx: DataAccess exception not generated by MPC8xx
DataAccess exception is never generated by MPC8xx so do the job directly where it is used to avoid an unnecessary branching. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - arch/powerpc/mm/fault.c uses the vector number, so make sure it understands the new ones. arch/powerpc/kernel/head_8xx.S | 23 ++- arch/powerpc/mm/fault.c| 1 + 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 845abf8..5f04d5f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -219,19 +219,9 @@ MachineCheck: EXC_XFER_STD(0x200, machine_check_exception) /* Data access exception. - * This is never generated by the MPC8xx. We jump to it for other - * translation errors. + * This is never generated by the MPC8xx. */ - . = 0x300 -DataAccess: - EXCEPTION_PROLOG - mfspr r10,SPRN_DSISR - stw r10,_DSISR(r11) - mr r5,r10 - mfspr r4,SPRN_DAR - li r10,0x00f0 - mtspr SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */ - EXC_XFER_LITE(0x300, handle_page_fault) + EXCEPTION(0x300, DataAccess, unknown_exception, EXC_XFER_STD) /* Instruction access exception. * This is never generated by the MPC8xx. @@ -489,7 +479,14 @@ DataTLBError: beq-FixupDAR/* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_EPILOG_0 - b DataAccess + EXCEPTION_PROLOG + mfspr r10,SPRN_DSISR + stw r10,_DSISR(r11) + mr r5,r10 + mfspr r4,SPRN_DAR + li r10,0x00f0 + mtspr SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */ + EXC_XFER_LITE(0x1400, handle_page_fault) EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 4d63c96..adc084b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -521,6 +521,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) switch (regs-trap) { case 0x300: case 0x380: + case 0x1400: printk(KERN_ALERT Unable to handle kernel paging request for data at address 0x%08lx\n, regs-dar); break; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 12/21] powerpc/8xx: Don't use MD_TWC for walk
MD_TWC can only be used properly with 4k pages. So lets calculate level 2 table index by ourselves. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - No need to save r11 in cr, we can do without modifying r11 in DataStoreTLBMiss Changes in v3: - None arch/powerpc/kernel/head_8xx.S | 28 1 files changed, 12 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ad15070..0f571f5 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -297,8 +297,6 @@ InstructionTLBMiss: addir11, r10, -0x1000 tlbie r11 #endif - DO_8xx_CPU6(0x3780, r3) - mtspr SPRN_MD_EPN, r10/* Have to use MD_EPN for walk, MI_EPN can't */ /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -326,10 +324,9 @@ InstructionTLBMiss: ori r11,r11,1 /* Set valid bit */ DO_8xx_CPU6(0x2b80, r3) mtspr SPRN_MI_TWC, r11/* Set segment attributes */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11/* Load pte table base address */ - mfspr r11, SPRN_MD_TWC/* and get the pte address */ - lwz r10, 0(r11) /* Get the pte */ + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + rlwinm r11, r11, 22, 20, 29/* Extract level 2 index */ + lwzxr10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT @@ -395,12 +392,13 @@ DataStoreTLBMiss: /* We have a pte table, so load fetch the pte from the table. */ - ori r11, r11, 1 /* Set valid bit in physical L2 page */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11/* Load pte table base address */ - mfspr r10, SPRN_MD_TWC/* and get the pte address */ + mfspr r10, SPRN_MD_EPN/* Get address of fault */ + /* Extract level 2 index */ + rlwinm r10, r10, 22, 20, 29 + rlwimi r10, r11, 0, 0, 19 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ + ori r11, r11, 1 /* Set valid bit in physical L2 page */ /* Insert the Guarded flag into the TWC from the Linux PTE. * It is bit 27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put @@ -524,18 +522,16 @@ FixupDAR:/* Entry point for dcbx workaround. */ /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 andis. r11, r10, 0x8000/* Address = 0x8000 */ - DO_8xx_CPU6(0x3780, r3) - mtspr SPRN_MD_EPN, r10 mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq-3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: rlwinm r10, r10, 12, 20, 29/* Extract level 1 index */ lwzxr11, r10, r11 /* Get the level 1 entry */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11/* Load pte table base address */ - mfspr r11, SPRN_MD_TWC/* and get the pte address */ - lwz r11, 0(r11) /* Get the pte */ + rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + rlwinm r11, r11, 22, 20, 29/* Extract level 2 index */ + lwzxr11, r10, r11 /* Get the pte */ #ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) /* restore r3 from memory */ #endif -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 13/21] powerpc/8xx: Use PAGE size related consts
For PAGE size related operations, use PAGE size consts in order to be able to use different page size in the futur. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/kernel/head_8xx.S | 30 ++ 1 files changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0f571f5..dcaee9f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -292,9 +292,9 @@ InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 - addir11, r10, 0x1000 + addir11, r10, PAGE_SIZE tlbie r11 - addir11, r10, -0x1000 + addir11, r10, -PAGE_SIZE tlbie r11 #endif @@ -313,7 +313,8 @@ InstructionTLBMiss: ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: #endif - rlwinm r10, r10, 12, 20, 29/* Extract level 1 index */ + /* Extract level 1 index */ + rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) 1), (PAGE_SHIFT - 2) 1, 29 lwzxr11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -325,7 +326,8 @@ InstructionTLBMiss: DO_8xx_CPU6(0x2b80, r3) mtspr SPRN_MI_TWC, r11/* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ - rlwinm r11, r11, 22, 20, 29/* Extract level 2 index */ + /* Extract level 2 index */ + rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzxr10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP @@ -385,7 +387,8 @@ DataStoreTLBMiss: lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: - rlwinm r10, r10, 12, 20, 29/* Extract level 1 index */ + /* Extract level 1 index */ + rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) 1), (PAGE_SHIFT - 2) 1, 29 lwzxr11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -394,8 +397,8 @@ DataStoreTLBMiss: */ mfspr r10, SPRN_MD_EPN/* Get address of fault */ /* Extract level 2 index */ - rlwinm r10, r10, 22, 20, 29 - rlwimi r10, r11, 0, 0, 19 /* Add level 2 base */ + rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 + rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ ori r11, r11, 1 /* Set valid bit in physical L2 page */ @@ -526,18 +529,20 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq-3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l -3: rlwinm r10, r10, 12, 20, 29/* Extract level 1 index */ + /* Extract level 1 index */ +3: rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) 1), (PAGE_SHIFT - 2) 1, 29 lwzxr11, r10, r11 /* Get the level 1 entry */ rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ - rlwinm r11, r11, 22, 20, 29/* Extract level 2 index */ + /* Extract level 2 index */ + rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzxr11, r10, r11 /* Get the pte */ #ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) /* restore r3 from memory */ #endif /* concat physical page address(r11) and page offset(r10) */ mfspr r10, SPRN_SRR0 - rlwimi r11, r10, 0, 20, 31 + rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, @@ -913,12 +918,13 @@ set_dec_cpu6: .globl sdata sdata: .globl empty_zero_page + .align PAGE_SHIFT empty_zero_page: - .space 4096 + .space PAGE_SIZE .globl swapper_pg_dir swapper_pg_dir: - .space 4096 + .space PGD_TABLE_SIZE /* Room for two PTE table poiners, usually the kernel and current user * pointer to their respective root page table (pgdir). -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 10/21] powerpc/8xx: Duplicate two insns instead of branching
Branching takes two cycles on MPC8xx. Lets duplicate the two instructions and avoid the branching. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/kernel/head_8xx.S |6 -- 1 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 5037420..4a49ff3 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -638,9 +638,11 @@ modified_instr: /* special handling for r10,r11 since these are modified already */ 153: mfspr r11, SPRN_SPRG_SCRATCH1 /* load r11 from SPRN_SPRG_SCRATCH1 */ - b 155f + add r10, r10, r11 /* add it */ + mfctr r11 /* restore r11 */ + b 151b 154: mfspr r11, SPRN_SPRG_SCRATCH0 /* load r10 from SPRN_SPRG_SCRATCH0 */ -155: add r10, r10, r11 /* add it */ + add r10, r10, r11 /* add it */ mfctr r11 /* restore r11 */ b 151b #endif -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 14/21] powerpc/8xx: Const for TLB RPN forced value
Value 0x00f0 is used to force bits in TLB level 2 entry. This value is linked to the page size and will vary when we change the page size. Lets define a const for it in order to have it at only one place. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/kernel/head_8xx.S | 19 +-- 1 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index dcaee9f..8966262 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -40,6 +40,13 @@ #else #define DO_8xx_CPU6(val, reg) #endif + +/* + * Value for the bits that have fixed value in RPN entries. + * Also used for tagging DAR for DTLBerror. + */ +#define RPN_PATTERN0x00f0 + __HEAD _ENTRY(_stext); _ENTRY(_start); @@ -211,7 +218,7 @@ MachineCheck: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) - li r5,0x00f0 + li r5,RPN_PATTERN mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) @@ -237,7 +244,7 @@ Alignment: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) - li r5,0x00f0 + li r5,RPN_PATTERN mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) @@ -341,7 +348,7 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ - li r11, 0x00f0 + li r11, RPN_PATTERN rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ DO_8xx_CPU6(0x2d80, r3) mtspr SPRN_MI_RPN, r10/* Update TLB entry */ @@ -445,7 +452,7 @@ DataStoreTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ -2: li r11, 0x00f0 +2: li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ DO_8xx_CPU6(0x3d80, r3) mtspr SPRN_MD_RPN, r10/* Update TLB entry */ @@ -479,7 +486,7 @@ DataTLBError: EXCEPTION_PROLOG_0 mfspr r11, SPRN_DAR - cmpwi cr0, r11, 0x00f0 + cmpwi cr0, r11, RPN_PATTERN beq-FixupDAR/* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_PROLOG_1 @@ -488,7 +495,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ stw r10,_DSISR(r11) mr r5,r10 mfspr r4,SPRN_DAR - li r10,0x00f0 + li r10,RPN_PATTERN mtspr SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */ EXC_XFER_LITE(0x1400, handle_page_fault) -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 08/21] powerpc/8xx: No need to restore registers and save them again.
In DTLBError handler there is not need to restore r10, r11 and cr registers after fixing DAR as they are saved again to the same place just after. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/kernel/head_8xx.S |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 5f04d5f..e5a250c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -478,8 +478,8 @@ DataTLBError: cmpwi cr0, r11, 0x00f0 beq-FixupDAR/* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ - EXCEPTION_EPILOG_0 - EXCEPTION_PROLOG + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 mfspr r10,SPRN_DSISR stw r10,_DSISR(r11) mr r5,r10 -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 11/21] powerpc/8xx: Use M_TW instead of M_TWB
Use M_TW instead of M_TWB for storing Level 1 table address as M_TWB requires 4k aligned tables, which is only the case with 4k pages. Consequently, we have to calculate the level 1 table index by ourselves. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/kernel/head_8xx.S | 48 ++--- 1 files changed, 26 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 4a49ff3..ad15070 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -276,8 +276,8 @@ SystemCall: . = 0x1100 /* * For the MPC8xx, this is a software tablewalk to load the instruction - * TLB. It is modelled after the example in the Motorola manual. The task - * switch loads the M_TWB register with the pointer to the first level table. + * TLB. The task switch loads the M_TW register with the pointer to the first + * level table. * If we discover there is no second level table (value is zero) or if there * is an invalid pte, we load that into the TLB, which causes another fault * into the TLB Error interrupt where we can handle such problems. @@ -299,7 +299,6 @@ InstructionTLBMiss: #endif DO_8xx_CPU6(0x3780, r3) mtspr SPRN_MD_EPN, r10/* Have to use MD_EPN for walk, MI_EPN can't */ - mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -307,14 +306,17 @@ InstructionTLBMiss: #ifdef CONFIG_MODULES /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ - andi. r11, r10, 0x0800/* Address = 0x8000 */ + andis. r11, r10, 0x8000/* Address = 0x8000 */ +#endif + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ +#ifdef CONFIG_MODULES beq 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - rlwimi r10, r11, 0, 2, 19 + lis r11, (swapper_pg_dir-PAGE_OFFSET)@h + ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: #endif - lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm r10, r10, 12, 20, 29/* Extract level 1 index */ + lwzxr11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -375,18 +377,19 @@ DataStoreTLBMiss: #endif EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ + mfspr r10, SPRN_MD_EPN /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - andi. r11, r10, 0x0800 + andis. r11, r10, 0x8000 + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - rlwimi r10, r11, 0, 2, 19 + lis r11, (swapper_pg_dir-PAGE_OFFSET)@h + ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: - lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm r10, r10, 12, 20, 29/* Extract level 1 index */ + lwzxr11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -523,12 +526,12 @@ FixupDAR:/* Entry point for dcbx workaround. */ andis. r11, r10, 0x8000/* Address = 0x8000 */ DO_8xx_CPU6(0x3780, r3) mtspr SPRN_MD_EPN, r10 - mfspr r11, SPRN_M_TWB /* Get level 1 table entry address */ + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq-3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l - rlwimi r11, r10, 32-20, 0xffc /* r11 = r11~0xffc|(r1020)0xffc */ -3: lwz r11, 0(r11) /* Get the level 1 entry */ +3: rlwinm r10, r10, 12, 20, 29/* Extract level 1 index */ + lwzxr11, r10, r11 /* Get the level 1 entry */ DO_8xx_CPU6(0x3b80, r3) mtspr SPRN_MD_TWC, r11/* Load pte table base address */ mfspr r11, SPRN_MD_TWC/* and get the pte address */ @@ -537,6 +540,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ lwz r3, 8(r0) /* restore r3 from memory */ #endif /* concat physical page address(r11) and page offset(r10) */ + mfspr r10, SPRN_SRR0 rlwimi r11, r10, 0, 20, 31 lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ @@ -692,11 +696,11 @@ start_here: #ifdef CONFIG_8xx_CPU6 lis r4,
[PATCH v3 06/21] powerpc/8xx: No need to save r10 and r3 when not calling FixupDAR
r10 and r3 are only used inside FixupDAR function. So lets save them inside that function only. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/kernel/head_8xx.S | 27 +-- 1 files changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 171c6ef..845abf8 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -482,20 +482,12 @@ InstructionTLBError: */ . = 0x1400 DataTLBError: -#ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) -#endif EXCEPTION_PROLOG_0 - mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_DAR - cmpwi cr0, r10, 0x00f0 + mfspr r11, SPRN_DAR + cmpwi cr0, r11, 0x00f0 beq-FixupDAR/* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ -#ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) -#endif - mfspr r10,SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 b DataAccess @@ -525,6 +517,10 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* define if you don't want to use self modifying code */ #define NO_SELF_MODIFYING_CODE FixupDAR:/* Entry point for dcbx workaround. */ +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 andis. r11, r10, 0x8000/* Address = 0x8000 */ @@ -540,6 +536,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ mtspr SPRN_MD_TWC, r11/* Load pte table base address */ mfspr r11, SPRN_MD_TWC/* and get the pte address */ lwz r11, 0(r11) /* Get the pte */ +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) /* restore r3 from memory */ +#endif /* concat physical page address(r11) and page offset(r10) */ rlwimi r11, r10, 0, 20, 31 lwz r11,0(r11) @@ -560,15 +559,13 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq+142f cmpwi cr0, r10, 1964 /* Is icbi? */ beq+142f -141: b DARFixed/* Nope, go back to normal TLB processing */ +141: mfspr r10,SPRN_SPRG_SCRATCH2 + b DARFixed/* Nope, go back to normal TLB processing */ 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 142: /* continue, it was a dcbx, dcbi instruction. */ -#ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) /* restore r3 from memory */ -#endif #ifndef NO_SELF_MODIFYING_CODE andis. r10,r11,0x1f/* test if reg RA is r0 */ li r10,modified_instr@l @@ -587,6 +584,7 @@ modified_instr: bne+143f subfr10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */ 143: mtdar r10 /* store faulting EA in DAR */ + mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed/* Go back to normal TLB handling */ #else mfctr r10 @@ -640,6 +638,7 @@ modified_instr: mfdar r11 mtctr r11 /* restore ctr reg from DAR */ mtdar r10 /* save fault EA to DAR */ + mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed/* Go back to normal TLB handling */ /* special handling for r10,r11 since these are modified already */ -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 15/21] powerpc/8xx: Implement 16k pages
This patch activates the handling of 16k pages on the MPC8xx. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/Kconfig |2 +- arch/powerpc/include/asm/mmu-8xx.h |2 ++ arch/powerpc/kernel/head_8xx.S |4 3 files changed, 7 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5f44d3b..dc5f64e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -518,7 +518,7 @@ config PPC_4K_PAGES bool 4k page size config PPC_16K_PAGES - bool 16k page size if 44x + bool 16k page size if 44x || PPC_8xx config PPC_64K_PAGES bool 64k page size if 44x || PPC_STD_MMU_64 || PPC_BOOK3E_64 diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 3d11d3c..986b9e1 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -56,6 +56,7 @@ * additional information from the MI_EPN, and MI_TWC registers. */ #define SPRN_MI_RPN790 +#define MI_SPS16K 0x0008 /* Small page size (0 = 4k, 1 = 16k) */ /* Define an RPN value for mapping kernel memory to large virtual * pages for boot initialization. This has real page number of 0, @@ -129,6 +130,7 @@ * additional information from the MD_EPN, and MD_TWC registers. */ #define SPRN_MD_RPN798 +#define MD_SPS16K 0x0008 /* Small page size (0 = 4k, 1 = 16k) */ /* This is a temporary storage register that could be used to save * a processor working register during a tablewalk. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 8966262..4dd6be0 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -45,7 +45,11 @@ * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. */ +#ifdef CONFIG_PPC_16K_PAGES +#define RPN_PATTERN(0x00f0 | MD_SPS16K) +#else #define RPN_PATTERN0x00f0 +#endif __HEAD _ENTRY(_stext); -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 17/21] powerpc/8xx: set PTE bit 22 off TLBmiss
No need to re-set this bit at each TLB miss. Let's set it in the PTE. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - Removed PPC405 related macro from PPC8xx specific code - PTE_NONE_MASK doesn't need PAGE_ACCESSED in Linux 2.6 arch/powerpc/include/asm/pgtable-ppc32.h | 20 arch/powerpc/include/asm/pte-8xx.h | 7 +-- arch/powerpc/kernel/head_8xx.S | 10 ++ 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 47edde8..35a9b44 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -172,6 +172,25 @@ static inline unsigned long pte_update(pte_t *p, #ifdef PTE_ATOMIC_UPDATES unsigned long old, tmp; +#ifdef CONFIG_PPC_8xx + unsigned long tmp2; + + __asm__ __volatile__(\ +1: lwarx %0,0,%4\n\ + andc%1,%0,%5\n\ + or %1,%1,%6\n\ + /* 0x200 == Extended encoding, bit 22 */ \ + /* Bit 22 has to be 1 if neither _PAGE_USER nor _PAGE_RW are set */ \ + rlwimi %1,%1,32-2,0x200\n /* get _PAGE_USER */ \ + rlwinm %3,%1,32-1,0x200\n /* get _PAGE_RW */ \ + or %1,%3,%1\n\ + xori%1,%1,0x200\n + stwcx. %1,0,%4\n\ + bne-1b + : =r (old), =r (tmp), =m (*p), =r (tmp2) + : r (p), r (clr), r (set), m (*p) + : cc ); +#else /* CONFIG_PPC_8xx */ __asm__ __volatile__(\ 1: lwarx %0,0,%3\n\ andc%1,%0,%4\n\ @@ -182,6 +201,7 @@ static inline unsigned long pte_update(pte_t *p, : =r (old), =r (tmp), =m (*p) : r (p), r (clr), r (set), m (*p) : cc ); +#endif /* CONFIG_PPC_8xx */ #else /* PTE_ATOMIC_UPDATES */ unsigned long old = pte_val(*p); *p = __pte((old ~clr) | set); diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index d44826e..daa4616 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -48,19 +48,22 @@ */ #define _PAGE_RW 0x0400 /* lsb PP bits, inverted in HW */ #define _PAGE_USER 0x0800 /* msb PP bits */ +/* set when neither _PAGE_USER nor _PAGE_RW are set */ +#define _PAGE_KNLRO0x0200 #define _PMD_PRESENT 0x0001 #define _PMD_BAD 0x0ff0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c -#define _PTE_NONE_MASK _PAGE_ACCESSED +#define _PTE_NONE_MASK _PAGE_KNLRO /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 /* We need to add _PAGE_SHARED to kernel pages */ -#define _PAGE_KERNEL_RO(_PAGE_SHARED) +#define _PAGE_KERNEL_RO(_PAGE_SHARED | _PAGE_KNLRO) +#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_KNLRO) #define _PAGE_KERNEL_RW(_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE) #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index a7af26e..48d3de8 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -445,14 +445,8 @@ DataStoreTLBMiss: and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT #endif - /* Honour kernel RO, User NA */ - /* 0x200 == Extended encoding, bit 22 */ - rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */ - /* r11 = (r10 _PAGE_RW) 1 */ - rlwinm r11, r10, 32-1, 0x200 - or r10, r11, r10 - /* invert RW and 0x200 bits */ - xorir10, r10, _PAGE_RW | 0x200 + /* invert RW */ + xorir10, r10, _PAGE_RW /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 22 and 28 must be clear. -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 19/21] powerpc/8xx: Don't restore regs to save them again.
There is not need to restore r10, r11 and cr registers at this end of ITLBmiss handler as they are saved again to the same place in ITLBError handler we are jumping to. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/kernel/head_8xx.S |8 +--- 1 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index bb7c816..e21f0b2 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -381,8 +381,7 @@ InstructionTLBMiss: lwz r3, 8(r0) #endif mfspr r10, SPRN_SPRG_SCRATCH2 - EXCEPTION_EPILOG_0 - b InstructionTLBError + b InstructionTLBError1 . = 0x1200 DataStoreTLBMiss: @@ -471,7 +470,10 @@ DataStoreTLBMiss: */ . = 0x1300 InstructionTLBError: - EXCEPTION_PROLOG + EXCEPTION_PROLOG_0 +InstructionTLBError1: + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 mr r4,r12 mr r5,r9 EXC_XFER_LITE(0x1300, handle_page_fault) -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 18/21] powerpc/8xx: _PMD_PRESENT already set in level 1 entries
When a PMD entry is valid, _PMD_PRESENT is set. Therefore, forcing that bit during TLB loading is useless. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/kernel/head_8xx.S |2 -- 1 files changed, 0 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 48d3de8..bb7c816 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -340,7 +340,6 @@ InstructionTLBMiss: /* We have a pte table, so load the MI_TWC with the attributes * for this segment. */ - ori r11,r11,1 /* Set valid bit */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ /* Extract level 2 index */ @@ -417,7 +416,6 @@ DataStoreTLBMiss: rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ - ori r11, r11, 1 /* Set valid bit in physical L2 page */ /* Insert the Guarded flag into the TWC from the Linux PTE. * It is bit 27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 21/21] powerpc/8xx: Invalidate non present TLB as early as possible
8xx sometimes need to load a invalid/non-present TLBs in it DTLB asm handler. These must be invalidated separaly as linux mm doesn't. Commit 5efab4a02c89c252fb4cce097aafde5f8208dbfe was invalidating them in arch/powerpc/mm/fault.c. This patch does the invalidation earlier in order to free the TLB as soon as possible. This also has the advantage of removing some 8xx specific code from fault.c Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v3: - New linux/arch/powerpc/kernel/head_8xx.S | 15 ++- linux/arch/powerpc/mm/fault.c| 7 --- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/linux/arch/powerpc/kernel/head_8xx.S b/linux/arch/powerpc/kernel/head_8xx.S index 3e8e341..3b96862 100644 --- a/linux/arch/powerpc/kernel/head_8xx.S +++ b/linux/arch/powerpc/kernel/head_8xx.S @@ -473,7 +473,10 @@ InstructionTLBError1: EXCEPTION_PROLOG_2 mr r4,r12 mr r5,r9 - EXC_XFER_LITE(0x1300, handle_page_fault) + andis. r10,r5,0x4000 + beq+1f + tlbie r4 +1: EXC_XFER_LITE(0x1300, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We bail out to @@ -489,11 +492,13 @@ DataTLBError: DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 - mfspr r10,SPRN_DSISR - stw r10,_DSISR(r11) - mr r5,r10 + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) mfspr r4,SPRN_DAR - li r10,RPN_PATTERN + andis. r10,r5,0x4000 + beq+1f + tlbie r4 +1: li r10,RPN_PATTERN mtspr SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */ EXC_XFER_LITE(0x1400, handle_page_fault) diff --git a/linux/arch/powerpc/mm/fault.c b/linux/arch/powerpc/mm/fault.c index adc084b..6f4f731 100644 --- a/linux/arch/powerpc/mm/fault.c +++ b/linux/arch/powerpc/mm/fault.c @@ -43,7 +43,6 @@ #include asm/tlbflush.h #include asm/siginfo.h #include asm/debug.h -#include mm/mmu_decl.h #include icswx.h @@ -368,12 +367,6 @@ good_area: goto bad_area; #endif /* CONFIG_6xx */ #if defined(CONFIG_8xx) - /* 8xx sometimes need to load a invalid/non-present TLBs. -* These must be invalidated separately as linux mm don't. -*/ - if (error_code 0x4000) /* no translation? */ - _tlbil_va(address, 0, 0, 0); - /* The MPC8xx seems to always set 0x8000, which is * undefined. Of those that can be set, this is the only * one which seems bad. -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 16/21] powerpc/8xx: Better readibility of ERRATA CPU6 handling
This patch hiddes that SPR address needed for CPU6 ERRATA handling in the macro. Then we don't have to worry about this address directly in the code. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v2: - None Changes in v3: - None arch/powerpc/kernel/head_8xx.S | 29 - 1 files changed, 16 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 4dd6be0..a7af26e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -33,12 +33,19 @@ /* Macro to make the code more readable. */ #ifdef CONFIG_8xx_CPU6 -#define DO_8xx_CPU6(val, reg) \ - li reg, val; \ - stw reg, 12(r0);\ - lwz reg, 12(r0); +#define SPRN_MI_TWC_ADDR 0x2b80 +#define SPRN_MI_RPN_ADDR 0x2d80 +#define SPRN_MD_TWC_ADDR 0x3b80 +#define SPRN_MD_RPN_ADDR 0x3d80 + +#define MTSPR_CPU6(spr, reg, treg) \ + li treg, spr##_ADDR; \ + stw treg, 12(r0); \ + lwz treg, 12(r0); \ + mtspr spr, reg #else -#define DO_8xx_CPU6(val, reg) +#define MTSPR_CPU6(spr, reg, treg) \ + mtspr spr, reg #endif /* @@ -334,8 +341,7 @@ InstructionTLBMiss: * for this segment. */ ori r11,r11,1 /* Set valid bit */ - DO_8xx_CPU6(0x2b80, r3) - mtspr SPRN_MI_TWC, r11/* Set segment attributes */ + MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ /* Extract level 2 index */ rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 @@ -354,8 +360,7 @@ InstructionTLBMiss: */ li r11, RPN_PATTERN rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ - DO_8xx_CPU6(0x2d80, r3) - mtspr SPRN_MI_RPN, r10/* Update TLB entry */ + MTSPR_CPU6(SPRN_MI_RPN, r10, r3)/* Update TLB entry */ /* Restore registers */ #ifdef CONFIG_8xx_CPU6 @@ -424,8 +429,7 @@ DataStoreTLBMiss: * It is bit 25 in the Linux PTE and bit 30 in the TWC */ rlwimi r11, r10, 32-5, 30, 30 - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 + MTSPR_CPU6(SPRN_MD_TWC, r11, r3) /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. * We also need to know if the insn is a load/store, so: @@ -458,8 +462,7 @@ DataStoreTLBMiss: */ 2: li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ - DO_8xx_CPU6(0x3d80, r3) - mtspr SPRN_MD_RPN, r10/* Update TLB entry */ + MTSPR_CPU6(SPRN_MD_RPN, r10, r3)/* Update TLB entry */ /* Restore registers */ #ifdef CONFIG_8xx_CPU6 -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 20/21] powerpc/8xx: Use DAR to save r3 for CPU6 ERRATA
As we are not using anymore DAR to save registers, it is now available for saving the r3 register used for CPU6 ERRATA handling. Therefore we can remove the major hack which was to use memory location 0 to save r3. Signed-off-by: Christophe Leroy christophe.le...@c-s.fr --- Changes in v3: - New linux/arch/powerpc/kernel/head_8xx.S | 33 +++-- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/linux/arch/powerpc/kernel/head_8xx.S b/linux/arch/powerpc/kernel/head_8xx.S index e21f0b2..3e8e341 100644 --- a/linux/arch/powerpc/kernel/head_8xx.S +++ b/linux/arch/powerpc/kernel/head_8xx.S @@ -83,13 +83,6 @@ _ENTRY(_start); * 8M 1:1. I also mapped an additional I/O space 1:1 so we can get to * the internal processor registers before MMU_init is called. * - * The TLB code currently contains a major hack. Since I use the condition - * code register, I have to save and restore it. I am out of registers, so - * I just store it in memory location 0 (the TLB handlers are not reentrant). - * To avoid making any decisions, I need to use the segment valid bit - * in the first level table, but that would require many changes to the - * Linux page directory/table functions that I don't want to do right now. - * * -- Dan */ .globl __start @@ -304,7 +297,7 @@ SystemCall: */ InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) + mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 @@ -349,7 +342,10 @@ InstructionTLBMiss: #ifdef CONFIG_SWAP andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT + li r11, RPN_PATTERN bne-cr0, 2f +#else + li r11, RPN_PATTERN #endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 21 and 28 must be clear. @@ -357,28 +353,29 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ - li r11, RPN_PATTERN rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ MTSPR_CPU6(SPRN_MI_RPN, r10, r3)/* Update TLB entry */ /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) + mfspr r3, SPRN_DAR + mtspr SPRN_DAR, r11 /* Tag DAR */ #endif mfspr r10, SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 rfi 2: - mfspr r11, SPRN_SRR1 + mfspr r10, SPRN_SRR1 /* clear all error bits as TLB Miss * sets a few unconditionally */ - rlwinm r11, r11, 0, 0x - mtspr SPRN_SRR1, r11 + rlwinm r10, r10, 0, 0x + mtspr SPRN_SRR1, r10 /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) + mfspr r3, SPRN_DAR + mtspr SPRN_DAR, r11 /* Tag DAR */ #endif mfspr r10, SPRN_SPRG_SCRATCH2 b InstructionTLBError1 @@ -386,7 +383,7 @@ InstructionTLBMiss: . = 0x1200 DataStoreTLBMiss: #ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) + mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 @@ -457,7 +454,7 @@ DataStoreTLBMiss: /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) + mfspr r3, SPRN_DAR #endif mtspr SPRN_DAR, r11 /* Tag DAR */ mfspr r10, SPRN_SPRG_SCRATCH2 @@ -527,7 +524,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ #define NO_SELF_MODIFYING_CODE FixupDAR:/* Entry point for dcbx workaround. */ #ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) + mtspr SPRN_DAR, r3 #endif mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ @@ -546,7 +543,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzxr11, r10, r11 /* Get the pte */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) /* restore r3 from memory */ + mfspr r3, SPRN_DAR #endif /* concat physical page address(r11) and page offset(r10) */ mfspr r10, SPRN_SRR0 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v3 00/21] powerpc/8xx: Optimise MMU TLB handling and add support of 16k pages
On Wed, 2014-09-17 at 18:36 +0200, Christophe Leroy wrote: This patchset: 1) provides several MMU TLB handling optimisation on MPC8xx. 2) adds support of 16k pages on MPC8xx. All changes have been successfully tested on a custom board equipped with MPC885 Signed-off-by: Christophe Leroy christophe.le...@c-s.fr Tested-by: Christophe Leroy christophe.le...@c-s.fr I've already applied patches 1, 2, 4, 5, 6, 9, and 10 from the previous patchset -- have they changed? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [1/5] pseries: Define rtas hotplug event sections
On 09/17/2014 02:06 AM, Michael Ellerman wrote: On Mon, 2014-09-15 at 15:29 -0500, Nathan Fontenot wrote: diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index b390f55..a01879e 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -273,6 +273,7 @@ inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log) #define PSERIES_ELOG_SECT_ID_MANUFACT_INFO (('M' 8) | 'I') #define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' 8) | 'H') #define PSERIES_ELOG_SECT_ID_USER_DEF (('U' 8) | 'D') +#define PSERIES_ELOG_SECT_ID_HOTPLUG(('H' 8) | 'P') /* Vendor specific Platform Event Log Format, Version 6, section header */ struct pseries_errorlog { @@ -296,6 +297,31 @@ inline uint16_t pseries_errorlog_length(struct pseries_errorlog *sect) return be16_to_cpu(sect-length); } +/* RTAS pseries hotplug errorlog section */ +struct pseries_hp_errorlog { +uint8_t resource; +uint8_t action; +uint8_t id_type; +uint8_t reserved; These should be u8. ok. +union { +__be32 drc_index; +__be32 drc_count; +chardrc_name[1]; I don't see drc_name used? I don't use drc_name in this patch set but the drc_name piece is part of the rtas hotplug section definition and I wanted to provide a complete definition of the section. -Nathan +} _drc_u; +}; cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [2/5] pseries: Export drc_[acquire|release]_drc() routines
On 09/17/2014 02:07 AM, Michael Ellerman wrote: On Mon, 2014-09-15 at 15:30 -0500, Nathan Fontenot wrote: diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 361add6..b94516b 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -59,6 +59,8 @@ extern void dlpar_free_cc_property(struct property *); extern struct device_node *dlpar_configure_connector(u32, struct device_node *); extern int dlpar_attach_node(struct device_node *); extern int dlpar_detach_node(struct device_node *); +extern int dlpar_acquire_drc(u32); +extern int dlpar_release_drc(u32); Please name the parameters. Will do. And don't bother with extern. I was following the convention used in the file, droppping the extern is fine with me though. -Nathan ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v4 04/11] drivers: base: support cpu cache information interface to userspace via sysfs
Hi Greg, On 03/09/14 18:00, Sudeep Holla wrote: From: Sudeep Holla sudeep.ho...@arm.com This patch adds initial support for providing processor cache information to userspace through sysfs interface. This is based on already existing implementations(x86, ia64, s390 and powerpc) and hence the interface is intended to be fully compatible. The main purpose of this generic support is to avoid further code duplication to support new architectures and also to unify all the existing different implementations. This implementation maintains the hierarchy of cache objects which reflects the system's cache topology. Cache devices are instantiated as needed as CPUs come online. The cache information is replicated per-cpu even if they are shared. A per-cpu array of cache information maintained is used mainly for sysfs-related book keeping. It also implements the shared_cpu_map attribute, which is essential for enabling both kernel and user-space to discover the system's overall cache topology. This patch also add the missing ABI documentation for the cacheinfo sysfs interface already, which is well defined and widely used. Can you review the first 4 patches in this series please ? Regards, Sudeep ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V4] ASoC: fsl_ssi: refine ipg clock usage in this module
On Tue, Sep 16, 2014 at 10:13:16AM +0800, Shengjiu Wang wrote: Check if ipg clock is in clock-names property, then we can move the ipg clock enable and disable operation to startup and shutdown, that is only enable ipg clock when ssi is working and keep clock is disabled when ssi is in idle. But when the checking is failed, remain the clock control as before. Tested-by: Markus Pargmann m...@pengutronix.de Signed-off-by: Shengjiu Wang shengjiu.w...@freescale.com Acked-by: Nicolin Chen nicoleots...@gmail.com --- v4 change log: fix the code indent issue. sound/soc/fsl/fsl_ssi.c | 53 --- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 2fc3e66..16a1361 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -169,6 +169,7 @@ struct fsl_ssi_private { u8 i2s_mode; bool use_dma; bool use_dual_fifo; + bool has_ipg_clk_name; unsigned int fifo_depth; struct fsl_ssi_rxtx_reg_val rxtx_reg_val; @@ -530,6 +531,11 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream, struct snd_soc_pcm_runtime *rtd = substream-private_data; struct fsl_ssi_private *ssi_private = snd_soc_dai_get_drvdata(rtd-cpu_dai); + int ret; + + ret = clk_prepare_enable(ssi_private-clk); + if (ret) + return ret; /* When using dual fifo mode, it is safer to ensure an even period * size. If appearing to an odd number while DMA always starts its @@ -544,6 +550,21 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream, } /** + * fsl_ssi_shutdown: shutdown the SSI + * + */ +static void fsl_ssi_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct snd_soc_pcm_runtime *rtd = substream-private_data; + struct fsl_ssi_private *ssi_private = + snd_soc_dai_get_drvdata(rtd-cpu_dai); + + clk_disable_unprepare(ssi_private-clk); + +} + +/** * fsl_ssi_set_bclk - configure Digital Audio Interface bit clock * * Note: This function can be only called when using SSI as DAI master @@ -1043,6 +1064,7 @@ static int fsl_ssi_dai_probe(struct snd_soc_dai *dai) static const struct snd_soc_dai_ops fsl_ssi_dai_ops = { .startup= fsl_ssi_startup, + .shutdown = fsl_ssi_shutdown, .hw_params = fsl_ssi_hw_params, .hw_free= fsl_ssi_hw_free, .set_fmt= fsl_ssi_set_dai_fmt, @@ -1168,17 +1190,22 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, u32 dmas[4]; int ret; - ssi_private-clk = devm_clk_get(pdev-dev, NULL); + if (ssi_private-has_ipg_clk_name) + ssi_private-clk = devm_clk_get(pdev-dev, ipg); + else + ssi_private-clk = devm_clk_get(pdev-dev, NULL); if (IS_ERR(ssi_private-clk)) { ret = PTR_ERR(ssi_private-clk); dev_err(pdev-dev, could not get clock: %d\n, ret); return ret; } - ret = clk_prepare_enable(ssi_private-clk); - if (ret) { - dev_err(pdev-dev, clk_prepare_enable failed: %d\n, ret); - return ret; + if (!ssi_private-has_ipg_clk_name) { + ret = clk_prepare_enable(ssi_private-clk); + if (ret) { + dev_err(pdev-dev, clk_prepare_enable failed: %d\n, ret); + return ret; + } } /* For those SLAVE implementations, we ingore non-baudclk cases @@ -1236,8 +1263,9 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, return 0; error_pcm: - clk_disable_unprepare(ssi_private-clk); + if (!ssi_private-has_ipg_clk_name) + clk_disable_unprepare(ssi_private-clk); return ret; } @@ -1246,7 +1274,8 @@ static void fsl_ssi_imx_clean(struct platform_device *pdev, { if (!ssi_private-use_dma) imx_pcm_fiq_exit(pdev); - clk_disable_unprepare(ssi_private-clk); + if (!ssi_private-has_ipg_clk_name) + clk_disable_unprepare(ssi_private-clk); } static int fsl_ssi_probe(struct platform_device *pdev) @@ -1321,8 +1350,16 @@ static int fsl_ssi_probe(struct platform_device *pdev) return -ENOMEM; } - ssi_private-regs = devm_regmap_init_mmio(pdev-dev, iomem, + ret = of_property_match_string(np, clock-names, ipg); + if (ret 0) { + ssi_private-has_ipg_clk_name = false; + ssi_private-regs = devm_regmap_init_mmio(pdev-dev, iomem, fsl_ssi_regconfig); + } else { + ssi_private-has_ipg_clk_name = true; + ssi_private-regs = devm_regmap_init_mmio_clk(pdev-dev, + ipg, iomem, fsl_ssi_regconfig); + } if
Re: [PATCH V4] ASoC: fsl_ssi: refine ipg clock usage in this module
On Tue, Sep 16, 2014 at 10:13:16AM +0800, Shengjiu Wang wrote: Check if ipg clock is in clock-names property, then we can move the ipg clock enable and disable operation to startup and shutdown, that is only enable ipg clock when ssi is working and keep clock is disabled when ssi is in idle. But when the checking is failed, remain the clock control as before. Applied, thanks. signature.asc Description: Digital signature ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 0/4] powerpc/perf: Miscellaneous fixes
Miscellaenous fixes for perf and 24x7 counters in powerpc. Patches 1,3,4 were submitted earlier as a part of the parametrized events for 24x7 counters. But they are not directly related to the parametrized events. Patch 2 simplifies and fixes a bug in catalog_read() which causes the catalog file to not read first page. Cody P Schafer (3): powerpc/perf/hv-24x7: use kmem_cache instead of aligned stack allocations perf Documentation: sysfs events/ interfaces perf Documentation: remove duplicated docs for powerpc cpu specific events Sukadev Bhattiprolu (1): Simplify catalog_read() .../testing/sysfs-bus-event_source-devices-events | 611 ++-- arch/powerpc/perf/hv-24x7.c| 144 ++--- 2 files changed, 96 insertions(+), 659 deletions(-) -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/4] powerpc/perf/hv-24x7: use kmem_cache instead of aligned stack allocations
From: Cody P Schafer c...@linux.vnet.ibm.com Ian pointed out the use of __aligned(4096) caused rather large stack consumption in single_24x7_request(), so use the kmem_cache hv_page_cache (which we've already got set up for other allocations) insead of allocating locally. CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com CC: Haren Myneni hb...@us.ibm.com CC: Cody P Schafer d...@codyps.com Reported-by: Ian Munsie imun...@au1.ibm.com Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-24x7.c | 52 ++- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 70d4f74..2f2215c 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -294,7 +294,7 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, u16 lpar, u64 *res, bool success_expected) { - unsigned long ret; + unsigned long ret = -ENOMEM; /* * request_buffer and result_buffer are not required to be 4k aligned, @@ -304,7 +304,27 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, struct reqb { struct hv_24x7_request_buffer buf; struct hv_24x7_request req; - } __packed __aligned(4096) request_buffer = { + } __packed * request_buffer; + struct resb { + struct hv_24x7_data_result_buffer buf; + struct hv_24x7_result res; + struct hv_24x7_result_element elem; + __be64 result; + } __packed * result_buffer; + + BUILD_BUG_ON(sizeof(*request_buffer) 4096); + BUILD_BUG_ON(sizeof(*result_buffer) 4096); + + request_buffer = kmem_cache_alloc(hv_page_cache, GFP_USER); + + if (!request_buffer) + goto out_reqb; + + result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); + if (!result_buffer) + goto out_resb; + + *request_buffer = (struct reqb) { .buf = { .interface_version = HV_24X7_IF_VERSION_CURRENT, .num_requests = 1, @@ -320,28 +340,30 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, } }; - struct resb { - struct hv_24x7_data_result_buffer buf; - struct hv_24x7_result res; - struct hv_24x7_result_element elem; - __be64 result; - } __packed __aligned(4096) result_buffer = {}; - ret = plpar_hcall_norets(H_GET_24X7_DATA, - virt_to_phys(request_buffer), sizeof(request_buffer), - virt_to_phys(result_buffer), sizeof(result_buffer)); + virt_to_phys(request_buffer), sizeof(*request_buffer), + virt_to_phys(result_buffer), sizeof(*result_buffer)); if (ret) { if (success_expected) pr_err_ratelimited(hcall failed: %d %#x %#x %d = 0x%lx (%ld) detail=0x%x failing ix=%x\n, domain, offset, ix, lpar, ret, ret, - result_buffer.buf.detailed_rc, - result_buffer.buf.failing_request_ix); - return ret; + result_buffer-buf.detailed_rc, + result_buffer-buf.failing_request_ix); + goto out_hcall; } - *res = be64_to_cpu(result_buffer.result); + *res = be64_to_cpu(result_buffer-result); + kfree(result_buffer); + kfree(request_buffer); + return ret; + +out_hcall: + kfree(result_buffer); +out_resb: + kfree(request_buffer); +out_reqb: return ret; } -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/4] Simplify catalog_read()
catalog_read() implements the read interface for the sysfs file /sys/bus/event_source/devices/hv_24x7/interface/catalog It essentially takes a buffer, an offset and count as parameters to the read() call. It makes a hypervisor call to read a specific page from the catalog and copy the required bytes into the given buffer. Each call to catalog_read() returns at most one 4K page. Given these requirements, we should be able to simplify the catalog_read(). Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-24x7.c | 92 +-- 1 file changed, 10 insertions(+), 82 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 2f2215c..9427ef7 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -75,86 +75,6 @@ static struct attribute_group format_group = { static struct kmem_cache *hv_page_cache; -/* - * read_offset_data - copy data from one buffer to another while treating the - *source buffer as a small view on the total avaliable - *source data. - * - * @dest: buffer to copy into - * @dest_len: length of @dest in bytes - * @requested_offset: the offset within the source data we want. Must be 0 - * @src: buffer to copy data from - * @src_len: length of @src in bytes - * @source_offset: the offset in the sorce data that (src,src_len) refers to. - * Must be 0 - * - * returns the number of bytes copied. - * - * The following ascii art shows the various buffer possitioning we need to - * handle, assigns some arbitrary varibles to points on the buffer, and then - * shows how we fiddle with those values to get things we care about (copy - * start in src and copy len) - * - * s = @src buffer - * d = @dest buffer - * '.' areas in d are written to. - * - * u - * x wv z - * d |.| - * s |--| - * - * u - * x w z v - * d |--| - * s |--| - * - * x wu,z,v - * d || - * s |--| - * - * x,wu,v,z - * d |..| - * s |--| - * - * xu - * wvz - * d || - * s |--| - * - * x z w v - * d|--| - * s |--| - * - * x = source_offset - * w = requested_offset - * z = source_offset + src_len - * v = requested_offset + dest_len - * - * w_offset_in_s = w - x = requested_offset - source_offset - * z_offset_in_s = z - x = src_len - * v_offset_in_s = v - x = request_offset + dest_len - src_len - */ -static ssize_t read_offset_data(void *dest, size_t dest_len, - loff_t requested_offset, void *src, - size_t src_len, loff_t source_offset) -{ - size_t w_offset_in_s = requested_offset - source_offset; - size_t z_offset_in_s = src_len; - size_t v_offset_in_s = requested_offset + dest_len - src_len; - size_t u_offset_in_s = min(z_offset_in_s, v_offset_in_s); - size_t copy_len = u_offset_in_s - w_offset_in_s; - - if (requested_offset 0 || source_offset 0) - return -EINVAL; - - if (z_offset_in_s = w_offset_in_s) - return 0; - - memcpy(dest, src + w_offset_in_s, copy_len); - return copy_len; -} - static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096, unsigned long version, unsigned long index) @@ -185,6 +105,8 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, ssize_t ret = 0; size_t catalog_len = 0, catalog_page_len = 0, page_count = 0; loff_t page_offset = 0; + loff_t offset_in_page; + size_t copy_len; uint64_t catalog_version_num = 0; void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); struct hv_24x7_catalog_page_0 *page_0 = page; @@ -203,6 +125,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, page_offset = offset / 4096; page_count = count / 4096; + offset_in_page = count % 4096; if (page_offset = catalog_page_len) goto e_free; @@ -216,8 +139,13 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj, } } - ret = read_offset_data(buf, count, offset, - page, 4096, page_offset * 4096); + copy_len = 4096 - offset_in_page; + if (copy_len count) + copy_len = count; + + memcpy(buf, page+offset_in_page, copy_len); + ret = copy_len; + e_free: if (hret) pr_err(h_get_24x7_catalog_page(ver=%lld, page=%lld) failed: -- 1.7.9.5
[PATCH 4/4] perf Documentation: remove duplicated docs for powerpc cpu specific events
From: Cody P Schafer c...@linux.vnet.ibm.com Listing specific events doesn't actually help us at all here because: - these events actually vary between different ppc processors, they aren't garunteed to be present. - the documentation of the (generic) file contents is now superceded by the docs for arbitrary event file contents. CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com CC: Haren Myneni hb...@us.ibm.com CC: Cody P Schafer d...@codyps.com Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- .../testing/sysfs-bus-event_source-devices-events | 573 1 file changed, 573 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events index a5226f0..20979f8 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events @@ -27,579 +27,6 @@ Description:Generic performance monitoring events basename. -What: /sys/devices/cpu/events/PM_1PLUS_PPC_CMPL - /sys/devices/cpu/events/PM_BRU_FIN - /sys/devices/cpu/events/PM_BR_MPRED - /sys/devices/cpu/events/PM_CMPLU_STALL - /sys/devices/cpu/events/PM_CMPLU_STALL_BRU - /sys/devices/cpu/events/PM_CMPLU_STALL_DCACHE_MISS - /sys/devices/cpu/events/PM_CMPLU_STALL_DFU - /sys/devices/cpu/events/PM_CMPLU_STALL_DIV - /sys/devices/cpu/events/PM_CMPLU_STALL_ERAT_MISS - /sys/devices/cpu/events/PM_CMPLU_STALL_FXU - /sys/devices/cpu/events/PM_CMPLU_STALL_IFU - /sys/devices/cpu/events/PM_CMPLU_STALL_LSU - /sys/devices/cpu/events/PM_CMPLU_STALL_REJECT - /sys/devices/cpu/events/PM_CMPLU_STALL_SCALAR - /sys/devices/cpu/events/PM_CMPLU_STALL_SCALAR_LONG - /sys/devices/cpu/events/PM_CMPLU_STALL_STORE - /sys/devices/cpu/events/PM_CMPLU_STALL_THRD - /sys/devices/cpu/events/PM_CMPLU_STALL_VECTOR - /sys/devices/cpu/events/PM_CMPLU_STALL_VECTOR_LONG - /sys/devices/cpu/events/PM_CYC - /sys/devices/cpu/events/PM_GCT_NOSLOT_BR_MPRED - /sys/devices/cpu/events/PM_GCT_NOSLOT_BR_MPRED_IC_MISS - /sys/devices/cpu/events/PM_GCT_NOSLOT_CYC - /sys/devices/cpu/events/PM_GCT_NOSLOT_IC_MISS - /sys/devices/cpu/events/PM_GRP_CMPL - /sys/devices/cpu/events/PM_INST_CMPL - /sys/devices/cpu/events/PM_LD_MISS_L1 - /sys/devices/cpu/events/PM_LD_REF_L1 - /sys/devices/cpu/events/PM_RUN_CYC - /sys/devices/cpu/events/PM_RUN_INST_CMPL - /sys/devices/cpu/events/PM_IC_DEMAND_L2_BR_ALL - /sys/devices/cpu/events/PM_GCT_UTIL_7_TO_10_SLOTS - /sys/devices/cpu/events/PM_PMC2_SAVED - /sys/devices/cpu/events/PM_VSU0_16FLOP - /sys/devices/cpu/events/PM_MRK_LSU_DERAT_MISS - /sys/devices/cpu/events/PM_MRK_ST_CMPL - /sys/devices/cpu/events/PM_NEST_PAIR3_ADD - /sys/devices/cpu/events/PM_L2_ST_DISP - /sys/devices/cpu/events/PM_L2_CASTOUT_MOD - /sys/devices/cpu/events/PM_ISEG - /sys/devices/cpu/events/PM_MRK_INST_TIMEO - /sys/devices/cpu/events/PM_L2_RCST_DISP_FAIL_ADDR - /sys/devices/cpu/events/PM_LSU1_DC_PREF_STREAM_CONFIRM - /sys/devices/cpu/events/PM_IERAT_WR_64K - /sys/devices/cpu/events/PM_MRK_DTLB_MISS_16M - /sys/devices/cpu/events/PM_IERAT_MISS - /sys/devices/cpu/events/PM_MRK_PTEG_FROM_LMEM - /sys/devices/cpu/events/PM_FLOP - /sys/devices/cpu/events/PM_THRD_PRIO_4_5_CYC - /sys/devices/cpu/events/PM_BR_PRED_TA - /sys/devices/cpu/events/PM_EXT_INT - /sys/devices/cpu/events/PM_VSU_FSQRT_FDIV - /sys/devices/cpu/events/PM_MRK_LD_MISS_EXPOSED_CYC - /sys/devices/cpu/events/PM_LSU1_LDF - /sys/devices/cpu/events/PM_IC_WRITE_ALL - /sys/devices/cpu/events/PM_LSU0_SRQ_STFWD - /sys/devices/cpu/events/PM_PTEG_FROM_RL2L3_MOD - /sys/devices/cpu/events/PM_MRK_DATA_FROM_L31_SHR - /sys/devices/cpu/events/PM_DATA_FROM_L21_MOD - /sys/devices/cpu/events/PM_VSU1_SCAL_DOUBLE_ISSUED - /sys/devices/cpu/events/PM_VSU0_8FLOP - /sys/devices/cpu/events/PM_POWER_EVENT1 - /sys/devices/cpu/events/PM_DISP_CLB_HELD_BAL - /sys/devices/cpu/events/PM_VSU1_2FLOP - /sys/devices/cpu/events/PM_LWSYNC_HELD - /sys/devices/cpu/events/PM_PTEG_FROM_DL2L3_SHR - /sys/devices/cpu/events/PM_INST_FROM_L21_MOD -
[PATCH 3/4] perf Documentation: sysfs events/ interfaces
From: Cody P Schafer c...@linux.vnet.ibm.com Add documentation for the event, event.scale, and event.unit files in sysfs. event.scale and event.unit were undocumented. event was previously documented only for specific powerpc pmu events. CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com CC: Haren Myneni hb...@us.ibm.com CC: Cody P Schafer d...@codyps.com Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- .../testing/sysfs-bus-event_source-devices-events | 60 1 file changed, 60 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events index 7b40a3c..a5226f0 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events @@ -599,3 +599,63 @@ Description: POWER-systems specific performance monitoring events Further, multiple terms like 'event=0x' can be specified and separated with comma. All available terms are defined in the /sys/bus/event_source/devices/dev/format file. + +What: /sys/bus/event_source/devices/pmu/events/event +Date: 2014/02/24 +Contact: Linux kernel mailing list linux-ker...@vger.kernel.org +Description: Per-pmu performance monitoring events specific to the running system + + Each file (except for some of those with a '.' in them, '.unit' + and '.scale') in the 'events' directory describes a single + performance monitoring event supported by the pmu. The name + of the file is the name of the event. + + File contents: + + term[=value][,term[=value]]... + + Where term is one of the terms listed under + /sys/bus/event_source/devices/pmu/format/ and value is + a number is base-16 format with a '0x' prefix (lowercase only). + If a term is specified alone (without an assigned value), it + is implied that 0x1 is assigned to that term. + + Examples (each of these lines would be in a seperate file): + + event=0x2abc + event=0x423,inv,cmask=0x3 + domain=0x1,offset=0x8,starting_index=0x + + Each of the assignments indicates a value to be assigned to a + particular set of bits (as defined by the format file + corresponding to the term) in the perf_event structure passed + to the perf_open syscall. + +What: /sys/bus/event_source/devices/pmu/events/event.unit +Date: 2014/02/24 +Contact: Linux kernel mailing list linux-ker...@vger.kernel.org +Description: Perf event units + + A string specifying the English plural numerical unit that event + (once multiplied by event.scale) represents. + + Example: + + Joules + +What: /sys/bus/event_source/devices/pmu/events/event.scale +Date: 2014/02/24 +Contact: Linux kernel mailing list linux-ker...@vger.kernel.org +Description: Perf event scaling factors + + A string representing a floating point value expressed in + scientific notation to be multiplied by the event count + recieved from the kernel to match the unit specified in the + event.unit file. + + Example: + + 2.3283064365386962890625e-10 + + This is provided to avoid performing floating point arithmetic + in the kernel. -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 00/10] Add support for parameterized events from sysfs
What this patchset does: - the first patch (override sysfs in tools/perf via SYSFS_PATH) was sent out previously, but needed a resend anyhow. Having it is useful for testing the later changes to tools/perf. - the second patch is a bugfix to the powerpc hv-24x7 code which was previously sent out, which is a good idea to have when testing these patches on POWER8 hardware. - document perf sysfs and the changes to add parameterized events - semi-notably: removes the growing list of specific POWER cpu events and begins documenting them generically, much like the docs for /sys/modules/MODULENAME do for modules. - tools/perf changes to support parameterized events - export some parameterized events from the powerpc pmus hv_24x7 and hv_gpci Description of event parameters from the documentation patch: Event parameters are a basic way for partial events to be specified in sysfs with per-event names given to the fields that need to be filled in when using a particular event. It is intended for supporting cases where the single 'cpu' parameter is insufficient. For example, POWER 8 has events for physical sockets/cores/cpus that are accessible from with virtual machines. To keep using the single 'cpu' parameter we'd need to perform a mapping between Linux's cpus and the physical machine's cpus (in this case Linux is running under a hypervisor). This isn't possible because bindings between our cpus and physical cpus may not be fixed, and we probably won't have a cpu on each physical cpu. Description of the sysfs contents when events are parameterized (copied from an included patch): Examples: domain=0x1,offset=0x8,starting_index=phys_cpu In the case of the last example, a value replacing phys_cpu would need to be provided by the user selecting the particular event. This is refered to as event parameterization. All non-numerical values indicate an event parameter. Notes on how perf-list displays parameterized events (and how to use them, again culled from an included patch): PARAMETERIZED EVENTS Some pmu events listed by 'perf-list' will be displayed with '?' in them. For example: hv_gpci/dtbp_ptitc,phys_processor_idx=?/ This means that when provided as an event, a value for phys_processor_idx must also be supplied. For example: perf stat -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ... Changelog[v3] - [Jiri Olsa] Changed the event parameters are specified. If event file specifes 'param=val' make the usage 'param=123' rather than 'val=123'. (patch 1,2/10) - Shortened event names using PHYS and VCPU (patch 4/10) - Print help message if invalid parameter is specified or required parameter is missing. - Moved 3 patches that are unrelated to parametrized events into a separate patchset. - Reordered patches so code changes come first. Changelog[v2] - [Joe Perches, David Laight] Use beNN_to_cpu() instead of guessing the size from type. - Use kmem_cache_free() to free page allocated with kmem_cache_alloc(). - Rebase to recent kernel *** BLURB HERE *** Cody P Schafer (10): tools/perf: support parsing parameterized events tools/perf: extend format_alias() to include event parameters perf: provide sysfs_show for struct perf_pmu_events_attr powerpc/perf/hv-24x7: parse catalog and populate sysfs with events perf: add PMU_EVENT_ATTR_STRING() helper powerpc/perf/{hv-gpci,hv-common}: generate requests with counters annotated powerpc/perf/hv-gpci: add the remaining gpci requests perf Documentation: add event parameters tools/perf: Document parameterized and symbolic events powerpc/perf/hv-24x7: Document sysfs event description entries .../testing/sysfs-bus-event_source-devices-events |6 + .../testing/sysfs-bus-event_source-devices-hv_24x7 | 22 + arch/powerpc/perf/hv-24x7-catalog.h| 25 + arch/powerpc/perf/hv-24x7-domains.h| 28 + arch/powerpc/perf/hv-24x7.c| 787 +++- arch/powerpc/perf/hv-24x7.h| 12 +- arch/powerpc/perf/hv-common.c | 10 +- arch/powerpc/perf/hv-gpci-requests.h | 262 +++ arch/powerpc/perf/hv-gpci.c|8 + arch/powerpc/perf/hv-gpci.h| 37 +- arch/powerpc/perf/req-gen/_begin.h | 13 + arch/powerpc/perf/req-gen/_clear.h |5 + arch/powerpc/perf/req-gen/_end.h |4 + arch/powerpc/perf/req-gen/_request-begin.h | 15 + arch/powerpc/perf/req-gen/_request-end.h |8 + arch/powerpc/perf/req-gen/perf.h | 155 include/linux/perf_event.h
[PATCH v3 02/10] tools/perf: extend format_alias() to include event parameters
From: Cody P Schafer c...@linux.vnet.ibm.com This causes `perf list pmu` to show parameters for parameterized events like follows: pmu/event_name,param1=?,param2=?/ [Kernel PMU event] An example: hv_gpci/dispatch_timebase_by_processor_processor_time_in_timebase_cycles,phys_processor_idx=?/ [Kernel PMU event] Changelog[v6] [Jir Olsa] If the parameter for an event in sysfs is 'param=val', have perf-list show the event as 'param=?' rather than 'val=?'. CC: Haren Myneni hb...@us.ibm.com CC: Cody P Schafer d...@codyps.com Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- tools/perf/util/pmu.c | 26 +- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8c7c4a1..0756917 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -748,10 +748,33 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to) set_bit(b, bits); } +static int sub_non_neg(int a, int b) +{ + if (b a) + return 0; + return a - b; +} + static char *format_alias(char *buf, int len, struct perf_pmu *pmu, struct perf_pmu_alias *alias) { - snprintf(buf, len, %s/%s/, pmu-name, alias-name); + struct parse_events_term *term; + int used = snprintf(buf, len, %s/%s, pmu-name, alias-name); + + list_for_each_entry(term, alias-terms, list) + if (term-type_val == PARSE_EVENTS__TERM_TYPE_STR) + used += snprintf(buf + used, sub_non_neg(len, used), + ,%s=?, term-config); + + if (sub_non_neg(len, used) 0) { + buf[used] = '/'; + used++; + } + if (sub_non_neg(len, used) 0) { + buf[used] = '\0'; + used++; + } else + buf[len - 1] = '\0'; return buf; } @@ -802,6 +825,7 @@ void print_pmu_events(const char *event_glob, bool name_only) if (is_cpu !name_only) aliases[j] = format_alias_or(buf, sizeof(buf), pmu, alias); + aliases[j] = strdup(aliases[j]); j++; } -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 01/10] tools/perf: support parsing parameterized events
From: Cody P Schafer c...@linux.vnet.ibm.com Enable event specification like: pmu/event_name,param1=0x1,param2=0x4/ Assuming that /sys/bus/event_source/devices/pmu/events/event_name Contains something like param2=foo,bar=1,param1=baz Changelog[v6]: [Jiri Olsa] If the sysfs event file specifies 'param=val', make the usage 'hv_24x7/event,param=123/' rather than 'hv_24x7/event,val=123/'. [Sukadev Bhattiprolu] If verbose option is set, print help message if a required parameter is missing or invalid parameter is specified. CC: Haren Myneni hb...@us.ibm.com CC: Cody P Schafer d...@codyps.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- tools/perf/util/parse-events.h |1 + tools/perf/util/pmu.c | 64 +--- 2 files changed, 54 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index df094b4..9d7d2d5 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -59,6 +59,7 @@ struct parse_events_term { int type_val; int type_term; struct list_head list; + bool used; }; struct parse_events_evlist { diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 9bf5827..8c7c4a1 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -504,30 +504,67 @@ static __u64 pmu_format_value(unsigned long *format, __u64 value) } /* + * Term is a string term, and might be a param-term. Try to look up it's value + * in the remaining terms. + * - We have a term like base-or-format-term=param-term, + * - We need to find the value supplied for param-term (with param-term named + * in a config string) later on in the term list. + */ +static int pmu_resolve_param_term(struct parse_events_term *term, + struct list_head *head_terms, + __u64 *value) +{ + struct parse_events_term *t; + + list_for_each_entry(t, head_terms, list) { + if (t-type_val == PARSE_EVENTS__TERM_TYPE_NUM) { + if (!strcmp(t-config, term-config)) { + t-used = true; + *value = t-val.num; + return 0; + } + } + } + + if (verbose) + printf(Required parameter '%s' not specified\n, term-config); + + return -1; +} + +/* * Setup one of config[12] attr members based on the * user input data - term parameter. */ static int pmu_config_term(struct list_head *formats, struct perf_event_attr *attr, - struct parse_events_term *term) + struct parse_events_term *term, + struct list_head *head_terms) { struct perf_pmu_format *format; __u64 *vp; + __u64 val; + + /* +* If this is a parameter we've already used for parameterized-eval, +* skip it in normal eval. +*/ + if (term-used) + return 0; /* -* Support only for hardcoded and numnerial terms. * Hardcoded terms should be already in, so nothing * to be done for them. */ if (parse_events__is_hardcoded_term(term)) return 0; - if (term-type_val != PARSE_EVENTS__TERM_TYPE_NUM) - return -EINVAL; - format = pmu_find_format(formats, term-config); - if (!format) + if (!format) { + if (verbose) + printf(Invalid event/parameter '%s'\n, term-config); return -EINVAL; + } switch (format-value) { case PERF_PMU_FORMAT_VALUE_CONFIG: @@ -544,11 +581,16 @@ static int pmu_config_term(struct list_head *formats, } /* -* XXX If we ever decide to go with string values for -* non-hardcoded terms, here's the place to translate -* them into value. +* Either directly use a numeric term, or try to translate string terms +* using event parameters. */ - *vp |= pmu_format_value(format-bits, term-val.num); + if (term-type_val == PARSE_EVENTS__TERM_TYPE_NUM) + val = term-val.num; + else + if (pmu_resolve_param_term(term, head_terms, val)) + return -EINVAL; + + *vp |= pmu_format_value(format-bits, val); return 0; } @@ -559,7 +601,7 @@ int perf_pmu__config_terms(struct list_head *formats, struct parse_events_term *term; list_for_each_entry(term, head_terms, list) - if (pmu_config_term(formats, attr, term)) + if (pmu_config_term(formats, attr, term, head_terms)) return -EINVAL; return 0; --
[PATCH v3 03/10] perf: provide sysfs_show for struct perf_pmu_events_attr
From: Cody P Schafer c...@linux.vnet.ibm.com (struct perf_pmu_events_attr) is defined in include/linux/perf_event.h, but the only show for it is in x86 and contains x86 specific stuff. Make a generic one for those of us who are just using the event_str. CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com CC: Haren Myneni hb...@us.ibm.com CC: Cody P Schafer d...@codyps.com Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- include/linux/perf_event.h |3 +++ kernel/events/core.c |8 2 files changed, 11 insertions(+) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 707617a..cef4a56 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -882,6 +882,9 @@ struct perf_pmu_events_attr { const char *event_str; }; +ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); + #define PMU_EVENT_ATTR(_name, _var, _id, _show) \ static struct perf_pmu_events_attr _var = {\ .attr = __ATTR(_name, 0444, _show, NULL), \ diff --git a/kernel/events/core.c b/kernel/events/core.c index f9c1ed0..cf1a423 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8100,6 +8100,14 @@ void __init perf_event_init(void) != 1024); } +ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_attr, attr); + return sprintf(page, %s\n, pmu_attr-event_str); +} + static int __init perf_event_sysfs_init(void) { struct pmu *pmu; -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 04/10] powerpc/perf/hv-24x7: parse catalog and populate sysfs with events
From: Cody P Schafer c...@linux.vnet.ibm.com Retrieves and parses the 24x7 catalog on POWER systems that supply it (right now, only POWER 8). Events are exposed via sysfs in the standard fashion, and are all parameterized. Catalog is (at the moment) only parsed on boot. It needs re-parsing when a some hypervisor events occur. At that point we'll also need to prevent old events from continuing to function (counter that is passed in via spare space in the config values?). Changelog[v6] [Sukadev Bhattiprolu] Use PHYS and VCPU in place of PHYSICAL and VIRTUAL_PROCESSOR to shorten the names of the domains and hence, events; Changelog[v2] [Joe Perches, David Laight] Use beNN_to_cpu() instead of guessing the size from type. Use kmem_cache_free() to free page allocated with kmem_cache_alloc(). CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com CC: Haren Myneni hb...@us.ibm.com CC: Cody P Schafer d...@codyps.com Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-24x7-catalog.h | 25 ++ arch/powerpc/perf/hv-24x7-domains.h | 28 ++ arch/powerpc/perf/hv-24x7.c | 787 ++- arch/powerpc/perf/hv-24x7.h | 12 +- 4 files changed, 838 insertions(+), 14 deletions(-) create mode 100644 arch/powerpc/perf/hv-24x7-domains.h diff --git a/arch/powerpc/perf/hv-24x7-catalog.h b/arch/powerpc/perf/hv-24x7-catalog.h index 21b19dd..69e2e1f 100644 --- a/arch/powerpc/perf/hv-24x7-catalog.h +++ b/arch/powerpc/perf/hv-24x7-catalog.h @@ -30,4 +30,29 @@ struct hv_24x7_catalog_page_0 { __u8 reserved6[2]; } __packed; +struct hv_24x7_event_data { + __be16 length; /* in bytes, must be a multiple of 16 */ + __u8 reserved1[2]; + __u8 domain; /* Chip = 1, Core = 2 */ + __u8 reserved2[1]; + __be16 event_group_record_offs; /* in bytes, must be 8 byte aligned */ + __be16 event_group_record_len; /* in bytes */ + + /* in bytes, offset from event_group_record */ + __be16 event_counter_offs; + + /* verified_state, unverified_state, caveat_state, broken_state, ... */ + __be32 flags; + + __be16 primary_group_ix; + __be16 group_count; + __be16 event_name_len; + __u8 remainder[]; + /* __u8 event_name[event_name_len - 2]; */ + /* __be16 event_description_len; */ + /* __u8 event_desc[event_description_len - 2]; */ + /* __be16 detailed_desc_len; */ + /* __u8 detailed_desc[detailed_desc_len - 2]; */ +} __packed; + #endif diff --git a/arch/powerpc/perf/hv-24x7-domains.h b/arch/powerpc/perf/hv-24x7-domains.h new file mode 100644 index 000..49c1efd --- /dev/null +++ b/arch/powerpc/perf/hv-24x7-domains.h @@ -0,0 +1,28 @@ + +/* + * DOMAIN(name, num, index_kind, is_physical) + * + * @name: An all caps token, suitable for use in generating an enum + * member and appending to an event name in sysfs. + * + * @num: The number corresponding to the domain as given in + * documentation. We assume the catalog domain and the hcall + * domain have the same numbering (so far they do), but this + * may need to be changed in the future. + * + * @index_kind: A stringifiable token describing the meaning of the index + * within the given domain. Must fit the parsing rules of the + * perf sysfs api. + * + * @is_physical: True if the domain is physical, false otherwise (if virtual). + * + * Note: The terms PHYS_CHIP, PHYS_CORE, VCPU correspond to physical chip, + * physical core and virtual processor in 24x7 Counters specifications. + */ + +DOMAIN(PHYS_CHIP, 0x01, chip, true) +DOMAIN(PHYS_CORE, 0x02, core, true) +DOMAIN(VCPU_HOME_CORE, 0x03, vcpu, false) +DOMAIN(VCPU_HOME_CHIP, 0x04, vcpu, false) +DOMAIN(VCPU_HOME_NODE, 0x05, vcpu, false) +DOMAIN(VCPU_REMOTE_NODE, 0x06, vcpu, false) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 9427ef7..f13e2ea 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -13,16 +13,80 @@ #define pr_fmt(fmt) hv-24x7: fmt #include linux/perf_event.h +#include linux/rbtree.h #include linux/module.h #include linux/slab.h +#include linux/vmalloc.h + #include asm/firmware.h #include asm/hvcall.h #include asm/io.h +#include linux/byteorder/generic.h #include hv-24x7.h #include hv-24x7-catalog.h #include hv-common.h +static const char *domain_to_index_string(unsigned domain) +{ + switch (domain) { +#define DOMAIN(n, v, x, c) \ + case HV_PERF_DOMAIN_##n:\ + return #x; +#include hv-24x7-domains.h +#undef DOMAIN + default: + WARN(1, unknown domain %d\n, domain); + return UNKNOWN_DOMAIN_INDEX_STRING; + } +} + +static const char *event_domain_suffix(unsigned domain) +{ + switch (domain) { +#define DOMAIN(n, v, x, c) \ + case
[PATCH v3 05/10] perf: add PMU_EVENT_ATTR_STRING() helper
From: Cody P Schafer c...@linux.vnet.ibm.com Helper for constructing static struct perf_pmu_events_attr s. CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com CC: Haren Myneni hb...@us.ibm.com CC: Cody P Schafer d...@codyps.com Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- include/linux/perf_event.h |7 +++ 1 file changed, 7 insertions(+) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cef4a56..32190c5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -891,6 +891,13 @@ static struct perf_pmu_events_attr _var = { \ .id = _id, \ }; +#define PMU_EVENT_ATTR_STRING(_name, _var, _value) \ +static struct perf_pmu_events_attr _var = {\ + .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ + .event_str = _value,\ +}; + + #define PMU_FORMAT_ATTR(_name, _format) \ static ssize_t \ _name##_show(struct device *dev, \ -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 07/10] powerpc/perf/hv-gpci: add the remaining gpci requests
From: Cody P Schafer c...@linux.vnet.ibm.com Add the remaining gpci requests that contain counters suitable for use by perf. Omit those that don't contain any counters (but note their ommision). CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com CC: Haren Myneni hb...@us.ibm.com CC: Cody P Schafer d...@codyps.com Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-gpci-requests.h | 183 ++ 1 file changed, 183 insertions(+) diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h index 0dfc4d9..a908b08 100644 --- a/arch/powerpc/perf/hv-gpci-requests.h +++ b/arch/powerpc/perf/hv-gpci-requests.h @@ -65,6 +65,33 @@ REQUEST(__count(0, 8, processor_time_in_timebase_cycles) ) #include I(REQUEST_END) +#define REQUEST_NAME entitled_capped_uncapped_donated_idle_timebase_by_partition +#define REQUEST_NUM 0x20 +#define REQUEST_IDX_KIND sibling_part_id +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 8, partition_id) + __count(0x8,8, entitled_cycles) + __count(0x10, 8, consumed_capped_cycles) + __count(0x18, 8, consumed_uncapped_cycles) + __count(0x20, 8, cycles_donated) + __count(0x28, 8, purr_idle_cycles) +) +#include I(REQUEST_END) + +/* + * Not avaliable for counter_info_version = 0x8, use + * run_instruction_cycles_by_partition(0x100) instead. + */ +#define REQUEST_NAME run_instructions_run_cycles_by_partition +#define REQUEST_NUM 0x30 +#define REQUEST_IDX_KIND sibling_part_id +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 8, partition_id) + __count(0x8,8, instructions_completed) + __count(0x10, 8, cycles) +) +#include I(REQUEST_END) + #define REQUEST_NAME system_performance_capabilities #define REQUEST_NUM 0x40 #define REQUEST_IDX_KIND M1 @@ -75,5 +102,161 @@ REQUEST(__field(0, 1, perf_collect_privileged) ) #include I(REQUEST_END) +#define REQUEST_NAME processor_bus_utilization_abc_links +#define REQUEST_NUM 0x50 +#define REQUEST_IDX_KIND hw_chip_id +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, hw_chip_id) + __array(0x4,0xC,reserved1) + __count(0x10, 8, total_link_cycles) + __count(0x18, 8, idle_cycles_for_a_link) + __count(0x20, 8, idle_cycles_for_b_link) + __count(0x28, 8, idle_cycles_for_c_link) + __array(0x30, 0x20, reserved2) +) +#include I(REQUEST_END) + +#define REQUEST_NAME processor_bus_utilization_wxyz_links +#define REQUEST_NUM 0x60 +#define REQUEST_IDX_KIND hw_chip_id +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, hw_chip_id) + __array(0x4,0xC,reserved1) + __count(0x10, 8, total_link_cycles) + __count(0x18, 8, idle_cycles_for_w_link) + __count(0x20, 8, idle_cycles_for_x_link) + __count(0x28, 8, idle_cycles_for_y_link) + __count(0x30, 8, idle_cycles_for_z_link) + __array(0x38, 0x28, reserved2) +) +#include I(REQUEST_END) + +#define REQUEST_NAME processor_bus_utilization_gx_links +#define REQUEST_NUM 0x70 +#define REQUEST_IDX_KIND hw_chip_id +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, hw_chip_id) + __array(0x4,0xC,reserved1) + __count(0x10, 8, gx0_in_address_cycles) + __count(0x18, 8, gx0_in_data_cycles) + __count(0x20, 8, gx0_in_retries) + __count(0x28, 8, gx0_in_bus_cycles) + __count(0x30, 8, gx0_in_cycles_total) + __count(0x38, 8, gx0_out_address_cycles) + __count(0x40, 8, gx0_out_data_cycles) + __count(0x48, 8, gx0_out_retries) + __count(0x50, 8, gx0_out_bus_cycles) + __count(0x58, 8, gx0_out_cycles_total) + __count(0x60, 8, gx1_in_address_cycles) + __count(0x68, 8, gx1_in_data_cycles) + __count(0x70, 8, gx1_in_retries) + __count(0x78, 8, gx1_in_bus_cycles) + __count(0x80, 8, gx1_in_cycles_total) + __count(0x88, 8, gx1_out_address_cycles) + __count(0x90, 8, gx1_out_data_cycles) + __count(0x98, 8, gx1_out_retries) + __count(0xA0, 8, gx1_out_bus_cycles) + __count(0xA8, 8, gx1_out_cycles_total) +) +#include I(REQUEST_END) + +#define REQUEST_NAME processor_bus_utilization_mc_links +#define REQUEST_NUM 0x80 +#define REQUEST_IDX_KIND hw_chip_id +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, hw_chip_id) + __array(0x4,0xC,reserved1) + __count(0x10, 8, mc0_frames) + __count(0x18, 8, mc0_reads) + __count(0x20, 8, mc0_write) + __count(0x28, 8, mc0_total_cycles) + __count(0x30, 8, mc1_frames) + __count(0x38, 8, mc1_reads) + __count(0x40, 8, mc1_writes) + __count(0x48, 8,
[PATCH v3 06/10] powerpc/perf/{hv-gpci, hv-common}: generate requests with counters annotated
From: Cody P Schafer c...@linux.vnet.ibm.com This adds (in req-gen/) a framework for defining gpci counter requests. It uses macro magic similar to ftrace. Also convert the existing hv-gpci request structures and enum values to use the new framework (and adjust old users of the structs and enum values to cope with changes in naming). In exchange for this macro disaster, we get autogenerated event listing for GPCI in sysfs, build time field offset checking, and zero duplication of information about GPCI requests. CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com CC: Haren Myneni hb...@us.ibm.com CC: Cody P Schafer d...@codyps.com Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- arch/powerpc/perf/hv-common.c | 10 +- arch/powerpc/perf/hv-gpci-requests.h | 79 ++ arch/powerpc/perf/hv-gpci.c|8 ++ arch/powerpc/perf/hv-gpci.h| 37 +++ arch/powerpc/perf/req-gen/_begin.h | 13 +++ arch/powerpc/perf/req-gen/_clear.h |5 + arch/powerpc/perf/req-gen/_end.h |4 + arch/powerpc/perf/req-gen/_request-begin.h | 15 +++ arch/powerpc/perf/req-gen/_request-end.h |8 ++ arch/powerpc/perf/req-gen/perf.h | 155 10 files changed, 304 insertions(+), 30 deletions(-) create mode 100644 arch/powerpc/perf/hv-gpci-requests.h create mode 100644 arch/powerpc/perf/req-gen/_begin.h create mode 100644 arch/powerpc/perf/req-gen/_clear.h create mode 100644 arch/powerpc/perf/req-gen/_end.h create mode 100644 arch/powerpc/perf/req-gen/_request-begin.h create mode 100644 arch/powerpc/perf/req-gen/_request-end.h create mode 100644 arch/powerpc/perf/req-gen/perf.h diff --git a/arch/powerpc/perf/hv-common.c b/arch/powerpc/perf/hv-common.c index 47e02b3..7dce8f10 100644 --- a/arch/powerpc/perf/hv-common.c +++ b/arch/powerpc/perf/hv-common.c @@ -9,13 +9,13 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps) unsigned long r; struct p { struct hv_get_perf_counter_info_params params; - struct cv_system_performance_capabilities caps; + struct hv_gpci_system_performance_capabilities caps; } __packed __aligned(sizeof(uint64_t)); struct p arg = { .params = { .counter_request = cpu_to_be32( - CIR_SYSTEM_PERFORMANCE_CAPABILITIES), + HV_GPCI_system_performance_capabilities), .starting_index = cpu_to_be32(-1), .counter_info_version_in = 0, } @@ -31,9 +31,9 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps) caps-version = arg.params.counter_info_version_out; caps-collect_privileged = !!arg.caps.perf_collect_privileged; - caps-ga = !!(arg.caps.capability_mask CV_CM_GA); - caps-expanded = !!(arg.caps.capability_mask CV_CM_EXPANDED); - caps-lab = !!(arg.caps.capability_mask CV_CM_LAB); + caps-ga = !!(arg.caps.capability_mask HV_GPCI_CM_GA); + caps-expanded = !!(arg.caps.capability_mask HV_GPCI_CM_EXPANDED); + caps-lab = !!(arg.caps.capability_mask HV_GPCI_CM_LAB); return r; } diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h new file mode 100644 index 000..0dfc4d9 --- /dev/null +++ b/arch/powerpc/perf/hv-gpci-requests.h @@ -0,0 +1,79 @@ + +#include req-gen/_begin.h + +/* + * Based on the document getPerfCountInfo v1.07 + */ + +/* this needs to be -1 encoded in hex suitable for parsing by tools/perf. */ +#define M1 0x + +/* + * #define REQUEST_NAME counter_request_name + * #define REQUEST_NUM r_num + * #define REQUEST_IDX_KIND starting_index_kind + * #include I(REQUEST_BEGIN) + * REQUEST( + * __field(...) + * __field(...) + * __array(...) + * __count(...) + * ) + * #include I(REQUEST_END) + * + * - starting_index_kind is one of: + * M1: must be -1 + * chip_id: hardware chip id or -1 for current hw chip + * phys_processor_idx: + * + * __count(offset, bytes, name): + * a counter that should be exposed via perf + * __field(offset, bytes, name) + * a normal field + * __array(offset, bytes, name) + * an array of bytes + * + * + * @bytes for __count, and __field _must_ be a numeral token + * in decimal, not an expression and not in hex. + * + * + * TODO: + * - expose secondary index (if any counter ever uses it, only 0xA0 + * appears to use it right now, and it doesn't have any counters) + * - embed versioning info + * - include counter descriptions + */ +#define REQUEST_NAME dispatch_timebase_by_processor +#define REQUEST_NUM 0x10 +#define REQUEST_IDX_KIND phys_processor_idx +#include I(REQUEST_BEGIN) +REQUEST(__count(0, 8, processor_time_in_timebase_cycles) + __field(0x8,4, hw_processor_id) + __field(0xC,
[PATCH v3 08/10] perf Documentation: add event parameters
From: Cody P Schafer c...@linux.vnet.ibm.com Event parameters are a basic way for partial events to be specified in sysfs with per-event names given to the fields that need to be filled in when using a particular event. It is intended for supporting cases where the single 'cpu' parameter is insufficient. For example, POWER 8 has events for physical sockets/cores/cpus that are accessible from with virtual machines. To keep using the single 'cpu' parameter we'd need to perform a mapping between Linux's cpus and the physical machine's cpus (in this case Linux is running under a hypervisor). This isn't possible because bindings between our cpus and physical cpus may not be fixed, and we probably won't have a cpu on each physical cpu. CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com CC: Haren Myneni hb...@us.ibm.com CC: Cody P Schafer d...@codyps.com Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- .../testing/sysfs-bus-event_source-devices-events |6 ++ 1 file changed, 6 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events index 20979f8..c1f9850 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events @@ -52,12 +52,18 @@ Description:Per-pmu performance monitoring events specific to the running syste event=0x2abc event=0x423,inv,cmask=0x3 domain=0x1,offset=0x8,starting_index=0x + domain=0x1,offset=0x8,starting_index=phys_cpu Each of the assignments indicates a value to be assigned to a particular set of bits (as defined by the format file corresponding to the term) in the perf_event structure passed to the perf_open syscall. + In the case of the last example, a value replacing phys_cpu + would need to be provided by the user selecting the particular + event. This is refered to as event parameterization. All + non-numerical values indicate an event parameter. + What: /sys/bus/event_source/devices/pmu/events/event.unit Date: 2014/02/24 Contact: Linux kernel mailing list linux-ker...@vger.kernel.org -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 09/10] tools/perf: Document parameterized and symbolic events
From: Cody P Schafer c...@linux.vnet.ibm.com Changelog[v6]: - [Sukadev Bhattiprolu]: Update documentation of perf-list and perf-record; Added documentation for perf-stat. CC: Haren Myneni hb...@us.ibm.com CC: Cody P Schafer d...@codyps.com Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com Signed-off-by: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com --- tools/perf/Documentation/perf-list.txt | 13 + tools/perf/Documentation/perf-record.txt | 12 tools/perf/Documentation/perf-stat.txt | 20 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 6fce6a6..c405da24 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -89,6 +89,19 @@ raw encoding of 0x1A8 can be used: You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. +PARAMETERIZED EVENTS + + +Some pmu events listed by 'perf-list' will be displayed with '?' in them. For +example: + + hv_gpci/dtbp_ptitc,starting_index=?/ + +This means that when provided as an event, a value for 'starting_index' must +also be supplied. For example: + + perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,starting_index=0x2/' ... + OPTIONS --- diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index d460049..a6a2f9c 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -33,6 +33,18 @@ OPTIONS - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a hexadecimal event descriptor. + - a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where + 'param1', 'param2', etc are defined as formats for the PMU in + /sys/bus/event_sources/devices/pmu/format/*. + + - a symbolically formed event like 'pmu/config=M,config1=N,config3=K/' + + where M, N, K are numbers (in decimal, hex, octal format). Acceptable + values for each of 'config', 'config1' and 'config2' are defined by + corresponding entries in /sys/bus/event_sources/devices/pmu/format/* + param1 and param2 are defined as formats for the PMU in: + /sys/bus/event_sources/devices/pmu/format/* + - a hardware breakpoint event in the form of '\mem:addr[:access]' where addr is the address in memory you want to break in. Access is the memory access type (read, write, execute) it can diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 29ee857..04e150d 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -25,10 +25,22 @@ OPTIONS -e:: --event=:: - Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU - event (eventsel+umask) in the form of rNNN where NNN is a -hexadecimal event descriptor. + Select the PMU event. Selection can be: + + - a symbolic event name (use 'perf list' to list all events) + + - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. + + - a symbolically formed event like 'pmu/param1=0x3,param2/' where + param1 and param2 are defined as formats for the PMU in + /sys/bus/event_sources/devices/pmu/format/* + + - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/' + where M, N, K are numbers (in decimal, hex, octal format). + Acceptable values for each of 'config', 'config1' and 'config2' + parameters are defined by corresponding entries in + /sys/bus/event_sources/devices/pmu/format/* -i:: --no-inherit:: -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 10/10] powerpc/perf/hv-24x7: Document sysfs event description entries
From: Cody P Schafer c...@linux.vnet.ibm.com CC: Sukadev Bhattiprolu suka...@linux.vnet.ibm.com CC: Haren Myneni hb...@us.ibm.com CC: Cody P Schafer d...@codyps.com Signed-off-by: Cody P Schafer c...@linux.vnet.ibm.com --- .../testing/sysfs-bus-event_source-devices-hv_24x7 | 22 1 file changed, 22 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 index e78ee79..5b501d7 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 @@ -21,3 +21,25 @@ Contact: Cody P Schafer c...@linux.vnet.ibm.com Description: Exposes the version field of the 24x7 catalog. This is also extractable from the provided binary catalog sysfs entry. + +What: /sys/bus/event_source/devices/hv_24x7/event_descs/event-name +Date: February 2014 +Contact: Cody P Schafer c...@linux.vnet.ibm.com +Description: + Provides the description of a particular event as provided by + the firmware. If firmware does not provide a description, no + file will be created. + + Note that the event-name lacks the domain suffix appended for + events in the events/ dir. + +What: /sys/bus/event_source/devices/hv_24x7/event_long_descs/event-name +Date: February 2014 +Contact: Cody P Schafer c...@linux.vnet.ibm.com +Description: + Provides the long description of a particular event as + provided by the firmware. If firmware does not provide a + description, no file will be created. + + Note that the event-name lacks the domain suffix appended for + events in the events/ dir. -- 1.7.9.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v4 04/11] drivers: base: support cpu cache information interface to userspace via sysfs
On Wed, Sep 17, 2014 at 06:25:10PM +0100, Sudeep Holla wrote: Hi Greg, On 03/09/14 18:00, Sudeep Holla wrote: From: Sudeep Holla sudeep.ho...@arm.com This patch adds initial support for providing processor cache information to userspace through sysfs interface. This is based on already existing implementations(x86, ia64, s390 and powerpc) and hence the interface is intended to be fully compatible. The main purpose of this generic support is to avoid further code duplication to support new architectures and also to unify all the existing different implementations. This implementation maintains the hierarchy of cache objects which reflects the system's cache topology. Cache devices are instantiated as needed as CPUs come online. The cache information is replicated per-cpu even if they are shared. A per-cpu array of cache information maintained is used mainly for sysfs-related book keeping. It also implements the shared_cpu_map attribute, which is essential for enabling both kernel and user-space to discover the system's overall cache topology. This patch also add the missing ABI documentation for the cacheinfo sysfs interface already, which is well defined and widely used. Can you review the first 4 patches in this series please ? It's in my todo queue, which is really long at the moment due to me going to conferences (at one right now...) Will be working on this soon, thanks for your patience. greg k-h ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [3/5] pseries: Create device hotplug entry point
On 09/17/2014 02:07 AM, Michael Ellerman wrote: On Mon, 2014-09-15 at 15:31 -0500, Nathan Fontenot wrote: For pseries system the kernel will be notified of hotplug requests in the form of rtas hotplug events. Can you flesh that design out a bit for me, I don't entirely get how it's going to work. The kernel gets the rtas hotplug events (in rtasd.c) and spits them out to userspace, which then writes them back in ? This patch creates a common routine that can handle these requests in both the PowerVM anbd PowerKVM environments, handle_dlpar_errorlog(). This also ^ creates the initial memory hotplug request handling stub. For PowerVM this patch also creates a new /proc file that the drmgr command will use to write rtas hotplug events to. Why is this different between phyp and KVM? For future PowerKVM handling the rtas check-exception code can pass any rtas hotplug events received to handle_dlpar_errorlog(). Internally to the kernel you mean? Perhaps a better explanation of how things work today and where I see them going is needed. I was trying to avoid a long explanation and I don't think my shortened explanation worked. I'll include this in v2 of the patchset too. The current hotplug (or dlpar) of devices (the process is generally the same for memory, cpu, and pci) on PowerVM systems is initiated from the HMC, which communicates the request to the partitions through the RSCT framework. The RSCT framework then invokes the drmgr command. The drmgr command performs the hotplug operation by doing some pieces, such as most of the rtas calls and device tree parsing, in userspace and make requests to the kernel to online/offline the device, update the device tree and add/remove the device. For PowerKVM the approach is to follow what is currently being done for pci hotplug. A hotplug request is initiated from the host. QEMU then sends an EPOW interrupt to the guest which causes the guest to make the rtas,check-exception call. In QEMU, the rtas,check-exception call returns a rtas hotplug event to the guest. I was using this same framework to also enable memory (and next cpu) hotplug. You are correct that the current pci hotplug path for PowerKVM involves the kernel receiving the rtas event, passing it to rtas_errd in userspace, and having rtas_errd invoke drmgr. The drmgr command then handles the request as described above for PowerVM systems. There is no need for this circuitous route, we should just handle the entire hotplug of devices in the kernel. What I am hoping to do is to enable this by moving the code to handle hotplug from drmgr into the kernel and provide a single path for handling hotplug for PowerVM and PowerKVM. To make this work for PowerKVM we will update the kernel rtas code to recognize rtas hotplug events returned from rtas,check-exception calls and call handle_dlpar_errorlog(). The hotplug rtas event is never sent out to userspace. For PowerVM systems, I created the /proc/powerpc/dlpar file that a rtas hotplug event can be written to and passed to handle_dlpar_errorlog(). There is no chance of updating how we receive hotplug requests on PowerVM systems. Hopefully that explains the design better. diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index a2450b8..574ec73 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -16,7 +16,9 @@ #include linux/cpu.h #include linux/slab.h #include linux/of.h +#include linux/proc_fs.h #include offline_states.h +#include pseries.h #include asm/prom.h #include asm/machdep.h @@ -530,13 +532,72 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count) return count; } +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ That is really confusing, but I think it's just a diff artifact? Yes, diff artifact. +static int handle_dlpar_errorlog(struct rtas_error_log *error_log) +{ +struct pseries_errorlog *pseries_log; +struct pseries_hp_errorlog *hp_elog; +int rc = -EINVAL; + +pseries_log = get_pseries_errorlog(error_log, + PSERIES_ELOG_SECT_ID_HOTPLUG); +if (!pseries_log) +return rc; + +hp_elog = (struct pseries_hp_errorlog *)pseries_log-data; +if (!hp_elog) +return rc; I don't see how that can happen? struct pseries_errorlog { __be16 id; /* 0x00 2-byte ASCII section ID */ __be16 length; /* 0x02 Section length in bytes */ uint8_t version;/* 0x04 Section version */ uint8_t subtype;/* 0x05 Section subtype */ __be16 creator_component; /* 0x06 Creator component ID*/ uint8_t data[]; /* 0x08 Start of section data */ }; Should you be checking for length == 0 instead ? You are correct. Also I think the code will
Re: [4/5] pseries: Implement memory hotplug add in the kernel
On 09/17/2014 02:07 AM, Michael Ellerman wrote: On Mon, 2014-09-15 at 15:32 -0500, Nathan Fontenot wrote: This patch adds the ability to do memory hotplug adding in the kernel. Currently the hotplug add/remove of memory is handled by the drmgr command. The drmgr command performs the add/remove by performing some work in user-space and making requests to the kernel to handle other pieces. By moving all of the work to the kernel we can do the add and remove faster, and provide a common place to do memory hotplug for both the PowerVM and PowerKVM environments. Signed-off-by: Nathan Fontenot nf...@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 170 +++ 1 file changed, 170 insertions(+) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 0e60e15..b254773 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -17,6 +17,7 @@ #include linux/vmalloc.h #include linux/memory.h #include linux/memory_hotplug.h +#include linux/slab.h #include asm/firmware.h #include asm/machdep.h @@ -24,6 +25,8 @@ #include asm/sparsemem.h #include asm/rtas.h +#include pseries.h + DEFINE_MUTEX(dlpar_mem_mutex); unsigned long pseries_memory_block_size(void) @@ -69,6 +72,53 @@ unsigned long pseries_memory_block_size(void) return memblock_size; } +static void dlpar_free_drconf_property(struct property *prop) +{ +kfree(prop-name); +kfree(prop-value); +kfree(prop); +} + +static struct property *dlpar_clone_drconf_property(struct device_node *dn) +{ +struct property *prop, *new_prop; + +prop = of_find_property(dn, ibm,dynamic-memory, NULL); +if (!prop) +return NULL; + +new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL); +if (!new_prop) +return NULL; + +new_prop-name = kstrdup(prop-name, GFP_KERNEL); +new_prop-value = kmalloc(prop-length + 1, GFP_KERNEL); +if (!new_prop-name || !new_prop-value) { +dlpar_free_drconf_property(new_prop); +return NULL; +} + +memcpy(new_prop-value, prop-value, prop-length); +new_prop-length = prop-length; +*(((char *)new_prop-value) + new_prop-length) = 0; It's not a string, is it? No, property-value is a void*. I'll drop that line of code. +return new_prop; +} + +static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) +{ +unsigned long section_nr; +struct mem_section *mem_sect; +struct memory_block *mem_block; +u64 phys_addr = be64_to_cpu(lmb-base_addr); + +section_nr = pfn_to_section_nr(PFN_DOWN(phys_addr)); +mem_sect = __nr_to_section(section_nr); + +mem_block = find_memory_block(mem_sect); +return mem_block; +} + #ifdef CONFIG_MEMORY_HOTREMOVE static int pseries_remove_memory(u64 start, u64 size) { @@ -155,13 +205,133 @@ static inline int pseries_remove_mem_node(struct device_node *np) } #endif /* CONFIG_MEMORY_HOTREMOVE */ +static int dlpar_add_one_lmb(struct of_drconf_cell *lmb) +{ +struct memory_block *mem_block; +u64 phys_addr; +unsigned long pages_per_block; +unsigned long block_sz; +int nid, sections_per_block; +int rc; + +phys_addr = be64_to_cpu(lmb-base_addr); of_drconf_cell needs endian annotations. Yes it does. I can include a patch to update the struct. +block_sz = memory_block_size_bytes(); +sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; +pages_per_block = PAGES_PER_SECTION * sections_per_block; + +if (phys_addr ((pages_per_block PAGE_SHIFT) - 1)) +return -EINVAL; + +nid = memory_add_physaddr_to_nid(phys_addr); +rc = add_memory(nid, phys_addr, block_sz); +if (rc) +return rc; + +rc = memblock_add(phys_addr, block_sz); +if (rc) { +remove_memory(nid, phys_addr, block_sz); +return rc; +} + +mem_block = lmb_to_memblock(lmb); +if (!mem_block) { +remove_memory(nid, phys_addr, block_sz); +return -EINVAL; +} That could all use a lot of comments. ie. why do we have to add it twice? We don't actually add it twice, though I can see how one could think that based on the names of the routines called. I'll add comments to clarify this in v2 of the patch. memory_add_physaddr_to_nid(), this doesn't add anything despite its naming. The routine finds the node id for the specified physical address. add_memory(), this actually adds the memory. memblock_add(), this informs the memory block information tracking about the newly added memory. Why this is not done as part of add_memory I don't know. +rc = device_online(mem_block-dev); +put_device(mem_block-dev); +if (rc) +remove_memory(nid, phys_addr, block_sz); + +
Re: [RFC PATCH] dt:numa: adding numa node mapping for memory nodes.
On Wed, Sep 17, 2014 at 04:37:30PM +0100, Kumar Gala wrote: On Sep 17, 2014, at 1:56 AM, Ganapatrao Kulkarni ganapatrao.kulka...@caviumnetworks.com wrote: From: Ganapatrao Kulkarni ganapatrao.kulka...@cavium.com This patch adds property nid to memory node to provide the memory range to numa node id mapping. Signed-off-by: Ganapatrao Kulkarni ganapatrao.kulka...@cavium.com — Adding the PPC guys as they’ve been doing NUMA on IBM Power Servers for years with OF/DT. So we should really try and follow what they’ve done. Agreed. Documentation/devicetree/bindings/numa.txt | 58 ++ 1 file changed, 58 insertions(+) create mode 100644 Documentation/devicetree/bindings/numa.txt diff --git a/Documentation/devicetree/bindings/numa.txt b/Documentation/devicetree/bindings/numa.txt new file mode 100644 index 000..c4a94f2 --- /dev/null +++ b/Documentation/devicetree/bindings/numa.txt @@ -0,0 +1,58 @@ +== +numa id binding description +== + +== +1 - Introduction +== +The device node property nid(numa node id) can be added to memory Why the quotes? +device node to map the range of memory addresses as defined in property reg. +The property nid maps the memory range to the numa node id, which is used to +find the local and remory pages on numa aware systems. What is a numa node id, exactly, and how is the OS intended to use it? I don't see how this can be of any use as-is. Mark. + +== +2 - nid property +== +Numa node id, nid is required property of memory device node for +numa enabled platforms. + +|--| +|Property Type | Usage | Value Type | Definition | +|--| +| nid | R|u32 | Numa Node id| +| | || for this memory | +|--| + + +4 - Example memory nodes with numa node id mapping + + +Example 1 (2 memory nodes, each mapped to a numa node.): + + memory@ { + device_type = memory; + reg = 0x0 0x 0x0 0x8000; + nid = 0x0; + }; + + memory@100 { + device_type = memory; + reg = 0x100 0x 0x0 0x8000; + nid = 0x1; + }; + +Example 2 (multiple memory ranges in each memory node and mapped to numa node): + + memory@ { + device_type = memory; + reg = 0x0 0x 0x0 0x8000, + 0x0 0x8000 0x0 0x8000; + nid = 0x0; + }; + + memory@100 { + device_type = memory; + reg = 0x100 0x 0x0 0x8000, + 0x100 0x8000 0x0 0x8000; + nid = 0x1; + }; -- 1.8.1.4 -- Employee of Qualcomm Innovation Center, Inc. Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation -- To unsubscribe from this list: send the line unsubscribe devicetree in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [5/5] pseries: Implement memory hotplug remove in the kernel
On 09/17/2014 02:07 AM, Michael Ellerman wrote: On Mon, 2014-09-15 at 15:33 -0500, Nathan Fontenot wrote: This patch adds the ability to do memory hotplug remove in the kernel. Currently the hotplug add/remove of memory is handled by the drmgr command. The drmgr command performs the add/remove by performing some work in user-space and making requests to the kernel to handle other pieces. By moving all of the work to the kernel we can do the add and remove faster, and provide a common place to do memory hotplug for both the PowerVM and PowerKVM environments. Signed-off-by: Nathan Fontenot nf...@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 140 +++ 1 file changed, 139 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index b254773..160c424 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -193,7 +193,137 @@ static int pseries_remove_mem_node(struct device_node *np) pseries_remove_memblock(base, lmb_size); return 0; } + +static int lmb_is_removable(struct of_drconf_cell *lmb) +{ Do we not already have something like this? No. Perhaps your thinking of the code in drivers/base/memory.c that handles the sysfs removable file. That code just calls the same is_mem_section_removable() routine. +int i, scns_per_block; +int rc = 1; I can see this makes the = work below. But what if block_sz / MIN_MEMORY_BLOCK_SIZE = 0 ? If that happens, something else is really wrong. Most likely a malformed device tree. For pseries MIN_MEMORY_BLOCK_SIZE is defined to be the smallest LMB size we suppport, 16MB. I can add a pr_warn() statement here and bail if that happens. +unsigned long pfn, block_sz; +u64 phys_addr; + +phys_addr = be64_to_cpu(lmb-base_addr); +block_sz = memory_block_size_bytes(); +scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; + +for (i = 0; i scns_per_block; i++) { +pfn = PFN_DOWN(phys_addr); +if (!pfn_present(pfn)) +continue; + +rc = is_mem_section_removable(pfn, PAGES_PER_SECTION); +phys_addr += MIN_MEMORY_BLOCK_SIZE; +} + +return rc; +} +static int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog) +{ ... +} Most of the same comments as for add. ok, I'll go through them and apply them to the remove code. Thanks for the review. -Nathan ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v3 00/21] powerpc/8xx: Optimise MMU TLB handling and add support of 16k pages
Le 17/09/2014 18:40, Scott Wood a écrit : On Wed, 2014-09-17 at 18:36 +0200, Christophe Leroy wrote: This patchset: 1) provides several MMU TLB handling optimisation on MPC8xx. 2) adds support of 16k pages on MPC8xx. All changes have been successfully tested on a custom board equipped with MPC885 Signed-off-by: Christophe Leroy christophe.le...@c-s.fr Tested-by: Christophe Leroy christophe.le...@c-s.fr I've already applied patches 1, 2, 4, 5, 6, 9, and 10 from the previous patchset -- have they changed? -Scott No, only 3, 7, 17 are changed, and 20,21 are new. I didn't notice you already applied some. How should I then proceed now for the remaining ones ? Submit a new set ? Thanks Christophe --- Ce courrier électronique ne contient aucun virus ou logiciel malveillant parce que la protection avast! Antivirus est active. http://www.avast.com ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v3 00/21] powerpc/8xx: Optimise MMU TLB handling and add support of 16k pages
On Wed, 2014-09-17 at 22:33 +0200, christophe leroy wrote: Le 17/09/2014 18:40, Scott Wood a écrit : On Wed, 2014-09-17 at 18:36 +0200, Christophe Leroy wrote: This patchset: 1) provides several MMU TLB handling optimisation on MPC8xx. 2) adds support of 16k pages on MPC8xx. All changes have been successfully tested on a custom board equipped with MPC885 Signed-off-by: Christophe Leroy christophe.le...@c-s.fr Tested-by: Christophe Leroy christophe.le...@c-s.fr I've already applied patches 1, 2, 4, 5, 6, 9, and 10 from the previous patchset -- have they changed? -Scott No, only 3, 7, 17 are changed, and 20,21 are new. I didn't notice you already applied some. How should I then proceed now for the remaining ones ? Submit a new set ? No, I'll just skip the ones I've already applied. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 0/9] powerpc/powernv: Support for fastsleep and winkle
Hi, In this patch series we use winkle for offlined cores. I successfully tested the working of this with subcore functionality. Test scenario was as follows: 1. Set SMT mode to 1, Set subores-per-core to 1 2. Offline a core, in this case cpu 32 (sending it to winkle) 3. Set subcores-per-core to 4 4. Online the core 5. Start a guest (Topology 1 core 2 threads) on a subcore, in this case on cpu 36 This works without any glitch. Thanks, Shreyas On Monday 25 August 2014 11:31 PM, Shreyas B. Prabhu wrote: Fast sleep is an idle state, where the core and the L1 and L2 caches are brought down to a threshold voltage. This also means that the communication between L2 and L3 caches have to be fenced. However the current P8 chips have a bug wherein this fencing between L2 and L3 caches get delayed by a cpu cycle. This can delay L3 response to the other cpus if they request for data during this time. Thus they would fetch the same data from the memory which could lead to data corruption if L3 cache is not flushed. Patch 4 adds support to work around this. 'Deep Winkle' is a deeper idle state where core and private L2 are powered off. While it offers higher power savings, it is at the cost of losing hypervisor register state and higher latency. Patch 5-9 adds support for winkle and uses it for offline cpus. Patch 1 - Moves parameters required discover idle states to a location common to both cpuidle driver and powernv core code Patch 2 - Populates idle state details from device tree Patch 3 - Enables cpus to run guest after waking up from fastsleep/winkle Cc: Benjamin Herrenschmidt b...@kernel.crashing.org Cc: Paul Mackerras pau...@samba.org Cc: Michael Ellerman m...@ellerman.id.au Cc: Rafael J. Wysocki r...@rjwysocki.net Cc: Srivatsa S. Bhat sriva...@mit.edu Cc: Preeti U. Murthy pre...@linux.vnet.ibm.com Cc: Vaidyanathan Srinivasan sva...@linux.vnet.ibm.com Cc: Rob Herring robh...@kernel.org Cc: Grant Likely grant.lik...@linaro.org Cc: devicet...@vger.kernel.org Cc: linux...@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Preeti U Murthy (2): cpuidle/powernv: Populate cpuidle state details by querying the device-tree powerpc/powernv/cpuidle: Add workaround to enable fastsleep Shreyas B. Prabhu (6): powerpc/kvm/book3s_hv: Enable CPUs to run guest after waking up from fast-sleep powerpc/powernv: Add OPAL call to save and restore powerpc: Adding macro for accessing Thread Switch Control Register powerpc/powernv: Add winkle infrastructure powerpc/powernv: Discover and enable winkle powerpc/powernv: Enter deepest supported idle state in offline Srivatsa S. Bhat (1): powerpc/powernv: Enable Offline CPUs to enter deep idle states arch/powerpc/include/asm/machdep.h | 4 + arch/powerpc/include/asm/opal.h| 10 ++ arch/powerpc/include/asm/paca.h| 3 + arch/powerpc/include/asm/ppc-opcode.h | 2 + arch/powerpc/include/asm/processor.h | 6 +- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/exceptions-64s.S | 37 ++--- arch/powerpc/kernel/idle.c | 30 arch/powerpc/kernel/idle_power7.S | 83 +- arch/powerpc/platforms/powernv/opal-wrappers.S | 2 + arch/powerpc/platforms/powernv/powernv.h | 8 + arch/powerpc/platforms/powernv/setup.c | 217 + arch/powerpc/platforms/powernv/smp.c | 13 +- arch/powerpc/platforms/powernv/subcore.c | 15 ++ drivers/cpuidle/cpuidle-powernv.c | 40 - 16 files changed, 439 insertions(+), 33 deletions(-) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev