[PATCH] powerpc: Enable /dev/port when isa_io_special is set
isa_io_special is set when the platform provides a special implementation of inX/outX via some FW interface for example. Such a platform doesn't need an ISA bridge on PCI, and so /dev/port should be made available even if one isn't present. This makes the LPC bus IOs accessible via /dev/port on PowerNV Power8 Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org --- arch/powerpc/include/asm/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 5a64757..edcc209 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -21,7 +21,7 @@ extern struct pci_dev *isa_bridge_pcidev; /* * has legacy ISA devices ? */ -#define arch_has_dev_port()(isa_bridge_pcidev != NULL) +#define arch_has_dev_port()(isa_bridge_pcidev != NULL || isa_io_special) #endif #include linux/device.h ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/scom: Change scom_read() and scom_write() to return errors
scom_read() now returns the read value via a pointer argument and both functions return an int error code Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org --- arch/powerpc/include/asm/scom.h | 23 +-- arch/powerpc/platforms/wsp/scom_smp.c | 18 +- arch/powerpc/platforms/wsp/scom_wsp.c | 12 arch/powerpc/platforms/wsp/wsp.c | 13 +++-- arch/powerpc/sysdev/scom.c| 3 +-- 5 files changed, 46 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/scom.h b/arch/powerpc/include/asm/scom.h index 0cabfd7..07dcdcf 100644 --- a/arch/powerpc/include/asm/scom.h +++ b/arch/powerpc/include/asm/scom.h @@ -54,8 +54,8 @@ struct scom_controller { scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count); void (*unmap)(scom_map_t map); - u64 (*read)(scom_map_t map, u32 reg); - void (*write)(scom_map_t map, u32 reg, u64 value); + int (*read)(scom_map_t map, u32 reg, u64 *value); + int (*write)(scom_map_t map, u32 reg, u64 value); }; extern const struct scom_controller *scom_controller; @@ -133,10 +133,18 @@ static inline void scom_unmap(scom_map_t map) * scom_read - Read a SCOM register * @map: Result of scom_map * @reg: Register index within that map + * @value: Updated with the value read + * + * Returns 0 (success) or a negative error code */ -static inline u64 scom_read(scom_map_t map, u32 reg) +static inline int scom_read(scom_map_t map, u32 reg, u64 *value) { - return scom_controller-read(map, reg); + int rc; + + rc = scom_controller-read(map, reg, value); + if (rc) + *value = 0xul; + return rc; } /** @@ -144,12 +152,15 @@ static inline u64 scom_read(scom_map_t map, u32 reg) * @map: Result of scom_map * @reg: Register index within that map * @value: Value to write + * + * Returns 0 (success) or a negative error code */ -static inline void scom_write(scom_map_t map, u32 reg, u64 value) +static inline int scom_write(scom_map_t map, u32 reg, u64 value) { - scom_controller-write(map, reg, value); + return scom_controller-write(map, reg, value); } + #endif /* CONFIG_PPC_SCOM */ #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c index b56b70a..268bc89 100644 --- a/arch/powerpc/platforms/wsp/scom_smp.c +++ b/arch/powerpc/platforms/wsp/scom_smp.c @@ -116,7 +116,14 @@ static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask) scom_write(scom, SCOM_RAMIC, cmd); - while (!((val = scom_read(scom, SCOM_RAMC)) mask)) { + for (;;) { + if (scom_read(scom, SCOM_RAMC, val) != 0) { + pr_err(SCOM error on instruction 0x%08x, thread %d\n, + insn, thread); + return -1; + } + if (val mask) + break; pr_devel(Waiting on RAMC = 0x%llx\n, val); if (++n == 3) { pr_err(RAMC timeout on instruction 0x%08x, thread %d\n, @@ -151,9 +158,7 @@ static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt, if (rc) return rc; - *out_gpr = scom_read(scom, SCOM_RAMD); - - return 0; + return scom_read(scom, SCOM_RAMD, out_gpr); } static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr) @@ -353,7 +358,10 @@ int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, struct device_node *np) pr_devel(Bringing up CPU%d using SCOM...\n, lcpu); - pccr0 = scom_read(scom, SCOM_PCCR0); + if (scom_read(scom, SCOM_PCCR0, pccr0) != 0) { + printk(KERN_ERR XSCOM failure readng PCCR0 on CPU%d\n, lcpu); + return -1; + } scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG | SCOM_PCCR0_ENABLE_RAM); diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c index 4052e22..54172c4 100644 --- a/arch/powerpc/platforms/wsp/scom_wsp.c +++ b/arch/powerpc/platforms/wsp/scom_wsp.c @@ -50,18 +50,22 @@ static void wsp_scom_unmap(scom_map_t map) iounmap((void *)map); } -static u64 wsp_scom_read(scom_map_t map, u32 reg) +static int wsp_scom_read(scom_map_t map, u32 reg, u64 *value) { u64 __iomem *addr = (u64 __iomem *)map; - return in_be64(addr + reg); + *value = in_be64(addr + reg); + + return 0; } -static void wsp_scom_write(scom_map_t map, u32 reg, u64 value) +static int wsp_scom_write(scom_map_t map, u32 reg, u64 value) { u64 __iomem *addr = (u64 __iomem *)map; - return out_be64(addr + reg, value); + out_be64(addr + reg, value); + + return 0; } static const struct scom_controller wsp_scom_controller = {
[PATCH] powerpc/scom: Add support for reg property
When devices are direct children of a scom controller node, they should be able to use the normal reg property instead of scom-reg. In that case, they also use #address-cells rather than #scom-cells to indicate the size of an entry. Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org --- arch/powerpc/sysdev/scom.c | 22 +- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c index 10f1d9e..413622d 100644 --- a/arch/powerpc/sysdev/scom.c +++ b/arch/powerpc/sysdev/scom.c @@ -53,7 +53,7 @@ scom_map_t scom_map_device(struct device_node *dev, int index) { struct device_node *parent; unsigned int cells, size; - const u32 *prop; + const __be32 *prop, *sprop; u64 reg, cnt; scom_map_t ret; @@ -62,12 +62,24 @@ scom_map_t scom_map_device(struct device_node *dev, int index) if (parent == NULL) return 0; - prop = of_get_property(parent, #scom-cells, NULL); - cells = prop ? *prop : 1; - + /* +* We support scom-reg properties for adding scom registers +* to a random device-tree node with an explicit scom-parent +* +* We also support the simple reg property if the device is +* a direct child of a scom controller. +* +* In case both exist, scom-reg takes precedence. +*/ prop = of_get_property(dev, scom-reg, size); + sprop = of_get_property(parent, #scom-cells, NULL); + if (!prop parent == dev-parent) { + prop = of_get_property(dev, reg, size); + sprop = of_get_property(parent, #address-cells, NULL); + } if (!prop) - return 0; + return NULL; + cells = sprop ? be32_to_cpup(sprop) : 1; size = 2; if (index = (size / (2*cells))) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/powernv: Add scom support under OPALv3
OPAL v3 provides interfaces to access the chips XSCOM, expose this via the existing scom infrastructure. Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org --- arch/powerpc/platforms/powernv/Kconfig | 1 + arch/powerpc/platforms/powernv/Makefile | 1 + arch/powerpc/platforms/powernv/opal-xscom.c | 105 3 files changed, 107 insertions(+) create mode 100644 arch/powerpc/platforms/powernv/opal-xscom.c diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 6fae5eb..7f39da0 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -9,6 +9,7 @@ config PPC_POWERNV select EPAPR_BOOT select PPC_INDIRECT_PIO select PPC_UDBG_16550 + select PPC_SCOM default y config POWERNV_MSI diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 300c437..02dc1f5 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -4,3 +4,4 @@ obj-y += opal-rtc.o opal-nvram.o opal-lpc.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o +obj-$(CONFIG_PPC_SCOM) += opal-xscom.o diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c new file mode 100644 index 000..3ed5c64 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -0,0 +1,105 @@ +/* + * PowerNV LPC bus handling. + * + * Copyright 2013 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include linux/kernel.h +#include linux/of.h +#include linux/bug.h +#include linux/gfp.h +#include linux/slab.h + +#include asm/machdep.h +#include asm/firmware.h +#include asm/opal.h +#include asm/scom.h + +/* + * We could probably fit that inside the scom_map_t + * which is a void* after all but it's really too ugly + * so let's kmalloc it for now + */ +struct opal_scom_map { + uint32_t chip; + uint32_t addr; +}; + +static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count) +{ + struct opal_scom_map *m; + const __be32 *gcid; + + if (!of_get_property(dev, scom-controller, NULL)) { + pr_err(%s: device %s is not a SCOM controller\n, + __func__, dev-full_name); + return SCOM_MAP_INVALID; + } + gcid = of_get_property(dev, ibm,chip-id, NULL); + if (!gcid) { + pr_err(%s: device %s has no ibm,chip-id\n, + __func__, dev-full_name); + return SCOM_MAP_INVALID; + } + m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL); + if (!m) + return NULL; + m-chip = be32_to_cpup(gcid); + m-addr = reg; + + return (scom_map_t)m; +} + +static void opal_scom_unmap(scom_map_t map) +{ + kfree(map); +} + +static int opal_xscom_err_xlate(int64_t rc) +{ + switch(rc) { + case 0: + return 0; + /* Add more translations if necessary */ + default: + return -EIO; + } +} + +static int opal_scom_read(scom_map_t map, u32 reg, u64 *value) +{ + struct opal_scom_map *m = map; + int64_t rc; + + rc = opal_xscom_read(m-chip, m-addr + reg, (uint64_t *)__pa(value)); + return opal_xscom_err_xlate(rc); +} + +static int opal_scom_write(scom_map_t map, u32 reg, u64 value) +{ + struct opal_scom_map *m = map; + int64_t rc; + + rc = opal_xscom_write(m-chip, m-addr + reg, value); + return opal_xscom_err_xlate(rc); +} + +static const struct scom_controller opal_scom_controller = { + .map= opal_scom_map, + .unmap = opal_scom_unmap, + .read = opal_scom_read, + .write = opal_scom_write +}; + +static int opal_xscom_init(void) +{ + if (firmware_has_feature(FW_FEATURE_OPALv3)) + scom_init(opal_scom_controller); + return 0; +} +arch_initcall(opal_xscom_init); ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/scom: Create debugfs files using ibm,chip-id if available
When creating the debugfs scom files, use ibm,chip-id as the scom%d index rather than a simple made up number when possible. Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org --- arch/powerpc/sysdev/scom.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c index 413622d..cb20d54 100644 --- a/arch/powerpc/sysdev/scom.c +++ b/arch/powerpc/sysdev/scom.c @@ -196,8 +196,13 @@ static int scom_debug_init(void) return -1; i = rc = 0; - for_each_node_with_property(dn, scom-controller) - rc |= scom_debug_init_one(root, dn, i++); + for_each_node_with_property(dn, scom-controller) { + int id = of_get_ibm_chip_id(dn); + if (id == -1) + id = i; + rc |= scom_debug_init_one(root, dn, id); + i++; + } return rc; } ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/scom: CONFIG_SCOM_DEBUGFS should depend on CONFIG_DEBUG_FS
Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org --- arch/powerpc/sysdev/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig index ab4cb54..13ec968 100644 --- a/arch/powerpc/sysdev/Kconfig +++ b/arch/powerpc/sysdev/Kconfig @@ -28,7 +28,7 @@ config PPC_SCOM config SCOM_DEBUGFS bool Expose SCOM controllers via debugfs - depends on PPC_SCOM + depends on PPC_SCOM DEBUG_FS default n config GE_FPGA ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/scom: Use devspec rather than path in debugfs entries
This is the traditional name for device-tree path, used in sysfs, do the same for the XSCOM debugfs files. Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org --- arch/powerpc/sysdev/scom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c index cb20d54..3963d99 100644 --- a/arch/powerpc/sysdev/scom.c +++ b/arch/powerpc/sysdev/scom.c @@ -180,7 +180,7 @@ static int scom_debug_init_one(struct dentry *root, struct device_node *dn, debugfs_create_file(addr, 0600, dir, ent, scom_addr_fops); debugfs_create_file(value, 0600, dir, ent, scom_val_fops); - debugfs_create_blob(path, 0400, dir, ent-blob); + debugfs_create_blob(devspec, 0400, dir, ent-blob); return 0; } ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH v2 3/4] powerpc: refactor of_get_cpu_node to support other architectures
On Wed, Aug 28, 2013 at 08:46:38PM +0100, Grant Likely wrote: On Thu, 22 Aug 2013 14:59:30 +0100, Mark Rutland mark.rutl...@arm.com wrote: On Mon, Aug 19, 2013 at 02:56:10PM +0100, Sudeep KarkadaNagesha wrote: On 19/08/13 14:02, Rob Herring wrote: On 08/19/2013 05:19 AM, Mark Rutland wrote: On Sat, Aug 17, 2013 at 11:09:36PM +0100, Benjamin Herrenschmidt wrote: On Sat, 2013-08-17 at 12:50 +0200, Tomasz Figa wrote: I wonder how would this handle uniprocessor ARM (pre-v7) cores, for which the updated bindings[1] define #address-cells = 0 and so no reg property. [1] - http://thread.gmane.org/gmane.linux.ports.arm.kernel/260795 Why did you do that in the binding ? That sounds like looking to create problems ... Traditionally, UP setups just used 0 as the reg property on other architectures, why do differently ? The decision was taken because we defined our reg property to refer to the MPIDR register's Aff{2,1,0} bitfields, and on UP cores before v7 there's no MPIDR register at all. Given there can only be a single CPU in that case, describing a register that wasn't present didn't seem necessary or helpful. What exactly reg represents is up to the binding definition, but it still should be present IMO. I don't see any issue with it being different for pre-v7. Yes it's better to have 'reg' with value 0 than not having it. Otherwise this generic of_get_cpu_node implementation would need some _hack_ to handle that case. I'm not sure that having some code to handle a difference in standard between two architectures is a hack. If anything, I'd argue encoding a reg of 0 that corresponds to a nonexistent MPIDR value (given that's what the reg property is defined to map to on ARM) is more of a hack ;) I'm not averse to having a reg value of 0 for this case, but given that there are existing devicetrees without it, requiring a reg property will break compatibility with them. Then special cases those device trees, but you changing existing convention really needs to be avoided. The referenced documentation change is brand new, so we're not stuck with it. I have no problem with changing the bindings and forcing: #address-cells = 1; reg = 0; for UP predating v7, my big worry is related to in-kernel dts that we already patched to follow the #address-cells = 0 rule (and we had to do it since we got asked that question multiple times on the public lists). What do you mean by special case those device trees ? I have not planned to patch them again, unless we really consider that a necessary evil. Thanks, Lorenzo ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v12] ASoC: fsl: Add S/PDIF machine driver
On Wed, Aug 28, 2013 at 12:04:46PM +0800, Nicolin Chen wrote: This patch implements a device-tree-only machine driver for Freescale i.MX series Soc. It works with spdif_transmitter/spdif_receiver and fsl_spdif.c drivers. Applied, thanks. signature.asc Description: Digital signature ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH -next] ASoC: fsl_spdif: remove redundant dev_err call in fsl_spdif_probe()
On Thu, Aug 29, 2013 at 08:00:05AM +0800, Wei Yongjun wrote: From: Wei Yongjun yongjun_...@trendmicro.com.cn There is a error message within devm_ioremap_resource already, so remove the dev_err call to avoid redundant error message. Applied, thanks. signature.asc Description: Digital signature ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V2] powerpc: Convert out of line __arch_hweight to inline
Hi Ben On Wednesday 07 August 2013 04:48 PM, Madhavan Srinivasan wrote: Patch attempts to improve the performace of __arch_hweight functions by making them inline instead of current out of line implementation. Testcase is to disable/enable SMT on a large (192 thread) POWER7 lpar. Program used for SMT disable/enable is ppc64_cpu with --smt=[off/on] option. Here are the perf output. In this case, __arch_hweight64 is called by __bitmap_weight. Without patch (ppc64_cpu --smt=off): 17.60% ppc64_cpu [kernel.kallsyms] [k] .deactivate_slab 4.85% ppc64_cpu [kernel.kallsyms] [k] .__bitmap_weight 1.36% ppc64_cpu [kernel.kallsyms] [k] .__disable_runtime 1.29% ppc64_cpu [kernel.kallsyms] [k] .__arch_hweight64 With patch (ppc64_cpu --smt=off): 17.29% ppc64_cpu [kernel.kallsyms] [k] .deactivate_slab 3.71% ppc64_cpu [kernel.kallsyms] [k] .__bitmap_weight 3.26% ppc64_cpu [kernel.kallsyms] [k] .build_overlap_sched_groups Without patch (ppc64_cpu --smt=on): 8.35% ppc64_cpu [kernel.kallsyms] [k] .strlen 7.00% ppc64_cpu [kernel.kallsyms] [k] .memset 6.78% ppc64_cpu [kernel.kallsyms] [k] .__bitmap_weight 4.23% ppc64_cpu [kernel.kallsyms] [k] .deactivate_slab 1.58% ppc64_cpu [kernel.kallsyms] [k] .refresh_zone_stat_thresholds 1.57% ppc64_cpu [kernel.kallsyms] [k] .__arch_hweight64 1.54% ppc64_cpu [kernel.kallsyms] [k] .__enable_runtime With patch (ppc64_cpu --smt=on): 9.44% ppc64_cpu [kernel.kallsyms] [k] .strlen 6.43% ppc64_cpu [kernel.kallsyms] [k] .memset 5.48% ppc64_cpu [kernel.kallsyms] [k] .__bitmap_weight 4.59% ppc64_cpu [kernel.kallsyms] [k] .insert_entry 4.29% ppc64_cpu [kernel.kallsyms] [k] .deactivate_slab Patch changes v2: 1. Removed the arch/powerpc/lib/hweight_64.S file. Signed-off-by: Madhavan Srinivasan ma...@linux.vnet.ibm.com Any question or suggestion for this patch. --- arch/powerpc/include/asm/bitops.h | 130 - arch/powerpc/include/asm/ppc-opcode.h |6 ++ arch/powerpc/lib/Makefile |2 +- arch/powerpc/lib/hweight_64.S | 110 4 files changed, 133 insertions(+), 115 deletions(-) delete mode 100644 arch/powerpc/lib/hweight_64.S diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 910194e..136fe6a 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -43,8 +43,10 @@ #endif #include linux/compiler.h +#include linux/types.h #include asm/asm-compat.h #include asm/synch.h +#include asm/cputable.h /* * clear_bit doesn't imply a memory barrier @@ -263,10 +265,130 @@ static __inline__ int fls64(__u64 x) #endif /* __powerpc64__ */ #ifdef CONFIG_PPC64 -unsigned int __arch_hweight8(unsigned int w); -unsigned int __arch_hweight16(unsigned int w); -unsigned int __arch_hweight32(unsigned int w); -unsigned long __arch_hweight64(__u64 w); + +static inline unsigned int __arch_hweight8(unsigned int w) +{ + unsigned int register iop asm(r3) = w; + unsigned int register tmp asm(r4); + __asm__ __volatile__ ( + stringify_in_c(BEGIN_FTR_SECTION) + bl .__sw_hweight8; + nop; + stringify_in_c(FTR_SECTION_ELSE) + PPC_POPCNTB_M(%1,%2) ; + clrldi %0,%1,64-8; + stringify_in_c(ALT_FTR_SECTION_END_IFCLR((%3))) + : =r (iop), =r (tmp) + : r (iop), i (CPU_FTR_POPCNTB) + : r0, r1, r5, r6, r7, r8, r9, + r10, r11, r12, r13, r31, lr, cr0, xer); + + return iop; +} + +static inline unsigned int __arch_hweight16(unsigned int w) +{ + unsigned int register iop asm(r3) = w; + unsigned int register tmp asm(r4); + __asm__ __volatile__ ( + stringify_in_c(BEGIN_FTR_SECTION) + bl .__sw_hweight16; + nop; + nop; + nop; + nop; + stringify_in_c(FTR_SECTION_ELSE) + stringify_in_c(BEGIN_FTR_SECTION_NESTED(50)) + PPC_POPCNTB_M(%0,%2) ; + srdi %1,%0,8; + add %0,%1,%0; + clrldi %0,%0,64-8; + stringify_in_c(FTR_SECTION_ELSE_NESTED(50)) + clrlwi %0,%2,16; + PPC_POPCNTW_M(%1,%0) ; + clrldi %0,%1,64-8; + stringify_in_c(ALT_FTR_SECTION_END_NESTED_IFCLR(%4,50)) + stringify_in_c(ALT_FTR_SECTION_END_IFCLR((%3))) + : =r (iop), =r (tmp) + : r (iop), i (CPU_FTR_POPCNTB), i (CPU_FTR_POPCNTD) + : r0, r1, r5, r6, r7, r8, r9, + r10, r11, r12, r13, r31, lr, cr0, xer); + + return iop; +} + +static inline unsigned int
Re: [PATCH 03/10] crypto: nx - fix limits to sg lists for AES-CBC
On Thu, Aug 29, 2013 at 02:42:22PM +1000, Herbert Xu wrote: On Fri, Aug 23, 2013 at 05:01:07PM -0300, Marcelo Cerri wrote: This patch updates the nx-aes-cbc implementation to perform several hyper calls if needed in order to always respect the length limits for scatter/gather lists. Two different limits are considered: - ibm,max-sg-len: maximum number of bytes of each scatter/gather list. - ibm,max-sync-cop: - The total number of bytes that a scatter/gather list can hold. - The maximum number of elements that a scatter/gather list can have. Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com This patch does not apply against the current cryptodev tree. Please regenerate your pathces. Sorry for this. I'm sending a v2 series without conflicts. Thanks, -- Email: Herbert Xu herb...@gondor.apana.org.au Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 00/10] Series of fixes for NX driver
This series of patches contains fixes in several algorithms implemented by the NX driver. The patches can be separated in three different categories: - Changes to split the data in several hyper calls to respect the limits of data that the co-processador can handle. This affects all AES modes. - Fixes in how the driver handle zero length messages. This affects XCBC and GCM. - Fixes for SHA-2 when chunks bigger than the block size are provided. v2: - Fixed conflict. Fionnuala Gunter (2): crypto: nx - fix limits to sg lists for AES-XCBC crypto: nx - fix limits to sg lists for AES-CCM Marcelo Cerri (8): crypto: nx - add offset to nx_build_sg_lists() crypto: nx - fix limits to sg lists for AES-ECB crypto: nx - fix limits to sg lists for AES-CBC crypto: nx - fix limits to sg lists for AES-CTR crypto: nx - fix limits to sg lists for AES-GCM crypto: nx - fix XCBC for zero length messages crypto: nx - fix GCM for zero length messages crypto: nx - fix SHA-2 for chunks bigger than block size drivers/crypto/nx/nx-aes-cbc.c | 50 --- drivers/crypto/nx/nx-aes-ccm.c | 297 +--- drivers/crypto/nx/nx-aes-ctr.c | 50 --- drivers/crypto/nx/nx-aes-ecb.c | 48 --- drivers/crypto/nx/nx-aes-gcm.c | 292 ++- drivers/crypto/nx/nx-aes-xcbc.c | 191 +++--- drivers/crypto/nx/nx-sha256.c | 2 +- drivers/crypto/nx/nx-sha512.c | 2 +- drivers/crypto/nx/nx.c | 9 +- drivers/crypto/nx/nx.h | 2 +- 10 files changed, 683 insertions(+), 260 deletions(-) -- 1.7.12 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 01/10] crypto: nx - add offset to nx_build_sg_lists()
This patch includes one more parameter to nx_build_sg_lists() to skip the given number of bytes from beginning of each sg list. This is needed in order to implement the fixes for the AES modes to make them able to process larger chunks of data. Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com --- drivers/crypto/nx/nx-aes-cbc.c | 2 +- drivers/crypto/nx/nx-aes-ccm.c | 4 ++-- drivers/crypto/nx/nx-aes-ctr.c | 2 +- drivers/crypto/nx/nx-aes-ecb.c | 2 +- drivers/crypto/nx/nx-aes-gcm.c | 2 +- drivers/crypto/nx/nx.c | 9 +++-- drivers/crypto/nx/nx.h | 2 +- 7 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/nx/nx-aes-cbc.c b/drivers/crypto/nx/nx-aes-cbc.c index 7c0237d..a9e76c6 100644 --- a/drivers/crypto/nx/nx-aes-cbc.c +++ b/drivers/crypto/nx/nx-aes-cbc.c @@ -85,7 +85,7 @@ static int cbc_aes_nx_crypt(struct blkcipher_desc *desc, else NX_CPB_FDM(csbcpb) = ~NX_FDM_ENDE_ENCRYPT; - rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, + rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, 0, csbcpb-cpb.aes_cbc.iv); if (rc) goto out; diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c index 39d4224..666a35b 100644 --- a/drivers/crypto/nx/nx-aes-ccm.c +++ b/drivers/crypto/nx/nx-aes-ccm.c @@ -293,7 +293,7 @@ static int ccm_nx_decrypt(struct aead_request *req, if (rc) goto out; - rc = nx_build_sg_lists(nx_ctx, desc, req-dst, req-src, nbytes, + rc = nx_build_sg_lists(nx_ctx, desc, req-dst, req-src, nbytes, 0, csbcpb-cpb.aes_ccm.iv_or_ctr); if (rc) goto out; @@ -339,7 +339,7 @@ static int ccm_nx_encrypt(struct aead_request *req, if (rc) goto out; - rc = nx_build_sg_lists(nx_ctx, desc, req-dst, req-src, nbytes, + rc = nx_build_sg_lists(nx_ctx, desc, req-dst, req-src, nbytes, 0, csbcpb-cpb.aes_ccm.iv_or_ctr); if (rc) goto out; diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c index 762611b..80dee8d 100644 --- a/drivers/crypto/nx/nx-aes-ctr.c +++ b/drivers/crypto/nx/nx-aes-ctr.c @@ -98,7 +98,7 @@ static int ctr_aes_nx_crypt(struct blkcipher_desc *desc, goto out; } - rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, + rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, 0, csbcpb-cpb.aes_ctr.iv); if (rc) goto out; diff --git a/drivers/crypto/nx/nx-aes-ecb.c b/drivers/crypto/nx/nx-aes-ecb.c index 77dbe08..fe0d803 100644 --- a/drivers/crypto/nx/nx-aes-ecb.c +++ b/drivers/crypto/nx/nx-aes-ecb.c @@ -85,7 +85,7 @@ static int ecb_aes_nx_crypt(struct blkcipher_desc *desc, else NX_CPB_FDM(csbcpb) = ~NX_FDM_ENDE_ENCRYPT; - rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, NULL); + rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, 0, NULL); if (rc) goto out; diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c index 74feee1..c2d6f76 100644 --- a/drivers/crypto/nx/nx-aes-gcm.c +++ b/drivers/crypto/nx/nx-aes-gcm.c @@ -226,7 +226,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc) csbcpb-cpb.aes_gcm.bit_length_data = nbytes * 8; - rc = nx_build_sg_lists(nx_ctx, desc, req-dst, req-src, nbytes, + rc = nx_build_sg_lists(nx_ctx, desc, req-dst, req-src, nbytes, 0, csbcpb-cpb.aes_gcm.iv_or_cnt); if (rc) goto out; diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index bdf4990..5533fe3 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -211,6 +211,8 @@ struct nx_sg *nx_walk_and_build(struct nx_sg *nx_dst, * @dst: destination scatterlist * @src: source scatterlist * @nbytes: length of data described in the scatterlists + * @offset: number of bytes to fast-forward past at the beginning of + * scatterlists. * @iv: destination for the iv data, if the algorithm requires it * * This is common code shared by all the AES algorithms. It uses the block @@ -222,6 +224,7 @@ int nx_build_sg_lists(struct nx_crypto_ctx *nx_ctx, struct scatterlist*dst, struct scatterlist*src, unsigned int nbytes, + unsigned int offset, u8*iv) { struct nx_sg *nx_insg = nx_ctx-in_sg; @@ -230,8 +233,10 @@ int nx_build_sg_lists(struct nx_crypto_ctx *nx_ctx, if (iv) memcpy(iv, desc-info, AES_BLOCK_SIZE); - nx_insg = nx_walk_and_build(nx_insg, nx_ctx-ap-sglen, src, 0, nbytes); - nx_outsg =
[PATCH v2 02/10] crypto: nx - fix limits to sg lists for AES-ECB
This patch updates the nx-aes-ecb implementation to perform several hyper calls if needed in order to always respect the length limits for scatter/gather lists. Two different limits are considered: - ibm,max-sg-len: maximum number of bytes of each scatter/gather list. - ibm,max-sync-cop: - The total number of bytes that a scatter/gather list can hold. - The maximum number of elements that a scatter/gather list can have. Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com --- drivers/crypto/nx/nx-aes-ecb.c | 48 ++ 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/drivers/crypto/nx/nx-aes-ecb.c b/drivers/crypto/nx/nx-aes-ecb.c index fe0d803..85a8d23 100644 --- a/drivers/crypto/nx/nx-aes-ecb.c +++ b/drivers/crypto/nx/nx-aes-ecb.c @@ -71,37 +71,49 @@ static int ecb_aes_nx_crypt(struct blkcipher_desc *desc, struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc-tfm); struct nx_csbcpb *csbcpb = nx_ctx-csbcpb; unsigned long irq_flags; + unsigned int processed = 0, to_process; + u32 max_sg_len; int rc; spin_lock_irqsave(nx_ctx-lock, irq_flags); - if (nbytes nx_ctx-ap-databytelen) { - rc = -EINVAL; - goto out; - } + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx-ap-sglen); if (enc) NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT; else NX_CPB_FDM(csbcpb) = ~NX_FDM_ENDE_ENCRYPT; - rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, 0, NULL); - if (rc) - goto out; + do { + to_process = min_t(u64, nbytes - processed, + nx_ctx-ap-databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + to_process = to_process ~(AES_BLOCK_SIZE - 1); - if (!nx_ctx-op.inlen || !nx_ctx-op.outlen) { - rc = -EINVAL; - goto out; - } + rc = nx_build_sg_lists(nx_ctx, desc, dst, src, to_process, + processed, NULL); + if (rc) + goto out; - rc = nx_hcall_sync(nx_ctx, nx_ctx-op, - desc-flags CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto out; + if (!nx_ctx-op.inlen || !nx_ctx-op.outlen) { + rc = -EINVAL; + goto out; + } + + rc = nx_hcall_sync(nx_ctx, nx_ctx-op, + desc-flags CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; + + atomic_inc((nx_ctx-stats-aes_ops)); + atomic64_add(csbcpb-csb.processed_byte_count, +(nx_ctx-stats-aes_bytes)); + + processed += to_process; + } while (processed nbytes); - atomic_inc((nx_ctx-stats-aes_ops)); - atomic64_add(csbcpb-csb.processed_byte_count, -(nx_ctx-stats-aes_bytes)); out: spin_unlock_irqrestore(nx_ctx-lock, irq_flags); return rc; -- 1.7.12 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 04/10] crypto: nx - fix limits to sg lists for AES-CTR
This patch updates the nx-aes-ctr implementation to perform several hyper calls if needed in order to always respect the length limits for scatter/gather lists. Two different limits are considered: - ibm,max-sg-len: maximum number of bytes of each scatter/gather list. - ibm,max-sync-cop: - The total number of bytes that a scatter/gather list can hold. - The maximum number of elements that a scatter/gather list can have. Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com --- drivers/crypto/nx/nx-aes-ctr.c | 50 ++ 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c index 80dee8d..a37d009 100644 --- a/drivers/crypto/nx/nx-aes-ctr.c +++ b/drivers/crypto/nx/nx-aes-ctr.c @@ -89,33 +89,45 @@ static int ctr_aes_nx_crypt(struct blkcipher_desc *desc, struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc-tfm); struct nx_csbcpb *csbcpb = nx_ctx-csbcpb; unsigned long irq_flags; + unsigned int processed = 0, to_process; + u32 max_sg_len; int rc; spin_lock_irqsave(nx_ctx-lock, irq_flags); - if (nbytes nx_ctx-ap-databytelen) { - rc = -EINVAL; - goto out; - } + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx-ap-sglen); - rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, 0, - csbcpb-cpb.aes_ctr.iv); - if (rc) - goto out; + do { + to_process = min_t(u64, nbytes - processed, + nx_ctx-ap-databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + to_process = to_process ~(AES_BLOCK_SIZE - 1); - if (!nx_ctx-op.inlen || !nx_ctx-op.outlen) { - rc = -EINVAL; - goto out; - } + rc = nx_build_sg_lists(nx_ctx, desc, dst, src, to_process, + processed, csbcpb-cpb.aes_ctr.iv); + if (rc) + goto out; - rc = nx_hcall_sync(nx_ctx, nx_ctx-op, - desc-flags CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto out; + if (!nx_ctx-op.inlen || !nx_ctx-op.outlen) { + rc = -EINVAL; + goto out; + } - atomic_inc((nx_ctx-stats-aes_ops)); - atomic64_add(csbcpb-csb.processed_byte_count, -(nx_ctx-stats-aes_bytes)); + rc = nx_hcall_sync(nx_ctx, nx_ctx-op, + desc-flags CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; + + memcpy(desc-info, csbcpb-cpb.aes_cbc.cv, AES_BLOCK_SIZE); + + atomic_inc((nx_ctx-stats-aes_ops)); + atomic64_add(csbcpb-csb.processed_byte_count, +(nx_ctx-stats-aes_bytes)); + + processed += to_process; + } while (processed nbytes); out: spin_unlock_irqrestore(nx_ctx-lock, irq_flags); return rc; -- 1.7.12 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 05/10] crypto: nx - fix limits to sg lists for AES-GCM
This patch updates the nx-aes-gcm implementation to perform several hyper calls if needed in order to always respect the length limits for scatter/gather lists. Two different limits are considered: - ibm,max-sg-len: maximum number of bytes of each scatter/gather list. - ibm,max-sync-cop: - The total number of bytes that a scatter/gather list can hold. - The maximum number of elements that a scatter/gather list can have. Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com --- drivers/crypto/nx/nx-aes-gcm.c | 202 +++-- 1 file changed, 136 insertions(+), 66 deletions(-) diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c index c2d6f76..9e89bdf 100644 --- a/drivers/crypto/nx/nx-aes-gcm.c +++ b/drivers/crypto/nx/nx-aes-gcm.c @@ -125,37 +125,101 @@ static int nx_gca(struct nx_crypto_ctx *nx_ctx, struct aead_request *req, u8*out) { + int rc; struct nx_csbcpb *csbcpb_aead = nx_ctx-csbcpb_aead; - int rc = -EINVAL; struct scatter_walk walk; struct nx_sg *nx_sg = nx_ctx-in_sg; + unsigned int nbytes = req-assoclen; + unsigned int processed = 0, to_process; + u32 max_sg_len; - if (req-assoclen nx_ctx-ap-databytelen) - goto out; - - if (req-assoclen = AES_BLOCK_SIZE) { + if (nbytes = AES_BLOCK_SIZE) { scatterwalk_start(walk, req-assoc); - scatterwalk_copychunks(out, walk, req-assoclen, - SCATTERWALK_FROM_SG); + scatterwalk_copychunks(out, walk, nbytes, SCATTERWALK_FROM_SG); scatterwalk_done(walk, SCATTERWALK_FROM_SG, 0); - - rc = 0; - goto out; + return 0; } - nx_sg = nx_walk_and_build(nx_sg, nx_ctx-ap-sglen, req-assoc, 0, - req-assoclen); - nx_ctx-op_aead.inlen = (nx_ctx-in_sg - nx_sg) * sizeof(struct nx_sg); + NX_CPB_FDM(csbcpb_aead) = ~NX_FDM_CONTINUATION; - rc = nx_hcall_sync(nx_ctx, nx_ctx-op_aead, - req-base.flags CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto out; + /* page_limit: number of sg entries that fit on one page */ + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx-ap-sglen); - atomic_inc((nx_ctx-stats-aes_ops)); - atomic64_add(req-assoclen, (nx_ctx-stats-aes_bytes)); + do { + /* +* to_process: the data chunk to process in this update. +* This value is bound by sg list limits. +*/ + to_process = min_t(u64, nbytes - processed, + nx_ctx-ap-databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + + if ((to_process + processed) nbytes) + NX_CPB_FDM(csbcpb_aead) |= NX_FDM_INTERMEDIATE; + else + NX_CPB_FDM(csbcpb_aead) = ~NX_FDM_INTERMEDIATE; + + nx_sg = nx_walk_and_build(nx_ctx-in_sg, nx_ctx-ap-sglen, + req-assoc, processed, to_process); + nx_ctx-op_aead.inlen = (nx_ctx-in_sg - nx_sg) + * sizeof(struct nx_sg); + + rc = nx_hcall_sync(nx_ctx, nx_ctx-op_aead, + req-base.flags CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + return rc; + + memcpy(csbcpb_aead-cpb.aes_gca.in_pat, + csbcpb_aead-cpb.aes_gca.out_pat, + AES_BLOCK_SIZE); + NX_CPB_FDM(csbcpb_aead) |= NX_FDM_CONTINUATION; + + atomic_inc((nx_ctx-stats-aes_ops)); + atomic64_add(req-assoclen, (nx_ctx-stats-aes_bytes)); + + processed += to_process; + } while (processed nbytes); memcpy(out, csbcpb_aead-cpb.aes_gca.out_pat, AES_BLOCK_SIZE); + + return rc; +} + +static int gcm_empty(struct aead_request *req, struct blkcipher_desc *desc, +int enc) +{ + int rc; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req-base.tfm); + struct nx_csbcpb *csbcpb = nx_ctx-csbcpb; + + /* For scenarios where the input message is zero length, AES CTR mode +* may be used. Set the source data to be a single block (16B) of all +* zeros, and set the input IV value to be the same as the GMAC IV +* value. - nx_wb 4.8.1.3 */ + char src[AES_BLOCK_SIZE] = {}; + struct scatterlist sg; + + desc-tfm = crypto_alloc_blkcipher(ctr(aes), 0, 0); + if (IS_ERR(desc-tfm)) { + rc = -ENOMEM; +
[PATCH v2 06/10] crypto: nx - fix limits to sg lists for AES-XCBC
From: Fionnuala Gunter f...@linux.vnet.ibm.com This patch updates the NX driver to perform several hyper calls when necessary so that the length limits of scatter/gather lists are respected. Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com Reviewed-by: Marcelo Cerri mhce...@linux.vnet.ibm.com Signed-off-by: Fionnuala Gunter f...@linux.vnet.ibm.com --- drivers/crypto/nx/nx-aes-xcbc.c | 107 +++- 1 file changed, 63 insertions(+), 44 deletions(-) diff --git a/drivers/crypto/nx/nx-aes-xcbc.c b/drivers/crypto/nx/nx-aes-xcbc.c index 658da0f..1a5d9e3 100644 --- a/drivers/crypto/nx/nx-aes-xcbc.c +++ b/drivers/crypto/nx/nx-aes-xcbc.c @@ -88,78 +88,97 @@ static int nx_xcbc_update(struct shash_desc *desc, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(desc-tfm-base); struct nx_csbcpb *csbcpb = nx_ctx-csbcpb; struct nx_sg *in_sg; - u32 to_process, leftover; + u32 to_process, leftover, total; + u32 max_sg_len; unsigned long irq_flags; int rc = 0; spin_lock_irqsave(nx_ctx-lock, irq_flags); - if (NX_CPB_FDM(csbcpb) NX_FDM_CONTINUATION) { - /* we've hit the nx chip previously and we're updating again, -* so copy over the partial digest */ - memcpy(csbcpb-cpb.aes_xcbc.cv, - csbcpb-cpb.aes_xcbc.out_cv_mac, AES_BLOCK_SIZE); - } + + total = sctx-count + len; /* 2 cases for total data len: * 1: = AES_BLOCK_SIZE: copy into state, return 0 * 2: AES_BLOCK_SIZE: process X blocks, copy in leftover */ - if (len + sctx-count = AES_BLOCK_SIZE) { + if (total = AES_BLOCK_SIZE) { memcpy(sctx-buffer + sctx-count, data, len); sctx-count += len; goto out; } - /* to_process: the AES_BLOCK_SIZE data chunk to process in this -* update */ - to_process = (sctx-count + len) ~(AES_BLOCK_SIZE - 1); - leftover = (sctx-count + len) (AES_BLOCK_SIZE - 1); + in_sg = nx_ctx-in_sg; + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx-ap-sglen); - /* the hardware will not accept a 0 byte operation for this algorithm -* and the operation MUST be finalized to be correct. So if we happen -* to get an update that falls on a block sized boundary, we must -* save off the last block to finalize with later. */ - if (!leftover) { - to_process -= AES_BLOCK_SIZE; - leftover = AES_BLOCK_SIZE; - } + do { - if (sctx-count) { - in_sg = nx_build_sg_list(nx_ctx-in_sg, sctx-buffer, -sctx-count, nx_ctx-ap-sglen); - in_sg = nx_build_sg_list(in_sg, (u8 *)data, -to_process - sctx-count, -nx_ctx-ap-sglen); - nx_ctx-op.inlen = (nx_ctx-in_sg - in_sg) * - sizeof(struct nx_sg); - } else { - in_sg = nx_build_sg_list(nx_ctx-in_sg, (u8 *)data, to_process, -nx_ctx-ap-sglen); + /* to_process: the AES_BLOCK_SIZE data chunk to process in this +* update */ + to_process = min_t(u64, total, nx_ctx-ap-databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + to_process = to_process ~(AES_BLOCK_SIZE - 1); + leftover = total - to_process; + + /* the hardware will not accept a 0 byte operation for this +* algorithm and the operation MUST be finalized to be correct. +* So if we happen to get an update that falls on a block sized +* boundary, we must save off the last block to finalize with +* later. */ + if (!leftover) { + to_process -= AES_BLOCK_SIZE; + leftover = AES_BLOCK_SIZE; + } + + if (sctx-count) { + in_sg = nx_build_sg_list(nx_ctx-in_sg, + (u8 *) sctx-buffer, + sctx-count, + max_sg_len); + } + in_sg = nx_build_sg_list(in_sg, + (u8 *) data, + to_process - sctx-count, + max_sg_len); nx_ctx-op.inlen = (nx_ctx-in_sg - in_sg) * sizeof(struct nx_sg); - } - NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; + /* we've hit the nx chip previously and we're updating again, +
[PATCH v2 07/10] crypto: nx - fix limits to sg lists for AES-CCM
From: Fionnuala Gunter f...@linux.vnet.ibm.com This patch updates the NX driver to perform several hyper calls when necessary so that the length limits of scatter/gather lists are respected. Reviewed-by: Marcelo Cerri mhce...@linux.vnet.ibm.com Signed-off-by: Joy Latten jmlat...@linux.vnet.ibm.com Signed-off-by: Fionnuala Gunter f...@linux.vnet.ibm.com --- drivers/crypto/nx/nx-aes-ccm.c | 297 + 1 file changed, 215 insertions(+), 82 deletions(-) diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c index 666a35b..5ecd4c2 100644 --- a/drivers/crypto/nx/nx-aes-ccm.c +++ b/drivers/crypto/nx/nx-aes-ccm.c @@ -179,13 +179,26 @@ static int generate_pat(u8 *iv, struct nx_sg *nx_insg = nx_ctx-in_sg; struct nx_sg *nx_outsg = nx_ctx-out_sg; unsigned int iauth_len = 0; - struct vio_pfo_op *op = NULL; u8 tmp[16], *b1 = NULL, *b0 = NULL, *result = NULL; int rc; /* zero the ctr value */ memset(iv + 15 - iv[0], 0, iv[0] + 1); + /* page 78 of nx_wb.pdf has, +* Note: RFC3610 allows the AAD data to be up to 2^64 -1 bytes +* in length. If a full message is used, the AES CCA implementation +* restricts the maximum AAD length to 2^32 -1 bytes. +* If partial messages are used, the implementation supports +* 2^64 -1 bytes maximum AAD length. +* +* However, in the cryptoapi's aead_request structure, +* assoclen is an unsigned int, thus it cannot hold a length +* value greater than 2^32 - 1. +* Thus the AAD is further constrained by this and is never +* greater than 2^32. +*/ + if (!req-assoclen) { b0 = nx_ctx-csbcpb-cpb.aes_ccm.in_pat_or_b0; } else if (req-assoclen = 14) { @@ -195,7 +208,46 @@ static int generate_pat(u8 *iv, b0 = nx_ctx-csbcpb-cpb.aes_ccm.in_pat_or_b0; b1 = nx_ctx-priv.ccm.iauth_tag; iauth_len = req-assoclen; + } else if (req-assoclen = 65280) { + /* if associated data is less than (2^16 - 2^8), we construct +* B1 differently and feed in the associated data to a CCA +* operation */ + b0 = nx_ctx-csbcpb_aead-cpb.aes_cca.b0; + b1 = nx_ctx-csbcpb_aead-cpb.aes_cca.b1; + iauth_len = 14; + } else { + b0 = nx_ctx-csbcpb_aead-cpb.aes_cca.b0; + b1 = nx_ctx-csbcpb_aead-cpb.aes_cca.b1; + iauth_len = 10; + } + /* generate B0 */ + rc = generate_b0(iv, req-assoclen, authsize, nbytes, b0); + if (rc) + return rc; + + /* generate B1: +* add control info for associated data +* RFC 3610 and NIST Special Publication 800-38C +*/ + if (b1) { + memset(b1, 0, 16); + if (req-assoclen = 65280) { + *(u16 *)b1 = (u16)req-assoclen; + scatterwalk_map_and_copy(b1 + 2, req-assoc, 0, +iauth_len, SCATTERWALK_FROM_SG); + } else { + *(u16 *)b1 = (u16)(0xfffe); + *(u32 *)b1[2] = (u32)req-assoclen; + scatterwalk_map_and_copy(b1 + 6, req-assoc, 0, +iauth_len, SCATTERWALK_FROM_SG); + } + } + + /* now copy any remaining AAD to scatterlist and call nx... */ + if (!req-assoclen) { + return rc; + } else if (req-assoclen = 14) { nx_insg = nx_build_sg_list(nx_insg, b1, 16, nx_ctx-ap-sglen); nx_outsg = nx_build_sg_list(nx_outsg, tmp, 16, nx_ctx-ap-sglen); @@ -210,56 +262,74 @@ static int generate_pat(u8 *iv, NX_CPB_FDM(nx_ctx-csbcpb) |= NX_FDM_ENDE_ENCRYPT; NX_CPB_FDM(nx_ctx-csbcpb) |= NX_FDM_INTERMEDIATE; - op = nx_ctx-op; result = nx_ctx-csbcpb-cpb.aes_ccm.out_pat_or_mac; - } else if (req-assoclen = 65280) { - /* if associated data is less than (2^16 - 2^8), we construct -* B1 differently and feed in the associated data to a CCA -* operation */ - b0 = nx_ctx-csbcpb_aead-cpb.aes_cca.b0; - b1 = nx_ctx-csbcpb_aead-cpb.aes_cca.b1; - iauth_len = 14; - - /* remaining assoc data must have scatterlist built for it */ - nx_insg = nx_walk_and_build(nx_insg, nx_ctx-ap-sglen, - req-assoc, iauth_len, - req-assoclen - iauth_len); - nx_ctx-op_aead.inlen = (nx_ctx-in_sg - nx_insg) * + + rc = nx_hcall_sync(nx_ctx, nx_ctx-op, +
[PATCH v2 03/10] crypto: nx - fix limits to sg lists for AES-CBC
This patch updates the nx-aes-cbc implementation to perform several hyper calls if needed in order to always respect the length limits for scatter/gather lists. Two different limits are considered: - ibm,max-sg-len: maximum number of bytes of each scatter/gather list. - ibm,max-sync-cop: - The total number of bytes that a scatter/gather list can hold. - The maximum number of elements that a scatter/gather list can have. Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com --- drivers/crypto/nx/nx-aes-cbc.c | 50 +- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/drivers/crypto/nx/nx-aes-cbc.c b/drivers/crypto/nx/nx-aes-cbc.c index a9e76c6..cc00b52 100644 --- a/drivers/crypto/nx/nx-aes-cbc.c +++ b/drivers/crypto/nx/nx-aes-cbc.c @@ -71,39 +71,49 @@ static int cbc_aes_nx_crypt(struct blkcipher_desc *desc, struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc-tfm); struct nx_csbcpb *csbcpb = nx_ctx-csbcpb; unsigned long irq_flags; + unsigned int processed = 0, to_process; + u32 max_sg_len; int rc; spin_lock_irqsave(nx_ctx-lock, irq_flags); - if (nbytes nx_ctx-ap-databytelen) { - rc = -EINVAL; - goto out; - } + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx-ap-sglen); if (enc) NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT; else NX_CPB_FDM(csbcpb) = ~NX_FDM_ENDE_ENCRYPT; - rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, 0, - csbcpb-cpb.aes_cbc.iv); - if (rc) - goto out; + do { + to_process = min_t(u64, nbytes - processed, + nx_ctx-ap-databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + to_process = to_process ~(AES_BLOCK_SIZE - 1); - if (!nx_ctx-op.inlen || !nx_ctx-op.outlen) { - rc = -EINVAL; - goto out; - } + rc = nx_build_sg_lists(nx_ctx, desc, dst, src, to_process, + processed, csbcpb-cpb.aes_cbc.iv); + if (rc) + goto out; - rc = nx_hcall_sync(nx_ctx, nx_ctx-op, - desc-flags CRYPTO_TFM_REQ_MAY_SLEEP); - if (rc) - goto out; + if (!nx_ctx-op.inlen || !nx_ctx-op.outlen) { + rc = -EINVAL; + goto out; + } - memcpy(desc-info, csbcpb-cpb.aes_cbc.cv, AES_BLOCK_SIZE); - atomic_inc((nx_ctx-stats-aes_ops)); - atomic64_add(csbcpb-csb.processed_byte_count, -(nx_ctx-stats-aes_bytes)); + rc = nx_hcall_sync(nx_ctx, nx_ctx-op, + desc-flags CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; + + memcpy(desc-info, csbcpb-cpb.aes_cbc.cv, AES_BLOCK_SIZE); + atomic_inc((nx_ctx-stats-aes_ops)); + atomic64_add(csbcpb-csb.processed_byte_count, +(nx_ctx-stats-aes_bytes)); + + processed += to_process; + } while (processed nbytes); out: spin_unlock_irqrestore(nx_ctx-lock, irq_flags); return rc; -- 1.7.12 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 08/10] crypto: nx - fix XCBC for zero length messages
The NX XCBC implementation doesn't support zero length messages and because of that NX is currently returning a hard-coded hash for zero length messages. However this approach is incorrect since the hash value also depends on which key is used. This patch removes the hard-coded hash and replace it with an implementation based on the RFC 3566 using ECB. Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com --- drivers/crypto/nx/nx-aes-xcbc.c | 84 + 1 file changed, 77 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/nx/nx-aes-xcbc.c b/drivers/crypto/nx/nx-aes-xcbc.c index 1a5d9e3..03c4bf5 100644 --- a/drivers/crypto/nx/nx-aes-xcbc.c +++ b/drivers/crypto/nx/nx-aes-xcbc.c @@ -56,6 +56,77 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, return 0; } +/* + * Based on RFC 3566, for a zero-length message: + * + * n = 1 + * K1 = E(K, 0x01010101010101010101010101010101) + * K3 = E(K, 0x03030303030303030303030303030303) + * E[0] = 0x + * M[1] = 0x8000 (0 length message with padding) + * E[1] = (K1, M[1] ^ E[0] ^ K3) + * Tag = M[1] + */ +static int nx_xcbc_empty(struct shash_desc *desc, u8 *out) +{ + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(desc-tfm-base); + struct nx_csbcpb *csbcpb = nx_ctx-csbcpb; + struct nx_sg *in_sg, *out_sg; + u8 keys[2][AES_BLOCK_SIZE]; + u8 key[32]; + int rc = 0; + + /* Change to ECB mode */ + csbcpb-cpb.hdr.mode = NX_MODE_AES_ECB; + memcpy(key, csbcpb-cpb.aes_xcbc.key, AES_BLOCK_SIZE); + memcpy(csbcpb-cpb.aes_ecb.key, key, AES_BLOCK_SIZE); + NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT; + + /* K1 and K3 base patterns */ + memset(keys[0], 0x01, sizeof(keys[0])); + memset(keys[1], 0x03, sizeof(keys[1])); + + /* Generate K1 and K3 encrypting the patterns */ + in_sg = nx_build_sg_list(nx_ctx-in_sg, (u8 *) keys, sizeof(keys), +nx_ctx-ap-sglen); + out_sg = nx_build_sg_list(nx_ctx-out_sg, (u8 *) keys, sizeof(keys), + nx_ctx-ap-sglen); + nx_ctx-op.inlen = (nx_ctx-in_sg - in_sg) * sizeof(struct nx_sg); + nx_ctx-op.outlen = (nx_ctx-out_sg - out_sg) * sizeof(struct nx_sg); + + rc = nx_hcall_sync(nx_ctx, nx_ctx-op, + desc-flags CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; + atomic_inc((nx_ctx-stats-aes_ops)); + + /* XOr K3 with the padding for a 0 length message */ + keys[1][0] ^= 0x80; + + /* Encrypt the final result */ + memcpy(csbcpb-cpb.aes_ecb.key, keys[0], AES_BLOCK_SIZE); + in_sg = nx_build_sg_list(nx_ctx-in_sg, (u8 *) keys[1], sizeof(keys[1]), +nx_ctx-ap-sglen); + out_sg = nx_build_sg_list(nx_ctx-out_sg, out, AES_BLOCK_SIZE, + nx_ctx-ap-sglen); + nx_ctx-op.inlen = (nx_ctx-in_sg - in_sg) * sizeof(struct nx_sg); + nx_ctx-op.outlen = (nx_ctx-out_sg - out_sg) * sizeof(struct nx_sg); + + rc = nx_hcall_sync(nx_ctx, nx_ctx-op, + desc-flags CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; + atomic_inc((nx_ctx-stats-aes_ops)); + +out: + /* Restore XCBC mode */ + csbcpb-cpb.hdr.mode = NX_MODE_AES_XCBC_MAC; + memcpy(csbcpb-cpb.aes_xcbc.key, key, AES_BLOCK_SIZE); + NX_CPB_FDM(csbcpb) = ~NX_FDM_ENDE_ENCRYPT; + + return rc; +} + static int nx_xcbc_init(struct shash_desc *desc) { struct xcbc_state *sctx = shash_desc_ctx(desc); @@ -201,13 +272,12 @@ static int nx_xcbc_final(struct shash_desc *desc, u8 *out) memcpy(csbcpb-cpb.aes_xcbc.cv, csbcpb-cpb.aes_xcbc.out_cv_mac, AES_BLOCK_SIZE); } else if (sctx-count == 0) { - /* we've never seen an update, so this is a 0 byte op. The -* hardware cannot handle a 0 byte op, so just copy out the -* known 0 byte result. This is cheaper than allocating a -* software context to do a 0 byte op */ - u8 data[] = { 0x75, 0xf0, 0x25, 0x1d, 0x52, 0x8a, 0xc0, 0x1c, - 0x45, 0x73, 0xdf, 0xd5, 0x84, 0xd7, 0x9f, 0x29 }; - memcpy(out, data, sizeof(data)); + /* +* we've never seen an update, so this is a 0 byte op. The +* hardware cannot handle a 0 byte op, so just ECB to +* generate the hash. +*/ + rc = nx_xcbc_empty(desc, out); goto out; } -- 1.7.12 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 09/10] crypto: nx - fix GCM for zero length messages
The NX CGM implementation doesn't support zero length messages and the current implementation has two flaws: - When the input data length is zero, it ignores the associated data. - Even when both lengths are zero, it uses the Crypto API to encrypt a zeroed block using ctr(aes) and because of this it allocates a new transformation and sets the key for this new tfm. Both operations are intended to be used only in user context, while the cryptographic operations can be called in both user and softirq contexts. This patch replaces the nested Crypto API use and adds two special cases: - When input data and associated data lengths are zero: it uses NX ECB mode to emulate the encryption of a zeroed block using ctr(aes). - When input data is zero and associated data is available: it uses NX GMAC mode to calculate the associated data MAC. Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com --- drivers/crypto/nx/nx-aes-gcm.c | 132 ++--- 1 file changed, 112 insertions(+), 20 deletions(-) diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c index 9e89bdf..025d9a8 100644 --- a/drivers/crypto/nx/nx-aes-gcm.c +++ b/drivers/crypto/nx/nx-aes-gcm.c @@ -187,40 +187,125 @@ static int nx_gca(struct nx_crypto_ctx *nx_ctx, return rc; } +static int gmac(struct aead_request *req, struct blkcipher_desc *desc) +{ + int rc; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req-base.tfm); + struct nx_csbcpb *csbcpb = nx_ctx-csbcpb; + struct nx_sg *nx_sg; + unsigned int nbytes = req-assoclen; + unsigned int processed = 0, to_process; + u32 max_sg_len; + + /* Set GMAC mode */ + csbcpb-cpb.hdr.mode = NX_MODE_AES_GMAC; + + NX_CPB_FDM(csbcpb) = ~NX_FDM_CONTINUATION; + + /* page_limit: number of sg entries that fit on one page */ + max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg), + nx_ctx-ap-sglen); + + /* Copy IV */ + memcpy(csbcpb-cpb.aes_gcm.iv_or_cnt, desc-info, AES_BLOCK_SIZE); + + do { + /* +* to_process: the data chunk to process in this update. +* This value is bound by sg list limits. +*/ + to_process = min_t(u64, nbytes - processed, + nx_ctx-ap-databytelen); + to_process = min_t(u64, to_process, + NX_PAGE_SIZE * (max_sg_len - 1)); + + if ((to_process + processed) nbytes) + NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; + else + NX_CPB_FDM(csbcpb) = ~NX_FDM_INTERMEDIATE; + + nx_sg = nx_walk_and_build(nx_ctx-in_sg, nx_ctx-ap-sglen, + req-assoc, processed, to_process); + nx_ctx-op.inlen = (nx_ctx-in_sg - nx_sg) + * sizeof(struct nx_sg); + + csbcpb-cpb.aes_gcm.bit_length_data = 0; + csbcpb-cpb.aes_gcm.bit_length_aad = 8 * nbytes; + + rc = nx_hcall_sync(nx_ctx, nx_ctx-op, + req-base.flags CRYPTO_TFM_REQ_MAY_SLEEP); + if (rc) + goto out; + + memcpy(csbcpb-cpb.aes_gcm.in_pat_or_aad, + csbcpb-cpb.aes_gcm.out_pat_or_mac, AES_BLOCK_SIZE); + memcpy(csbcpb-cpb.aes_gcm.in_s0, + csbcpb-cpb.aes_gcm.out_s0, AES_BLOCK_SIZE); + + NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + + atomic_inc((nx_ctx-stats-aes_ops)); + atomic64_add(req-assoclen, (nx_ctx-stats-aes_bytes)); + + processed += to_process; + } while (processed nbytes); + +out: + /* Restore GCM mode */ + csbcpb-cpb.hdr.mode = NX_MODE_AES_GCM; + return rc; +} + static int gcm_empty(struct aead_request *req, struct blkcipher_desc *desc, int enc) { int rc; struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req-base.tfm); struct nx_csbcpb *csbcpb = nx_ctx-csbcpb; + char out[AES_BLOCK_SIZE]; + struct nx_sg *in_sg, *out_sg; /* For scenarios where the input message is zero length, AES CTR mode * may be used. Set the source data to be a single block (16B) of all * zeros, and set the input IV value to be the same as the GMAC IV * value. - nx_wb 4.8.1.3 */ - char src[AES_BLOCK_SIZE] = {}; - struct scatterlist sg; - desc-tfm = crypto_alloc_blkcipher(ctr(aes), 0, 0); - if (IS_ERR(desc-tfm)) { - rc = -ENOMEM; - goto out; - } - - crypto_blkcipher_setkey(desc-tfm, csbcpb-cpb.aes_gcm.key, - NX_CPB_KEY_SIZE(csbcpb) == NX_KS_AES_128 ? 16 : -
[PATCH v2 10/10] crypto: nx - fix SHA-2 for chunks bigger than block size
Each call to the co-processor, with exception of the last call, needs to send data that is multiple of block size. As consequence, any remaining data is kept in the internal NX context. This patch fixes a bug in the driver that causes it to save incorrect data into the context when data is bigger than the block size. Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com --- drivers/crypto/nx/nx-sha256.c | 2 +- drivers/crypto/nx/nx-sha512.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 6547a71..da0b24a 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -129,7 +129,7 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; total -= to_process; - data += to_process; + data += to_process - sctx-count; sctx-count = 0; in_sg = nx_ctx-in_sg; } while (leftover = SHA256_BLOCK_SIZE); diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index 236e6af..4ae5b0f 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -131,7 +131,7 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; total -= to_process; - data += to_process; + data += to_process - sctx-count[0]; sctx-count[0] = 0; in_sg = nx_ctx-in_sg; } while (leftover = SHA512_BLOCK_SIZE); -- 1.7.12 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Feedback wished on possible improvment of CPU15 errata handling on mpc8xx
The mpc8xx powerpc has an errata identified CPU15 which is that whenever the last instruction of a page is a conditional branch to the last instruction of the next page, the CPU might do crazy things. To work around this errata, one of the workarounds proposed by freescale is: In the ITLB miss exception code, when loading the TLB for an MMU page, also invalidate any TLB referring to the next and previous page using tlbie. This intentionally forces an ITLB miss exception on every execution across sequential MMU page boundaries It is that workaround which has been implemented in the kernel. The drawback of this workaround is that TLB miss is encountered everytime we cross page boundary. On a flat program execution, it means that we get a TLB miss every 1000 instructions. A TLB miss handling is around 30/40 instructions, which means a degradation of about 4% of the performances. It can be even worse if the program has a loop astride two pages. In the errata document from freescale, there is an example where they only invalidate the TLB when the page has the actual issue, in extenso when the page has the offending instruction at offset 0xffc, and they suggest to use the available PTE bits to tag pages in advance. I checked in asm/pte-8xx.h : we still have one SW bit available (0x0080). So I was thinking about using that bit to mark pages CPU15_SAFE when loading them if they don't have the offending instruction. Then, in the ITLBmiss handler, instead of always invalidating preceeding and following pages, we would check SW bit in the PTE and invalidate following page only if current page is not marked CPU15_SAFE, then check the PTE of preceeding page and invalidate it only if it is not marked CPU15_SAFE I believe this would improve the CPU15 errata handling and would reduce the overhead introduced by the handling of this errata. Do you see anything wrong with my proposal ? Christophe ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 0/4] Unify CPU hotplug lock interface
On Sun, 2013-08-18 at 03:02 +0200, Rafael J. Wysocki wrote: On Saturday, August 17, 2013 01:46:55 PM Toshi Kani wrote: lock_device_hotplug() was recently introduced to serialize CPU Memory online/offline and hotplug operations, along with sysfs online interface restructure (commit 4f3549d7). With this new locking scheme, cpu_hotplug_driver_lock() is redundant and is no longer necessary. This patchset makes sure that lock_device_hotplug() covers all CPU online/ offline interfaces, and then removes cpu_hotplug_driver_lock(). The patchset is based on Linus's tree, 3.11.0-rc5. Nice series, thanks a lot for taking care of this! Hi Rafael, Per the recent your changes in lock_device_hotplug(), do you think it makes sense to integrate this patchset into your tree? I am also considering to add one more patch to use lock_device_hotplug_sysfs() in cpu_probe_store(). I will rebase to your tree and send them today if it makes sense to you. Thanks, -Toshi Rafael --- Toshi Kani (4): hotplug, x86: Fix online state in cpu0 debug interface hotplug, x86: Add hotplug lock to missing places hotplug, x86: Disable ARCH_CPU_PROBE_RELEASE on x86 hotplug, powerpc, x86: Remove cpu_hotplug_driver_lock() --- arch/powerpc/kernel/smp.c | 12 -- arch/powerpc/platforms/pseries/dlpar.c | 40 +- arch/x86/Kconfig | 4 arch/x86/kernel/smpboot.c | 21 -- arch/x86/kernel/topology.c | 11 ++ drivers/base/cpu.c | 26 -- include/linux/cpu.h| 13 --- 7 files changed, 37 insertions(+), 90 deletions(-) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Feedback wished on possible improvment of CPU15 errata handling on mpc8xx
Linuxppc-dev linuxppc-dev-bounces+joakim.tjernlund=transmode...@lists.ozlabs.org wrote on 2013/08/29 19:11:48: The mpc8xx powerpc has an errata identified CPU15 which is that whenever the last instruction of a page is a conditional branch to the last instruction of the next page, the CPU might do crazy things. To work around this errata, one of the workarounds proposed by freescale is: In the ITLB miss exception code, when loading the TLB for an MMU page, also invalidate any TLB referring to the next and previous page using tlbie. This intentionally forces an ITLB miss exception on every execution across sequential MMU page boundaries It is that workaround which has been implemented in the kernel. The drawback of this workaround is that TLB miss is encountered everytime we cross page boundary. On a flat program execution, it means that we get a TLB miss every 1000 instructions. A TLB miss handling is around 30/40 instructions, which means a degradation of about 4% of the performances. It can be even worse if the program has a loop astride two pages. In the errata document from freescale, there is an example where they only invalidate the TLB when the page has the actual issue, in extenso when the page has the offending instruction at offset 0xffc, and they suggest to use the available PTE bits to tag pages in advance. I checked in asm/pte-8xx.h : we still have one SW bit available (0x0080). So I was thinking about using that bit to mark pages CPU15_SAFE when loading them if they don't have the offending instruction. Then, in the ITLBmiss handler, instead of always invalidating preceeding and following pages, we would check SW bit in the PTE and invalidate following page only if current page is not marked CPU15_SAFE, then check the PTE of preceeding page and invalidate it only if it is not marked CPU15_SAFE I believe this would improve the CPU15 errata handling and would reduce the overhead introduced by the handling of this errata. Do you see anything wrong with my proposal ? Just that you are using up the last bit of the pte which will be needed at some point. Have you run into CPU15? We have been using 8xx for more than 10 years on kernel 2.4 and I don't think we ever run into this problem. If you go forward with this I suggest you use the WRITETHRU bit instead and make it so the user can choose which to use. If you want to optimize TLB misses you might want to add support for 8MB pages, I got the TLB and kernel memory done in my 2.4 kernel. You could start with that and add 8MB user space page. Jocke ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v12] ASoC: fsl: Add S/PDIF machine driver
On 08/27/2013 10:04 PM, Nicolin Chen wrote: This patch implements a device-tree-only machine driver for Freescale i.MX series Soc. It works with spdif_transmitter/spdif_receiver and fsl_spdif.c drivers. Sorry for the slow response. For the record, the binding, Acked-by: Stephen Warren swar...@nvidia.com ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] of: Feed entire flattened device tree into the random pool
On Mon, 29 Jul 2013 13:11:50 +1000, Anton Blanchard an...@samba.org wrote: Hi, be32_to_cpu(initial_boot_params-totalsize); Ouch, thanks Grant. Anton -- We feed the entire DMI table into the random pool to provide better random data during early boot, so do the same with the flattened device tree. Signed-off-by: Anton Blanchard an...@samba.org Applied, thanks g. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Feedback wished on possible improvment of CPU15 errata handling on mpc8xx
Le 29/08/2013 19:57, Joakim Tjernlund a écrit : Linuxppc-dev linuxppc-dev-bounces+joakim.tjernlund=transmode...@lists.ozlabs.org wrote on 2013/08/29 19:11:48: The mpc8xx powerpc has an errata identified CPU15 which is that whenever the last instruction of a page is a conditional branch to the last instruction of the next page, the CPU might do crazy things. To work around this errata, one of the workarounds proposed by freescale is: In the ITLB miss exception code, when loading the TLB for an MMU page, also invalidate any TLB referring to the next and previous page using tlbie. This intentionally forces an ITLB miss exception on every execution across sequential MMU page boundaries It is that workaround which has been implemented in the kernel. The drawback of this workaround is that TLB miss is encountered everytime we cross page boundary. On a flat program execution, it means that we get a TLB miss every 1000 instructions. A TLB miss handling is around 30/40 instructions, which means a degradation of about 4% of the performances. It can be even worse if the program has a loop astride two pages. In the errata document from freescale, there is an example where they only invalidate the TLB when the page has the actual issue, in extenso when the page has the offending instruction at offset 0xffc, and they suggest to use the available PTE bits to tag pages in advance. I checked in asm/pte-8xx.h : we still have one SW bit available (0x0080). So I was thinking about using that bit to mark pages CPU15_SAFE when loading them if they don't have the offending instruction. Then, in the ITLBmiss handler, instead of always invalidating preceeding and following pages, we would check SW bit in the PTE and invalidate following page only if current page is not marked CPU15_SAFE, then check the PTE of preceeding page and invalidate it only if it is not marked CPU15_SAFE I believe this would improve the CPU15 errata handling and would reduce the overhead introduced by the handling of this errata. Do you see anything wrong with my proposal ? Just that you are using up the last bit of the pte which will be needed at some point. Have you run into CPU15? We have been using 8xx for more than 10 years on kernel 2.4 and I don't think we ever run into this problem. Ok, indeed I have activated the CPU15 errata in the kernel because I know my CPU has the bug. Do you think it can be deactivated without much risk though ? If you go forward with this I suggest you use the WRITETHRU bit instead and make it so the user can choose which to use. If you want to optimize TLB misses you might want to add support for 8MB pages, I got the TLB and kernel memory done in my 2.4 kernel. You could start with that and add 8MB user space page. In 2.6 Kernel we have CONFIG_PIN_TLB which pins the first 8Mbytes in ITLB and pins the first 24Mbytes in DTLB as far as I understand. Do we need more for the kernel ? I so, yes I would be interested in porting your code to 2.6 Wouldn't we waste memory by using 8Mbytes pages in user mode ? I read somewhere that Transparent Huge Pages have been ported on powerpc in future kernel 3.11. Therefore I was thinking about maybe adding support for hugepages into 8xx. 8xx has 512kbytes hugepages, I was thinking that maybe it would be more appropriate than 8Mbytes pages. Do you think it would be feasible and usefull to do this for embeddeds system having let say 32 to 128Mbytes RAM ? Christophe ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] hvc_xen: Remove unnecessary __GFP_ZERO from kzalloc
kzalloc already adds this __GFP_ZERO. Signed-off-by: Joe Perches j...@perches.com --- drivers/tty/hvc/hvc_xen.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 682210d..e61c36c 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -208,7 +208,7 @@ static int xen_hvm_console_init(void) info = vtermno_to_xencons(HVC_COOKIE); if (!info) { - info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); + info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL); if (!info) return -ENOMEM; } else if (info-intf != NULL) { @@ -257,7 +257,7 @@ static int xen_pv_console_init(void) info = vtermno_to_xencons(HVC_COOKIE); if (!info) { - info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); + info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL); if (!info) return -ENOMEM; } else if (info-intf != NULL) { @@ -284,7 +284,7 @@ static int xen_initial_domain_console_init(void) info = vtermno_to_xencons(HVC_COOKIE); if (!info) { - info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); + info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL); if (!info) return -ENOMEM; } -- 1.8.1.2.459.gbcd45b4.dirty ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Feedback wished on possible improvment of CPU15 errata handling on mpc8xx
leroy christophe christophe.le...@c-s.fr wrote on 2013/08/29 23:04:03: Le 29/08/2013 19:57, Joakim Tjernlund a écrit : Linuxppc-dev linuxppc-dev-bounces+joakim.tjernlund=transmode...@lists.ozlabs.org wrote on 2013/08/29 19:11:48: The mpc8xx powerpc has an errata identified CPU15 which is that whenever the last instruction of a page is a conditional branch to the last instruction of the next page, the CPU might do crazy things. To work around this errata, one of the workarounds proposed by freescale is: In the ITLB miss exception code, when loading the TLB for an MMU page, also invalidate any TLB referring to the next and previous page using tlbie. This intentionally forces an ITLB miss exception on every execution across sequential MMU page boundaries It is that workaround which has been implemented in the kernel. The drawback of this workaround is that TLB miss is encountered everytime we cross page boundary. On a flat program execution, it means that we get a TLB miss every 1000 instructions. A TLB miss handling is around 30/40 instructions, which means a degradation of about 4% of the performances. It can be even worse if the program has a loop astride two pages. In the errata document from freescale, there is an example where they only invalidate the TLB when the page has the actual issue, in extenso when the page has the offending instruction at offset 0xffc, and they suggest to use the available PTE bits to tag pages in advance. I checked in asm/pte-8xx.h : we still have one SW bit available (0x0080). So I was thinking about using that bit to mark pages CPU15_SAFE when loading them if they don't have the offending instruction. Then, in the ITLBmiss handler, instead of always invalidating preceeding and following pages, we would check SW bit in the PTE and invalidate following page only if current page is not marked CPU15_SAFE, then check the PTE of preceeding page and invalidate it only if it is not marked CPU15_SAFE I believe this would improve the CPU15 errata handling and would reduce the overhead introduced by the handling of this errata. Do you see anything wrong with my proposal ? Just that you are using up the last bit of the pte which will be needed at some point. Have you run into CPU15? We have been using 8xx for more than 10 years on kernel 2.4 and I don't think we ever run into this problem. Ok, indeed I have activated the CPU15 errata in the kernel because I know my CPU has the bug. Do you think it can be deactivated without much risk though ? Can't say for you, all I know that our 860 and 862 CPUs seem to work OK. If you go forward with this I suggest you use the WRITETHRU bit instead and make it so the user can choose which to use. If you want to optimize TLB misses you might want to add support for 8MB pages, I got the TLB and kernel memory done in my 2.4 kernel. You could start with that and add 8MB user space page. In 2.6 Kernel we have CONFIG_PIN_TLB which pins the first 8Mbytes in ITLB and pins the first 24Mbytes in DTLB as far as I understand. Do we need more for the kernel ? I so, yes I would be interested in porting your code to 2.6 Yes, 2.4 has the same. There is a drawback with pinning though, you pin 4 ITLBs and 4 DTLBs. One only needs 1 ITLB for kernel so the other 3 are unused. 24MB DTLs is pretty statik, chances are that it is either too much or too little. Wouldn't we waste memory by using 8Mbytes pages in user mode ? Don't know the details of how user space deal with these pages, hopefully someone else knows better. I read somewhere that Transparent Huge Pages have been ported on powerpc in future kernel 3.11. Therefore I was thinking about maybe adding support for hugepages into 8xx. 8xx has 512kbytes hugepages, I was thinking that maybe it would be more appropriate than 8Mbytes pages. See previous comment, although 8MB pages is less TLB insn as I recall. Do you think it would be feasible and usefull to do this for embeddeds system having let say 32 to 128Mbytes RAM ? One could stop for just kernel memory. With 8MB pages there are some additional advantages compared with PINNED TLBs: - you map all kernel memory - you can also map other spaces, I got both IMMR/BCR and all my NOR FLASH mapped with 8MB pages. Jocke ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
AUTO: Michael Barry is out of the office (returning 11/09/2013)
I am out of the office until 11/09/2013. Note: This is an automated response to your message Linuxppc-dev Digest, Vol 108, Issue 258 sent on 29/08/2013 21:47:50. This is the only notification you will receive while this person is away. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC PATCH 1/1] powerpc/embedded6xx: Add support for Motorola/Emerson MVME5100.
Stephen N Chivers/AUS/CSC wrote on 08/22/2013 10:58:10 AM: From: Stephen N Chivers/AUS/CSC To: Scott Wood scottw...@freescale.com Cc: b...@kernel.crashing.org, Chris Proctor cproc...@csc.com.au, linuxppc-dev@lists.ozlabs.org, pau...@samba.org, Stephen N Chivers schiv...@csc.com.au Date: 08/22/2013 10:58 AM Subject: Re: [RFC PATCH 1/1] powerpc/embedded6xx: Add support for Motorola/Emerson MVME5100. Scott Wood scottw...@freescale.com wrote on 08/21/2013 09:20:03 AM: From: Scott Wood scottw...@freescale.com To: Stephen N Chivers schiv...@csc.com.au Cc: b...@kernel.crashing.org, Chris Proctor cproc...@csc.com.au, linuxppc-dev@lists.ozlabs.org, pau...@samba.org Date: 08/21/2013 09:20 AM Subject: Re: [RFC PATCH 1/1] powerpc/embedded6xx: Add support for Motorola/Emerson MVME5100. On Tue, 2013-08-20 at 13:28 +1100, Stephen N Chivers wrote: Scott Wood scottw...@freescale.com wrote on 08/09/2013 11:35:20 AM: From: Scott Wood scottw...@freescale.com To: Stephen N Chivers schiv...@csc.com.au Cc: b...@kernel.crashing.org, pau...@samba.org, Chris Proctor cproc...@csc.com.au, linuxppc-dev@lists.ozlabs.org Date: 08/09/2013 11:36 AM Subject: Re: [RFC PATCH 1/1] powerpc/embedded6xx: Add support for Motorola/Emerson MVME5100. simple-bus may be applicable here (in addition to a specific compatible). The HAWK ASIC is a difficult beast. I still cannot get a positive identification as to what it is (Motorola/Freescale part number unknown, not even the part number on the chip on the board helps). The best I can come up with is that it is a tsi108 without the ethenets. So device_type will be tsi-bridge and compatible will be tsi108-bridge. Don't use device_type. compatible should include hawk in the name (especially if you're not sure what's really in it), and/or the part number on the chip. If you're convinced it's fully compatible with tsi108-bridge you can add that as a second compatible value, though given the uncertainty it's probably better to just teach Linux to look for the new compatible. If devices on the bus can be used without any special bus setup or knowledge, then you can add a compatible of simple-bus to the end. Why not just look for a chrp,iic node directly? I was following the model used in other places, like chrp/setup.c. Not all examples are good examples. :-) + if ((np = of_find_compatible_node(NULL, pci, mpc10x-pci))) { Why insist on the device_type? Following the model in the linkstation (kurobox) platform support. Drop the device_type check. +static void +mvme5100_restart(char *cmd) +{ + volatile ulong i = 1000; + + + local_irq_disable(); + _nmask_and_or_msr(0, MSR_IP); Does mtmsr(mfmsr() | MSR_IP) not work? Don't know. Is from the original code by Matt Porter. It actually appears that there are no callers remaining that use the and portion of the functionality. In fact there are no callers that use it for anything other than setting MSR_IP. :-P + out_8((u_char *) BOARD_MODRST_REG, 0x01); + + while (i-- 0); Do not use a loop to implement a delay. Taken from the original code. But at this point the board is going to reset and reboot via firmware, as /sbin/reboot or /sbin/halt has been invoked. Still, it's just a bad idea. What's wrong with udelay()? Or just use an infinite loop. How much value is there really in timing out here? +static void __init +mvme5100_set_bat(void) +{ + + + mb(); + mtspr(SPRN_DBAT1U, 0xf0001ffe); + mtspr(SPRN_DBAT1L, 0xf02a); + mb(); + setbat(1, 0xfe00, 0xfe00, 0x0200, PAGE_KERNEL_NCG); +} It is no longer allowed to squat on random virtual address space like this. If you really need a BAT you'll have to allocate the virtual address properly. Yes. I found that this was an anathema when researching the port in 2010 but I couldn't find any practical solution at the time. The code is called early to ensure that the hawk registers are available. sysdev/cpm_common.c does the same thing. What is the correct solution? ioremap() has special code to function early (using ioremap_bot). If you still need to use a BAT that early, reserve the space with asm/fixmap.h or by adding a function to the early ioremap code to just reserve the space. Or better, improve the ioremap code to be capable of creating a BAT (or equivalent) when requested. It is really interesting. Given that the UART implementation on the HAWK is such that legacy_serial will not set up an early console it is very likely that the address translation set up by the bat is not required. I can
Re: [PATCH 0/4] Unify CPU hotplug lock interface
On Thursday, August 29, 2013 11:15:10 AM Toshi Kani wrote: On Sun, 2013-08-18 at 03:02 +0200, Rafael J. Wysocki wrote: On Saturday, August 17, 2013 01:46:55 PM Toshi Kani wrote: lock_device_hotplug() was recently introduced to serialize CPU Memory online/offline and hotplug operations, along with sysfs online interface restructure (commit 4f3549d7). With this new locking scheme, cpu_hotplug_driver_lock() is redundant and is no longer necessary. This patchset makes sure that lock_device_hotplug() covers all CPU online/ offline interfaces, and then removes cpu_hotplug_driver_lock(). The patchset is based on Linus's tree, 3.11.0-rc5. Nice series, thanks a lot for taking care of this! Hi Rafael, Per the recent your changes in lock_device_hotplug(), do you think it makes sense to integrate this patchset into your tree? I am also considering to add one more patch to use lock_device_hotplug_sysfs() in cpu_probe_store(). I will rebase to your tree and send them today if it makes sense to you. Yes, it does to me. Thanks, Rafael --- Toshi Kani (4): hotplug, x86: Fix online state in cpu0 debug interface hotplug, x86: Add hotplug lock to missing places hotplug, x86: Disable ARCH_CPU_PROBE_RELEASE on x86 hotplug, powerpc, x86: Remove cpu_hotplug_driver_lock() --- arch/powerpc/kernel/smp.c | 12 -- arch/powerpc/platforms/pseries/dlpar.c | 40 +- arch/x86/Kconfig | 4 arch/x86/kernel/smpboot.c | 21 -- arch/x86/kernel/topology.c | 11 ++ drivers/base/cpu.c | 26 -- include/linux/cpu.h| 13 --- 7 files changed, 37 insertions(+), 90 deletions(-) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ -- I speak only for myself. Rafael J. Wysocki, Intel Open Source Technology Center. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 0/4] Unify CPU hotplug lock interface
On Fri, 2013-08-30 at 02:06 +0200, Rafael J. Wysocki wrote: On Thursday, August 29, 2013 11:15:10 AM Toshi Kani wrote: On Sun, 2013-08-18 at 03:02 +0200, Rafael J. Wysocki wrote: On Saturday, August 17, 2013 01:46:55 PM Toshi Kani wrote: lock_device_hotplug() was recently introduced to serialize CPU Memory online/offline and hotplug operations, along with sysfs online interface restructure (commit 4f3549d7). With this new locking scheme, cpu_hotplug_driver_lock() is redundant and is no longer necessary. This patchset makes sure that lock_device_hotplug() covers all CPU online/ offline interfaces, and then removes cpu_hotplug_driver_lock(). The patchset is based on Linus's tree, 3.11.0-rc5. Nice series, thanks a lot for taking care of this! Hi Rafael, Per the recent your changes in lock_device_hotplug(), do you think it makes sense to integrate this patchset into your tree? I am also considering to add one more patch to use lock_device_hotplug_sysfs() in cpu_probe_store(). I will rebase to your tree and send them today if it makes sense to you. Yes, it does to me. Great! I will send them shortly. Thanks, -Toshi ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 1/4] hotplug, x86: Fix online state in cpu0 debug interface
_debug_hotplug_cpu() is a debug interface that puts cpu0 offline during boot-up when CONFIG_DEBUG_HOTPLUG_CPU0 is set. After cpu0 is put offline in this interface, however, /sys/devices/system/cpu/cpu0/online still shows 1 (online). This patch fixes _debug_hotplug_cpu() to update dev-offline when CPU online/offline operation succeeded. Signed-off-by: Toshi Kani toshi.k...@hp.com Acked-by: Rafael J. Wysocki rafael.j.wyso...@intel.com --- arch/x86/kernel/topology.c |7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 6e60b5f..5823bbd 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -72,16 +72,19 @@ int __ref _debug_hotplug_cpu(int cpu, int action) ret = cpu_down(cpu); if (!ret) { pr_info(CPU %u is now offline\n, cpu); + dev-offline = true; kobject_uevent(dev-kobj, KOBJ_OFFLINE); } else pr_debug(Can't offline CPU%d.\n, cpu); break; case 1: ret = cpu_up(cpu); - if (!ret) + if (!ret) { + dev-offline = false; kobject_uevent(dev-kobj, KOBJ_ONLINE); - else + } else { pr_debug(Can't online CPU%d.\n, cpu); + } break; default: ret = -EINVAL; ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 3/4] hotplug, x86: Disable ARCH_CPU_PROBE_RELEASE on x86
Commit d7c53c9e enabled ARCH_CPU_PROBE_RELEASE on x86 in order to serialize CPU online/offline operations. Although it is the config option to enable CPU hotplug test interfaces, probe release, it is also the option to enable cpu_hotplug_driver_lock() as well. Therefore, this option had to be enabled on x86 with dummy arch_cpu_probe() and arch_cpu_release(). Since then, lock_device_hotplug() was introduced to serialize CPU online/offline hotplug operations. Therefore, this config option is no longer required for the serialization. This patch disables this config option on x86 and revert the changes made by commit d7c53c9e. Signed-off-by: Toshi Kani toshi.k...@hp.com Acked-by: Rafael J. Wysocki rafael.j.wyso...@intel.com --- arch/x86/Kconfig |4 arch/x86/kernel/smpboot.c | 21 - 2 files changed, 25 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b32ebf9..c87e49a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -255,10 +255,6 @@ config ARCH_HWEIGHT_CFLAGS default -fcall-saved-ecx -fcall-saved-edx if X86_32 default -fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11 if X86_64 -config ARCH_CPU_PROBE_RELEASE - def_bool y - depends on HOTPLUG_CPU - config ARCH_SUPPORTS_UPROBES def_bool y diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index aecc98a..5b24a9d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -82,27 +82,6 @@ /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -#ifdef CONFIG_HOTPLUG_CPU -/* - * We need this for trampoline_base protection from concurrent accesses when - * off- and onlining cores wildly. - */ -static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); - -void cpu_hotplug_driver_lock(void) -{ - mutex_lock(x86_cpu_hotplug_driver_mutex); -} - -void cpu_hotplug_driver_unlock(void) -{ - mutex_unlock(x86_cpu_hotplug_driver_mutex); -} - -ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; } -ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; } -#endif - /* Number of siblings per CPU package */ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 4/4] hotplug, powerpc, x86: Remove cpu_hotplug_driver_lock()
cpu_hotplug_driver_lock() serializes CPU online/offline operations when ARCH_CPU_PROBE_RELEASE is set. This lock interface is no longer necessary with the following reason: - lock_device_hotplug() now protects CPU online/offline operations, including the probe release interfaces enabled by ARCH_CPU_PROBE_RELEASE. The use of cpu_hotplug_driver_lock() is redundant. - cpu_hotplug_driver_lock() is only valid when ARCH_CPU_PROBE_RELEASE is defined, which is misleading and is only enabled on powerpc. This patch removes the cpu_hotplug_driver_lock() interface. As a result, ARCH_CPU_PROBE_RELEASE only enables / disables the cpu probe release interface as intended. There is no functional change in this patch. Signed-off-by: Toshi Kani toshi.k...@hp.com Acked-by: Rafael J. Wysocki rafael.j.wyso...@intel.com Reviewed-by: Nathan Fontenot nf...@linux.vnet.ibm.com --- Performed build test only on powerpc. --- arch/powerpc/kernel/smp.c | 12 -- arch/powerpc/platforms/pseries/dlpar.c | 40 arch/x86/kernel/topology.c |2 -- drivers/base/cpu.c | 10 +--- include/linux/cpu.h| 13 -- 5 files changed, 16 insertions(+), 61 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 38b0ba6..1667269 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -763,18 +763,6 @@ void __cpu_die(unsigned int cpu) smp_ops-cpu_die(cpu); } -static DEFINE_MUTEX(powerpc_cpu_hotplug_driver_mutex); - -void cpu_hotplug_driver_lock() -{ - mutex_lock(powerpc_cpu_hotplug_driver_mutex); -} - -void cpu_hotplug_driver_unlock() -{ - mutex_unlock(powerpc_cpu_hotplug_driver_mutex); -} - void cpu_die(void) { if (ppc_md.cpu_die) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index a1a7b9a..e39325d 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -387,18 +387,13 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) char *cpu_name; int rc; - cpu_hotplug_driver_lock(); rc = strict_strtoul(buf, 0, drc_index); - if (rc) { - rc = -EINVAL; - goto out; - } + if (rc) + return -EINVAL; dn = dlpar_configure_connector(drc_index); - if (!dn) { - rc = -EINVAL; - goto out; - } + if (!dn) + return -EINVAL; /* configure-connector reports cpus as living in the base * directory of the device tree. CPUs actually live in the @@ -407,8 +402,7 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) cpu_name = kasprintf(GFP_KERNEL, /cpus%s, dn-full_name); if (!cpu_name) { dlpar_free_cc_nodes(dn); - rc = -ENOMEM; - goto out; + return -ENOMEM; } kfree(dn-full_name); @@ -417,22 +411,21 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) rc = dlpar_acquire_drc(drc_index); if (rc) { dlpar_free_cc_nodes(dn); - rc = -EINVAL; - goto out; + return -EINVAL; } rc = dlpar_attach_node(dn); if (rc) { dlpar_release_drc(drc_index); dlpar_free_cc_nodes(dn); - goto out; + return rc; } rc = dlpar_online_cpu(dn); -out: - cpu_hotplug_driver_unlock(); + if (rc) + return rc; - return rc ? rc : count; + return count; } static int dlpar_offline_cpu(struct device_node *dn) @@ -505,30 +498,27 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count) return -EINVAL; } - cpu_hotplug_driver_lock(); rc = dlpar_offline_cpu(dn); if (rc) { of_node_put(dn); - rc = -EINVAL; - goto out; + return -EINVAL; } rc = dlpar_release_drc(*drc_index); if (rc) { of_node_put(dn); - goto out; + return rc; } rc = dlpar_detach_node(dn); if (rc) { dlpar_acquire_drc(*drc_index); - goto out; + return rc; } of_node_put(dn); -out: - cpu_hotplug_driver_unlock(); - return rc ? rc : count; + + return count; } static int __init pseries_dlpar_init(void) diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index a3f35eb..649b010 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -66,7 +66,6 @@ int __ref _debug_hotplug_cpu(int cpu, int action) return -EINVAL; lock_device_hotplug(); - cpu_hotplug_driver_lock(); switch
[PATCH v2 0/4] Unify CPU hotplug lock interface
lock_device_hotplug() was recently introduced to serialize CPU Memory online/offline and hotplug operations, along with sysfs online interface restructure (commit 4f3549d7). With this new locking scheme, cpu_hotplug_driver_lock() is redundant and is no longer necessary. This patchset makes sure that lock_device_hotplug() covers all CPU online/ offline interfaces, and then removes cpu_hotplug_driver_lock(). v2: - Rebased to the pm tree, bleeding-edge. - Changed patch 2/4 to use lock_device_hotplug_sysfs(). --- Toshi Kani (4): hotplug, x86: Fix online state in cpu0 debug interface hotplug, x86: Add hotplug lock to missing places hotplug, x86: Disable ARCH_CPU_PROBE_RELEASE on x86 hotplug, powerpc, x86: Remove cpu_hotplug_driver_lock() --- arch/powerpc/kernel/smp.c | 12 -- arch/powerpc/platforms/pseries/dlpar.c | 40 +- arch/x86/Kconfig | 4 arch/x86/kernel/smpboot.c | 21 -- arch/x86/kernel/topology.c | 11 ++ drivers/base/cpu.c | 34 +++-- include/linux/cpu.h| 13 --- 7 files changed, 45 insertions(+), 90 deletions(-) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 0/4] Unify CPU hotplug lock interface
(2013/08/30 9:22), Toshi Kani wrote: lock_device_hotplug() was recently introduced to serialize CPU Memory online/offline and hotplug operations, along with sysfs online interface restructure (commit 4f3549d7). With this new locking scheme, cpu_hotplug_driver_lock() is redundant and is no longer necessary. This patchset makes sure that lock_device_hotplug() covers all CPU online/ offline interfaces, and then removes cpu_hotplug_driver_lock(). v2: - Rebased to the pm tree, bleeding-edge. - Changed patch 2/4 to use lock_device_hotplug_sysfs(). --- Toshi Kani (4): hotplug, x86: Fix online state in cpu0 debug interface hotplug, x86: Add hotplug lock to missing places hotplug, x86: Disable ARCH_CPU_PROBE_RELEASE on x86 hotplug, powerpc, x86: Remove cpu_hotplug_driver_lock() --- The patch-set looks good to me. Acked-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com Thanks, Yasuaki Ishimatsu arch/powerpc/kernel/smp.c | 12 -- arch/powerpc/platforms/pseries/dlpar.c | 40 +- arch/x86/Kconfig | 4 arch/x86/kernel/smpboot.c | 21 -- arch/x86/kernel/topology.c | 11 ++ drivers/base/cpu.c | 34 +++-- include/linux/cpu.h| 13 --- 7 files changed, 45 insertions(+), 90 deletions(-) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V2 1/6] perf: New conditional branch filter criteria in branch stack sampling
POWER8 PMU based BHRB supports filtering for conditional branches. This patch introduces new branch filter PERF_SAMPLE_BRANCH_COND which will extend the existing perf ABI. Other architectures can provide this functionality with either HW filtering support (if present) or with SW filtering of instructions. Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com Reviewed-by: Stephane Eranian eran...@google.com --- include/uapi/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 0b1df41..5da52b6 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -160,8 +160,9 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_ABORT_TX = 1U 7, /* transaction aborts */ PERF_SAMPLE_BRANCH_IN_TX= 1U 8, /* in transaction */ PERF_SAMPLE_BRANCH_NO_TX= 1U 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_COND = 1U 10, /* conditional branches */ - PERF_SAMPLE_BRANCH_MAX = 1U 10, /* non-ABI */ + PERF_SAMPLE_BRANCH_MAX = 1U 11, /* non-ABI */ }; #define PERF_SAMPLE_BRANCH_PLM_ALL \ -- 1.7.11.7 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V2 2/6] powerpc, perf: Enable conditional branch filter for POWER8
Enables conditional branch filter support for POWER8 utilizing MMCRA register based filter and also invalidates a BHRB branch filter combination involving conditional branches. Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com --- arch/powerpc/perf/power8-pmu.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 2ee4a70..6e28587 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -580,11 +580,21 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type) if (branch_sample_type PERF_SAMPLE_BRANCH_IND_CALL) return -1; + /* Invalid branch filter combination - HW does not support */ + if ((branch_sample_type PERF_SAMPLE_BRANCH_ANY_CALL) + (branch_sample_type PERF_SAMPLE_BRANCH_COND)) + return -1; + if (branch_sample_type PERF_SAMPLE_BRANCH_ANY_CALL) { pmu_bhrb_filter |= POWER8_MMCRA_IFM1; return pmu_bhrb_filter; } + if (branch_sample_type PERF_SAMPLE_BRANCH_COND) { + pmu_bhrb_filter |= POWER8_MMCRA_IFM3; + return pmu_bhrb_filter; + } + /* Every thing else is unsupported */ return -1; } -- 1.7.11.7 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V2 4/6] x86, perf: Add conditional branch filtering support
This patch adds conditional branch filtering support, enabling it for PERF_SAMPLE_BRANCH_COND in perf branch stack sampling framework by utilizing an available software filter X86_BR_JCC. Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com Reviewed-by: Stephane Eranian eran...@google.com --- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 5 + 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index d5be06a..9723773 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -371,6 +371,9 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type PERF_SAMPLE_BRANCH_NO_TX) mask |= X86_BR_NO_TX; + if (br_type PERF_SAMPLE_BRANCH_COND) + mask |= X86_BR_JCC; + /* * stash actual user request into reg, it may * be used by fixup code for some CPU @@ -665,6 +668,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { * NHM/WSM erratum: must include IND_JMP to capture IND_CALL */ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, }; static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { @@ -676,6 +680,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR, [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, }; /* core */ -- 1.7.11.7 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V2 5/6] perf, documentation: Description for conditional branch filter
Adding documentation support for conditional branch filter. Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com Reviewed-by: Stephane Eranian eran...@google.com --- tools/perf/Documentation/perf-record.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e297b74..59ca8d0 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -163,12 +163,13 @@ following filters are defined: - any_call: any function call or system call - any_ret: any function return or system call return - ind_call: any indirect branch +- cond: conditional branches - u: only when the branch target is at the user level - k: only when the branch target is in the kernel - hv: only when the target is at the hypervisor level + -The option requires at least one branch type among any, any_call, any_ret, ind_call. +The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. The privilege levels may be omitted, in which case, the privilege levels of the associated event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege levels are subject to permissions. When sampling on multiple events, branch stack sampling -- 1.7.11.7 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V2 3/6] perf, tool: Conditional branch filter 'cond' added to perf record
Adding perf record support for new branch stack filter criteria PERF_SAMPLE_BRANCH_COND. Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com --- tools/perf/builtin-record.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ecca62e..802d11d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -625,6 +625,7 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT(any_call, PERF_SAMPLE_BRANCH_ANY_CALL), BRANCH_OPT(any_ret, PERF_SAMPLE_BRANCH_ANY_RETURN), BRANCH_OPT(ind_call, PERF_SAMPLE_BRANCH_IND_CALL), + BRANCH_OPT(cond, PERF_SAMPLE_BRANCH_COND), BRANCH_END }; -- 1.7.11.7 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V2 6/6] powerpc, perf: Enable SW filtering in branch stack sampling framework
This patch enables SW based post processing of BHRB captured branches to be able to meet more user defined branch filtration criteria in perf branch stack sampling framework. This changes increase the number of filters and their valid combinations on powerpc64 platform with BHRB support. Summary of code changes described below. (1) struct cpu_hw_events Introduced two new variables and modified one to track various filters. a) bhrb_hw_filter Tracks PMU based HW branch filter flags. Computed from PMU dependent call back. b) bhrb_sw_filter Tracks SW based instruction filter flags Computed from PPC64 generic SW filter. c) filter_mask Tracks overall filter flags for PPC64 (2) Creating HW event with BHRB request Kernel would try to figure out supported HW filters through a PMU call back ppmu-bhrb_filter_map(). Here it would only invalidate unsupported HW filter combinations. In future we could process one element from the combination in HW and one in SW. Meanwhile cpuhw-filter_mask would be tracking the overall supported branch filter requests on the PMU. Kernel would also process the user request against available SW filters for PPC64. Then we would process filter_mask to verify whether all the user requested branch filters have been taken care of either in HW or in SW. (3) BHRB SW filter processing During the BHRB data capture inside the PMU interrupt context, each of the captured perf_branch_entry.from would be checked for compliance with applicable SW branch filters. If the entry does not confirm to the filter requirements, it would be discarded from the final perf branch stack buffer. (4) Instruction classification for proposed SW filters Here are the list of category of instructions which have been classified under the proposed SW filters. (a) PERF_SAMPLE_BRANCH_ANY_RETURN (i) [Un]conditional branch to LR without setting the LR (1) blr (2) bclr (3) btlr (4) bflr (5) bdnzlr (6) bdnztlr (7) bdnzflr (8) bdzlr (9) bdztlr (10) bdzflr (11) bltlr (12) blelr (13) beqlr (14) bgelr (15) bgtlr (16) bnllr (17) bnelr (18) bnglr (19) bsolr (20) bnslr (21) biclr (22) bnilr (23) bunlr (24) bnulr (b) PERF_SAMPLE_BRANCH_IND_CALL (i) [Un]conditional branch to CTR with setting the link (1) bctrl (2) bcctrl (3) btctrl (4) bfctrl (5) bltctrl (6) blectrl (7) beqctrl (8) bgectrl (9) bgtctrl (10) bnlctrl (11) bnectrl (12) bngctrl (13) bsoctrl (14) bnsctrl (15) bicctrl (16) bnictrl (17) bunctrl (18) bnuctrl (ii) [Un]conditional branch to LR setting the link (0) bclrl (1) blrl (2) btlrl (3) bflrl (4) bdnzlrl (5) bdnztlrl (6) bdnzflrl (7) bdzlrl (8) bdztlrl (9) bdzflrl (10) bltlrl (11) blelrl (12) beqlrl (13) bgelrl (14) bgtlrl (15) bnllrl (16) bnelrl (17) bnglrl (18) bsolrl (19) bnslrl (20) biclrl (21) bnilrl (22) bunlrl (23) bnulrl (iii) [Un]conditional branch to TAR setting the link (1) btarl (2) bctarl Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com --- arch/powerpc/include/asm/perf_event_server.h | 2 +- arch/powerpc/perf/core-book3s.c | 200
[PATCH V2 0/6] perf: New conditional branch filter
This patchset is the re-spin of the original branch stack sampling patchset which introduced new PERF_SAMPLE_BRANCH_COND filter. This patchset also enables SW based branch filtering support for PPC64 platforms which have branch stack sampling support. With this new enablement, the branch filter support for PPC64 platforms have been extended to include all these combinations discussed below with a sample test application program. (1) perf record -e branch-misses:u -b ./cprog # Overhead Command Source Shared Object Source Symbol Target Shared Object Target Symbol # ... . . # 4.42%cprog cprog [k] sw_4_2 cprog [k] lr_addr 4.41%cprog cprog [k] symbol2cprog [k] hw_1_2 4.41%cprog cprog [k] ctr_addr cprog [k] sw_4_1 4.41%cprog cprog [k] lr_addrcprog [k] sw_4_2 4.41%cprog cprog [k] sw_4_2 cprog [k] callme 4.41%cprog cprog [k] symbol1cprog [k] hw_1_1 4.41%cprog cprog [k] success_3_1_3 cprog [k] sw_3_1 2.43%cprog cprog [k] sw_4_1 cprog [k] ctr_addr 2.43%cprog cprog [k] hw_1_2 cprog [k] symbol2 2.43%cprog cprog [k] callme cprog [k] hw_1_2 2.43%cprog cprog [k] address1 cprog [k] back1 2.43%cprog cprog [k] back1 cprog [k] callme 2.43%cprog cprog [k] hw_2_1 cprog [k] address1 2.43%cprog cprog [k] sw_3_1_1 cprog [k] sw_3_1 2.43%cprog cprog [k] sw_3_1_2 cprog [k] sw_3_1 2.43%cprog cprog [k] sw_3_1_3 cprog [k] sw_3_1 2.43%cprog cprog [k] sw_3_1 cprog [k] sw_3_1_1 2.43%cprog cprog [k] sw_3_1 cprog [k] sw_3_1_2 2.43%cprog cprog [k] sw_3_1 cprog [k] sw_3_1_3 2.43%cprog cprog [k] callme cprog [k] sw_3_1 2.43%cprog cprog [k] callme cprog [k] sw_4_2 2.43%cprog cprog [k] hw_1_1 cprog [k] symbol1 2.43%cprog cprog [k] callme cprog [k] hw_1_1 2.42%cprog cprog [k] sw_3_1 cprog [k] callme 1.99%cprog cprog [k] success_3_1_1 cprog [k] sw_3_1 1.99%cprog cprog [k] sw_3_1 cprog [k] success_3_1_1 1.99%cprog cprog [k] address2 cprog [k] back2 1.99%cprog cprog [k] hw_2_2 cprog [k] address2 1.99%cprog cprog [k] back2 cprog [k] callme 1.99%cprog cprog [k] callme cprog [k] main 1.99%cprog cprog [k] sw_3_1 cprog [k] success_3_1_3 1.99%cprog cprog [k] hw_1_1 cprog [k] callme 1.99%cprog cprog [k] sw_3_2 cprog [k] callme 1.99%cprog cprog [k] callme cprog [k] sw_3_2 1.99%cprog cprog [k] success_3_1_2 cprog [k] sw_3_1 1.99%cprog cprog [k] sw_3_1 cprog [k] success_3_1_2 1.99%cprog cprog [k] hw_1_2 cprog [k] callme 1.99%cprog cprog [k] sw_4_1 cprog [k] callme 0.02%cprog [unknown] [k] 0xf7ba2328