[PATCH] powerpc: Enable /dev/port when isa_io_special is set

2013-08-29 Thread Benjamin Herrenschmidt
isa_io_special is set when the platform provides a special
implementation of inX/outX via some FW interface for example.

Such a platform doesn't need an ISA bridge on PCI, and so /dev/port
should be made available even if one isn't present.

This makes the LPC bus IOs accessible via /dev/port on PowerNV Power8

Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
---
 arch/powerpc/include/asm/io.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 5a64757..edcc209 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -21,7 +21,7 @@ extern struct pci_dev *isa_bridge_pcidev;
 /*
  * has legacy ISA devices ?
  */
-#define arch_has_dev_port()(isa_bridge_pcidev != NULL)
+#define arch_has_dev_port()(isa_bridge_pcidev != NULL || isa_io_special)
 #endif
 
 #include linux/device.h


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc/scom: Change scom_read() and scom_write() to return errors

2013-08-29 Thread Benjamin Herrenschmidt
scom_read() now returns the read value via a pointer argument and
both functions return an int error code

Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
---
 arch/powerpc/include/asm/scom.h   | 23 +--
 arch/powerpc/platforms/wsp/scom_smp.c | 18 +-
 arch/powerpc/platforms/wsp/scom_wsp.c | 12 
 arch/powerpc/platforms/wsp/wsp.c  | 13 +++--
 arch/powerpc/sysdev/scom.c|  3 +--
 5 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/include/asm/scom.h b/arch/powerpc/include/asm/scom.h
index 0cabfd7..07dcdcf 100644
--- a/arch/powerpc/include/asm/scom.h
+++ b/arch/powerpc/include/asm/scom.h
@@ -54,8 +54,8 @@ struct scom_controller {
scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count);
void (*unmap)(scom_map_t map);
 
-   u64 (*read)(scom_map_t map, u32 reg);
-   void (*write)(scom_map_t map, u32 reg, u64 value);
+   int (*read)(scom_map_t map, u32 reg, u64 *value);
+   int (*write)(scom_map_t map, u32 reg, u64 value);
 };
 
 extern const struct scom_controller *scom_controller;
@@ -133,10 +133,18 @@ static inline void scom_unmap(scom_map_t map)
  * scom_read - Read a SCOM register
  * @map: Result of scom_map
  * @reg: Register index within that map
+ * @value: Updated with the value read
+ *
+ * Returns 0 (success) or a negative error code
  */
-static inline u64 scom_read(scom_map_t map, u32 reg)
+static inline int scom_read(scom_map_t map, u32 reg, u64 *value)
 {
-   return scom_controller-read(map, reg);
+   int rc;
+
+   rc = scom_controller-read(map, reg, value);
+   if (rc)
+   *value = 0xul;
+   return rc;
 }
 
 /**
@@ -144,12 +152,15 @@ static inline u64 scom_read(scom_map_t map, u32 reg)
  * @map: Result of scom_map
  * @reg: Register index within that map
  * @value: Value to write
+ *
+ * Returns 0 (success) or a negative error code
  */
-static inline void scom_write(scom_map_t map, u32 reg, u64 value)
+static inline int scom_write(scom_map_t map, u32 reg, u64 value)
 {
-   scom_controller-write(map, reg, value);
+   return scom_controller-write(map, reg, value);
 }
 
+
 #endif /* CONFIG_PPC_SCOM */
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/platforms/wsp/scom_smp.c 
b/arch/powerpc/platforms/wsp/scom_smp.c
index b56b70a..268bc89 100644
--- a/arch/powerpc/platforms/wsp/scom_smp.c
+++ b/arch/powerpc/platforms/wsp/scom_smp.c
@@ -116,7 +116,14 @@ static int a2_scom_ram(scom_map_t scom, int thread, u32 
insn, int extmask)
 
scom_write(scom, SCOM_RAMIC, cmd);
 
-   while (!((val = scom_read(scom, SCOM_RAMC))  mask)) {
+   for (;;) {
+   if (scom_read(scom, SCOM_RAMC, val) != 0) {
+   pr_err(SCOM error on instruction 0x%08x, thread %d\n,
+  insn, thread);
+   return -1;
+   }
+   if (val  mask)
+   break;
pr_devel(Waiting on RAMC = 0x%llx\n, val);
if (++n == 3) {
pr_err(RAMC timeout on instruction 0x%08x, thread 
%d\n,
@@ -151,9 +158,7 @@ static int a2_scom_getgpr(scom_map_t scom, int thread, int 
gpr, int alt,
if (rc)
return rc;
 
-   *out_gpr = scom_read(scom, SCOM_RAMD);
-
-   return 0;
+   return scom_read(scom, SCOM_RAMD, out_gpr);
 }
 
 static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr)
@@ -353,7 +358,10 @@ int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, 
struct device_node *np)
 
pr_devel(Bringing up CPU%d using SCOM...\n, lcpu);
 
-   pccr0 = scom_read(scom, SCOM_PCCR0);
+   if (scom_read(scom, SCOM_PCCR0, pccr0) != 0) {
+   printk(KERN_ERR XSCOM failure readng PCCR0 on CPU%d\n, lcpu);
+   return -1;
+   }
scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG |
 SCOM_PCCR0_ENABLE_RAM);
 
diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c 
b/arch/powerpc/platforms/wsp/scom_wsp.c
index 4052e22..54172c4 100644
--- a/arch/powerpc/platforms/wsp/scom_wsp.c
+++ b/arch/powerpc/platforms/wsp/scom_wsp.c
@@ -50,18 +50,22 @@ static void wsp_scom_unmap(scom_map_t map)
iounmap((void *)map);
 }
 
-static u64 wsp_scom_read(scom_map_t map, u32 reg)
+static int wsp_scom_read(scom_map_t map, u32 reg, u64 *value)
 {
u64 __iomem *addr = (u64 __iomem *)map;
 
-   return in_be64(addr + reg);
+   *value = in_be64(addr + reg);
+
+   return 0;
 }
 
-static void wsp_scom_write(scom_map_t map, u32 reg, u64 value)
+static int wsp_scom_write(scom_map_t map, u32 reg, u64 value)
 {
u64 __iomem *addr = (u64 __iomem *)map;
 
-   return out_be64(addr + reg, value);
+   out_be64(addr + reg, value);
+
+   return 0;
 }
 
 static const struct scom_controller wsp_scom_controller = {

[PATCH] powerpc/scom: Add support for reg property

2013-08-29 Thread Benjamin Herrenschmidt
When devices are direct children of a scom controller node, they
should be able to use the normal reg property instead of scom-reg.

In that case, they also use #address-cells rather than #scom-cells
to indicate the size of an entry.

Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
---
 arch/powerpc/sysdev/scom.c | 22 +-
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
index 10f1d9e..413622d 100644
--- a/arch/powerpc/sysdev/scom.c
+++ b/arch/powerpc/sysdev/scom.c
@@ -53,7 +53,7 @@ scom_map_t scom_map_device(struct device_node *dev, int index)
 {
struct device_node *parent;
unsigned int cells, size;
-   const u32 *prop;
+   const __be32 *prop, *sprop;
u64 reg, cnt;
scom_map_t ret;
 
@@ -62,12 +62,24 @@ scom_map_t scom_map_device(struct device_node *dev, int 
index)
if (parent == NULL)
return 0;
 
-   prop = of_get_property(parent, #scom-cells, NULL);
-   cells = prop ? *prop : 1;
-
+   /*
+* We support scom-reg properties for adding scom registers
+* to a random device-tree node with an explicit scom-parent
+*
+* We also support the simple reg property if the device is
+* a direct child of a scom controller.
+*
+* In case both exist, scom-reg takes precedence.
+*/
prop = of_get_property(dev, scom-reg, size);
+   sprop = of_get_property(parent, #scom-cells, NULL);
+   if (!prop  parent == dev-parent) {
+   prop = of_get_property(dev, reg, size);
+   sprop = of_get_property(parent, #address-cells, NULL);
+   }
if (!prop)
-   return 0;
+   return NULL;
+   cells = sprop ? be32_to_cpup(sprop) : 1;
size = 2;
 
if (index = (size / (2*cells)))



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc/powernv: Add scom support under OPALv3

2013-08-29 Thread Benjamin Herrenschmidt
OPAL v3 provides interfaces to access the chips XSCOM, expose
this via the existing scom infrastructure.

Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
---
 arch/powerpc/platforms/powernv/Kconfig  |   1 +
 arch/powerpc/platforms/powernv/Makefile |   1 +
 arch/powerpc/platforms/powernv/opal-xscom.c | 105 
 3 files changed, 107 insertions(+)
 create mode 100644 arch/powerpc/platforms/powernv/opal-xscom.c

diff --git a/arch/powerpc/platforms/powernv/Kconfig 
b/arch/powerpc/platforms/powernv/Kconfig
index 6fae5eb..7f39da0 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -9,6 +9,7 @@ config PPC_POWERNV
select EPAPR_BOOT
select PPC_INDIRECT_PIO
select PPC_UDBG_16550
+   select PPC_SCOM
default y
 
 config POWERNV_MSI
diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index 300c437..02dc1f5 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -4,3 +4,4 @@ obj-y   += opal-rtc.o opal-nvram.o opal-lpc.o
 obj-$(CONFIG_SMP)  += smp.o
 obj-$(CONFIG_PCI)  += pci.o pci-p5ioc2.o pci-ioda.o
 obj-$(CONFIG_EEH)  += eeh-ioda.o eeh-powernv.o
+obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c 
b/arch/powerpc/platforms/powernv/opal-xscom.c
new file mode 100644
index 000..3ed5c64
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -0,0 +1,105 @@
+/*
+ * PowerNV LPC bus handling.
+ *
+ * Copyright 2013 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include linux/kernel.h
+#include linux/of.h
+#include linux/bug.h
+#include linux/gfp.h
+#include linux/slab.h
+
+#include asm/machdep.h
+#include asm/firmware.h
+#include asm/opal.h
+#include asm/scom.h
+
+/*
+ * We could probably fit that inside the scom_map_t
+ * which is a void* after all but it's really too ugly
+ * so let's kmalloc it for now
+ */
+struct opal_scom_map {
+   uint32_t chip;
+   uint32_t addr;
+};
+
+static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
+{
+   struct opal_scom_map *m;
+   const __be32 *gcid;
+
+   if (!of_get_property(dev, scom-controller, NULL)) {
+   pr_err(%s: device %s is not a SCOM controller\n,
+   __func__, dev-full_name);
+   return SCOM_MAP_INVALID;
+   }
+   gcid = of_get_property(dev, ibm,chip-id, NULL);
+   if (!gcid) {
+   pr_err(%s: device %s has no ibm,chip-id\n,
+   __func__, dev-full_name);
+   return SCOM_MAP_INVALID;
+   }
+   m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL);
+   if (!m)
+   return NULL;
+   m-chip = be32_to_cpup(gcid);
+   m-addr = reg;
+
+   return (scom_map_t)m;
+}
+
+static void opal_scom_unmap(scom_map_t map)
+{
+   kfree(map);
+}
+
+static int opal_xscom_err_xlate(int64_t rc)
+{
+   switch(rc) {
+   case 0:
+   return 0;
+   /* Add more translations if necessary */
+   default:
+   return -EIO;
+   }
+}
+
+static int opal_scom_read(scom_map_t map, u32 reg, u64 *value)
+{
+   struct opal_scom_map *m = map;
+   int64_t rc;
+
+   rc = opal_xscom_read(m-chip, m-addr + reg, (uint64_t *)__pa(value));
+   return opal_xscom_err_xlate(rc);
+}
+
+static int opal_scom_write(scom_map_t map, u32 reg, u64 value)
+{
+   struct opal_scom_map *m = map;
+   int64_t rc;
+
+   rc = opal_xscom_write(m-chip, m-addr + reg, value);
+   return opal_xscom_err_xlate(rc);
+}
+
+static const struct scom_controller opal_scom_controller = {
+   .map= opal_scom_map,
+   .unmap  = opal_scom_unmap,
+   .read   = opal_scom_read,
+   .write  = opal_scom_write
+};
+
+static int opal_xscom_init(void)
+{
+   if (firmware_has_feature(FW_FEATURE_OPALv3))
+   scom_init(opal_scom_controller);
+   return 0;
+}
+arch_initcall(opal_xscom_init);



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc/scom: Create debugfs files using ibm,chip-id if available

2013-08-29 Thread Benjamin Herrenschmidt
When creating the debugfs scom files, use ibm,chip-id as the scom%d
index rather than a simple made up number when possible.

Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
---
 arch/powerpc/sysdev/scom.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
index 413622d..cb20d54 100644
--- a/arch/powerpc/sysdev/scom.c
+++ b/arch/powerpc/sysdev/scom.c
@@ -196,8 +196,13 @@ static int scom_debug_init(void)
return -1;
 
i = rc = 0;
-   for_each_node_with_property(dn, scom-controller)
-   rc |= scom_debug_init_one(root, dn, i++);
+   for_each_node_with_property(dn, scom-controller) {
+   int id = of_get_ibm_chip_id(dn);
+   if (id == -1)
+   id = i;
+   rc |= scom_debug_init_one(root, dn, id);
+   i++;
+   }
 
return rc;
 }



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc/scom: CONFIG_SCOM_DEBUGFS should depend on CONFIG_DEBUG_FS

2013-08-29 Thread Benjamin Herrenschmidt
Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
---
 arch/powerpc/sysdev/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index ab4cb54..13ec968 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -28,7 +28,7 @@ config PPC_SCOM
 
 config SCOM_DEBUGFS
bool Expose SCOM controllers via debugfs
-   depends on PPC_SCOM
+   depends on PPC_SCOM  DEBUG_FS
default n
 
 config GE_FPGA


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc/scom: Use devspec rather than path in debugfs entries

2013-08-29 Thread Benjamin Herrenschmidt

This is the traditional name for device-tree path, used in sysfs,
do the same for the XSCOM debugfs files.

Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
---
 arch/powerpc/sysdev/scom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
index cb20d54..3963d99 100644
--- a/arch/powerpc/sysdev/scom.c
+++ b/arch/powerpc/sysdev/scom.c
@@ -180,7 +180,7 @@ static int scom_debug_init_one(struct dentry *root, struct 
device_node *dn,
 
debugfs_create_file(addr, 0600, dir, ent, scom_addr_fops);
debugfs_create_file(value, 0600, dir, ent, scom_val_fops);
-   debugfs_create_blob(path, 0400, dir, ent-blob);
+   debugfs_create_blob(devspec, 0400, dir, ent-blob);
 
return 0;
 }




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC PATCH v2 3/4] powerpc: refactor of_get_cpu_node to support other architectures

2013-08-29 Thread Lorenzo Pieralisi
On Wed, Aug 28, 2013 at 08:46:38PM +0100, Grant Likely wrote:
 On Thu, 22 Aug 2013 14:59:30 +0100, Mark Rutland mark.rutl...@arm.com wrote:
  On Mon, Aug 19, 2013 at 02:56:10PM +0100, Sudeep KarkadaNagesha wrote:
   On 19/08/13 14:02, Rob Herring wrote:
On 08/19/2013 05:19 AM, Mark Rutland wrote:
On Sat, Aug 17, 2013 at 11:09:36PM +0100, Benjamin Herrenschmidt wrote:
On Sat, 2013-08-17 at 12:50 +0200, Tomasz Figa wrote:
I wonder how would this handle uniprocessor ARM (pre-v7) cores, for
which 
the updated bindings[1] define #address-cells = 0 and so no reg 
property.
   
[1] - http://thread.gmane.org/gmane.linux.ports.arm.kernel/260795
   
Why did you do that in the binding ? That sounds like looking to 
create
problems ... 
   
Traditionally, UP setups just used 0 as the reg property on other
architectures, why do differently ?
   
The decision was taken because we defined our reg property to refer to
the MPIDR register's Aff{2,1,0} bitfields, and on UP cores before v7
there's no MPIDR register at all. Given there can only be a single CPU
in that case, describing a register that wasn't present didn't seem
necessary or helpful.

What exactly reg represents is up to the binding definition, but it
still should be present IMO. I don't see any issue with it being
different for pre-v7.

   Yes it's better to have 'reg' with value 0 than not having it.
   Otherwise this generic of_get_cpu_node implementation would need some
   _hack_ to handle that case.
  
  I'm not sure that having some code to handle a difference in standard
  between two architectures is a hack. If anything, I'd argue encoding a
  reg of 0 that corresponds to a nonexistent MPIDR value (given that's
  what the reg property is defined to map to on ARM) is more of a hack ;)
  
  I'm not averse to having a reg value of 0 for this case, but given that
  there are existing devicetrees without it, requiring a reg property will
  break compatibility with them.
 
 Then special cases those device trees, but you changing existing
 convention really needs to be avoided. The referenced documentation
 change is brand new, so we're not stuck with it.

I have no problem with changing the bindings and forcing:

#address-cells = 1;
reg = 0;

for UP predating v7, my big worry is related to in-kernel dts that we
already patched to follow the #address-cells = 0 rule (and we had to
do it since we got asked that question multiple times on the public
lists).

What do you mean by special case those device trees ? I have not
planned to patch them again, unless we really consider that a necessary
evil.

Thanks,
Lorenzo

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v12] ASoC: fsl: Add S/PDIF machine driver

2013-08-29 Thread Mark Brown
On Wed, Aug 28, 2013 at 12:04:46PM +0800, Nicolin Chen wrote:
 This patch implements a device-tree-only machine driver for Freescale
 i.MX series Soc. It works with spdif_transmitter/spdif_receiver and
 fsl_spdif.c drivers.

Applied, thanks.


signature.asc
Description: Digital signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH -next] ASoC: fsl_spdif: remove redundant dev_err call in fsl_spdif_probe()

2013-08-29 Thread Mark Brown
On Thu, Aug 29, 2013 at 08:00:05AM +0800, Wei Yongjun wrote:
 From: Wei Yongjun yongjun_...@trendmicro.com.cn
 
 There is a error message within devm_ioremap_resource
 already, so remove the dev_err call to avoid redundant
 error message.

Applied, thanks.


signature.asc
Description: Digital signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2] powerpc: Convert out of line __arch_hweight to inline

2013-08-29 Thread Madhavan Srinivasan
Hi Ben

On Wednesday 07 August 2013 04:48 PM, Madhavan Srinivasan wrote:
 Patch attempts to improve the performace of __arch_hweight functions by
 making them inline instead of current out of line implementation.
 
 Testcase is to disable/enable SMT on a large (192 thread) POWER7 lpar.
 Program used for SMT disable/enable is ppc64_cpu with --smt=[off/on]
 option. Here are the perf output. In this case, __arch_hweight64 is
 called by __bitmap_weight.
 
 Without patch (ppc64_cpu --smt=off):
 
  17.60%  ppc64_cpu  [kernel.kallsyms]   [k] .deactivate_slab
 
   4.85%  ppc64_cpu  [kernel.kallsyms]   [k] .__bitmap_weight
 
   1.36%  ppc64_cpu  [kernel.kallsyms]   [k] .__disable_runtime
   1.29%  ppc64_cpu  [kernel.kallsyms]   [k] .__arch_hweight64
 
 
 With patch (ppc64_cpu --smt=off):
 
  17.29%  ppc64_cpu  [kernel.kallsyms]   [k] .deactivate_slab
 
   3.71%  ppc64_cpu  [kernel.kallsyms]   [k] .__bitmap_weight
   3.26%  ppc64_cpu  [kernel.kallsyms]   [k] 
 .build_overlap_sched_groups
 
 
 Without patch (ppc64_cpu --smt=on):
 
   8.35%  ppc64_cpu  [kernel.kallsyms]   [k] .strlen
   7.00%  ppc64_cpu  [kernel.kallsyms]   [k] .memset
   6.78%  ppc64_cpu  [kernel.kallsyms]   [k] .__bitmap_weight
   4.23%  ppc64_cpu  [kernel.kallsyms]   [k] .deactivate_slab
 
   1.58%  ppc64_cpu  [kernel.kallsyms]   [k] 
 .refresh_zone_stat_thresholds
   1.57%  ppc64_cpu  [kernel.kallsyms]   [k] .__arch_hweight64
   1.54%  ppc64_cpu  [kernel.kallsyms]   [k] .__enable_runtime
 
 
 With patch (ppc64_cpu --smt=on):
 
   9.44%  ppc64_cpu  [kernel.kallsyms]   [k] .strlen
   6.43%  ppc64_cpu  [kernel.kallsyms]   [k] .memset
   5.48%  ppc64_cpu  [kernel.kallsyms]   [k] .__bitmap_weight
   4.59%  ppc64_cpu  [kernel.kallsyms]   [k] .insert_entry
   4.29%  ppc64_cpu  [kernel.kallsyms]   [k] .deactivate_slab
 
 
 Patch changes v2:
 
 1. Removed the arch/powerpc/lib/hweight_64.S file.
 
 Signed-off-by: Madhavan Srinivasan ma...@linux.vnet.ibm.com


Any question or suggestion for this patch.


 ---
  arch/powerpc/include/asm/bitops.h |  130 
 -
  arch/powerpc/include/asm/ppc-opcode.h |6 ++
  arch/powerpc/lib/Makefile |2 +-
  arch/powerpc/lib/hweight_64.S |  110 
  4 files changed, 133 insertions(+), 115 deletions(-)
  delete mode 100644 arch/powerpc/lib/hweight_64.S
 
 diff --git a/arch/powerpc/include/asm/bitops.h 
 b/arch/powerpc/include/asm/bitops.h
 index 910194e..136fe6a 100644
 --- a/arch/powerpc/include/asm/bitops.h
 +++ b/arch/powerpc/include/asm/bitops.h
 @@ -43,8 +43,10 @@
  #endif
 
  #include linux/compiler.h
 +#include linux/types.h
  #include asm/asm-compat.h
  #include asm/synch.h
 +#include asm/cputable.h
 
  /*
   * clear_bit doesn't imply a memory barrier
 @@ -263,10 +265,130 @@ static __inline__ int fls64(__u64 x)
  #endif /* __powerpc64__ */
 
  #ifdef CONFIG_PPC64
 -unsigned int __arch_hweight8(unsigned int w);
 -unsigned int __arch_hweight16(unsigned int w);
 -unsigned int __arch_hweight32(unsigned int w);
 -unsigned long __arch_hweight64(__u64 w);
 +
 +static inline unsigned int __arch_hweight8(unsigned int w)
 +{
 + unsigned int register iop asm(r3) = w;
 + unsigned int register tmp asm(r4);
 + __asm__ __volatile__ (
 + stringify_in_c(BEGIN_FTR_SECTION)
 + bl .__sw_hweight8;
 + nop;
 + stringify_in_c(FTR_SECTION_ELSE)
 + PPC_POPCNTB_M(%1,%2) ;
 + clrldi %0,%1,64-8;
 + stringify_in_c(ALT_FTR_SECTION_END_IFCLR((%3)))
 + : =r (iop), =r (tmp)
 + : r (iop), i (CPU_FTR_POPCNTB)
 + : r0, r1, r5, r6, r7, r8, r9,
 + r10, r11, r12, r13, r31, lr, cr0, xer);
 +
 + return iop;
 +}
 +
 +static inline unsigned int __arch_hweight16(unsigned int w)
 +{
 + unsigned int register iop asm(r3) = w;
 + unsigned int register tmp asm(r4);
 + __asm__ __volatile__ (
 + stringify_in_c(BEGIN_FTR_SECTION)
 + bl .__sw_hweight16;
 + nop;
 + nop;
 + nop;
 + nop;
 + stringify_in_c(FTR_SECTION_ELSE)
 + stringify_in_c(BEGIN_FTR_SECTION_NESTED(50))
 + PPC_POPCNTB_M(%0,%2) ;
 + srdi %1,%0,8;
 + add %0,%1,%0;
 + clrldi %0,%0,64-8;
 + stringify_in_c(FTR_SECTION_ELSE_NESTED(50))
 + clrlwi %0,%2,16;
 + PPC_POPCNTW_M(%1,%0) ;
 + clrldi %0,%1,64-8;
 + stringify_in_c(ALT_FTR_SECTION_END_NESTED_IFCLR(%4,50))
 + stringify_in_c(ALT_FTR_SECTION_END_IFCLR((%3)))
 + : =r (iop), =r (tmp)
 + : r (iop), i (CPU_FTR_POPCNTB), i (CPU_FTR_POPCNTD)
 + : r0, r1, r5, r6, r7, r8, r9,
 + r10, r11, r12, r13, r31, lr, cr0, xer);
 +
 + return iop;
 +}
 +
 +static inline unsigned int 

Re: [PATCH 03/10] crypto: nx - fix limits to sg lists for AES-CBC

2013-08-29 Thread Marcelo Cerri
On Thu, Aug 29, 2013 at 02:42:22PM +1000, Herbert Xu wrote:
 On Fri, Aug 23, 2013 at 05:01:07PM -0300, Marcelo Cerri wrote:
  This patch updates the nx-aes-cbc implementation to perform several
  hyper calls if needed in order to always respect the length limits for
  scatter/gather lists.
  
  Two different limits are considered:
  
   - ibm,max-sg-len: maximum number of bytes of each scatter/gather
 list.
  
   - ibm,max-sync-cop:
  - The total number of bytes that a scatter/gather list can hold.
  - The maximum number of elements that a scatter/gather list can have.
  
  Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com
  Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com
 
 This patch does not apply against the current cryptodev tree.
 
 Please regenerate your pathces.

Sorry for this. I'm sending a v2 series without conflicts.

 
 Thanks,
 -- 
 Email: Herbert Xu herb...@gondor.apana.org.au
 Home Page: http://gondor.apana.org.au/~herbert/
 PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
 --
 To unsubscribe from this list: send the line unsubscribe linux-crypto in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 00/10] Series of fixes for NX driver

2013-08-29 Thread Marcelo Cerri
This series of patches contains fixes in several algorithms implemented
by the NX driver. The patches can be separated in three different
categories:
  
 - Changes to split the data in several hyper calls to respect the
   limits of data that the co-processador can handle. This affects
   all AES modes.
 - Fixes in how the driver handle zero length messages. This affects
   XCBC and GCM.
 - Fixes for SHA-2 when chunks bigger than the block size are provided.

v2:
 - Fixed conflict.

Fionnuala Gunter (2):
  crypto: nx - fix limits to sg lists for AES-XCBC
  crypto: nx - fix limits to sg lists for AES-CCM

Marcelo Cerri (8):
  crypto: nx - add offset to nx_build_sg_lists()
  crypto: nx - fix limits to sg lists for AES-ECB
  crypto: nx - fix limits to sg lists for AES-CBC
  crypto: nx - fix limits to sg lists for AES-CTR
  crypto: nx - fix limits to sg lists for AES-GCM
  crypto: nx - fix XCBC for zero length messages
  crypto: nx - fix GCM for zero length messages
  crypto: nx - fix SHA-2 for chunks bigger than block size

 drivers/crypto/nx/nx-aes-cbc.c  |  50 ---
 drivers/crypto/nx/nx-aes-ccm.c  | 297 +---
 drivers/crypto/nx/nx-aes-ctr.c  |  50 ---
 drivers/crypto/nx/nx-aes-ecb.c  |  48 ---
 drivers/crypto/nx/nx-aes-gcm.c  | 292 ++-
 drivers/crypto/nx/nx-aes-xcbc.c | 191 +++---
 drivers/crypto/nx/nx-sha256.c   |   2 +-
 drivers/crypto/nx/nx-sha512.c   |   2 +-
 drivers/crypto/nx/nx.c  |   9 +-
 drivers/crypto/nx/nx.h  |   2 +-
 10 files changed, 683 insertions(+), 260 deletions(-)

-- 
1.7.12

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 01/10] crypto: nx - add offset to nx_build_sg_lists()

2013-08-29 Thread Marcelo Cerri
This patch includes one more parameter to nx_build_sg_lists() to skip
the given number of bytes from beginning of each sg list.

This is needed in order to implement the fixes for the AES modes to make
them able to process larger chunks of data.

Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com
Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com
---
 drivers/crypto/nx/nx-aes-cbc.c | 2 +-
 drivers/crypto/nx/nx-aes-ccm.c | 4 ++--
 drivers/crypto/nx/nx-aes-ctr.c | 2 +-
 drivers/crypto/nx/nx-aes-ecb.c | 2 +-
 drivers/crypto/nx/nx-aes-gcm.c | 2 +-
 drivers/crypto/nx/nx.c | 9 +++--
 drivers/crypto/nx/nx.h | 2 +-
 7 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/crypto/nx/nx-aes-cbc.c b/drivers/crypto/nx/nx-aes-cbc.c
index 7c0237d..a9e76c6 100644
--- a/drivers/crypto/nx/nx-aes-cbc.c
+++ b/drivers/crypto/nx/nx-aes-cbc.c
@@ -85,7 +85,7 @@ static int cbc_aes_nx_crypt(struct blkcipher_desc *desc,
else
NX_CPB_FDM(csbcpb) = ~NX_FDM_ENDE_ENCRYPT;
 
-   rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes,
+   rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, 0,
   csbcpb-cpb.aes_cbc.iv);
if (rc)
goto out;
diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c
index 39d4224..666a35b 100644
--- a/drivers/crypto/nx/nx-aes-ccm.c
+++ b/drivers/crypto/nx/nx-aes-ccm.c
@@ -293,7 +293,7 @@ static int ccm_nx_decrypt(struct aead_request   *req,
if (rc)
goto out;
 
-   rc = nx_build_sg_lists(nx_ctx, desc, req-dst, req-src, nbytes,
+   rc = nx_build_sg_lists(nx_ctx, desc, req-dst, req-src, nbytes, 0,
   csbcpb-cpb.aes_ccm.iv_or_ctr);
if (rc)
goto out;
@@ -339,7 +339,7 @@ static int ccm_nx_encrypt(struct aead_request   *req,
if (rc)
goto out;
 
-   rc = nx_build_sg_lists(nx_ctx, desc, req-dst, req-src, nbytes,
+   rc = nx_build_sg_lists(nx_ctx, desc, req-dst, req-src, nbytes, 0,
   csbcpb-cpb.aes_ccm.iv_or_ctr);
if (rc)
goto out;
diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c
index 762611b..80dee8d 100644
--- a/drivers/crypto/nx/nx-aes-ctr.c
+++ b/drivers/crypto/nx/nx-aes-ctr.c
@@ -98,7 +98,7 @@ static int ctr_aes_nx_crypt(struct blkcipher_desc *desc,
goto out;
}
 
-   rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes,
+   rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, 0,
   csbcpb-cpb.aes_ctr.iv);
if (rc)
goto out;
diff --git a/drivers/crypto/nx/nx-aes-ecb.c b/drivers/crypto/nx/nx-aes-ecb.c
index 77dbe08..fe0d803 100644
--- a/drivers/crypto/nx/nx-aes-ecb.c
+++ b/drivers/crypto/nx/nx-aes-ecb.c
@@ -85,7 +85,7 @@ static int ecb_aes_nx_crypt(struct blkcipher_desc *desc,
else
NX_CPB_FDM(csbcpb) = ~NX_FDM_ENDE_ENCRYPT;
 
-   rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, NULL);
+   rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, 0, NULL);
if (rc)
goto out;
 
diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c
index 74feee1..c2d6f76 100644
--- a/drivers/crypto/nx/nx-aes-gcm.c
+++ b/drivers/crypto/nx/nx-aes-gcm.c
@@ -226,7 +226,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int 
enc)
 
csbcpb-cpb.aes_gcm.bit_length_data = nbytes * 8;
 
-   rc = nx_build_sg_lists(nx_ctx, desc, req-dst, req-src, nbytes,
+   rc = nx_build_sg_lists(nx_ctx, desc, req-dst, req-src, nbytes, 0,
   csbcpb-cpb.aes_gcm.iv_or_cnt);
if (rc)
goto out;
diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index bdf4990..5533fe3 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c
@@ -211,6 +211,8 @@ struct nx_sg *nx_walk_and_build(struct nx_sg   *nx_dst,
  * @dst: destination scatterlist
  * @src: source scatterlist
  * @nbytes: length of data described in the scatterlists
+ * @offset: number of bytes to fast-forward past at the beginning of
+ *  scatterlists.
  * @iv: destination for the iv data, if the algorithm requires it
  *
  * This is common code shared by all the AES algorithms. It uses the block
@@ -222,6 +224,7 @@ int nx_build_sg_lists(struct nx_crypto_ctx  *nx_ctx,
  struct scatterlist*dst,
  struct scatterlist*src,
  unsigned int   nbytes,
+ unsigned int   offset,
  u8*iv)
 {
struct nx_sg *nx_insg = nx_ctx-in_sg;
@@ -230,8 +233,10 @@ int nx_build_sg_lists(struct nx_crypto_ctx  *nx_ctx,
if (iv)
memcpy(iv, desc-info, AES_BLOCK_SIZE);
 
-   nx_insg = nx_walk_and_build(nx_insg, nx_ctx-ap-sglen, src, 0, nbytes);
-   nx_outsg = 

[PATCH v2 02/10] crypto: nx - fix limits to sg lists for AES-ECB

2013-08-29 Thread Marcelo Cerri
This patch updates the nx-aes-ecb implementation to perform several
hyper calls if needed in order to always respect the length limits for
scatter/gather lists.

Two different limits are considered:

 - ibm,max-sg-len: maximum number of bytes of each scatter/gather
   list.

 - ibm,max-sync-cop:
- The total number of bytes that a scatter/gather list can hold.
- The maximum number of elements that a scatter/gather list can have.

Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com
Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com
---
 drivers/crypto/nx/nx-aes-ecb.c | 48 ++
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/drivers/crypto/nx/nx-aes-ecb.c b/drivers/crypto/nx/nx-aes-ecb.c
index fe0d803..85a8d23 100644
--- a/drivers/crypto/nx/nx-aes-ecb.c
+++ b/drivers/crypto/nx/nx-aes-ecb.c
@@ -71,37 +71,49 @@ static int ecb_aes_nx_crypt(struct blkcipher_desc *desc,
struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc-tfm);
struct nx_csbcpb *csbcpb = nx_ctx-csbcpb;
unsigned long irq_flags;
+   unsigned int processed = 0, to_process;
+   u32 max_sg_len;
int rc;
 
spin_lock_irqsave(nx_ctx-lock, irq_flags);
 
-   if (nbytes  nx_ctx-ap-databytelen) {
-   rc = -EINVAL;
-   goto out;
-   }
+   max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg),
+  nx_ctx-ap-sglen);
 
if (enc)
NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT;
else
NX_CPB_FDM(csbcpb) = ~NX_FDM_ENDE_ENCRYPT;
 
-   rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, 0, NULL);
-   if (rc)
-   goto out;
+   do {
+   to_process = min_t(u64, nbytes - processed,
+  nx_ctx-ap-databytelen);
+   to_process = min_t(u64, to_process,
+  NX_PAGE_SIZE * (max_sg_len - 1));
+   to_process = to_process  ~(AES_BLOCK_SIZE - 1);
 
-   if (!nx_ctx-op.inlen || !nx_ctx-op.outlen) {
-   rc = -EINVAL;
-   goto out;
-   }
+   rc = nx_build_sg_lists(nx_ctx, desc, dst, src, to_process,
+   processed, NULL);
+   if (rc)
+   goto out;
 
-   rc = nx_hcall_sync(nx_ctx, nx_ctx-op,
-  desc-flags  CRYPTO_TFM_REQ_MAY_SLEEP);
-   if (rc)
-   goto out;
+   if (!nx_ctx-op.inlen || !nx_ctx-op.outlen) {
+   rc = -EINVAL;
+   goto out;
+   }
+
+   rc = nx_hcall_sync(nx_ctx, nx_ctx-op,
+  desc-flags  CRYPTO_TFM_REQ_MAY_SLEEP);
+   if (rc)
+   goto out;
+
+   atomic_inc((nx_ctx-stats-aes_ops));
+   atomic64_add(csbcpb-csb.processed_byte_count,
+(nx_ctx-stats-aes_bytes));
+
+   processed += to_process;
+   } while (processed  nbytes);
 
-   atomic_inc((nx_ctx-stats-aes_ops));
-   atomic64_add(csbcpb-csb.processed_byte_count,
-(nx_ctx-stats-aes_bytes));
 out:
spin_unlock_irqrestore(nx_ctx-lock, irq_flags);
return rc;
-- 
1.7.12

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 04/10] crypto: nx - fix limits to sg lists for AES-CTR

2013-08-29 Thread Marcelo Cerri
This patch updates the nx-aes-ctr implementation to perform several
hyper calls if needed in order to always respect the length limits for
scatter/gather lists.

Two different limits are considered:

 - ibm,max-sg-len: maximum number of bytes of each scatter/gather
   list.

 - ibm,max-sync-cop:
- The total number of bytes that a scatter/gather list can hold.
- The maximum number of elements that a scatter/gather list can have.

Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com
Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com
---
 drivers/crypto/nx/nx-aes-ctr.c | 50 ++
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c
index 80dee8d..a37d009 100644
--- a/drivers/crypto/nx/nx-aes-ctr.c
+++ b/drivers/crypto/nx/nx-aes-ctr.c
@@ -89,33 +89,45 @@ static int ctr_aes_nx_crypt(struct blkcipher_desc *desc,
struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc-tfm);
struct nx_csbcpb *csbcpb = nx_ctx-csbcpb;
unsigned long irq_flags;
+   unsigned int processed = 0, to_process;
+   u32 max_sg_len;
int rc;
 
spin_lock_irqsave(nx_ctx-lock, irq_flags);
 
-   if (nbytes  nx_ctx-ap-databytelen) {
-   rc = -EINVAL;
-   goto out;
-   }
+   max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg),
+  nx_ctx-ap-sglen);
 
-   rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, 0,
-  csbcpb-cpb.aes_ctr.iv);
-   if (rc)
-   goto out;
+   do {
+   to_process = min_t(u64, nbytes - processed,
+  nx_ctx-ap-databytelen);
+   to_process = min_t(u64, to_process,
+  NX_PAGE_SIZE * (max_sg_len - 1));
+   to_process = to_process  ~(AES_BLOCK_SIZE - 1);
 
-   if (!nx_ctx-op.inlen || !nx_ctx-op.outlen) {
-   rc = -EINVAL;
-   goto out;
-   }
+   rc = nx_build_sg_lists(nx_ctx, desc, dst, src, to_process,
+  processed, csbcpb-cpb.aes_ctr.iv);
+   if (rc)
+   goto out;
 
-   rc = nx_hcall_sync(nx_ctx, nx_ctx-op,
-  desc-flags  CRYPTO_TFM_REQ_MAY_SLEEP);
-   if (rc)
-   goto out;
+   if (!nx_ctx-op.inlen || !nx_ctx-op.outlen) {
+   rc = -EINVAL;
+   goto out;
+   }
 
-   atomic_inc((nx_ctx-stats-aes_ops));
-   atomic64_add(csbcpb-csb.processed_byte_count,
-(nx_ctx-stats-aes_bytes));
+   rc = nx_hcall_sync(nx_ctx, nx_ctx-op,
+  desc-flags  CRYPTO_TFM_REQ_MAY_SLEEP);
+   if (rc)
+   goto out;
+
+   memcpy(desc-info, csbcpb-cpb.aes_cbc.cv, AES_BLOCK_SIZE);
+
+   atomic_inc((nx_ctx-stats-aes_ops));
+   atomic64_add(csbcpb-csb.processed_byte_count,
+(nx_ctx-stats-aes_bytes));
+
+   processed += to_process;
+   } while (processed  nbytes);
 out:
spin_unlock_irqrestore(nx_ctx-lock, irq_flags);
return rc;
-- 
1.7.12

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 05/10] crypto: nx - fix limits to sg lists for AES-GCM

2013-08-29 Thread Marcelo Cerri
This patch updates the nx-aes-gcm implementation to perform several
hyper calls if needed in order to always respect the length limits for
scatter/gather lists.

Two different limits are considered:

 - ibm,max-sg-len: maximum number of bytes of each scatter/gather
   list.

 - ibm,max-sync-cop:
- The total number of bytes that a scatter/gather list can hold.
- The maximum number of elements that a scatter/gather list can have.

Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com
Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com
---
 drivers/crypto/nx/nx-aes-gcm.c | 202 +++--
 1 file changed, 136 insertions(+), 66 deletions(-)

diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c
index c2d6f76..9e89bdf 100644
--- a/drivers/crypto/nx/nx-aes-gcm.c
+++ b/drivers/crypto/nx/nx-aes-gcm.c
@@ -125,37 +125,101 @@ static int nx_gca(struct nx_crypto_ctx  *nx_ctx,
  struct aead_request   *req,
  u8*out)
 {
+   int rc;
struct nx_csbcpb *csbcpb_aead = nx_ctx-csbcpb_aead;
-   int rc = -EINVAL;
struct scatter_walk walk;
struct nx_sg *nx_sg = nx_ctx-in_sg;
+   unsigned int nbytes = req-assoclen;
+   unsigned int processed = 0, to_process;
+   u32 max_sg_len;
 
-   if (req-assoclen  nx_ctx-ap-databytelen)
-   goto out;
-
-   if (req-assoclen = AES_BLOCK_SIZE) {
+   if (nbytes = AES_BLOCK_SIZE) {
scatterwalk_start(walk, req-assoc);
-   scatterwalk_copychunks(out, walk, req-assoclen,
-  SCATTERWALK_FROM_SG);
+   scatterwalk_copychunks(out, walk, nbytes, SCATTERWALK_FROM_SG);
scatterwalk_done(walk, SCATTERWALK_FROM_SG, 0);
-
-   rc = 0;
-   goto out;
+   return 0;
}
 
-   nx_sg = nx_walk_and_build(nx_sg, nx_ctx-ap-sglen, req-assoc, 0,
- req-assoclen);
-   nx_ctx-op_aead.inlen = (nx_ctx-in_sg - nx_sg) * sizeof(struct nx_sg);
+   NX_CPB_FDM(csbcpb_aead) = ~NX_FDM_CONTINUATION;
 
-   rc = nx_hcall_sync(nx_ctx, nx_ctx-op_aead,
-  req-base.flags  CRYPTO_TFM_REQ_MAY_SLEEP);
-   if (rc)
-   goto out;
+   /* page_limit: number of sg entries that fit on one page */
+   max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg),
+  nx_ctx-ap-sglen);
 
-   atomic_inc((nx_ctx-stats-aes_ops));
-   atomic64_add(req-assoclen, (nx_ctx-stats-aes_bytes));
+   do {
+   /*
+* to_process: the data chunk to process in this update.
+* This value is bound by sg list limits.
+*/
+   to_process = min_t(u64, nbytes - processed,
+  nx_ctx-ap-databytelen);
+   to_process = min_t(u64, to_process,
+  NX_PAGE_SIZE * (max_sg_len - 1));
+
+   if ((to_process + processed)  nbytes)
+   NX_CPB_FDM(csbcpb_aead) |= NX_FDM_INTERMEDIATE;
+   else
+   NX_CPB_FDM(csbcpb_aead) = ~NX_FDM_INTERMEDIATE;
+
+   nx_sg = nx_walk_and_build(nx_ctx-in_sg, nx_ctx-ap-sglen,
+ req-assoc, processed, to_process);
+   nx_ctx-op_aead.inlen = (nx_ctx-in_sg - nx_sg)
+   * sizeof(struct nx_sg);
+
+   rc = nx_hcall_sync(nx_ctx, nx_ctx-op_aead,
+   req-base.flags  CRYPTO_TFM_REQ_MAY_SLEEP);
+   if (rc)
+   return rc;
+
+   memcpy(csbcpb_aead-cpb.aes_gca.in_pat,
+   csbcpb_aead-cpb.aes_gca.out_pat,
+   AES_BLOCK_SIZE);
+   NX_CPB_FDM(csbcpb_aead) |= NX_FDM_CONTINUATION;
+
+   atomic_inc((nx_ctx-stats-aes_ops));
+   atomic64_add(req-assoclen, (nx_ctx-stats-aes_bytes));
+
+   processed += to_process;
+   } while (processed  nbytes);
 
memcpy(out, csbcpb_aead-cpb.aes_gca.out_pat, AES_BLOCK_SIZE);
+
+   return rc;
+}
+
+static int gcm_empty(struct aead_request *req, struct blkcipher_desc *desc,
+int enc)
+{
+   int rc;
+   struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req-base.tfm);
+   struct nx_csbcpb *csbcpb = nx_ctx-csbcpb;
+
+   /* For scenarios where the input message is zero length, AES CTR mode
+* may be used. Set the source data to be a single block (16B) of all
+* zeros, and set the input IV value to be the same as the GMAC IV
+* value. - nx_wb 4.8.1.3 */
+   char src[AES_BLOCK_SIZE] = {};
+   struct scatterlist sg;
+
+   desc-tfm = crypto_alloc_blkcipher(ctr(aes), 0, 0);
+   if (IS_ERR(desc-tfm)) {
+   rc = -ENOMEM;
+ 

[PATCH v2 06/10] crypto: nx - fix limits to sg lists for AES-XCBC

2013-08-29 Thread Marcelo Cerri
From: Fionnuala Gunter f...@linux.vnet.ibm.com

This patch updates the NX driver to perform several hyper calls when necessary
so that the length limits of scatter/gather lists are respected.

Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com
Reviewed-by: Marcelo Cerri mhce...@linux.vnet.ibm.com
Signed-off-by: Fionnuala Gunter f...@linux.vnet.ibm.com
---
 drivers/crypto/nx/nx-aes-xcbc.c | 107 +++-
 1 file changed, 63 insertions(+), 44 deletions(-)

diff --git a/drivers/crypto/nx/nx-aes-xcbc.c b/drivers/crypto/nx/nx-aes-xcbc.c
index 658da0f..1a5d9e3 100644
--- a/drivers/crypto/nx/nx-aes-xcbc.c
+++ b/drivers/crypto/nx/nx-aes-xcbc.c
@@ -88,78 +88,97 @@ static int nx_xcbc_update(struct shash_desc *desc,
struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(desc-tfm-base);
struct nx_csbcpb *csbcpb = nx_ctx-csbcpb;
struct nx_sg *in_sg;
-   u32 to_process, leftover;
+   u32 to_process, leftover, total;
+   u32 max_sg_len;
unsigned long irq_flags;
int rc = 0;
 
spin_lock_irqsave(nx_ctx-lock, irq_flags);
 
-   if (NX_CPB_FDM(csbcpb)  NX_FDM_CONTINUATION) {
-   /* we've hit the nx chip previously and we're updating again,
-* so copy over the partial digest */
-   memcpy(csbcpb-cpb.aes_xcbc.cv,
-  csbcpb-cpb.aes_xcbc.out_cv_mac, AES_BLOCK_SIZE);
-   }
+
+   total = sctx-count + len;
 
/* 2 cases for total data len:
 *  1: = AES_BLOCK_SIZE: copy into state, return 0
 *  2:  AES_BLOCK_SIZE: process X blocks, copy in leftover
 */
-   if (len + sctx-count = AES_BLOCK_SIZE) {
+   if (total = AES_BLOCK_SIZE) {
memcpy(sctx-buffer + sctx-count, data, len);
sctx-count += len;
goto out;
}
 
-   /* to_process: the AES_BLOCK_SIZE data chunk to process in this
-* update */
-   to_process = (sctx-count + len)  ~(AES_BLOCK_SIZE - 1);
-   leftover = (sctx-count + len)  (AES_BLOCK_SIZE - 1);
+   in_sg = nx_ctx-in_sg;
+   max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg),
+   nx_ctx-ap-sglen);
 
-   /* the hardware will not accept a 0 byte operation for this algorithm
-* and the operation MUST be finalized to be correct. So if we happen
-* to get an update that falls on a block sized boundary, we must
-* save off the last block to finalize with later. */
-   if (!leftover) {
-   to_process -= AES_BLOCK_SIZE;
-   leftover = AES_BLOCK_SIZE;
-   }
+   do {
 
-   if (sctx-count) {
-   in_sg = nx_build_sg_list(nx_ctx-in_sg, sctx-buffer,
-sctx-count, nx_ctx-ap-sglen);
-   in_sg = nx_build_sg_list(in_sg, (u8 *)data,
-to_process - sctx-count,
-nx_ctx-ap-sglen);
-   nx_ctx-op.inlen = (nx_ctx-in_sg - in_sg) *
-   sizeof(struct nx_sg);
-   } else {
-   in_sg = nx_build_sg_list(nx_ctx-in_sg, (u8 *)data, to_process,
-nx_ctx-ap-sglen);
+   /* to_process: the AES_BLOCK_SIZE data chunk to process in this
+* update */
+   to_process = min_t(u64, total, nx_ctx-ap-databytelen);
+   to_process = min_t(u64, to_process,
+   NX_PAGE_SIZE * (max_sg_len - 1));
+   to_process = to_process  ~(AES_BLOCK_SIZE - 1);
+   leftover = total - to_process;
+
+   /* the hardware will not accept a 0 byte operation for this
+* algorithm and the operation MUST be finalized to be correct.
+* So if we happen to get an update that falls on a block sized
+* boundary, we must save off the last block to finalize with
+* later. */
+   if (!leftover) {
+   to_process -= AES_BLOCK_SIZE;
+   leftover = AES_BLOCK_SIZE;
+   }
+
+   if (sctx-count) {
+   in_sg = nx_build_sg_list(nx_ctx-in_sg,
+   (u8 *) sctx-buffer,
+   sctx-count,
+   max_sg_len);
+   }
+   in_sg = nx_build_sg_list(in_sg,
+   (u8 *) data,
+   to_process - sctx-count,
+   max_sg_len);
nx_ctx-op.inlen = (nx_ctx-in_sg - in_sg) *
sizeof(struct nx_sg);
-   }
 
-   NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE;
+   /* we've hit the nx chip previously and we're updating again,
+   

[PATCH v2 07/10] crypto: nx - fix limits to sg lists for AES-CCM

2013-08-29 Thread Marcelo Cerri
From: Fionnuala Gunter f...@linux.vnet.ibm.com

This patch updates the NX driver to perform several hyper calls when necessary
so that the length limits of scatter/gather lists are respected.

Reviewed-by: Marcelo Cerri mhce...@linux.vnet.ibm.com
Signed-off-by: Joy Latten jmlat...@linux.vnet.ibm.com
Signed-off-by: Fionnuala Gunter f...@linux.vnet.ibm.com
---
 drivers/crypto/nx/nx-aes-ccm.c | 297 +
 1 file changed, 215 insertions(+), 82 deletions(-)

diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c
index 666a35b..5ecd4c2 100644
--- a/drivers/crypto/nx/nx-aes-ccm.c
+++ b/drivers/crypto/nx/nx-aes-ccm.c
@@ -179,13 +179,26 @@ static int generate_pat(u8   *iv,
struct nx_sg *nx_insg = nx_ctx-in_sg;
struct nx_sg *nx_outsg = nx_ctx-out_sg;
unsigned int iauth_len = 0;
-   struct vio_pfo_op *op = NULL;
u8 tmp[16], *b1 = NULL, *b0 = NULL, *result = NULL;
int rc;
 
/* zero the ctr value */
memset(iv + 15 - iv[0], 0, iv[0] + 1);
 
+   /* page 78 of nx_wb.pdf has,
+* Note: RFC3610 allows the AAD data to be up to 2^64 -1 bytes
+* in length. If a full message is used, the AES CCA implementation
+* restricts the maximum AAD length to 2^32 -1 bytes.
+* If partial messages are used, the implementation supports
+* 2^64 -1 bytes maximum AAD length.
+*
+* However, in the cryptoapi's aead_request structure,
+* assoclen is an unsigned int, thus it cannot hold a length
+* value greater than 2^32 - 1.
+* Thus the AAD is further constrained by this and is never
+* greater than 2^32.
+*/
+
if (!req-assoclen) {
b0 = nx_ctx-csbcpb-cpb.aes_ccm.in_pat_or_b0;
} else if (req-assoclen = 14) {
@@ -195,7 +208,46 @@ static int generate_pat(u8   *iv,
b0 = nx_ctx-csbcpb-cpb.aes_ccm.in_pat_or_b0;
b1 = nx_ctx-priv.ccm.iauth_tag;
iauth_len = req-assoclen;
+   } else if (req-assoclen = 65280) {
+   /* if associated data is less than (2^16 - 2^8), we construct
+* B1 differently and feed in the associated data to a CCA
+* operation */
+   b0 = nx_ctx-csbcpb_aead-cpb.aes_cca.b0;
+   b1 = nx_ctx-csbcpb_aead-cpb.aes_cca.b1;
+   iauth_len = 14;
+   } else {
+   b0 = nx_ctx-csbcpb_aead-cpb.aes_cca.b0;
+   b1 = nx_ctx-csbcpb_aead-cpb.aes_cca.b1;
+   iauth_len = 10;
+   }
 
+   /* generate B0 */
+   rc = generate_b0(iv, req-assoclen, authsize, nbytes, b0);
+   if (rc)
+   return rc;
+
+   /* generate B1:
+* add control info for associated data
+* RFC 3610 and NIST Special Publication 800-38C
+*/
+   if (b1) {
+   memset(b1, 0, 16);
+   if (req-assoclen = 65280) {
+   *(u16 *)b1 = (u16)req-assoclen;
+   scatterwalk_map_and_copy(b1 + 2, req-assoc, 0,
+iauth_len, SCATTERWALK_FROM_SG);
+   } else {
+   *(u16 *)b1 = (u16)(0xfffe);
+   *(u32 *)b1[2] = (u32)req-assoclen;
+   scatterwalk_map_and_copy(b1 + 6, req-assoc, 0,
+iauth_len, SCATTERWALK_FROM_SG);
+   }
+   }
+
+   /* now copy any remaining AAD to scatterlist and call nx... */
+   if (!req-assoclen) {
+   return rc;
+   } else if (req-assoclen = 14) {
nx_insg = nx_build_sg_list(nx_insg, b1, 16, nx_ctx-ap-sglen);
nx_outsg = nx_build_sg_list(nx_outsg, tmp, 16,
nx_ctx-ap-sglen);
@@ -210,56 +262,74 @@ static int generate_pat(u8   *iv,
NX_CPB_FDM(nx_ctx-csbcpb) |= NX_FDM_ENDE_ENCRYPT;
NX_CPB_FDM(nx_ctx-csbcpb) |= NX_FDM_INTERMEDIATE;
 
-   op = nx_ctx-op;
result = nx_ctx-csbcpb-cpb.aes_ccm.out_pat_or_mac;
-   } else if (req-assoclen = 65280) {
-   /* if associated data is less than (2^16 - 2^8), we construct
-* B1 differently and feed in the associated data to a CCA
-* operation */
-   b0 = nx_ctx-csbcpb_aead-cpb.aes_cca.b0;
-   b1 = nx_ctx-csbcpb_aead-cpb.aes_cca.b1;
-   iauth_len = 14;
-
-   /* remaining assoc data must have scatterlist built for it */
-   nx_insg = nx_walk_and_build(nx_insg, nx_ctx-ap-sglen,
-   req-assoc, iauth_len,
-   req-assoclen - iauth_len);
-   nx_ctx-op_aead.inlen = (nx_ctx-in_sg - nx_insg) *
+
+   rc = nx_hcall_sync(nx_ctx, nx_ctx-op,
+

[PATCH v2 03/10] crypto: nx - fix limits to sg lists for AES-CBC

2013-08-29 Thread Marcelo Cerri
This patch updates the nx-aes-cbc implementation to perform several
hyper calls if needed in order to always respect the length limits for
scatter/gather lists.

Two different limits are considered:

 - ibm,max-sg-len: maximum number of bytes of each scatter/gather
   list.

 - ibm,max-sync-cop:
- The total number of bytes that a scatter/gather list can hold.
- The maximum number of elements that a scatter/gather list can have.

Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com
Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com
---
 drivers/crypto/nx/nx-aes-cbc.c | 50 +-
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/drivers/crypto/nx/nx-aes-cbc.c b/drivers/crypto/nx/nx-aes-cbc.c
index a9e76c6..cc00b52 100644
--- a/drivers/crypto/nx/nx-aes-cbc.c
+++ b/drivers/crypto/nx/nx-aes-cbc.c
@@ -71,39 +71,49 @@ static int cbc_aes_nx_crypt(struct blkcipher_desc *desc,
struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc-tfm);
struct nx_csbcpb *csbcpb = nx_ctx-csbcpb;
unsigned long irq_flags;
+   unsigned int processed = 0, to_process;
+   u32 max_sg_len;
int rc;
 
spin_lock_irqsave(nx_ctx-lock, irq_flags);
 
-   if (nbytes  nx_ctx-ap-databytelen) {
-   rc = -EINVAL;
-   goto out;
-   }
+   max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg),
+  nx_ctx-ap-sglen);
 
if (enc)
NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT;
else
NX_CPB_FDM(csbcpb) = ~NX_FDM_ENDE_ENCRYPT;
 
-   rc = nx_build_sg_lists(nx_ctx, desc, dst, src, nbytes, 0,
-  csbcpb-cpb.aes_cbc.iv);
-   if (rc)
-   goto out;
+   do {
+   to_process = min_t(u64, nbytes - processed,
+  nx_ctx-ap-databytelen);
+   to_process = min_t(u64, to_process,
+  NX_PAGE_SIZE * (max_sg_len - 1));
+   to_process = to_process  ~(AES_BLOCK_SIZE - 1);
 
-   if (!nx_ctx-op.inlen || !nx_ctx-op.outlen) {
-   rc = -EINVAL;
-   goto out;
-   }
+   rc = nx_build_sg_lists(nx_ctx, desc, dst, src, to_process,
+  processed, csbcpb-cpb.aes_cbc.iv);
+   if (rc)
+   goto out;
 
-   rc = nx_hcall_sync(nx_ctx, nx_ctx-op,
-  desc-flags  CRYPTO_TFM_REQ_MAY_SLEEP);
-   if (rc)
-   goto out;
+   if (!nx_ctx-op.inlen || !nx_ctx-op.outlen) {
+   rc = -EINVAL;
+   goto out;
+   }
 
-   memcpy(desc-info, csbcpb-cpb.aes_cbc.cv, AES_BLOCK_SIZE);
-   atomic_inc((nx_ctx-stats-aes_ops));
-   atomic64_add(csbcpb-csb.processed_byte_count,
-(nx_ctx-stats-aes_bytes));
+   rc = nx_hcall_sync(nx_ctx, nx_ctx-op,
+  desc-flags  CRYPTO_TFM_REQ_MAY_SLEEP);
+   if (rc)
+   goto out;
+
+   memcpy(desc-info, csbcpb-cpb.aes_cbc.cv, AES_BLOCK_SIZE);
+   atomic_inc((nx_ctx-stats-aes_ops));
+   atomic64_add(csbcpb-csb.processed_byte_count,
+(nx_ctx-stats-aes_bytes));
+
+   processed += to_process;
+   } while (processed  nbytes);
 out:
spin_unlock_irqrestore(nx_ctx-lock, irq_flags);
return rc;
-- 
1.7.12

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 08/10] crypto: nx - fix XCBC for zero length messages

2013-08-29 Thread Marcelo Cerri
The NX XCBC implementation doesn't support zero length messages and
because of that NX is currently returning a hard-coded hash for zero
length messages. However this approach is incorrect since the hash value
also depends on which key is used.

This patch removes the hard-coded hash and replace it with an
implementation based on the RFC 3566 using ECB.

Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com
Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com
---
 drivers/crypto/nx/nx-aes-xcbc.c | 84 +
 1 file changed, 77 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/nx/nx-aes-xcbc.c b/drivers/crypto/nx/nx-aes-xcbc.c
index 1a5d9e3..03c4bf5 100644
--- a/drivers/crypto/nx/nx-aes-xcbc.c
+++ b/drivers/crypto/nx/nx-aes-xcbc.c
@@ -56,6 +56,77 @@ static int nx_xcbc_set_key(struct crypto_shash *desc,
return 0;
 }
 
+/*
+ * Based on RFC 3566, for a zero-length message:
+ *
+ * n = 1
+ * K1 = E(K, 0x01010101010101010101010101010101)
+ * K3 = E(K, 0x03030303030303030303030303030303)
+ * E[0] = 0x
+ * M[1] = 0x8000 (0 length message with padding)
+ * E[1] = (K1, M[1] ^ E[0] ^ K3)
+ * Tag = M[1]
+ */
+static int nx_xcbc_empty(struct shash_desc *desc, u8 *out)
+{
+   struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(desc-tfm-base);
+   struct nx_csbcpb *csbcpb = nx_ctx-csbcpb;
+   struct nx_sg *in_sg, *out_sg;
+   u8 keys[2][AES_BLOCK_SIZE];
+   u8 key[32];
+   int rc = 0;
+
+   /* Change to ECB mode */
+   csbcpb-cpb.hdr.mode = NX_MODE_AES_ECB;
+   memcpy(key, csbcpb-cpb.aes_xcbc.key, AES_BLOCK_SIZE);
+   memcpy(csbcpb-cpb.aes_ecb.key, key, AES_BLOCK_SIZE);
+   NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT;
+
+   /* K1 and K3 base patterns */
+   memset(keys[0], 0x01, sizeof(keys[0]));
+   memset(keys[1], 0x03, sizeof(keys[1]));
+
+   /* Generate K1 and K3 encrypting the patterns */
+   in_sg = nx_build_sg_list(nx_ctx-in_sg, (u8 *) keys, sizeof(keys),
+nx_ctx-ap-sglen);
+   out_sg = nx_build_sg_list(nx_ctx-out_sg, (u8 *) keys, sizeof(keys),
+ nx_ctx-ap-sglen);
+   nx_ctx-op.inlen = (nx_ctx-in_sg - in_sg) * sizeof(struct nx_sg);
+   nx_ctx-op.outlen = (nx_ctx-out_sg - out_sg) * sizeof(struct nx_sg);
+
+   rc = nx_hcall_sync(nx_ctx, nx_ctx-op,
+  desc-flags  CRYPTO_TFM_REQ_MAY_SLEEP);
+   if (rc)
+   goto out;
+   atomic_inc((nx_ctx-stats-aes_ops));
+
+   /* XOr K3 with the padding for a 0 length message */
+   keys[1][0] ^= 0x80;
+
+   /* Encrypt the final result */
+   memcpy(csbcpb-cpb.aes_ecb.key, keys[0], AES_BLOCK_SIZE);
+   in_sg = nx_build_sg_list(nx_ctx-in_sg, (u8 *) keys[1], sizeof(keys[1]),
+nx_ctx-ap-sglen);
+   out_sg = nx_build_sg_list(nx_ctx-out_sg, out, AES_BLOCK_SIZE,
+ nx_ctx-ap-sglen);
+   nx_ctx-op.inlen = (nx_ctx-in_sg - in_sg) * sizeof(struct nx_sg);
+   nx_ctx-op.outlen = (nx_ctx-out_sg - out_sg) * sizeof(struct nx_sg);
+
+   rc = nx_hcall_sync(nx_ctx, nx_ctx-op,
+  desc-flags  CRYPTO_TFM_REQ_MAY_SLEEP);
+   if (rc)
+   goto out;
+   atomic_inc((nx_ctx-stats-aes_ops));
+
+out:
+   /* Restore XCBC mode */
+   csbcpb-cpb.hdr.mode = NX_MODE_AES_XCBC_MAC;
+   memcpy(csbcpb-cpb.aes_xcbc.key, key, AES_BLOCK_SIZE);
+   NX_CPB_FDM(csbcpb) = ~NX_FDM_ENDE_ENCRYPT;
+
+   return rc;
+}
+
 static int nx_xcbc_init(struct shash_desc *desc)
 {
struct xcbc_state *sctx = shash_desc_ctx(desc);
@@ -201,13 +272,12 @@ static int nx_xcbc_final(struct shash_desc *desc, u8 *out)
memcpy(csbcpb-cpb.aes_xcbc.cv,
   csbcpb-cpb.aes_xcbc.out_cv_mac, AES_BLOCK_SIZE);
} else if (sctx-count == 0) {
-   /* we've never seen an update, so this is a 0 byte op. The
-* hardware cannot handle a 0 byte op, so just copy out the
-* known 0 byte result. This is cheaper than allocating a
-* software context to do a 0 byte op */
-   u8 data[] = { 0x75, 0xf0, 0x25, 0x1d, 0x52, 0x8a, 0xc0, 0x1c,
- 0x45, 0x73, 0xdf, 0xd5, 0x84, 0xd7, 0x9f, 0x29 };
-   memcpy(out, data, sizeof(data));
+   /*
+* we've never seen an update, so this is a 0 byte op. The
+* hardware cannot handle a 0 byte op, so just ECB to
+* generate the hash.
+*/
+   rc = nx_xcbc_empty(desc, out);
goto out;
}
 
-- 
1.7.12

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 09/10] crypto: nx - fix GCM for zero length messages

2013-08-29 Thread Marcelo Cerri
The NX CGM implementation doesn't support zero length messages and the
current implementation has two flaws:

 - When the input data length is zero, it ignores the associated data.
 - Even when both lengths are zero, it uses the Crypto API to encrypt a
   zeroed block using ctr(aes) and because of this it allocates a new
   transformation and sets the key for this new tfm. Both operations are
   intended to be used only in user context, while the cryptographic
   operations can be called in both user and softirq contexts.

This patch replaces the nested Crypto API use and adds two special
cases:

 - When input data and associated data lengths are zero: it uses NX ECB
   mode to emulate the encryption of a zeroed block using ctr(aes).
 - When input data is zero and associated data is available: it uses NX
   GMAC mode to calculate the associated data MAC.

Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com
Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com
---
 drivers/crypto/nx/nx-aes-gcm.c | 132 ++---
 1 file changed, 112 insertions(+), 20 deletions(-)

diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c
index 9e89bdf..025d9a8 100644
--- a/drivers/crypto/nx/nx-aes-gcm.c
+++ b/drivers/crypto/nx/nx-aes-gcm.c
@@ -187,40 +187,125 @@ static int nx_gca(struct nx_crypto_ctx  *nx_ctx,
return rc;
 }
 
+static int gmac(struct aead_request *req, struct blkcipher_desc *desc)
+{
+   int rc;
+   struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req-base.tfm);
+   struct nx_csbcpb *csbcpb = nx_ctx-csbcpb;
+   struct nx_sg *nx_sg;
+   unsigned int nbytes = req-assoclen;
+   unsigned int processed = 0, to_process;
+   u32 max_sg_len;
+
+   /* Set GMAC mode */
+   csbcpb-cpb.hdr.mode = NX_MODE_AES_GMAC;
+
+   NX_CPB_FDM(csbcpb) = ~NX_FDM_CONTINUATION;
+
+   /* page_limit: number of sg entries that fit on one page */
+   max_sg_len = min_t(u32, nx_driver.of.max_sg_len/sizeof(struct nx_sg),
+  nx_ctx-ap-sglen);
+
+   /* Copy IV */
+   memcpy(csbcpb-cpb.aes_gcm.iv_or_cnt, desc-info, AES_BLOCK_SIZE);
+
+   do {
+   /*
+* to_process: the data chunk to process in this update.
+* This value is bound by sg list limits.
+*/
+   to_process = min_t(u64, nbytes - processed,
+  nx_ctx-ap-databytelen);
+   to_process = min_t(u64, to_process,
+  NX_PAGE_SIZE * (max_sg_len - 1));
+
+   if ((to_process + processed)  nbytes)
+   NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE;
+   else
+   NX_CPB_FDM(csbcpb) = ~NX_FDM_INTERMEDIATE;
+
+   nx_sg = nx_walk_and_build(nx_ctx-in_sg, nx_ctx-ap-sglen,
+ req-assoc, processed, to_process);
+   nx_ctx-op.inlen = (nx_ctx-in_sg - nx_sg)
+   * sizeof(struct nx_sg);
+
+   csbcpb-cpb.aes_gcm.bit_length_data = 0;
+   csbcpb-cpb.aes_gcm.bit_length_aad = 8 * nbytes;
+
+   rc = nx_hcall_sync(nx_ctx, nx_ctx-op,
+   req-base.flags  CRYPTO_TFM_REQ_MAY_SLEEP);
+   if (rc)
+   goto out;
+
+   memcpy(csbcpb-cpb.aes_gcm.in_pat_or_aad,
+   csbcpb-cpb.aes_gcm.out_pat_or_mac, AES_BLOCK_SIZE);
+   memcpy(csbcpb-cpb.aes_gcm.in_s0,
+   csbcpb-cpb.aes_gcm.out_s0, AES_BLOCK_SIZE);
+
+   NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
+
+   atomic_inc((nx_ctx-stats-aes_ops));
+   atomic64_add(req-assoclen, (nx_ctx-stats-aes_bytes));
+
+   processed += to_process;
+   } while (processed  nbytes);
+
+out:
+   /* Restore GCM mode */
+   csbcpb-cpb.hdr.mode = NX_MODE_AES_GCM;
+   return rc;
+}
+
 static int gcm_empty(struct aead_request *req, struct blkcipher_desc *desc,
 int enc)
 {
int rc;
struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req-base.tfm);
struct nx_csbcpb *csbcpb = nx_ctx-csbcpb;
+   char out[AES_BLOCK_SIZE];
+   struct nx_sg *in_sg, *out_sg;
 
/* For scenarios where the input message is zero length, AES CTR mode
 * may be used. Set the source data to be a single block (16B) of all
 * zeros, and set the input IV value to be the same as the GMAC IV
 * value. - nx_wb 4.8.1.3 */
-   char src[AES_BLOCK_SIZE] = {};
-   struct scatterlist sg;
 
-   desc-tfm = crypto_alloc_blkcipher(ctr(aes), 0, 0);
-   if (IS_ERR(desc-tfm)) {
-   rc = -ENOMEM;
-   goto out;
-   }
-
-   crypto_blkcipher_setkey(desc-tfm, csbcpb-cpb.aes_gcm.key,
-   NX_CPB_KEY_SIZE(csbcpb) == NX_KS_AES_128 ? 16 :
-   

[PATCH v2 10/10] crypto: nx - fix SHA-2 for chunks bigger than block size

2013-08-29 Thread Marcelo Cerri
Each call to the co-processor, with exception of the last call, needs to
send data that is multiple of block size. As consequence, any remaining
data is kept in the internal NX context.

This patch fixes a bug in the driver that causes it to save incorrect
data into the context when data is bigger than the block size.

Reviewed-by: Joy Latten jmlat...@linux.vnet.ibm.com
Signed-off-by: Marcelo Cerri mhce...@linux.vnet.ibm.com
---
 drivers/crypto/nx/nx-sha256.c | 2 +-
 drivers/crypto/nx/nx-sha512.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c
index 6547a71..da0b24a 100644
--- a/drivers/crypto/nx/nx-sha256.c
+++ b/drivers/crypto/nx/nx-sha256.c
@@ -129,7 +129,7 @@ static int nx_sha256_update(struct shash_desc *desc, const 
u8 *data,
NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
 
total -= to_process;
-   data += to_process;
+   data += to_process - sctx-count;
sctx-count = 0;
in_sg = nx_ctx-in_sg;
} while (leftover = SHA256_BLOCK_SIZE);
diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c
index 236e6af..4ae5b0f 100644
--- a/drivers/crypto/nx/nx-sha512.c
+++ b/drivers/crypto/nx/nx-sha512.c
@@ -131,7 +131,7 @@ static int nx_sha512_update(struct shash_desc *desc, const 
u8 *data,
NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
 
total -= to_process;
-   data += to_process;
+   data += to_process - sctx-count[0];
sctx-count[0] = 0;
in_sg = nx_ctx-in_sg;
} while (leftover = SHA512_BLOCK_SIZE);
-- 
1.7.12

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Feedback wished on possible improvment of CPU15 errata handling on mpc8xx

2013-08-29 Thread leroy christophe
The mpc8xx powerpc has an errata identified CPU15 which is that whenever 
the last instruction of a page is a conditional branch to the last 
instruction of the next page, the CPU might do crazy things.


To work around this errata, one of the workarounds proposed by freescale is:
In the ITLB miss exception code, when loading the TLB for an MMU page, 
also invalidate any TLB referring to the next and previous page using 
tlbie. This intentionally forces an ITLB miss exception on every 
execution across sequential MMU page boundaries


It is that workaround which has been implemented in the kernel. The 
drawback of this workaround is that TLB miss is encountered everytime we 
cross page boundary. On a flat program execution, it means that we get a 
TLB miss every 1000 instructions. A TLB miss handling is around 30/40 
instructions, which means a degradation of about 4% of the performances.

It can be even worse if the program has a loop astride two pages.

In the errata document from freescale, there is an example where they 
only invalidate the TLB when the page has the actual issue, in extenso 
when the page has the offending instruction at offset 0xffc, and they 
suggest to use the available PTE bits to tag pages in advance.


I checked in asm/pte-8xx.h : we still have one SW bit available 
(0x0080). So I was thinking about using that bit to mark pages 
CPU15_SAFE when loading them if they don't have the offending instruction.


Then, in the ITLBmiss handler, instead of always invalidating preceeding 
and following pages, we would check SW bit in the PTE and invalidate 
following page only if current page is not marked CPU15_SAFE, then check 
the PTE of preceeding page and invalidate it only if it is not marked 
CPU15_SAFE


I believe this would improve the CPU15 errata handling and would reduce 
the overhead introduced by the handling of this errata.


Do you see anything wrong with my proposal ?

Christophe
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 0/4] Unify CPU hotplug lock interface

2013-08-29 Thread Toshi Kani
On Sun, 2013-08-18 at 03:02 +0200, Rafael J. Wysocki wrote:
 On Saturday, August 17, 2013 01:46:55 PM Toshi Kani wrote:
  lock_device_hotplug() was recently introduced to serialize CPU  Memory
  online/offline and hotplug operations, along with sysfs online interface
  restructure (commit 4f3549d7).  With this new locking scheme,
  cpu_hotplug_driver_lock() is redundant and is no longer necessary.
  
  This patchset makes sure that lock_device_hotplug() covers all CPU online/
  offline interfaces, and then removes cpu_hotplug_driver_lock().
  
  The patchset is based on Linus's tree, 3.11.0-rc5.
 
 Nice series, thanks a lot for taking care of this!

Hi Rafael,

Per the recent your changes in lock_device_hotplug(), do you think it
makes sense to integrate this patchset into your tree?  I am also
considering to add one more patch to use lock_device_hotplug_sysfs() in
cpu_probe_store().  I will rebase to your tree and send them today if it
makes sense to you.

Thanks,
-Toshi


 
 Rafael
 
 
  ---
  Toshi Kani (4):
hotplug, x86: Fix online state in cpu0 debug interface
hotplug, x86: Add hotplug lock to missing places
hotplug, x86: Disable ARCH_CPU_PROBE_RELEASE on x86
hotplug, powerpc, x86: Remove cpu_hotplug_driver_lock()
  
  ---
   arch/powerpc/kernel/smp.c  | 12 --
   arch/powerpc/platforms/pseries/dlpar.c | 40 
  +-
   arch/x86/Kconfig   |  4 
   arch/x86/kernel/smpboot.c  | 21 --
   arch/x86/kernel/topology.c | 11 ++
   drivers/base/cpu.c | 26 --
   include/linux/cpu.h| 13 ---
   7 files changed, 37 insertions(+), 90 deletions(-)
  --
  To unsubscribe from this list: send the line unsubscribe linux-kernel in
  the body of a message to majord...@vger.kernel.org
  More majordomo info at  http://vger.kernel.org/majordomo-info.html
  Please read the FAQ at  http://www.tux.org/lkml/


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Feedback wished on possible improvment of CPU15 errata handling on mpc8xx

2013-08-29 Thread Joakim Tjernlund
Linuxppc-dev 
linuxppc-dev-bounces+joakim.tjernlund=transmode...@lists.ozlabs.org 
wrote on 2013/08/29 19:11:48:
 The mpc8xx powerpc has an errata identified CPU15 which is that whenever 

 the last instruction of a page is a conditional branch to the last 
 instruction of the next page, the CPU might do crazy things.
 
 To work around this errata, one of the workarounds proposed by freescale 
is:
 In the ITLB miss exception code, when loading the TLB for an MMU page, 
 also invalidate any TLB referring to the next and previous page using 
 tlbie. This intentionally forces an ITLB miss exception on every 
 execution across sequential MMU page boundaries
 
 It is that workaround which has been implemented in the kernel. The 
 drawback of this workaround is that TLB miss is encountered everytime we 

 cross page boundary. On a flat program execution, it means that we get a 

 TLB miss every 1000 instructions. A TLB miss handling is around 30/40 
 instructions, which means a degradation of about 4% of the performances.
 It can be even worse if the program has a loop astride two pages.
 
 In the errata document from freescale, there is an example where they 
 only invalidate the TLB when the page has the actual issue, in extenso 
 when the page has the offending instruction at offset 0xffc, and they 
 suggest to use the available PTE bits to tag pages in advance.
 
 I checked in asm/pte-8xx.h : we still have one SW bit available 
 (0x0080). So I was thinking about using that bit to mark pages 
 CPU15_SAFE when loading them if they don't have the offending 
instruction.
 
 Then, in the ITLBmiss handler, instead of always invalidating preceeding 

 and following pages, we would check SW bit in the PTE and invalidate 
 following page only if current page is not marked CPU15_SAFE, then check 

 the PTE of preceeding page and invalidate it only if it is not marked 
 CPU15_SAFE
 
 I believe this would improve the CPU15 errata handling and would reduce 
 the overhead introduced by the handling of this errata.
 
 Do you see anything wrong with my proposal ?

Just that you are using up the last bit of the pte which will be needed at 
some point.
Have you run into CPU15? We have been using 8xx for more than 10 years on 
kernel 2.4 and I
don't think we ever run into this problem.
If you go forward with this I suggest you use the WRITETHRU bit instead 
and make
it so the user can choose which to use.

If you want to optimize TLB misses you might want to add support for 8MB 
pages, I got
the TLB and kernel memory done in my 2.4 kernel. You could start with that 
and
add 8MB user space page.

  Jocke
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v12] ASoC: fsl: Add S/PDIF machine driver

2013-08-29 Thread Stephen Warren
On 08/27/2013 10:04 PM, Nicolin Chen wrote:
 This patch implements a device-tree-only machine driver for Freescale
 i.MX series Soc. It works with spdif_transmitter/spdif_receiver and
 fsl_spdif.c drivers.

Sorry for the slow response. For the record, the binding,
Acked-by: Stephen Warren swar...@nvidia.com
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] of: Feed entire flattened device tree into the random pool

2013-08-29 Thread Grant Likely
On Mon, 29 Jul 2013 13:11:50 +1000, Anton Blanchard an...@samba.org wrote:
 
 Hi,
 
  be32_to_cpu(initial_boot_params-totalsize);
 
 Ouch, thanks Grant.
 
 Anton
 --
 
 We feed the entire DMI table into the random pool to provide
 better random data during early boot, so do the same with the
 flattened device tree.
 
 Signed-off-by: Anton Blanchard an...@samba.org

Applied, thanks

g.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Feedback wished on possible improvment of CPU15 errata handling on mpc8xx

2013-08-29 Thread leroy christophe

Le 29/08/2013 19:57, Joakim Tjernlund a écrit :

Linuxppc-dev
linuxppc-dev-bounces+joakim.tjernlund=transmode...@lists.ozlabs.org
wrote on 2013/08/29 19:11:48:

The mpc8xx powerpc has an errata identified CPU15 which is that whenever
the last instruction of a page is a conditional branch to the last
instruction of the next page, the CPU might do crazy things.

To work around this errata, one of the workarounds proposed by freescale

is:

In the ITLB miss exception code, when loading the TLB for an MMU page,
also invalidate any TLB referring to the next and previous page using
tlbie. This intentionally forces an ITLB miss exception on every
execution across sequential MMU page boundaries

It is that workaround which has been implemented in the kernel. The
drawback of this workaround is that TLB miss is encountered everytime we
cross page boundary. On a flat program execution, it means that we get a
TLB miss every 1000 instructions. A TLB miss handling is around 30/40
instructions, which means a degradation of about 4% of the performances.
It can be even worse if the program has a loop astride two pages.

In the errata document from freescale, there is an example where they
only invalidate the TLB when the page has the actual issue, in extenso
when the page has the offending instruction at offset 0xffc, and they
suggest to use the available PTE bits to tag pages in advance.

I checked in asm/pte-8xx.h : we still have one SW bit available
(0x0080). So I was thinking about using that bit to mark pages
CPU15_SAFE when loading them if they don't have the offending

instruction.

Then, in the ITLBmiss handler, instead of always invalidating preceeding
and following pages, we would check SW bit in the PTE and invalidate
following page only if current page is not marked CPU15_SAFE, then check
the PTE of preceeding page and invalidate it only if it is not marked
CPU15_SAFE

I believe this would improve the CPU15 errata handling and would reduce
the overhead introduced by the handling of this errata.

Do you see anything wrong with my proposal ?

Just that you are using up the last bit of the pte which will be needed at
some point.
Have you run into CPU15? We have been using 8xx for more than 10 years on
kernel 2.4 and I
don't think we ever run into this problem.
Ok, indeed I have activated the CPU15 errata in the kernel because I 
know my CPU has the bug.

Do you think it can be deactivated without much risk though ?

If you go forward with this I suggest you use the WRITETHRU bit instead
and make
it so the user can choose which to use.

If you want to optimize TLB misses you might want to add support for 8MB
pages, I got
the TLB and kernel memory done in my 2.4 kernel. You could start with that
and
add 8MB user space page.
In 2.6 Kernel we have CONFIG_PIN_TLB which pins the first 8Mbytes in 
ITLB and pins the first 24Mbytes in DTLB as far as I understand. Do we 
need more for the kernel ? I so, yes I would be interested in porting 
your code to 2.6


Wouldn't we waste memory by using 8Mbytes pages in user mode ?
I read somewhere that Transparent Huge Pages have been ported on powerpc 
in future kernel 3.11. Therefore I was thinking about maybe adding 
support for hugepages into 8xx.
8xx has 512kbytes hugepages, I was thinking that maybe it would be more 
appropriate than 8Mbytes pages.
Do you think it would be feasible and usefull to do this for embeddeds 
system having let say 32 to 128Mbytes RAM ?


Christophe

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] hvc_xen: Remove unnecessary __GFP_ZERO from kzalloc

2013-08-29 Thread Joe Perches
kzalloc already adds this __GFP_ZERO.

Signed-off-by: Joe Perches j...@perches.com
---
 drivers/tty/hvc/hvc_xen.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index 682210d..e61c36c 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -208,7 +208,7 @@ static int xen_hvm_console_init(void)
 
info = vtermno_to_xencons(HVC_COOKIE);
if (!info) {
-   info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | 
__GFP_ZERO);
+   info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
if (!info)
return -ENOMEM;
} else if (info-intf != NULL) {
@@ -257,7 +257,7 @@ static int xen_pv_console_init(void)
 
info = vtermno_to_xencons(HVC_COOKIE);
if (!info) {
-   info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | 
__GFP_ZERO);
+   info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
if (!info)
return -ENOMEM;
} else if (info-intf != NULL) {
@@ -284,7 +284,7 @@ static int xen_initial_domain_console_init(void)
 
info = vtermno_to_xencons(HVC_COOKIE);
if (!info) {
-   info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | 
__GFP_ZERO);
+   info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
if (!info)
return -ENOMEM;
}
-- 
1.8.1.2.459.gbcd45b4.dirty

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Feedback wished on possible improvment of CPU15 errata handling on mpc8xx

2013-08-29 Thread Joakim Tjernlund
leroy christophe christophe.le...@c-s.fr wrote on 2013/08/29 23:04:03:
 
 Le 29/08/2013 19:57, Joakim Tjernlund a écrit :
  Linuxppc-dev
  linuxppc-dev-bounces+joakim.tjernlund=transmode...@lists.ozlabs.org
  wrote on 2013/08/29 19:11:48:
  The mpc8xx powerpc has an errata identified CPU15 which is that 
whenever
  the last instruction of a page is a conditional branch to the last
  instruction of the next page, the CPU might do crazy things.
 
  To work around this errata, one of the workarounds proposed by 
freescale
  is:
  In the ITLB miss exception code, when loading the TLB for an MMU 
page,
  also invalidate any TLB referring to the next and previous page using
  tlbie. This intentionally forces an ITLB miss exception on every
  execution across sequential MMU page boundaries
 
  It is that workaround which has been implemented in the kernel. The
  drawback of this workaround is that TLB miss is encountered everytime 
we
  cross page boundary. On a flat program execution, it means that we 
get a
  TLB miss every 1000 instructions. A TLB miss handling is around 30/40
  instructions, which means a degradation of about 4% of the 
performances.
  It can be even worse if the program has a loop astride two pages.
 
  In the errata document from freescale, there is an example where they
  only invalidate the TLB when the page has the actual issue, in 
extenso
  when the page has the offending instruction at offset 0xffc, and they
  suggest to use the available PTE bits to tag pages in advance.
 
  I checked in asm/pte-8xx.h : we still have one SW bit available
  (0x0080). So I was thinking about using that bit to mark pages
  CPU15_SAFE when loading them if they don't have the offending
  instruction.
  Then, in the ITLBmiss handler, instead of always invalidating 
preceeding
  and following pages, we would check SW bit in the PTE and invalidate
  following page only if current page is not marked CPU15_SAFE, then 
check
  the PTE of preceeding page and invalidate it only if it is not marked
  CPU15_SAFE
 
  I believe this would improve the CPU15 errata handling and would 
reduce
  the overhead introduced by the handling of this errata.
 
  Do you see anything wrong with my proposal ?
  Just that you are using up the last bit of the pte which will be 
needed at
  some point.
  Have you run into CPU15? We have been using 8xx for more than 10 years 
on
  kernel 2.4 and I
  don't think we ever run into this problem.
 Ok, indeed I have activated the CPU15 errata in the kernel because I 
 know my CPU has the bug.
 Do you think it can be deactivated without much risk though ?

Can't say for you, all I know that our 860 and 862 CPUs seem to work OK.

  If you go forward with this I suggest you use the WRITETHRU bit 
instead
  and make
  it so the user can choose which to use.
 
  If you want to optimize TLB misses you might want to add support for 
8MB
  pages, I got
  the TLB and kernel memory done in my 2.4 kernel. You could start with 
that
  and
  add 8MB user space page.
 In 2.6 Kernel we have CONFIG_PIN_TLB which pins the first 8Mbytes in 
 ITLB and pins the first 24Mbytes in DTLB as far as I understand. Do we 
 need more for the kernel ? I so, yes I would be interested in porting 
 your code to 2.6

Yes, 2.4 has the same. There is a drawback with pinning though, you pin 4 
ITLBs and 4 DTLBs.
One only needs 1 ITLB for kernel so the other 3 are unused. 24MB DTLs is 
pretty statik, chances
are that it is either too much or too little.

 
 Wouldn't we waste memory by using 8Mbytes pages in user mode ?

Don't know the details of how user space deal with these pages, hopefully
someone else knows better.

 I read somewhere that Transparent Huge Pages have been ported on powerpc 

 in future kernel 3.11. Therefore I was thinking about maybe adding 
 support for hugepages into 8xx.
 8xx has 512kbytes hugepages, I was thinking that maybe it would be more 
 appropriate than 8Mbytes pages.

See previous comment, although 8MB pages is less TLB insn as I recall.

 Do you think it would be feasible and usefull to do this for embeddeds 
 system having let say 32 to 128Mbytes RAM ?

One could stop for just kernel memory. With 8MB pages there are some 
additional 
advantages compared with PINNED TLBs:
- you map all kernel memory
- you can also map other spaces, I got both IMMR/BCR and all my NOR FLASH
  mapped with 8MB pages.

 Jocke
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


AUTO: Michael Barry is out of the office (returning 11/09/2013)

2013-08-29 Thread Michael Barry

I am out of the office until 11/09/2013.




Note: This is an automated response to your message  Linuxppc-dev Digest,
Vol 108, Issue 258 sent on 29/08/2013 21:47:50.

This is the only notification you will receive while this person is away.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC PATCH 1/1] powerpc/embedded6xx: Add support for Motorola/Emerson MVME5100.

2013-08-29 Thread Stephen N Chivers
Stephen N Chivers/AUS/CSC wrote on 08/22/2013 10:58:10 AM:

 From: Stephen N Chivers/AUS/CSC
 To: Scott Wood scottw...@freescale.com
 Cc: b...@kernel.crashing.org, Chris Proctor cproc...@csc.com.au, 
 linuxppc-dev@lists.ozlabs.org, pau...@samba.org, Stephen N Chivers 
 schiv...@csc.com.au
 Date: 08/22/2013 10:58 AM
 Subject: Re: [RFC PATCH 1/1] powerpc/embedded6xx: Add support for 
 Motorola/Emerson MVME5100.
 
 Scott Wood scottw...@freescale.com wrote on 08/21/2013 09:20:03 AM:
 
  From: Scott Wood scottw...@freescale.com
  To: Stephen N Chivers schiv...@csc.com.au
  Cc: b...@kernel.crashing.org, Chris Proctor cproc...@csc.com.au,
  linuxppc-dev@lists.ozlabs.org, pau...@samba.org
  Date: 08/21/2013 09:20 AM
  Subject: Re: [RFC PATCH 1/1] powerpc/embedded6xx: Add support for 
  Motorola/Emerson MVME5100.
  
  On Tue, 2013-08-20 at 13:28 +1100, Stephen N Chivers wrote:
   Scott Wood scottw...@freescale.com wrote on 08/09/2013 11:35:20 
AM:
   
From: Scott Wood scottw...@freescale.com
To: Stephen N Chivers schiv...@csc.com.au
Cc: b...@kernel.crashing.org, pau...@samba.org, Chris Proctor 
cproc...@csc.com.au, linuxppc-dev@lists.ozlabs.org
Date: 08/09/2013 11:36 AM
Subject: Re: [RFC PATCH 1/1] powerpc/embedded6xx: Add support for 
Motorola/Emerson MVME5100.

simple-bus may be applicable here (in addition to a specific
compatible).
   
   The HAWK ASIC is a difficult beast. I still cannot get a positive
   identification as to what it is (Motorola/Freescale part number
   unknown, not even the part number on the chip on the board 
helps).
   The best I can come up with is that it is a tsi108 without
   the ethenets.
   So device_type will be tsi-bridge and compatible will be
   tsi108-bridge.
  
  Don't use device_type.  compatible should include hawk in the name
  (especially if you're not sure what's really in it), and/or the part
  number on the chip.  If you're convinced it's fully compatible with
  tsi108-bridge you can add that as a second compatible value, though
  given the uncertainty it's probably better to just teach Linux to look
  for the new compatible.
  
  If devices on the bus can be used without any special bus setup or
  knowledge, then you can add a compatible of simple-bus to the end.
  
Why not just look for a chrp,iic node directly?
   
   I was following the model used in other places, like chrp/setup.c.
  
  Not all examples are good examples. :-)
  
 +   if ((np = of_find_compatible_node(NULL, pci, 
 mpc10x-pci))) 
   {

Why insist on the device_type?
   
   Following the model in the linkstation (kurobox) platform support. 
  
  Drop the device_type check.
  
 +static void
 +mvme5100_restart(char *cmd)
 +{
 +   volatile ulong  i = 1000;
 +
 +
 +   local_irq_disable();
 +   _nmask_and_or_msr(0, MSR_IP);

Does mtmsr(mfmsr() | MSR_IP) not work?
   
   Don't know. Is from the original code by Matt Porter.
  
  It actually appears that there are no callers remaining that use the
  and portion of the functionality.  In fact there are no callers that
  use it for anything other than setting MSR_IP. :-P
  
 +   out_8((u_char *) BOARD_MODRST_REG, 0x01);
 +
 +   while (i--  0);

Do not use a loop to implement a delay.
   
   Taken from the original code. But at this point the board
   is going to reset and reboot via firmware, as /sbin/reboot
   or /sbin/halt has been invoked.
  
  Still, it's just a bad idea.  What's wrong with udelay()?
  
  Or just use an infinite loop.  How much value is there really in 
timing
  out here?
  
 +static void __init
 +mvme5100_set_bat(void)
 +{
 +
 +
 +   mb();
 +   mtspr(SPRN_DBAT1U, 0xf0001ffe);
 +   mtspr(SPRN_DBAT1L, 0xf02a);
 +   mb();
 +   setbat(1, 0xfe00, 0xfe00, 0x0200, 
   PAGE_KERNEL_NCG);
 +}

It is no longer allowed to squat on random virtual address space 
like
this.  If you really need a BAT you'll have to allocate the 
virtual
address properly.
   
   Yes. I found that this was an anathema when researching the port in
   2010 but I couldn't find any practical solution at the time.
   The code is called early to ensure that the hawk registers are 
available.
   sysdev/cpm_common.c does the same thing.
  
   What is the correct solution?
  
  ioremap() has special code to function early (using ioremap_bot).
  
  If you still need to use a BAT that early, reserve the space with
  asm/fixmap.h or by adding a function to the early ioremap code to just
  reserve the space.  Or better, improve the ioremap code to be capable 
of
  creating a BAT (or equivalent) when requested.
 
 It is really interesting. Given that the UART implementation on the
 HAWK is such that legacy_serial will not set up an early console it
 is very likely that the address translation set up by the bat is not
 required.
 I can 

Re: [PATCH 0/4] Unify CPU hotplug lock interface

2013-08-29 Thread Rafael J. Wysocki
On Thursday, August 29, 2013 11:15:10 AM Toshi Kani wrote:
 On Sun, 2013-08-18 at 03:02 +0200, Rafael J. Wysocki wrote:
  On Saturday, August 17, 2013 01:46:55 PM Toshi Kani wrote:
   lock_device_hotplug() was recently introduced to serialize CPU  Memory
   online/offline and hotplug operations, along with sysfs online interface
   restructure (commit 4f3549d7).  With this new locking scheme,
   cpu_hotplug_driver_lock() is redundant and is no longer necessary.
   
   This patchset makes sure that lock_device_hotplug() covers all CPU online/
   offline interfaces, and then removes cpu_hotplug_driver_lock().
   
   The patchset is based on Linus's tree, 3.11.0-rc5.
  
  Nice series, thanks a lot for taking care of this!
 
 Hi Rafael,
 
 Per the recent your changes in lock_device_hotplug(), do you think it
 makes sense to integrate this patchset into your tree?  I am also
 considering to add one more patch to use lock_device_hotplug_sysfs() in
 cpu_probe_store().  I will rebase to your tree and send them today if it
 makes sense to you.

Yes, it does to me.

Thanks,
Rafael


   ---
   Toshi Kani (4):
 hotplug, x86: Fix online state in cpu0 debug interface
 hotplug, x86: Add hotplug lock to missing places
 hotplug, x86: Disable ARCH_CPU_PROBE_RELEASE on x86
 hotplug, powerpc, x86: Remove cpu_hotplug_driver_lock()
   
   ---
arch/powerpc/kernel/smp.c  | 12 --
arch/powerpc/platforms/pseries/dlpar.c | 40 
   +-
arch/x86/Kconfig   |  4 
arch/x86/kernel/smpboot.c  | 21 --
arch/x86/kernel/topology.c | 11 ++
drivers/base/cpu.c | 26 --
include/linux/cpu.h| 13 ---
7 files changed, 37 insertions(+), 90 deletions(-)
   --
   To unsubscribe from this list: send the line unsubscribe linux-kernel in
   the body of a message to majord...@vger.kernel.org
   More majordomo info at  http://vger.kernel.org/majordomo-info.html
   Please read the FAQ at  http://www.tux.org/lkml/
 
 
-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 0/4] Unify CPU hotplug lock interface

2013-08-29 Thread Toshi Kani
On Fri, 2013-08-30 at 02:06 +0200, Rafael J. Wysocki wrote:
 On Thursday, August 29, 2013 11:15:10 AM Toshi Kani wrote:
  On Sun, 2013-08-18 at 03:02 +0200, Rafael J. Wysocki wrote:
   On Saturday, August 17, 2013 01:46:55 PM Toshi Kani wrote:
lock_device_hotplug() was recently introduced to serialize CPU  Memory
online/offline and hotplug operations, along with sysfs online interface
restructure (commit 4f3549d7).  With this new locking scheme,
cpu_hotplug_driver_lock() is redundant and is no longer necessary.

This patchset makes sure that lock_device_hotplug() covers all CPU 
online/
offline interfaces, and then removes cpu_hotplug_driver_lock().

The patchset is based on Linus's tree, 3.11.0-rc5.
   
   Nice series, thanks a lot for taking care of this!
  
  Hi Rafael,
  
  Per the recent your changes in lock_device_hotplug(), do you think it
  makes sense to integrate this patchset into your tree?  I am also
  considering to add one more patch to use lock_device_hotplug_sysfs() in
  cpu_probe_store().  I will rebase to your tree and send them today if it
  makes sense to you.
 
 Yes, it does to me.

Great!  I will send them shortly.

Thanks,
-Toshi

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 1/4] hotplug, x86: Fix online state in cpu0 debug interface

2013-08-29 Thread Toshi Kani
_debug_hotplug_cpu() is a debug interface that puts cpu0 offline during
boot-up when CONFIG_DEBUG_HOTPLUG_CPU0 is set.  After cpu0 is put offline
in this interface, however, /sys/devices/system/cpu/cpu0/online still
shows 1 (online).

This patch fixes _debug_hotplug_cpu() to update dev-offline when CPU
online/offline operation succeeded.

Signed-off-by: Toshi Kani toshi.k...@hp.com
Acked-by: Rafael J. Wysocki rafael.j.wyso...@intel.com
---
 arch/x86/kernel/topology.c |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 6e60b5f..5823bbd 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -72,16 +72,19 @@ int __ref _debug_hotplug_cpu(int cpu, int action)
ret = cpu_down(cpu);
if (!ret) {
pr_info(CPU %u is now offline\n, cpu);
+   dev-offline = true;
kobject_uevent(dev-kobj, KOBJ_OFFLINE);
} else
pr_debug(Can't offline CPU%d.\n, cpu);
break;
case 1:
ret = cpu_up(cpu);
-   if (!ret)
+   if (!ret) {
+   dev-offline = false;
kobject_uevent(dev-kobj, KOBJ_ONLINE);
-   else
+   } else {
pr_debug(Can't online CPU%d.\n, cpu);
+   }
break;
default:
ret = -EINVAL;
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 3/4] hotplug, x86: Disable ARCH_CPU_PROBE_RELEASE on x86

2013-08-29 Thread Toshi Kani
Commit d7c53c9e enabled ARCH_CPU_PROBE_RELEASE on x86 in order to
serialize CPU online/offline operations.  Although it is the config
option to enable CPU hotplug test interfaces, probe  release, it is
also the option to enable cpu_hotplug_driver_lock() as well.  Therefore,
this option had to be enabled on x86 with dummy arch_cpu_probe() and
arch_cpu_release().

Since then, lock_device_hotplug() was introduced to serialize CPU
online/offline  hotplug operations.  Therefore, this config option
is no longer required for the serialization.  This patch disables
this config option on x86 and revert the changes made by commit
d7c53c9e.

Signed-off-by: Toshi Kani toshi.k...@hp.com
Acked-by: Rafael J. Wysocki rafael.j.wyso...@intel.com
---
 arch/x86/Kconfig  |4 
 arch/x86/kernel/smpboot.c |   21 -
 2 files changed, 25 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b32ebf9..c87e49a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -255,10 +255,6 @@ config ARCH_HWEIGHT_CFLAGS
default -fcall-saved-ecx -fcall-saved-edx if X86_32
default -fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx 
-fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 
-fcall-saved-r11 if X86_64
 
-config ARCH_CPU_PROBE_RELEASE
-   def_bool y
-   depends on HOTPLUG_CPU
-
 config ARCH_SUPPORTS_UPROBES
def_bool y
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index aecc98a..5b24a9d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -82,27 +82,6 @@
 /* State of each CPU */
 DEFINE_PER_CPU(int, cpu_state) = { 0 };
 
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * We need this for trampoline_base protection from concurrent accesses when
- * off- and onlining cores wildly.
- */
-static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
-
-void cpu_hotplug_driver_lock(void)
-{
-   mutex_lock(x86_cpu_hotplug_driver_mutex);
-}
-
-void cpu_hotplug_driver_unlock(void)
-{
-   mutex_unlock(x86_cpu_hotplug_driver_mutex);
-}
-
-ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
-ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
-#endif
-
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
 EXPORT_SYMBOL(smp_num_siblings);
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 4/4] hotplug, powerpc, x86: Remove cpu_hotplug_driver_lock()

2013-08-29 Thread Toshi Kani
cpu_hotplug_driver_lock() serializes CPU online/offline operations
when ARCH_CPU_PROBE_RELEASE is set.  This lock interface is no longer
necessary with the following reason:

 - lock_device_hotplug() now protects CPU online/offline operations,
   including the probe  release interfaces enabled by
   ARCH_CPU_PROBE_RELEASE.  The use of cpu_hotplug_driver_lock() is
   redundant.
 - cpu_hotplug_driver_lock() is only valid when ARCH_CPU_PROBE_RELEASE
   is defined, which is misleading and is only enabled on powerpc.

This patch removes the cpu_hotplug_driver_lock() interface.  As
a result, ARCH_CPU_PROBE_RELEASE only enables / disables the cpu
probe  release interface as intended.  There is no functional change
in this patch.

Signed-off-by: Toshi Kani toshi.k...@hp.com
Acked-by: Rafael J. Wysocki rafael.j.wyso...@intel.com
Reviewed-by: Nathan Fontenot nf...@linux.vnet.ibm.com
---
Performed build test only on powerpc.
---
 arch/powerpc/kernel/smp.c  |   12 --
 arch/powerpc/platforms/pseries/dlpar.c |   40 
 arch/x86/kernel/topology.c |2 --
 drivers/base/cpu.c |   10 +---
 include/linux/cpu.h|   13 --
 5 files changed, 16 insertions(+), 61 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 38b0ba6..1667269 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -763,18 +763,6 @@ void __cpu_die(unsigned int cpu)
smp_ops-cpu_die(cpu);
 }
 
-static DEFINE_MUTEX(powerpc_cpu_hotplug_driver_mutex);
-
-void cpu_hotplug_driver_lock()
-{
-   mutex_lock(powerpc_cpu_hotplug_driver_mutex);
-}
-
-void cpu_hotplug_driver_unlock()
-{
-   mutex_unlock(powerpc_cpu_hotplug_driver_mutex);
-}
-
 void cpu_die(void)
 {
if (ppc_md.cpu_die)
diff --git a/arch/powerpc/platforms/pseries/dlpar.c 
b/arch/powerpc/platforms/pseries/dlpar.c
index a1a7b9a..e39325d 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -387,18 +387,13 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t 
count)
char *cpu_name;
int rc;
 
-   cpu_hotplug_driver_lock();
rc = strict_strtoul(buf, 0, drc_index);
-   if (rc) {
-   rc = -EINVAL;
-   goto out;
-   }
+   if (rc)
+   return -EINVAL;
 
dn = dlpar_configure_connector(drc_index);
-   if (!dn) {
-   rc = -EINVAL;
-   goto out;
-   }
+   if (!dn)
+   return -EINVAL;
 
/* configure-connector reports cpus as living in the base
 * directory of the device tree.  CPUs actually live in the
@@ -407,8 +402,7 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t 
count)
cpu_name = kasprintf(GFP_KERNEL, /cpus%s, dn-full_name);
if (!cpu_name) {
dlpar_free_cc_nodes(dn);
-   rc = -ENOMEM;
-   goto out;
+   return -ENOMEM;
}
 
kfree(dn-full_name);
@@ -417,22 +411,21 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t 
count)
rc = dlpar_acquire_drc(drc_index);
if (rc) {
dlpar_free_cc_nodes(dn);
-   rc = -EINVAL;
-   goto out;
+   return -EINVAL;
}
 
rc = dlpar_attach_node(dn);
if (rc) {
dlpar_release_drc(drc_index);
dlpar_free_cc_nodes(dn);
-   goto out;
+   return rc;
}
 
rc = dlpar_online_cpu(dn);
-out:
-   cpu_hotplug_driver_unlock();
+   if (rc)
+   return rc;
 
-   return rc ? rc : count;
+   return count;
 }
 
 static int dlpar_offline_cpu(struct device_node *dn)
@@ -505,30 +498,27 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t 
count)
return -EINVAL;
}
 
-   cpu_hotplug_driver_lock();
rc = dlpar_offline_cpu(dn);
if (rc) {
of_node_put(dn);
-   rc = -EINVAL;
-   goto out;
+   return -EINVAL;
}
 
rc = dlpar_release_drc(*drc_index);
if (rc) {
of_node_put(dn);
-   goto out;
+   return rc;
}
 
rc = dlpar_detach_node(dn);
if (rc) {
dlpar_acquire_drc(*drc_index);
-   goto out;
+   return rc;
}
 
of_node_put(dn);
-out:
-   cpu_hotplug_driver_unlock();
-   return rc ? rc : count;
+
+   return count;
 }
 
 static int __init pseries_dlpar_init(void)
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index a3f35eb..649b010 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -66,7 +66,6 @@ int __ref _debug_hotplug_cpu(int cpu, int action)
return -EINVAL;
 
lock_device_hotplug();
-   cpu_hotplug_driver_lock();
 
switch 

[PATCH v2 0/4] Unify CPU hotplug lock interface

2013-08-29 Thread Toshi Kani
lock_device_hotplug() was recently introduced to serialize CPU  Memory
online/offline and hotplug operations, along with sysfs online interface
restructure (commit 4f3549d7).  With this new locking scheme,
cpu_hotplug_driver_lock() is redundant and is no longer necessary.

This patchset makes sure that lock_device_hotplug() covers all CPU online/
offline interfaces, and then removes cpu_hotplug_driver_lock().

v2:
 - Rebased to the pm tree, bleeding-edge.
 - Changed patch 2/4 to use lock_device_hotplug_sysfs().

---
Toshi Kani (4):
  hotplug, x86: Fix online state in cpu0 debug interface
  hotplug, x86: Add hotplug lock to missing places
  hotplug, x86: Disable ARCH_CPU_PROBE_RELEASE on x86
  hotplug, powerpc, x86: Remove cpu_hotplug_driver_lock()

---
 arch/powerpc/kernel/smp.c  | 12 --
 arch/powerpc/platforms/pseries/dlpar.c | 40 +-
 arch/x86/Kconfig   |  4 
 arch/x86/kernel/smpboot.c  | 21 --
 arch/x86/kernel/topology.c | 11 ++
 drivers/base/cpu.c | 34 +++--
 include/linux/cpu.h| 13 ---
 7 files changed, 45 insertions(+), 90 deletions(-)
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v2 0/4] Unify CPU hotplug lock interface

2013-08-29 Thread Yasuaki Ishimatsu
(2013/08/30 9:22), Toshi Kani wrote:
 lock_device_hotplug() was recently introduced to serialize CPU  Memory
 online/offline and hotplug operations, along with sysfs online interface
 restructure (commit 4f3549d7).  With this new locking scheme,
 cpu_hotplug_driver_lock() is redundant and is no longer necessary.
 
 This patchset makes sure that lock_device_hotplug() covers all CPU online/
 offline interfaces, and then removes cpu_hotplug_driver_lock().
 
 v2:
   - Rebased to the pm tree, bleeding-edge.
   - Changed patch 2/4 to use lock_device_hotplug_sysfs().
 
 ---
 Toshi Kani (4):
hotplug, x86: Fix online state in cpu0 debug interface
hotplug, x86: Add hotplug lock to missing places
hotplug, x86: Disable ARCH_CPU_PROBE_RELEASE on x86
hotplug, powerpc, x86: Remove cpu_hotplug_driver_lock()
 
 ---
The patch-set looks good to me.

Acked-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com

Thanks,
Yasuaki Ishimatsu


   arch/powerpc/kernel/smp.c  | 12 --
   arch/powerpc/platforms/pseries/dlpar.c | 40 
 +-
   arch/x86/Kconfig   |  4 
   arch/x86/kernel/smpboot.c  | 21 --
   arch/x86/kernel/topology.c | 11 ++
   drivers/base/cpu.c | 34 +++--
   include/linux/cpu.h| 13 ---
   7 files changed, 45 insertions(+), 90 deletions(-)
 


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH V2 1/6] perf: New conditional branch filter criteria in branch stack sampling

2013-08-29 Thread Anshuman Khandual
POWER8 PMU based BHRB supports filtering for conditional branches.
This patch introduces new branch filter PERF_SAMPLE_BRANCH_COND which
will extend the existing perf ABI. Other architectures can provide
this functionality with either HW filtering support (if present) or
with SW filtering of instructions.

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
Reviewed-by: Stephane Eranian eran...@google.com
---
 include/uapi/linux/perf_event.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 0b1df41..5da52b6 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -160,8 +160,9 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_ABORT_TX = 1U  7, /* transaction aborts */
PERF_SAMPLE_BRANCH_IN_TX= 1U  8, /* in transaction */
PERF_SAMPLE_BRANCH_NO_TX= 1U  9, /* not in transaction */
+   PERF_SAMPLE_BRANCH_COND = 1U  10, /* conditional branches */
 
-   PERF_SAMPLE_BRANCH_MAX  = 1U  10, /* non-ABI */
+   PERF_SAMPLE_BRANCH_MAX  = 1U  11, /* non-ABI */
 };
 
 #define PERF_SAMPLE_BRANCH_PLM_ALL \
-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH V2 2/6] powerpc, perf: Enable conditional branch filter for POWER8

2013-08-29 Thread Anshuman Khandual
Enables conditional branch filter support for POWER8
utilizing MMCRA register based filter and also invalidates
a BHRB branch filter combination involving conditional
branches.

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
---
 arch/powerpc/perf/power8-pmu.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 2ee4a70..6e28587 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -580,11 +580,21 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type)
if (branch_sample_type  PERF_SAMPLE_BRANCH_IND_CALL)
return -1;
 
+   /* Invalid branch filter combination - HW does not support */
+   if ((branch_sample_type  PERF_SAMPLE_BRANCH_ANY_CALL) 
+   (branch_sample_type  PERF_SAMPLE_BRANCH_COND))
+   return -1;
+
if (branch_sample_type  PERF_SAMPLE_BRANCH_ANY_CALL) {
pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
return pmu_bhrb_filter;
}
 
+   if (branch_sample_type  PERF_SAMPLE_BRANCH_COND) {
+   pmu_bhrb_filter |= POWER8_MMCRA_IFM3;
+   return pmu_bhrb_filter;
+   }
+
/* Every thing else is unsupported */
return -1;
 }
-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH V2 4/6] x86, perf: Add conditional branch filtering support

2013-08-29 Thread Anshuman Khandual
This patch adds conditional branch filtering support,
enabling it for PERF_SAMPLE_BRANCH_COND in perf branch
stack sampling framework by utilizing an available
software filter X86_BR_JCC.

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
Reviewed-by: Stephane Eranian eran...@google.com
---
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c 
b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d5be06a..9723773 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -371,6 +371,9 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event 
*event)
if (br_type  PERF_SAMPLE_BRANCH_NO_TX)
mask |= X86_BR_NO_TX;
 
+   if (br_type  PERF_SAMPLE_BRANCH_COND)
+   mask |= X86_BR_JCC;
+
/*
 * stash actual user request into reg, it may
 * be used by fixup code for some CPU
@@ -665,6 +668,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
 */
[PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
+   [PERF_SAMPLE_BRANCH_COND] = LBR_JCC,
 };
 
 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
@@ -676,6 +680,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
[PERF_SAMPLE_BRANCH_ANY_CALL]   = LBR_REL_CALL | LBR_IND_CALL
| LBR_FAR,
[PERF_SAMPLE_BRANCH_IND_CALL]   = LBR_IND_CALL,
+   [PERF_SAMPLE_BRANCH_COND]   = LBR_JCC,
 };
 
 /* core */
-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH V2 5/6] perf, documentation: Description for conditional branch filter

2013-08-29 Thread Anshuman Khandual
Adding documentation support for conditional branch filter.

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
Reviewed-by: Stephane Eranian eran...@google.com
---
 tools/perf/Documentation/perf-record.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index e297b74..59ca8d0 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -163,12 +163,13 @@ following filters are defined:
 - any_call: any function call or system call
 - any_ret: any function return or system call return
 - ind_call: any indirect branch
+- cond: conditional branches
 - u:  only when the branch target is at the user level
 - k: only when the branch target is in the kernel
 - hv: only when the target is at the hypervisor level
 
 +
-The option requires at least one branch type among any, any_call, any_ret, 
ind_call.
+The option requires at least one branch type among any, any_call, any_ret, 
ind_call, cond.
 The privilege levels may be omitted, in which case, the privilege levels of 
the associated
 event are applied to the branch filter. Both kernel (k) and hypervisor (hv) 
privilege
 levels are subject to permissions.  When sampling on multiple events, branch 
stack sampling
-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH V2 3/6] perf, tool: Conditional branch filter 'cond' added to perf record

2013-08-29 Thread Anshuman Khandual
Adding perf record support for new branch stack filter criteria
PERF_SAMPLE_BRANCH_COND.

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
---
 tools/perf/builtin-record.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ecca62e..802d11d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -625,6 +625,7 @@ static const struct branch_mode branch_modes[] = {
BRANCH_OPT(any_call, PERF_SAMPLE_BRANCH_ANY_CALL),
BRANCH_OPT(any_ret, PERF_SAMPLE_BRANCH_ANY_RETURN),
BRANCH_OPT(ind_call, PERF_SAMPLE_BRANCH_IND_CALL),
+   BRANCH_OPT(cond, PERF_SAMPLE_BRANCH_COND),
BRANCH_END
 };
 
-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH V2 6/6] powerpc, perf: Enable SW filtering in branch stack sampling framework

2013-08-29 Thread Anshuman Khandual
This patch enables SW based post processing of BHRB captured branches
to be able to meet more user defined branch filtration criteria in perf
branch stack sampling framework. This changes increase the number of
filters and their valid combinations on powerpc64 platform with BHRB
support. Summary of code changes described below.

(1) struct cpu_hw_events

Introduced two new variables and modified one to track various filters.

a) bhrb_hw_filter   Tracks PMU based HW branch filter flags.
Computed from PMU dependent call back.
b) bhrb_sw_filter   Tracks SW based instruction filter flags
Computed from PPC64 generic SW filter.
c) filter_mask  Tracks overall filter flags for PPC64

(2) Creating HW event with BHRB request

Kernel would try to figure out supported HW filters through a PMU call
back ppmu-bhrb_filter_map(). Here it would only invalidate unsupported
HW filter combinations. In future we could process one element from the
combination in HW and one in SW. Meanwhile cpuhw-filter_mask would be
tracking the overall supported branch filter requests on the PMU.

Kernel would also process the user request against available SW filters
for PPC64. Then we would process filter_mask to verify whether all the
user requested branch filters have been taken care of either in HW or in
SW.

(3) BHRB SW filter processing

During the BHRB data capture inside the PMU interrupt context, each
of the captured perf_branch_entry.from would be checked for compliance
with applicable SW branch filters. If the entry does not confirm to the
filter requirements, it would be discarded from the final perf branch
stack buffer.

(4) Instruction classification for proposed SW filters

Here are the list of category of instructions which have been classified
under the proposed SW filters.

(a) PERF_SAMPLE_BRANCH_ANY_RETURN

(i) [Un]conditional branch to LR without setting the LR
(1) blr
(2) bclr
(3) btlr
(4) bflr
(5) bdnzlr
(6) bdnztlr
(7) bdnzflr
(8) bdzlr
(9) bdztlr
(10) bdzflr
(11) bltlr
(12) blelr
(13) beqlr
(14) bgelr
(15) bgtlr
(16) bnllr
(17) bnelr
(18) bnglr
(19) bsolr
(20) bnslr
(21) biclr
(22) bnilr
(23) bunlr
(24) bnulr

(b) PERF_SAMPLE_BRANCH_IND_CALL

(i) [Un]conditional branch to CTR with setting the link
(1) bctrl
(2) bcctrl
(3) btctrl
(4) bfctrl
(5) bltctrl
(6) blectrl
(7) beqctrl
(8) bgectrl
(9) bgtctrl
(10) bnlctrl
(11) bnectrl
(12) bngctrl
(13) bsoctrl
(14) bnsctrl
(15) bicctrl
(16) bnictrl
(17) bunctrl
(18) bnuctrl

(ii) [Un]conditional branch to LR setting the link
(0) bclrl
(1) blrl
(2) btlrl
(3) bflrl
(4) bdnzlrl
(5) bdnztlrl
(6) bdnzflrl
(7) bdzlrl
(8) bdztlrl
(9) bdzflrl
(10) bltlrl
(11) blelrl
(12) beqlrl
(13) bgelrl
(14) bgtlrl
(15) bnllrl
(16) bnelrl
(17) bnglrl
(18) bsolrl
(19) bnslrl
(20) biclrl
(21) bnilrl
(22) bunlrl
(23) bnulrl

(iii) [Un]conditional branch to TAR setting the link
(1) btarl
(2) bctarl

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/perf_event_server.h |   2 +-
 arch/powerpc/perf/core-book3s.c  | 200 

[PATCH V2 0/6] perf: New conditional branch filter

2013-08-29 Thread Anshuman Khandual
This patchset is the re-spin of the original branch stack sampling
patchset which introduced new PERF_SAMPLE_BRANCH_COND filter. This patchset
also enables SW based branch filtering support for PPC64 platforms which have
branch stack sampling support. With this new enablement, the branch filter 
support
for PPC64 platforms have been extended to include all these combinations 
discussed
below with a sample test application program.


(1) perf record -e branch-misses:u -b ./cprog
# Overhead  Command  Source Shared Object  Source Symbol  Target Shared 
Object  Target Symbol
#   ...    .  
  .
#
 4.42%cprog  cprog [k] sw_4_2 cprog 
[k] lr_addr  
 4.41%cprog  cprog [k] symbol2cprog 
[k] hw_1_2   
 4.41%cprog  cprog [k] ctr_addr   cprog 
[k] sw_4_1   
 4.41%cprog  cprog [k] lr_addrcprog 
[k] sw_4_2   
 4.41%cprog  cprog [k] sw_4_2 cprog 
[k] callme   
 4.41%cprog  cprog [k] symbol1cprog 
[k] hw_1_1   
 4.41%cprog  cprog [k] success_3_1_3  cprog 
[k] sw_3_1   
 2.43%cprog  cprog [k] sw_4_1 cprog 
[k] ctr_addr 
 2.43%cprog  cprog [k] hw_1_2 cprog 
[k] symbol2  
 2.43%cprog  cprog [k] callme cprog 
[k] hw_1_2   
 2.43%cprog  cprog [k] address1   cprog 
[k] back1
 2.43%cprog  cprog [k] back1  cprog 
[k] callme   
 2.43%cprog  cprog [k] hw_2_1 cprog 
[k] address1 
 2.43%cprog  cprog [k] sw_3_1_1   cprog 
[k] sw_3_1   
 2.43%cprog  cprog [k] sw_3_1_2   cprog 
[k] sw_3_1   
 2.43%cprog  cprog [k] sw_3_1_3   cprog 
[k] sw_3_1   
 2.43%cprog  cprog [k] sw_3_1 cprog 
[k] sw_3_1_1 
 2.43%cprog  cprog [k] sw_3_1 cprog 
[k] sw_3_1_2 
 2.43%cprog  cprog [k] sw_3_1 cprog 
[k] sw_3_1_3 
 2.43%cprog  cprog [k] callme cprog 
[k] sw_3_1   
 2.43%cprog  cprog [k] callme cprog 
[k] sw_4_2   
 2.43%cprog  cprog [k] hw_1_1 cprog 
[k] symbol1  
 2.43%cprog  cprog [k] callme cprog 
[k] hw_1_1   
 2.42%cprog  cprog [k] sw_3_1 cprog 
[k] callme   
 1.99%cprog  cprog [k] success_3_1_1  cprog 
[k] sw_3_1   
 1.99%cprog  cprog [k] sw_3_1 cprog 
[k] success_3_1_1
 1.99%cprog  cprog [k] address2   cprog 
[k] back2
 1.99%cprog  cprog [k] hw_2_2 cprog 
[k] address2 
 1.99%cprog  cprog [k] back2  cprog 
[k] callme   
 1.99%cprog  cprog [k] callme cprog 
[k] main 
 1.99%cprog  cprog [k] sw_3_1 cprog 
[k] success_3_1_3
 1.99%cprog  cprog [k] hw_1_1 cprog 
[k] callme   
 1.99%cprog  cprog [k] sw_3_2 cprog 
[k] callme   
 1.99%cprog  cprog [k] callme cprog 
[k] sw_3_2   
 1.99%cprog  cprog [k] success_3_1_2  cprog 
[k] sw_3_1   
 1.99%cprog  cprog [k] sw_3_1 cprog 
[k] success_3_1_2
 1.99%cprog  cprog [k] hw_1_2 cprog 
[k] callme   
 1.99%cprog  cprog [k] sw_4_1 cprog 
[k] callme   
 0.02%cprog  [unknown] [k] 0xf7ba2328