[PATCH 02/11] powerpc: Add ICSWX instruction

2015-04-07 Thread Dan Streetman
Add the asm ICSWX and ICSWEPX opcodes.  Add definitions for the
Coprocessor Request structures needed to use the icswx calls to
coprocessors.  Add icswx() function to perform the ICSWX asm
using the provided Coprocessor Command Word value and
Coprocessor Request Block structure.

This is required for communication with the NX-842 coprocessor on
a PowerNV system.

Signed-off-by: Dan Streetman 
---
 arch/powerpc/include/asm/icswx.h  | 184 ++
 arch/powerpc/include/asm/ppc-opcode.h |  13 +++
 2 files changed, 197 insertions(+)
 create mode 100644 arch/powerpc/include/asm/icswx.h

diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h
new file mode 100644
index 000..a70ae93
--- /dev/null
+++ b/arch/powerpc/include/asm/icswx.h
@@ -0,0 +1,184 @@
+/*
+ * ICSWX api
+ *
+ * Copyright (C) 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This provides the Initiate Coprocessor Store Word Indexed (ICSWX)
+ * instruction.  This instruction is used to communicate with PowerPC
+ * coprocessors.  This also provides definitions of the structures used
+ * to communicate with the coprocessor.
+ *
+ * The RFC02130: Coprocessor Architecture document is the reference for
+ * everything in this file unless otherwise noted.
+ */
+#ifndef _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_
+#define _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_
+
+#include  /* for PPC_ICSWX */
+
+/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */
+
+#define CCB_VALUE  (0x3fff)
+#define CCB_ADDRESS(0xfff8)
+#define CCB_CM (0x0007)
+#define CCB_CM0(0x0004)
+#define CCB_CM12   (0x0003)
+
+#define CCB_CM0_ALL_COMPLETIONS(0x0)
+#define CCB_CM0_LAST_IN_CHAIN  (0x4)
+#define CCB_CM12_STORE (0x0)
+#define CCB_CM12_INTERRUPT (0x1)
+
+#define CCB_SIZE   (0x10)
+#define CCB_ALIGN  CCB_SIZE
+
+struct coprocessor_completion_block {
+   __be64 value;
+   __be64 address;
+} __packed __aligned(CCB_ALIGN);
+
+
+/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */
+
+#define CSB_V  (0x80)
+#define CSB_F  (0x04)
+#define CSB_CH (0x03)
+#define CSB_CE_INCOMPLETE  (0x80)
+#define CSB_CE_TERMINATION (0x40)
+#define CSB_CE_TPBC(0x20)
+
+#define CSB_CC_SUCCESS (0)
+#define CSB_CC_INVALID_ALIGN   (1)
+#define CSB_CC_OPERAND_OVERLAP (2)
+#define CSB_CC_DATA_LENGTH (3)
+#define CSB_CC_TRANSLATION (5)
+#define CSB_CC_PROTECTION  (6)
+#define CSB_CC_RD_EXTERNAL (7)
+#define CSB_CC_INVALID_OPERAND (8)
+#define CSB_CC_PRIVILEGE   (9)
+#define CSB_CC_INTERNAL(10)
+#define CSB_CC_WR_EXTERNAL (12)
+#define CSB_CC_NOSPC   (13)
+#define CSB_CC_EXCESSIVE_DDE   (14)
+#define CSB_CC_WR_TRANSLATION  (15)
+#define CSB_CC_WR_PROTECTION   (16)
+#define CSB_CC_UNKNOWN_CODE(17)
+#define CSB_CC_ABORT   (18)
+#define CSB_CC_TRANSPORT   (20)
+#define CSB_CC_SEGMENTED_DDL   (31)
+#define CSB_CC_PROGRESS_POINT  (32)
+#define CSB_CC_DDE_OVERFLOW(33)
+#define CSB_CC_SESSION (34)
+#define CSB_CC_PROVISION   (36)
+#define CSB_CC_CHAIN   (37)
+#define CSB_CC_SEQUENCE(38)
+#define CSB_CC_HW  (39)
+
+#define CSB_SIZE   (0x10)
+#define CSB_ALIGN  CSB_SIZE
+
+struct coprocessor_status_block {
+   u8 flags;
+   u8 cs;
+   u8 cc;
+   u8 ce;
+   __be32 count;
+   __be64 address;
+} __packed __aligned(CSB_ALIGN);
+
+
+/* Chapter 6.5.10 Data-Descriptor List (DDL)
+ * each list contains one or more Data-Descriptor Entries (DDE)
+ */
+
+#define DDE_P  (0x8000)
+
+#define DDE_SIZE   (16)
+#define DDE_ALIGN  DDE_SIZE
+
+struct data_descriptor_entry {
+   __be16 flags;
+   u8 count;
+   u8 index;
+   __be32 length;
+   __be64 address;
+} __packed __aligned(DDE_ALIGN);
+
+
+/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
+
+#define CRB_SIZE   (128)
+#define CRB_ALIGN  (256) /* Errata: requires 256 alignment */
+
+/* Coprocessor Status Block field
+ *   ADDRESS   address of CSB
+ *   C CCB is valid
+ *   AT0 = addrs are virtual, 1 = addrs are phys
+ *   M enable perf monitor
+ */
+#define CRB_CSB_ADDRESS(0xfff0)
+#define CRB_CSB_C  (0x0008)
+#define CRB_CSB_AT (0x0002)
+#define CRB_CSB_M  (0x0001)
+
+struct coprocessor_request_block {
+   __be32 ccw;
+   __be32 flags;
+   __be64 csb_addr;
+
+   struc

[PATCH 04/11] drivers/crypto/nx: move nx-842.c to nx-842-pseries.c

2015-04-07 Thread Dan Streetman
Move the entire NX-842 driver for the pSeries platform from the file
nx-842.c to nx-842-pseries.c.  This is required by later patches that
add NX-842 support for the PowerNV platform.

This patch does not alter the content of the pSeries NX-842 driver at
all, it only changes the filename.

Signed-off-by: Dan Streetman 
---
 drivers/crypto/nx/Makefile |2 +-
 drivers/crypto/nx/nx-842-pseries.c | 1603 
 drivers/crypto/nx/nx-842.c | 1603 
 3 files changed, 1604 insertions(+), 1604 deletions(-)
 create mode 100644 drivers/crypto/nx/nx-842-pseries.c
 delete mode 100644 drivers/crypto/nx/nx-842.c

diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile
index bb770ea..8669ffa 100644
--- a/drivers/crypto/nx/Makefile
+++ b/drivers/crypto/nx/Makefile
@@ -11,4 +11,4 @@ nx-crypto-objs := nx.o \
  nx-sha512.o
 
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o
-nx-compress-objs := nx-842.o
+nx-compress-objs := nx-842-pseries.o
diff --git a/drivers/crypto/nx/nx-842-pseries.c 
b/drivers/crypto/nx/nx-842-pseries.c
new file mode 100644
index 000..887196e
--- /dev/null
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -0,0 +1,1603 @@
+/*
+ * Driver for IBM Power 842 compression accelerator
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Authors: Robert Jennings 
+ *  Seth Jennings 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include "nx_csbcpb.h" /* struct nx_csbcpb */
+
+#define MODULE_NAME "nx-compress"
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Robert Jennings ");
+MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
+
+#define SHIFT_4K 12
+#define SHIFT_64K 16
+#define SIZE_4K (1UL << SHIFT_4K)
+#define SIZE_64K (1UL << SHIFT_64K)
+
+/* IO buffer must be 128 byte aligned */
+#define IO_BUFFER_ALIGN 128
+
+struct nx842_header {
+   int blocks_nr; /* number of compressed blocks */
+   int offset; /* offset of the first block (from beginning of header) */
+   int sizes[0]; /* size of compressed blocks */
+};
+
+static inline int nx842_header_size(const struct nx842_header *hdr)
+{
+   return sizeof(struct nx842_header) +
+   hdr->blocks_nr * sizeof(hdr->sizes[0]);
+}
+
+/* Macros for fields within nx_csbcpb */
+/* Check the valid bit within the csbcpb valid field */
+#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
+
+/* CE macros operate on the completion_extension field bits in the csbcpb.
+ * CE0 0=full completion, 1=partial completion
+ * CE1 0=CE0 indicates completion, 1=termination (output may be modified)
+ * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */
+#define NX842_CSBCPB_CE0(x)(x & BIT_MASK(7))
+#define NX842_CSBCPB_CE1(x)(x & BIT_MASK(6))
+#define NX842_CSBCPB_CE2(x)(x & BIT_MASK(5))
+
+/* The NX unit accepts data only on 4K page boundaries */
+#define NX842_HW_PAGE_SHIFTSHIFT_4K
+#define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT)
+#define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1))
+
+enum nx842_status {
+   UNAVAILABLE,
+   AVAILABLE
+};
+
+struct ibm_nx842_counters {
+   atomic64_t comp_complete;
+   atomic64_t comp_failed;
+   atomic64_t decomp_complete;
+   atomic64_t decomp_failed;
+   atomic64_t swdecomp;
+   atomic64_t comp_times[32];
+   atomic64_t decomp_times[32];
+};
+
+static struct nx842_devdata {
+   struct vio_dev *vdev;
+   struct device *dev;
+   struct ibm_nx842_counters *counters;
+   unsigned int max_sg_len;
+   unsigned int max_sync_size;
+   unsigned int max_sync_sg;
+   enum nx842_status status;
+} __rcu *devdata;
+static DEFINE_SPINLOCK(devdata_mutex);
+
+#define NX842_COUNTER_INC(_x) \
+static inline void nx842_inc_##_x( \
+   const struct nx842_devdata *dev) { \
+   if (dev) \
+   atomic64_inc(&dev->counters->_x); \
+}
+NX842_COUNTER_INC(comp_complete);
+NX842_COUNTER_INC(comp_failed);
+NX842_COUNTER_INC(decomp_complete);
+NX842_COUNTER_INC(decomp_failed);
+NX842_COUNTER_INC(swdecomp);
+
+#define NX842_HIST_SLOTS 16
+
+static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int t

[PATCH 05/11] drivers/crypto/nx: add NX-842 platform frontend driver

2015-04-07 Thread Dan Streetman
Add NX-842 frontend that allows using either the pSeries platform
or PowerNV platform driver for the NX-842 hardware.  Update the
MAINTAINERS file to include the new filenames.

Signed-off-by: Dan Streetman 
---
 MAINTAINERS|   2 +-
 crypto/842.c   |   2 +-
 drivers/crypto/Kconfig |   6 +-
 drivers/crypto/nx/Kconfig  |  33 ++---
 drivers/crypto/nx/Makefile |   4 +-
 drivers/crypto/nx/nx-842-pseries.c |  51 ++---
 drivers/crypto/nx/nx-842.c | 144 +
 drivers/crypto/nx/nx-842.h |  32 +
 include/linux/nx842.h  |   6 +-
 9 files changed, 235 insertions(+), 45 deletions(-)
 create mode 100644 drivers/crypto/nx/nx-842.c
 create mode 100644 drivers/crypto/nx/nx-842.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 3dc973a..5a8d46d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4834,7 +4834,7 @@ F:drivers/crypto/nx/
 IBM Power 842 compression accelerator
 M: Dan Streetman 
 S: Supported
-F: drivers/crypto/nx/nx-842.c
+F: drivers/crypto/nx/nx-842*
 F: include/linux/nx842.h
 F: include/linux/sw842.h
 F: lib/842/
diff --git a/crypto/842.c b/crypto/842.c
index b48f4f1..d21cedb 100644
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -52,7 +52,7 @@ static int nx842_init(struct crypto_tfm *tfm)
struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
int wmemsize;
 
-   wmemsize = max_t(int, nx842_get_workmem_size(), LZO1X_MEM_COMPRESS);
+   wmemsize = max_t(int, NX842_MEM_COMPRESS, LZO1X_MEM_COMPRESS);
ctx->nx842_wmem = kmalloc(wmemsize, GFP_NOFS);
if (!ctx->nx842_wmem)
return -ENOMEM;
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 2fb0fdf..6d8b11f 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -312,11 +312,11 @@ config CRYPTO_DEV_S5P
  algorithms execution.
 
 config CRYPTO_DEV_NX
-   bool "Support for IBM Power7+ in-Nest cryptographic acceleration"
-   depends on PPC64 && IBMVIO && !CPU_LITTLE_ENDIAN
+   bool "Support for IBM Power in-Nest cryptographic acceleration"
+   depends on PPC64
default n
help
- Support for Power7+ in-Nest cryptographic acceleration.
+ Support for Power in-Nest cryptographic acceleration.
 
 if CRYPTO_DEV_NX
source "drivers/crypto/nx/Kconfig"
diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig
index f826166..e4396fc 100644
--- a/drivers/crypto/nx/Kconfig
+++ b/drivers/crypto/nx/Kconfig
@@ -1,6 +1,6 @@
 config CRYPTO_DEV_NX_ENCRYPT
-   tristate "Encryption acceleration support"
-   depends on PPC64 && IBMVIO
+   tristate "Encryption acceleration support on pSeries platform"
+   depends on PPC_PSERIES && IBMVIO && !CPU_LITTLE_ENDIAN
default y
select CRYPTO_AES
select CRYPTO_CBC
@@ -12,15 +12,30 @@ config CRYPTO_DEV_NX_ENCRYPT
select CRYPTO_SHA256
select CRYPTO_SHA512
help
- Support for Power7+ in-Nest encryption acceleration. This
- module supports acceleration for AES and SHA2 algorithms. If you
- choose 'M' here, this module will be called nx_crypto.
+ Support for Power in-Nest encryption acceleration. This
+ module supports acceleration for AES and SHA2 algorithms on
+ the pSeries platform.  If you choose 'M' here, this module
+ will be called nx_crypto.
 
 config CRYPTO_DEV_NX_COMPRESS
tristate "Compression acceleration support"
-   depends on PPC64 && IBMVIO
default y
help
- Support for Power7+ in-Nest compression acceleration. This
- module supports acceleration for AES and SHA2 algorithms. If you
- choose 'M' here, this module will be called nx_compress.
+ Support for Power in-Nest compression acceleration. This
+ module supports acceleration for compressing memory with the 842
+ algorithm.  One of the platform drivers must be selected also.
+ If you choose 'M' here, this module will be called nx_compress.
+
+if CRYPTO_DEV_NX_COMPRESS
+
+config CRYPTO_DEV_NX_PSERIES_COMPRESS
+   tristate "Compression acceleration support on pSeries platform"
+   depends on PPC_PSERIES && IBMVIO && !CPU_LITTLE_ENDIAN
+   default y
+   help
+ Support for Power in-Nest compression acceleration. This
+ module supports acceleration for compressing memory with the 842
+ algorithm.  This supports NX hardware on the pSeries platform.
+ If you choose 'M' here, this module will be called 
nx_compress_pseries.
+
+endif
diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile
index 8669ffa..bc7b7ea 100644
--- a/drivers/crypto/nx/Makefile
+++ b/drivers/crypto/nx/Makefile
@@ -11,4 +11,6 @@ nx-crypto-objs := nx.o \
  nx-sha512.o
 
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o
-nx-compress-objs := 

[PATCH 01/11] powerpc: export of_get_ibm_chip_id function

2015-04-07 Thread Dan Streetman
Export the of_get_ibm_chip_id() function.  This will be used by the
PowerNV NX-842 driver.

Signed-off-by: Dan Streetman 
---
 arch/powerpc/kernel/prom.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index b8e15c6..f9fb9a2 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -800,6 +800,7 @@ int of_get_ibm_chip_id(struct device_node *np)
}
return -1;
 }
+EXPORT_SYMBOL(of_get_ibm_chip_id);
 
 /**
  * cpu_to_chip_id - Return the cpus chip-id
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/11] crypto: remove LZO fallback from crypto 842

2015-04-07 Thread Dan Streetman
Update the crypto 842 driver to no longer fallback to LZO if the 842
hardware is unavailable.  Simplify the crpypto 842 driver to remove all
headers indicating 842/lzo.

The crypto 842 driver should do 842-format compression and decompression
only.  It should not fallback to LZO compression/decompression.  The
user of the crypto 842 driver can fallback to another format if desired.

Signed-off-by: Dan Streetman 
---
 crypto/842.c   | 139 -
 crypto/Kconfig |   4 +-
 2 files changed, 29 insertions(+), 114 deletions(-)

diff --git a/crypto/842.c b/crypto/842.c
index d21cedb..d81c6c7 100644
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -26,128 +26,46 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 
-static int nx842_uselzo;
-
-struct nx842_ctx {
-   void *nx842_wmem; /* working memory for 842/lzo */
-};
-
-enum nx842_crypto_type {
-   NX842_CRYPTO_TYPE_842,
-   NX842_CRYPTO_TYPE_LZO
+struct crypto842_ctx {
+   void *wmem; /* working memory for 842 */
 };
 
-#define NX842_SENTINEL 0xdeadbeef
-
-struct nx842_crypto_header {
-   unsigned int sentinel; /* debug */
-   enum nx842_crypto_type type;
-};
-
-static int nx842_init(struct crypto_tfm *tfm)
+static int crypto842_init(struct crypto_tfm *tfm)
 {
-   struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
-   int wmemsize;
+   struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm);
 
-   wmemsize = max_t(int, NX842_MEM_COMPRESS, LZO1X_MEM_COMPRESS);
-   ctx->nx842_wmem = kmalloc(wmemsize, GFP_NOFS);
-   if (!ctx->nx842_wmem)
+   ctx->wmem = kmalloc(NX842_MEM_COMPRESS, GFP_NOFS);
+   if (!ctx->wmem)
return -ENOMEM;
 
return 0;
 }
 
-static void nx842_exit(struct crypto_tfm *tfm)
+static void crypto842_exit(struct crypto_tfm *tfm)
 {
-   struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
+   struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm);
 
-   kfree(ctx->nx842_wmem);
+   kfree(ctx->wmem);
 }
 
-static void nx842_reset_uselzo(unsigned long data)
-{
-   nx842_uselzo = 0;
-}
-
-static DEFINE_TIMER(failover_timer, nx842_reset_uselzo, 0, 0);
-
-static int nx842_crypto_compress(struct crypto_tfm *tfm, const u8 *src,
+static int crypto842_compress(struct crypto_tfm *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen)
 {
-   struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
-   struct nx842_crypto_header *hdr;
-   unsigned int tmp_len = *dlen;
-   size_t lzodlen; /* needed for lzo */
-   int err;
-
-   *dlen = 0;
-   hdr = (struct nx842_crypto_header *)dst;
-   hdr->sentinel = NX842_SENTINEL; /* debug */
-   dst += sizeof(struct nx842_crypto_header);
-   tmp_len -= sizeof(struct nx842_crypto_header);
-   lzodlen = tmp_len;
-
-   if (likely(!nx842_uselzo)) {
-   err = nx842_compress(src, slen, dst, &tmp_len, ctx->nx842_wmem);
-
-   if (likely(!err)) {
-   hdr->type = NX842_CRYPTO_TYPE_842;
-   *dlen = tmp_len + sizeof(struct nx842_crypto_header);
-   return 0;
-   }
-
-   /* hardware failed */
-   nx842_uselzo = 1;
-
-   /* set timer to check for hardware again in 1 second */
-   mod_timer(&failover_timer, jiffies + msecs_to_jiffies(1000));
-   }
-
-   /* no hardware, use lzo */
-   err = lzo1x_1_compress(src, slen, dst, &lzodlen, ctx->nx842_wmem);
-   if (err != LZO_E_OK)
-   return -EINVAL;
-
-   hdr->type = NX842_CRYPTO_TYPE_LZO;
-   *dlen = lzodlen + sizeof(struct nx842_crypto_header);
-   return 0;
+   struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm);
+
+   return nx842_compress(src, slen, dst, dlen, ctx->wmem);
 }
 
-static int nx842_crypto_decompress(struct crypto_tfm *tfm, const u8 *src,
+static int crypto842_decompress(struct crypto_tfm *tfm, const u8 *src,
  unsigned int slen, u8 *dst, unsigned int *dlen)
 {
-   struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
-   struct nx842_crypto_header *hdr;
-   unsigned int tmp_len = *dlen;
-   size_t lzodlen; /* needed for lzo */
-   int err;
-
-   *dlen = 0;
-   hdr = (struct nx842_crypto_header *)src;
-
-   if (unlikely(hdr->sentinel != NX842_SENTINEL))
-   return -EINVAL;
-
-   src += sizeof(struct nx842_crypto_header);
-   slen -= sizeof(struct nx842_crypto_header);
-
-   if (likely(hdr->type == NX842_CRYPTO_TYPE_842)) {
-   err = nx842_decompress(src, slen, dst, &tmp_len,
-   ctx->nx842_wmem);
-   if (err)
-   return -EINVAL;
-   *dlen = tmp_len;
-   } else if (hdr->type == NX842_CRYPTO_TYPE_LZO) {
-   lzodlen = tmp_len;
-   err = lzo1x_decompress_safe(src, slen, dst, &lzodlen);
-   if (err != LZO_E_OK

[PATCH 10/11] crypto: rewrite crypto 842 to use nx842 constraints

2015-04-07 Thread Dan Streetman
Major rewrite of the crypto 842 driver to use the "constraints" from the
NX-842 hardware driver, and split and/or shift input or output buffers to
fit the required alignment/length constraints.  Add a header to the compressed
buffers.  Update the MAINTAINERS 842 section with the crypto 842 filename.
Fallback to software 842 decompression if the NX-842 hardware fails.

Now that the NX-842 hardware driver provides information about its constraints,
this updates the main crypto 842 driver to adjust each incoming buffer to those
hw constraints.  This allows using the 842 compression hardware with any
alignment or length buffers; previously with only the pSeries NX-842 driver
all (uncompressed) buffers needed to be page-sized and page-aligned.

Signed-off-by: Dan Streetman 
---
 MAINTAINERS  |   1 +
 crypto/842.c | 414 ---
 2 files changed, 399 insertions(+), 16 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5a8d46d..850540d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4837,6 +4837,7 @@ S:Supported
 F: drivers/crypto/nx/nx-842*
 F: include/linux/nx842.h
 F: include/linux/sw842.h
+F: crypto/842.c
 F: lib/842/
 
 IBM Power Linux RAID adapter
diff --git a/crypto/842.c b/crypto/842.c
index d81c6c7..7f35c49 100644
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -11,14 +11,22 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ * Copyright (C) IBM Corporation, 2011-2015
  *
- * Copyright (C) IBM Corporation, 2011
+ * Original Authors: Robert Jennings 
+ *   Seth Jennings 
  *
- * Authors: Robert Jennings 
- *  Seth Jennings 
+ * Rewrite: Dan Streetman 
+ *
+ * This is an interface to the NX-842 compression hardware in PowerPC
+ * processors (see drivers/crypto/nx/nx-842.c for details).  Most (all?)
+ * of the complexity of this drvier is due to the fact that the NX-842
+ * compression hardware requires the input and output data buffers to be
+ * specifically aligned, to be a specific multiple in length, and within
+ * specific minimum and maximum lengths.  Those restrictions, provided by
+ * the nx-842 driver via is nx842_constraints, mean this driver must use
+ * bounce buffers and headers to correct misaligned in or out buffers,
+ * and to split input buffers that are too large.
  */
 
 #include 
@@ -27,10 +35,59 @@
 #include 
 #include 
 #include 
-#include 
+#include 
+
+/* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
+ * template (see lib/842/842_decompress.c), so this magic number
+ * will never appear at the start of a raw 842 compressed buffer.
+ * That can be useful in the future, if buffer alignment and length is
+ * correct, to not require the use of any header, which will save some
+ * space in the resulting compressed buffer; then in decompress, if the
+ * input buffer does not contain this header magic, it's assumed to be
+ * a raw compressed buffer and should be passed directly to the NX-842
+ * hardware driver.
+ */
+#define CRYPTO_842_MAGIC   (0xf842)
+#define CRYPTO_842_GROUP_MAX   (0x19)  /* max 0-based index, real max is +1 */
+#define CRYPTO_842_HEADER_SIZE(h)  \
+   (sizeof(*(h)) + sizeof((h)->group) * (h)->groups)
+#define CRYPTO_842_HEADER_MAX_SIZE \
+   (sizeof(struct crypto842_header) +  \
+sizeof(struct crypto842_header_group) * CRYPTO_842_GROUP_MAX)
+
+/* try longer on comp because we can fallback to sw decomp if hw is busy */
+#define COMP_BUSY_TIMEOUT  (250) /* ms */
+#define DECOMP_BUSY_TIMEOUT(50) /* ms */
+
+struct crypto842_header_group {
+   u16 padding;/* unused bytes at start of group */
+   u32 length; /* length of group, not including padding */
+} __packed;
+
+struct crypto842_header {
+   u16 magic;  /* CRYPTO_842_MAGIC */
+   u16 ignore; /* decompressed end bytes to ignore */
+   u8 groups;  /* 0-based; add 1 for total */
+   struct crypto842_header_group group[1];
+} __packed;
+
+struct crypto842_param {
+   u8 *in;
+   long iremain;
+   u8 *out;
+   long oremain;
+   long ototal;
+};
 
 struct crypto842_ctx {
-   void *wmem; /* working memory for 842 */
+   void *wmem; /* working memory for 842 */
+   void *bounce;   /* bounce buffer to correct alignment */
+
+   /* header includes 1 group, so the total usable groups are
+* max + 1; meaning max is the highest valid 0-based index.
+*/
+   struct crypto842_header header;
+   struct crypto842_header_group group[CRYPTO_842_GROUP_MAX];
 };
 
 static int crypto842_in

[PATCH 08/11] drivers/crypto/nx: simplify pSeries nx842 driver

2015-04-07 Thread Dan Streetman
Simplify the pSeries NX-842 driver: do not expect incoming buffers to be
exactly page-sized; do not break up input buffers to compress smaller blocks;
do not use any internal headers in the compressed data blocks; remove the
software decompression implementation.

This changes the pSeries NX-842 driver to perform constraints-based compression
so that it only needs to compress one entire input block at a time.  This
removes the need for it to split input data blocks into multiple compressed
data sections in the output buffer, and removes the need for any extra header
info in the compressed data; all that is moved (in a later patch) into the
main crypto 842 driver.  Additionally, the 842 software decompression
implementation is no longer needed here, and the crypto 842 driver will use
the generic software 842 decompression function as a fallback if any hardware
842 driver fails.

Signed-off-by: Dan Streetman 
---
 drivers/crypto/nx/nx-842-pseries.c | 779 -
 1 file changed, 153 insertions(+), 626 deletions(-)

diff --git a/drivers/crypto/nx/nx-842-pseries.c 
b/drivers/crypto/nx/nx-842-pseries.c
index 3773e36..0b7bad3 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -21,7 +21,6 @@
  *  Seth Jennings 
  */
 
-#include 
 #include 
 
 #include "nx-842.h"
@@ -32,11 +31,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Robert Jennings ");
 MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
 
-#define SHIFT_4K 12
-#define SHIFT_64K 16
-#define SIZE_4K (1UL << SHIFT_4K)
-#define SIZE_64K (1UL << SHIFT_64K)
-
 /* IO buffer must be 128 byte aligned */
 #define IO_BUFFER_ALIGN 128
 
@@ -47,18 +41,52 @@ static struct nx842_constraints nx842_pseries_constraints = 
{
.maximum =  PAGE_SIZE, /* dynamic, max_sync_size */
 };
 
-struct nx842_header {
-   int blocks_nr; /* number of compressed blocks */
-   int offset; /* offset of the first block (from beginning of header) */
-   int sizes[0]; /* size of compressed blocks */
-};
-
-static inline int nx842_header_size(const struct nx842_header *hdr)
+static int check_constraints(unsigned long buf, unsigned int *len, bool in)
 {
-   return sizeof(struct nx842_header) +
-   hdr->blocks_nr * sizeof(hdr->sizes[0]);
+   if (!IS_ALIGNED(buf, nx842_pseries_constraints.alignment)) {
+   pr_debug("%s buffer 0x%lx not aligned to 0x%x\n",
+in ? "input" : "output", buf,
+nx842_pseries_constraints.alignment);
+   return -EINVAL;
+   }
+   if (*len % nx842_pseries_constraints.multiple) {
+   pr_debug("%s buffer len 0x%x not multiple of 0x%x\n",
+in ? "input" : "output", *len,
+nx842_pseries_constraints.multiple);
+   if (in)
+   return -EINVAL;
+   *len = round_down(*len, nx842_pseries_constraints.multiple);
+   }
+   if (*len < nx842_pseries_constraints.minimum) {
+   pr_debug("%s buffer len 0x%x under minimum 0x%x\n",
+in ? "input" : "output", *len,
+nx842_pseries_constraints.minimum);
+   return -EINVAL;
+   }
+   if (*len > nx842_pseries_constraints.maximum) {
+   pr_debug("%s buffer len 0x%x over maximum 0x%x\n",
+in ? "input" : "output", *len,
+nx842_pseries_constraints.maximum);
+   if (in)
+   return -EINVAL;
+   *len = nx842_pseries_constraints.maximum;
+   }
+   return 0;
 }
 
+/* I assume we need to align the CSB? */
+#define WORKMEM_ALIGN  (256)
+
+struct nx842_workmem {
+   /* scatterlist */
+   char slin[4096];
+   char slout[4096];
+   /* coprocessor status/parameter block */
+   struct nx_csbcpb csbcpb;
+
+   char padding[WORKMEM_ALIGN];
+} __packed __aligned(WORKMEM_ALIGN);
+
 /* Macros for fields within nx_csbcpb */
 /* Check the valid bit within the csbcpb valid field */
 #define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
@@ -72,8 +100,7 @@ static inline int nx842_header_size(const struct 
nx842_header *hdr)
 #define NX842_CSBCPB_CE2(x)(x & BIT_MASK(5))
 
 /* The NX unit accepts data only on 4K page boundaries */
-#define NX842_HW_PAGE_SHIFTSHIFT_4K
-#define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT)
+#define NX842_HW_PAGE_SIZE (4096)
 #define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1))
 
 enum nx842_status {
@@ -194,41 +221,6 @@ static int nx842_build_scatterlist(unsigned long buf, int 
len,
return 0;
 }
 
-/*
- * Working memory for software decompression
- */
-struct sw842_fifo {
-   union {
-   char f8[256][8];
-   char f4[512][4];
-   };
-   char f2[256][2];
-   unsigned char f84_full;
-   unsigned char f2_full;
-   unsigned

[PATCH 07/11] drivers/crypto/nx: add PowerNV platform NX-842 driver

2015-04-07 Thread Dan Streetman
Add driver for NX-842 hardware on the PowerNV platform.

This allows the use of the 842 compression hardware coprocessor on
the PowerNV platform.

Signed-off-by: Dan Streetman 
---
 drivers/crypto/nx/Kconfig  |  10 +
 drivers/crypto/nx/Makefile |   2 +
 drivers/crypto/nx/nx-842-powernv.c | 623 +
 drivers/crypto/nx/nx-842-pseries.c |   9 -
 drivers/crypto/nx/nx-842.c |   4 +-
 drivers/crypto/nx/nx-842.h |  97 ++
 include/linux/nx842.h  |   6 +-
 7 files changed, 739 insertions(+), 12 deletions(-)
 create mode 100644 drivers/crypto/nx/nx-842-powernv.c

diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig
index e4396fc..4bf400a 100644
--- a/drivers/crypto/nx/Kconfig
+++ b/drivers/crypto/nx/Kconfig
@@ -38,4 +38,14 @@ config CRYPTO_DEV_NX_PSERIES_COMPRESS
  algorithm.  This supports NX hardware on the pSeries platform.
  If you choose 'M' here, this module will be called 
nx_compress_pseries.
 
+config CRYPTO_DEV_NX_POWERNV_COMPRESS
+   tristate "Compression acceleration support on PowerNV platform"
+   depends on PPC_POWERNV
+   default y
+   help
+ Support for Power in-Nest compression acceleration. This
+ module supports acceleration for compressing memory with the 842
+ algorithm.  This supports NX hardware on the PowerNV platform.
+ If you choose 'M' here, this module will be called 
nx_compress_powernv.
+
 endif
diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile
index bc7b7ea..82221f2 100644
--- a/drivers/crypto/nx/Makefile
+++ b/drivers/crypto/nx/Makefile
@@ -12,5 +12,7 @@ nx-crypto-objs := nx.o \
 
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o
 obj-$(CONFIG_CRYPTO_DEV_NX_PSERIES_COMPRESS) += nx-compress-pseries.o
+obj-$(CONFIG_CRYPTO_DEV_NX_POWERNV_COMPRESS) += nx-compress-powernv.o
 nx-compress-objs := nx-842.o
 nx-compress-pseries-objs := nx-842-pseries.o
+nx-compress-powernv-objs := nx-842-powernv.o
diff --git a/drivers/crypto/nx/nx-842-powernv.c 
b/drivers/crypto/nx/nx-842-powernv.c
new file mode 100644
index 000..f1624a8
--- /dev/null
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -0,0 +1,623 @@
+/*
+ * Driver for IBM PowerNV 842 compression accelerator
+ *
+ * Copyright (C) 2015 Dan Streetman, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "nx-842.h"
+
+#include 
+
+#include 
+#include 
+
+#define MODULE_NAME NX842_POWERNV_MODULE_NAME
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman ");
+MODULE_DESCRIPTION("842 H/W Compression driver for IBM PowerNV processors");
+
+#define WORKMEM_ALIGN  (CRB_ALIGN)
+#define CSB_WAIT_MAX   (5000) /* ms */
+
+struct nx842_workmem {
+   /* Below fields must be properly aligned */
+   struct coprocessor_request_block crb; /* CRB_ALIGN align */
+   struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */
+   struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */
+   /* Above fields must be properly aligned */
+
+   ktime_t start;
+
+   char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
+} __packed __aligned(WORKMEM_ALIGN);
+
+struct nx842_coproc {
+   unsigned int chip_id;
+   unsigned int ct;
+   unsigned int ci;
+   struct list_head list;
+};
+
+/* no cpu hotplug on powernv, so this list never changes after init */
+static LIST_HEAD(nx842_coprocs);
+static unsigned int nx842_ct;
+
+/**
+ * setup_indirect_dde - Setup an indirect DDE
+ *
+ * The DDE is setup with the the DDE count, byte count, and address of
+ * first direct DDE in the list.
+ */
+static void setup_indirect_dde(struct data_descriptor_entry *dde,
+   struct data_descriptor_entry *ddl,
+   unsigned int dde_count, unsigned int byte_count)
+{
+   dde->flags = 0;
+   dde->count = dde_count;
+   dde->index = 0;
+   dde->length = cpu_to_be32(byte_count);
+   dde->address = cpu_to_be64(nx842_get_pa(ddl));
+}
+
+/**
+ * setup_direct_dde - Setup single DDE from buffer
+ *
+ * The DDE is setup with the buffer and length.  The buffer must be properly
+ * aligned.  The used length is returned.
+ * Returns:
+ *   NSuccessfully set up DDE with N bytes
+ */
+static unsigned int setup_direct_dde(struct data_descriptor_entry *dde,
+   unsigned long pa, unsigned int len)
+{
+   unsigned int l = min_t(unsigned int, len, LEN_ON

[PATCH 06/11] drivers/crypto/nx: add nx842 constraints

2015-04-07 Thread Dan Streetman
Add "constraints" for the NX-842 driver.  The constraints are used to
indicate what the current NX-842 platform driver is capable of.  The
constraints tell the NX-842 user what alignment, min and max length, and
length multiple each provided buffers should conform to.  These are
required because the 842 hardware requires buffers to meet specific
constraints that vary based on platform - for example, the pSeries
max length is much lower than the PowerNV max length.

These constraints are used by a later patch that improves the crypto 842
driver.

Signed-off-by: Dan Streetman 
---
 drivers/crypto/nx/nx-842-pseries.c | 10 ++
 drivers/crypto/nx/nx-842.c | 38 ++
 drivers/crypto/nx/nx-842.h |  2 ++
 include/linux/nx842.h  |  9 +
 4 files changed, 59 insertions(+)

diff --git a/drivers/crypto/nx/nx-842-pseries.c 
b/drivers/crypto/nx/nx-842-pseries.c
index 728a148..baa2e52 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -40,6 +40,13 @@ MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power 
processors");
 /* IO buffer must be 128 byte aligned */
 #define IO_BUFFER_ALIGN 128
 
+static struct nx842_constraints nx842_pseries_constraints = {
+   .alignment =IO_BUFFER_ALIGN,
+   .multiple = DDE_BUFFER_LAST_MULT,
+   .minimum =  IO_BUFFER_ALIGN,
+   .maximum =  PAGE_SIZE, /* dynamic, max_sync_size */
+};
+
 struct nx842_header {
int blocks_nr; /* number of compressed blocks */
int offset; /* offset of the first block (from beginning of header) */
@@ -840,6 +847,8 @@ static int nx842_OF_upd_maxsyncop(struct nx842_devdata 
*devdata,
goto out;
}
 
+   nx842_pseries_constraints.maximum = devdata->max_sync_size;
+
devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit,
maxsynccop->decomp_sg_limit);
if (devdata->max_sync_sg < 1) {
@@ -1113,6 +1122,7 @@ static struct attribute_group nx842_attribute_group = {
 
 static struct nx842_driver nx842_pseries_driver = {
.owner =THIS_MODULE,
+   .constraints =  &nx842_pseries_constraints,
.compress = nx842_pseries_compress,
.decompress =   nx842_pseries_decompress,
 };
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 815d277..279a38e 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -86,6 +86,44 @@ static void put_driver(struct nx842_driver *driver)
module_put(driver->owner);
 }
 
+/**
+ * nx842_constraints
+ *
+ * This provides the driver's constraints.  Different nx842 implementations
+ * may have varying requirements.  The constraints are:
+ *   @alignment:   All buffers should be aligned to this
+ *   @multiple:All buffer lengths should be a multiple of this
+ *   @minimum: Buffer lengths must not be less than this amount
+ *   @maximum: Buffer lengths must not be more than this amount
+ *
+ * The constraints apply to all buffers and lengths, both input and output,
+ * for both compression and decompression, except for the minimum which
+ * only applies to compression input and decompression output; the
+ * compressed data can be less than the minimum constraint.  It can be
+ * assumed that compressed data will always adhere to the multiple
+ * constraint.
+ *
+ * The driver may succeed even if these constraints are violated;
+ * however the driver can return failure or suffer reduced performance
+ * if any constraint is not met.
+ */
+int nx842_constraints(struct nx842_constraints *c)
+{
+   struct nx842_driver *driver = get_driver();
+   int ret = 0;
+
+   if (!driver)
+   return -ENODEV;
+
+   BUG_ON(!c);
+   memcpy(c, driver->constraints, sizeof(*c));
+
+   put_driver(driver);
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(nx842_constraints);
+
 int nx842_compress(const unsigned char *in, unsigned int in_len,
unsigned char *out, unsigned int *out_len,
void *wrkmem)
diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h
index 2a5d4e1..c6ceb0f 100644
--- a/drivers/crypto/nx/nx-842.h
+++ b/drivers/crypto/nx/nx-842.h
@@ -12,6 +12,8 @@
 struct nx842_driver {
struct module *owner;
 
+   struct nx842_constraints *constraints;
+
int (*compress)(const unsigned char *in, unsigned int in_len,
unsigned char *out, unsigned int *out_len,
void *wrkmem);
diff --git a/include/linux/nx842.h b/include/linux/nx842.h
index 778e3ab..883b474 100644
--- a/include/linux/nx842.h
+++ b/include/linux/nx842.h
@@ -5,6 +5,15 @@
 
 #define NX842_MEM_COMPRESS __NX842_PSERIES_MEM_COMPRESS
 
+struct nx842_constraints {
+   int alignment;
+   int multiple;
+   int minimum;
+   int maximum;
+};
+
+int nx842_constraints(struc

[PATCH 11/11] crypto: add crypto compression sefltest

2015-04-07 Thread Dan Streetman
Add configurable module to perform self-tests on any crypto compression
driver.

This allows testing any crypto compression driver with any input buffer,
at varying alignments and lengths.  It calculates the average bytes per
second compression and decompression rates.  Any errors reported by the
compressor during compression or decompression will end the test and
be logged.

Signed-off-by: Dan Streetman 
---
 crypto/Kconfig |   9 +
 crypto/Makefile|   1 +
 crypto/comp_selftest.c | 928 +
 3 files changed, 938 insertions(+)
 create mode 100644 crypto/comp_selftest.c

diff --git a/crypto/Kconfig b/crypto/Kconfig
index a7148ff..e56ecf2 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -189,6 +189,15 @@ config CRYPTO_TEST
help
  Quick & dirty crypto test module.
 
+config CRYPTO_COMP_SELFTEST
+   tristate "Compression Self-Testing module"
+   help
+ Configurable Compression Self-Testing using debugfs interface.
+ This allows you to compress and decompress buffers of variable
+ offsets, lengths, and data, using different compressors.  Also
+ the average bytes per second rate for compression/decompression
+ can be calculated.
+
 config CRYPTO_ABLK_HELPER
tristate
select CRYPTO_CRYPTD
diff --git a/crypto/Makefile b/crypto/Makefile
index ba19465..0bb1ac2 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -95,6 +95,7 @@ obj-$(CONFIG_CRYPTO_RNG2) += krng.o
 obj-$(CONFIG_CRYPTO_ANSI_CPRNG) += ansi_cprng.o
 obj-$(CONFIG_CRYPTO_DRBG) += drbg.o
 obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
+obj-$(CONFIG_CRYPTO_COMP_SELFTEST) += comp_selftest.o
 obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o
 obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o
 obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
diff --git a/crypto/comp_selftest.c b/crypto/comp_selftest.c
new file mode 100644
index 000..691a8ea
--- /dev/null
+++ b/crypto/comp_selftest.c
@@ -0,0 +1,928 @@
+/*
+ * Self-test for compression
+ *
+ * Copyright (C) 2015 Dan Streetman, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define MODULE_NAME "comp_selftest"
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman ");
+MODULE_DESCRIPTION("Crypto Compression Self-Test");
+
+static unsigned int test_kthreads_max = 64;
+module_param_named(threads_max, test_kthreads_max, uint, 0444);
+
+static unsigned int test_buffer_order = 2;
+module_param_named(buffer_order, test_buffer_order, uint, 0444);
+
+#define TEST_KTHREADS_DEFAULT  (4)
+
+#define TEST_REPEAT_DEFAULT(1)
+
+#define TEST_BPS_WINDOW_DEFAULT(1)
+
+#define TEST_BUFFER_SIZE   (PAGE_SIZE << test_buffer_order)
+
+#define TEST_CHECK_INTERVAL(msecs_to_jiffies(500))
+
+#define OFFSET_START_DEFAULT   (0)
+#define OFFSET_END_DEFAULT OFFSET_START_DEFAULT
+#define OFFSET_INTERVAL_DEFAULT(1)
+#define LENGTH_START_DEFAULT   (PAGE_SIZE)
+#define LENGTH_END_DEFAULT LENGTH_START_DEFAULT
+#define LENGTH_INTERVAL_DEFAULT(1)
+
+struct test_range {
+   u32 start, interval, end;
+};
+
+struct test_param {
+   u32 running;
+   u32 repeat;
+   u32 kthreads;
+   u32 bps_window; /* in seconds */
+   struct test_range offset[3];
+   struct test_range length[3];
+};
+
+struct test_kthread_param {
+   bool running;
+   struct task_struct *kthread;
+   struct crypto_comp *tfm;
+   u8 *buffer[3];
+   u32 offset[3];
+   u32 length[3];
+   atomic64_t bps[2];
+};
+
+static struct test_kthread_param *test_kthread_params;
+
+static struct task_struct *test_kthread;
+static int test_return;
+static u8 *test_buffer;
+
+static atomic64_t test_max_bps[2];
+
+#define TEST_TFM_NAME_MAX  (32)
+static char test_tfm[TEST_TFM_NAME_MAX];
+
+static struct test_param test_params, test_new_params;
+
+static DECLARE_RWSEM(test_lock);
+
+
+static unsigned long total_bps(int i)
+{
+   unsigned long total = 0;
+   int j;
+
+   for (j = 0; j < test_kthreads_max; j++)
+   total += atomic64_read(&test_kthread_params[j].bps[i]);
+
+   return total;
+}
+
+static void update_max_bps(int i)
+{
+   uint64_t prev, t;
+
+   t = total_bps(i);
+   prev = atomic64_read(&test_max_bps[i]);
+   while (t > prev) {
+   uint64_t a = atomic64_cmpxchg(&test_max_bps[i], prev, t);
+
+   if (prev == a)
+   break;
+

[PATCH 03/11] crypto: add software 842 decompression

2015-04-07 Thread Dan Streetman
Add an 842-format software decompression function.  Update the MAINTAINERS
842 section to include the new files.

This decompression function can decompress any standard-format 842
compressed data.  The 842 compressed format is explained in the header
comments.  This general-use decompression function is required by later
patches that update the crypto 842 driver to fall back to software 842
decompression if the NX-842 hardware fails and/or returns an error.

Signed-off-by: Dan Streetman 
---
 MAINTAINERS  |   2 +
 include/linux/sw842.h|   7 +
 lib/842/842_decompress.c | 413 +++
 lib/842/Makefile |   1 +
 lib/Kconfig  |   3 +
 lib/Makefile |   1 +
 6 files changed, 427 insertions(+)
 create mode 100644 include/linux/sw842.h
 create mode 100644 lib/842/842_decompress.c
 create mode 100644 lib/842/Makefile

diff --git a/MAINTAINERS b/MAINTAINERS
index efbcb50..3dc973a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4836,6 +4836,8 @@ M:Dan Streetman 
 S: Supported
 F: drivers/crypto/nx/nx-842.c
 F: include/linux/nx842.h
+F: include/linux/sw842.h
+F: lib/842/
 
 IBM Power Linux RAID adapter
 M: Brian King 
diff --git a/include/linux/sw842.h b/include/linux/sw842.h
new file mode 100644
index 000..aa8d86e
--- /dev/null
+++ b/include/linux/sw842.h
@@ -0,0 +1,7 @@
+#ifndef __SW842_H__
+#define __SW842_H__
+
+int sw842_decompress(const unsigned char *src, int srclen,
+   unsigned char *dst, int *destlen);
+
+#endif
diff --git a/lib/842/842_decompress.c b/lib/842/842_decompress.c
new file mode 100644
index 000..9fc0ffc
--- /dev/null
+++ b/lib/842/842_decompress.c
@@ -0,0 +1,413 @@
+/*
+ * 842 Decompressor
+ *
+ * Copyright (C) 2015 Dan Streetman, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * The 842 compressed format is made up of multiple blocks, each of
+ * which have the format:
+ *
+ * [arg1][arg2][arg3][arg4]
+ *
+ * where there are between 0 and 4 template args, depending on the specific
+ * template operation.  For normal operations, each arg is either a specific
+ * number of data bytes to add to the output stream, or an index pointing
+ * to a previously-written number of data bytes to copy to the output stream.
+ *
+ * The template code is a 5-bit value.  This code indicates what to
+ * do with the following data.  Template codes from 0 to 0x19 should
+ * use the template table, the static "ops" table in the code below.
+ * For each template (table row), there are between 1 and 4 actions;
+ * each action corresponds to an arg following the template code
+ * bits.  Each action is either a "data" type action, or a "index"
+ * type action, and each action results in 2, 4, or 8 bytes being
+ * written to the output stream.  Each template (i.e. all actions in
+ * the table row) will add up to 8 bytes being written to the output
+ * stream.  Any row with less than 4 actions is padded with noop
+ * actions, indicated by N0 (for which there is no corresponding arg
+ * in the compressed data stream).
+ *
+ * "Data" actions, indicated in the table by D2, D4, and D8, mean that
+ * the corresponding arg is 2, 4, or 8 bytes, respectively, in the
+ * compressed data stream should be copied directly to the output stream.
+ *
+ * "Index" actions, indicated in the table by I2, I4, and I8, mean
+ * the corresponding arg is an index parameter that points to,
+ * respectively, a 2, 4, or 8 byte value already in the output
+ * stream, that should be copied to the end of the output stream.
+ * Essentially, the index points to a position in a ring buffer that
+ * contains the last N bytes of output stream data.  The number of bits
+ * for each index's arg are: 8 bits for I2, 9 bits for I4, and 8 bits for
+ * I8.  Since each index points to a 2, 4, or 8 byte section, this means
+ * that I2 can reference 512 bytes ((2^8 bits = 256) * 2 bytes), I4 can
+ * reference 2048 bytes ((2^9 = 512) * 4 bytes), and I8 can reference
+ * 2048 bytes ((2^8 = 256) * 8 bytes).  Think of it as a dedicated ring
+ * buffer for each of I2, I4, and I8 that are updated for each byte
+ * written to the output stream.  In this implementation, the output stream
+ * is directly used for each index; there is no additional memory required.
+ * Note that the index is into a ring buffer, not a sliding window;
+ * for example, if there have been 260 bytes written to the output stream,
+ * an I2 index of 0 would index to byte 256 in the out

[PATCH 00/11] add 842 hw compression for PowerNV platform

2015-04-07 Thread Dan Streetman
IBM PowerPC processors starting at version P7+ contain a NX coprocessor that
provides various hw-accelerated functions, one of which is memory compression
to the IBM "842" compression format.  This NX-842 coprocessor is already
supported on the pSeries platform, by the nx-842.c driver and the crypto
compression interface at crypto/842.c.  This patch set adds support for NX-842
on the PowerNV (Non-Virtualized) platform.

The existing pSeries platform NX-842 driver could not be re-used for the
PowerNV platform driver, as there are fundamentally different interfaces;
on pSeries the system hypervisor (pHyp) provides the interface and manages
communication with the coprocessor, while on PowerNV the kernel talks directly
to the coprocessor using the ICSWX instruction.  The data structures used to
describe each compression or decompression request to the coprocessor are
also different between pHyp's interface and direct communication with ICSWX.
So, different drivers for pSeries and PowerNV are required.  Adding the new
PowerNV driver but keeping the interface to the drivers the same required
adding a new common frontend interface, to which only one of the platform
drivers will connect (based on what platform the kernel is currently running
on), and moving some functionality out of the existing pSeries driver into a
more common location.  Also, the crypto/842.c interface to the NX-842 hw
driver is modified to be able to handle any alignment or length input or
output buffer; currently with the pSeries driver only page-size and
page-aligned (uncompressed) buffers are possible.

The result is a crypto 842 interface that allows using any input and output
buffers (i.e. any alignment and length) to communicate with the NX-842
hardware on either the pSeries or PowerNV platforms, as well as a generic
842 software decompressor that the crypto 842 interface falls back to if the
NX-842 hardware fails and/or returns error during decompression.

Finally, this also adds a generic crypto compression selftest module, that
can verify correct compression/decompression cycles using variable alignment
and length buffers, multiple threads, and can calculate the throughput.

Dan Streetman (11):
  powerpc: export of_get_ibm_chip_id function
  powerpc: Add ICSWX instruction
  crypto: add software 842 decompression
  drivers/crypto/nx: move nx-842.c to nx-842-pseries.c
  drivers/crypto/nx: add NX-842 platform frontend driver
  drivers/crypto/nx: add nx842 constraints
  drivers/crypto/nx: add PowerNV platform NX-842 driver
  drivers/crypto/nx: simplify pSeries nx842 driver
  crypto: remove LZO fallback from crypto 842
  crypto: rewrite crypto 842 to use nx842 constraints
  crypto: add crypto compression sefltest

 MAINTAINERS   |5 +-
 arch/powerpc/include/asm/icswx.h  |  184 
 arch/powerpc/include/asm/ppc-opcode.h |   13 +
 arch/powerpc/kernel/prom.c|1 +
 crypto/842.c  |  495 --
 crypto/Kconfig|   13 +-
 crypto/Makefile   |1 +
 crypto/comp_selftest.c|  928 +++
 drivers/crypto/Kconfig|6 +-
 drivers/crypto/nx/Kconfig |   43 +-
 drivers/crypto/nx/Makefile|4 +
 drivers/crypto/nx/nx-842-powernv.c|  623 +
 drivers/crypto/nx/nx-842-pseries.c| 1126 +++
 drivers/crypto/nx/nx-842.c| 1623 +++--
 drivers/crypto/nx/nx-842.h|  131 +++
 include/linux/nx842.h |   17 +-
 include/linux/sw842.h |7 +
 lib/842/842_decompress.c  |  413 +
 lib/842/Makefile  |1 +
 lib/Kconfig   |3 +
 lib/Makefile  |1 +
 21 files changed, 4001 insertions(+), 1637 deletions(-)
 create mode 100644 arch/powerpc/include/asm/icswx.h
 create mode 100644 crypto/comp_selftest.c
 create mode 100644 drivers/crypto/nx/nx-842-powernv.c
 create mode 100644 drivers/crypto/nx/nx-842-pseries.c
 create mode 100644 drivers/crypto/nx/nx-842.h
 create mode 100644 include/linux/sw842.h
 create mode 100644 lib/842/842_decompress.c
 create mode 100644 lib/842/Makefile

-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] crypto: sahara - fix AES descriptor create

2015-04-07 Thread Steffen Trumtrar
The AES implementation still assumes, that the hw_desc[0] has a valid
key as long as no new key needs to be set; consequentialy it always
sets the AES key header for the first descriptor and puts data into
the second one (hw_desc[1]).

Change this to only update the key in the hardware, when a new key is
to be set and use the first descriptor for data otherwise.

Signed-off-by: Steffen Trumtrar 
---
 drivers/crypto/sahara.c | 32 ++--
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index d488d97fcee3..84397e0ad647 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -479,6 +479,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev 
*dev)
struct scatterlist *sg;
int ret;
int i, j;
+   int idx = 0;
 
/* Copy new key if necessary */
if (ctx->flags & FLAGS_NEW_KEY) {
@@ -486,17 +487,20 @@ static int sahara_hw_descriptor_create(struct sahara_dev 
*dev)
ctx->flags &= ~FLAGS_NEW_KEY;
 
if (dev->flags & FLAGS_CBC) {
-   dev->hw_desc[0]->len1 = AES_BLOCK_SIZE;
-   dev->hw_desc[0]->p1 = dev->iv_phys_base;
+   dev->hw_desc[idx]->len1 = AES_BLOCK_SIZE;
+   dev->hw_desc[idx]->p1 = dev->iv_phys_base;
} else {
-   dev->hw_desc[0]->len1 = 0;
-   dev->hw_desc[0]->p1 = 0;
+   dev->hw_desc[idx]->len1 = 0;
+   dev->hw_desc[idx]->p1 = 0;
}
-   dev->hw_desc[0]->len2 = ctx->keylen;
-   dev->hw_desc[0]->p2 = dev->key_phys_base;
-   dev->hw_desc[0]->next = dev->hw_phys_desc[1];
+   dev->hw_desc[idx]->len2 = ctx->keylen;
+   dev->hw_desc[idx]->p2 = dev->key_phys_base;
+   dev->hw_desc[idx]->next = dev->hw_phys_desc[1];
+
+   dev->hw_desc[idx]->hdr = sahara_aes_key_hdr(dev);
+
+   idx++;
}
-   dev->hw_desc[0]->hdr = sahara_aes_key_hdr(dev);
 
dev->nb_in_sg = sahara_sg_length(dev->in_sg, dev->total);
dev->nb_out_sg = sahara_sg_length(dev->out_sg, dev->total);
@@ -520,7 +524,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev 
*dev)
}
 
/* Create input links */
-   dev->hw_desc[1]->p1 = dev->hw_phys_link[0];
+   dev->hw_desc[idx]->p1 = dev->hw_phys_link[0];
sg = dev->in_sg;
for (i = 0; i < dev->nb_in_sg; i++) {
dev->hw_link[i]->len = sg->length;
@@ -534,7 +538,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev 
*dev)
}
 
/* Create output links */
-   dev->hw_desc[1]->p2 = dev->hw_phys_link[i];
+   dev->hw_desc[idx]->p2 = dev->hw_phys_link[i];
sg = dev->out_sg;
for (j = i; j < dev->nb_out_sg + i; j++) {
dev->hw_link[j]->len = sg->length;
@@ -548,10 +552,10 @@ static int sahara_hw_descriptor_create(struct sahara_dev 
*dev)
}
 
/* Fill remaining fields of hw_desc[1] */
-   dev->hw_desc[1]->hdr = sahara_aes_data_link_hdr(dev);
-   dev->hw_desc[1]->len1 = dev->total;
-   dev->hw_desc[1]->len2 = dev->total;
-   dev->hw_desc[1]->next = 0;
+   dev->hw_desc[idx]->hdr = sahara_aes_data_link_hdr(dev);
+   dev->hw_desc[idx]->len1 = dev->total;
+   dev->hw_desc[idx]->len2 = dev->total;
+   dev->hw_desc[idx]->next = 0;
 
sahara_dump_descriptors(dev);
sahara_dump_links(dev);
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] crypto: sahara - use the backlog

2015-04-07 Thread Steffen Trumtrar
With commit

7e77bdebff5cb1e9876c561f69710b9ab8fa1f7e crypto: af_alg - fix backlog 
handling

in place, the backlog works under all circumstances where it previously failed, 
atleast
for the sahara driver. Use it.

Signed-off-by: Steffen Trumtrar 
---
 drivers/crypto/sahara.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 290a7f0a681f..d488d97fcee3 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -1092,15 +1092,20 @@ static int sahara_queue_manage(void *data)
 {
struct sahara_dev *dev = (struct sahara_dev *)data;
struct crypto_async_request *async_req;
+   struct crypto_async_request *backlog;
int ret = 0;
 
do {
__set_current_state(TASK_INTERRUPTIBLE);
 
mutex_lock(&dev->queue_mutex);
+   backlog = crypto_get_backlog(&dev->queue);
async_req = crypto_dequeue_request(&dev->queue);
mutex_unlock(&dev->queue_mutex);
 
+   if (backlog)
+   backlog->complete(backlog, -EINPROGRESS);
+
if (async_req) {
if (crypto_tfm_alg_type(async_req->tfm) ==
CRYPTO_ALG_TYPE_AHASH) {
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] crypto: qat - print ring name in debug output

2015-04-07 Thread Herbert Xu
On Fri, Apr 03, 2015 at 08:40:58AM -0700, Tadeusz Struk wrote:
> Ring name was allocated but never refenenced.
> It was supposed to be printed out in debug output.
> 
> Signed-off-by: Tadeusz Struk 

Applied.
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] crypto: Fix a typo in Kconfig

2015-04-07 Thread Herbert Xu
On Sat, Apr 04, 2015 at 12:20:30AM +0900, Masanari Iida wrote:
> This patch fix a spelling typo in crypto/Kconfig.
> 
> Signed-off-by: Masanari Iida 

Applied.
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] crypto: qat - fix double release_firmware on error path

2015-04-07 Thread Herbert Xu
On Fri, Apr 03, 2015 at 08:41:17AM -0700, Tadeusz Struk wrote:
> release_firmware was called twice on error path causing an Oops.
> 
> Reported-by: Ahsan Atta  
> Signed-off-by: Tadeusz Struk 

Applied.
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


crypto: user - Fix crypto_alg_match race

2015-04-07 Thread Herbert Xu
The function crypto_alg_match returns an algorithm without taking
any references on it.  This means that the algorithm can be freed
at any time, therefore all users of crypto_alg_match are buggy.

This patch fixes this by taking a reference count on the algorithm
to prevent such races.

Signed-off-by: Herbert Xu 

diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index eab2497..41dfe76 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -62,10 +62,14 @@ static struct crypto_alg *crypto_alg_match(struct 
crypto_user_alg *p, int exact)
else if (!exact)
match = !strcmp(q->cra_name, p->cru_name);
 
-   if (match) {
-   alg = q;
-   break;
-   }
+   if (!match)
+   continue;
+
+   if (unlikely(!crypto_mod_get(q)))
+   continue;
+
+   alg = q;
+   break;
}
 
up_read(&crypto_alg_sem);
@@ -205,9 +209,10 @@ static int crypto_report(struct sk_buff *in_skb, struct 
nlmsghdr *in_nlh,
if (!alg)
return -ENOENT;
 
+   err = -ENOMEM;
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb)
-   return -ENOMEM;
+   goto drop_alg;
 
info.in_skb = in_skb;
info.out_skb = skb;
@@ -215,6 +220,10 @@ static int crypto_report(struct sk_buff *in_skb, struct 
nlmsghdr *in_nlh,
info.nlmsg_flags = 0;
 
err = crypto_report_alg(alg, &info);
+
+drop_alg:
+   crypto_mod_put(alg);
+
if (err)
return err;
 
@@ -284,6 +293,7 @@ static int crypto_update_alg(struct sk_buff *skb, struct 
nlmsghdr *nlh,
 
up_write(&crypto_alg_sem);
 
+   crypto_mod_put(alg);
crypto_remove_final(&list);
 
return 0;
@@ -294,6 +304,7 @@ static int crypto_del_alg(struct sk_buff *skb, struct 
nlmsghdr *nlh,
 {
struct crypto_alg *alg;
struct crypto_user_alg *p = nlmsg_data(nlh);
+   int err;
 
if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
@@ -310,13 +321,19 @@ static int crypto_del_alg(struct sk_buff *skb, struct 
nlmsghdr *nlh,
 * if we try to unregister. Unregistering such an algorithm without
 * removing the module is not possible, so we restrict to crypto
 * instances that are build from templates. */
+   err = -EINVAL;
if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE))
-   return -EINVAL;
+   goto drop_alg;
 
-   if (atomic_read(&alg->cra_refcnt) != 1)
-   return -EBUSY;
+   err = -EBUSY;
+   if (atomic_read(&alg->cra_refcnt) > 2)
+   goto drop_alg;
 
-   return crypto_unregister_instance((struct crypto_instance *)alg);
+   err = crypto_unregister_instance((struct crypto_instance *)alg);
+
+drop_alg:
+   crypto_mod_put(alg);
+   return err;
 }
 
 static struct crypto_alg *crypto_user_skcipher_alg(const char *name, u32 type,
@@ -395,8 +412,10 @@ static int crypto_add_alg(struct sk_buff *skb, struct 
nlmsghdr *nlh,
return -EINVAL;
 
alg = crypto_alg_match(p, exact);
-   if (alg)
+   if (alg) {
+   crypto_mod_put(alg);
return -EEXIST;
+   }
 
if (strlen(p->cru_driver_name))
name = p->cru_driver_name;
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/10] crypto: atmel-aes: correct usage of dma_sync_* API

2015-04-07 Thread Leilei Zhao
The output buffer is used for CPU access, so
the API should be dma_sync_single_for_cpu which
makes the cache line invalid in order to reload
the value in memory.

Signed-off-by: Leilei Zhao 
Acked-by: Nicolas Ferre 
---
 drivers/crypto/atmel-aes.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 9bd3437..aa84623 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -627,7 +627,7 @@ static int atmel_aes_crypt_dma_stop(struct atmel_aes_dev 
*dd)
dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE);
dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
} else {
-   dma_sync_single_for_device(dd->dev, dd->dma_addr_out,
+   dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out,
dd->dma_size, DMA_FROM_DEVICE);
 
/* copy data */
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/10] crypto: atmel-aes: initialize spinlock in probe

2015-04-07 Thread Leilei Zhao
Kernel will report "BUG: spinlock lockup suspected on CPU#0"
when CONFIG_DEBUG_SPINLOCK is enabled in kernel config and the
spinlock is used at the first time. It's caused by uninitialized
spinlock, so just initialize it in probe.

Signed-off-by: Leilei Zhao 
Acked-by: Nicolas Ferre 
---
 drivers/crypto/atmel-aes.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index ddea772..f0532ab 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -1341,6 +1341,7 @@ static int atmel_aes_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, aes_dd);
 
INIT_LIST_HEAD(&aes_dd->list);
+   spin_lock_init(&aes_dd->lock);
 
tasklet_init(&aes_dd->done_task, atmel_aes_done_task,
(unsigned long)aes_dd);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/10] crypto: atmel-tdes: initialize spinlock in probe

2015-04-07 Thread Leilei Zhao
Kernel will report "BUG: spinlock lockup suspected on CPU#0"
when CONFIG_DEBUG_SPINLOCK is enabled in kernel config and the
spinlock is used at the first time. It's caused by uninitialized
spinlock, so just initialize it in probe.

Signed-off-by: Leilei Zhao 
Acked-by: Nicolas Ferre 
---
 drivers/crypto/atmel-tdes.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index 258772d..3517e2a 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -1370,6 +1370,7 @@ static int atmel_tdes_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, tdes_dd);
 
INIT_LIST_HEAD(&tdes_dd->list);
+   spin_lock_init(&tdes_dd->lock);
 
tasklet_init(&tdes_dd->done_task, atmel_tdes_done_task,
(unsigned long)tdes_dd);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/10] crypto: atmel-aes: sync the buf used in DMA or CPU

2015-04-07 Thread Leilei Zhao
The input buffer and output buffer are mapped for DMA transfer
in Atmel AES driver. But they are also be used by CPU when
the requested crypt length is not bigger than the threshold
value 16. The buffers will be cached in cache line when CPU
accessed them. When DMA uses the buffers again, the memory
can happened to be flushed by cache while DMA starts transfer.

So using API dma_sync_single_for_device and dma_sync_single_for_cpu
in DMA to ensure DMA coherence and CPU always access the correct
value. This fix the issue that the encrypted result periodically goes
wrong when doing performance test with OpenSSH.

Signed-off-by: Leilei Zhao 
Acked-by: Nicolas Ferre 
---
 drivers/crypto/atmel-aes.c |   16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index f0532ab..9bd3437 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -315,10 +315,10 @@ static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd,
 
dd->dma_size = length;
 
-   if (!(dd->flags & AES_FLAGS_FAST)) {
-   dma_sync_single_for_device(dd->dev, dma_addr_in, length,
-  DMA_TO_DEVICE);
-   }
+   dma_sync_single_for_device(dd->dev, dma_addr_in, length,
+  DMA_TO_DEVICE);
+   dma_sync_single_for_device(dd->dev, dma_addr_out, length,
+  DMA_FROM_DEVICE);
 
if (dd->flags & AES_FLAGS_CFB8) {
dd->dma_lch_in.dma_conf.dst_addr_width =
@@ -391,6 +391,11 @@ static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev 
*dd)
 {
dd->flags &= ~AES_FLAGS_DMA;
 
+   dma_sync_single_for_cpu(dd->dev, dd->dma_addr_in,
+   dd->dma_size, DMA_TO_DEVICE);
+   dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out,
+   dd->dma_size, DMA_FROM_DEVICE);
+
/* use cache buffers */
dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg);
if (!dd->nb_in_sg)
@@ -459,6 +464,9 @@ static int atmel_aes_crypt_dma_start(struct atmel_aes_dev 
*dd)
dd->flags |= AES_FLAGS_FAST;
 
} else {
+   dma_sync_single_for_cpu(dd->dev, dd->dma_addr_in,
+   dd->dma_size, DMA_TO_DEVICE);
+
/* use cache buffers */
count = atmel_aes_sg_copy(&dd->in_sg, &dd->in_offset,
dd->buf_in, dd->buflen, dd->total, 0);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/10] crypto: atmel-sha: fix sg list management

2015-04-07 Thread Leilei Zhao
Having a zero length sg doesn't mean it is the end of the sg list. This
case happens when calculating HMAC of IPSec packet.

Signed-off-by: Leilei Zhao 
Signed-off-by: Ludovic Desroches 
Acked-by: Nicolas Ferre 
---
 drivers/crypto/atmel-sha.c |   16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index b471bbe..9fb8af1 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -163,8 +163,20 @@ static size_t atmel_sha_append_sg(struct atmel_sha_reqctx 
*ctx)
count = min(ctx->sg->length - ctx->offset, ctx->total);
count = min(count, ctx->buflen - ctx->bufcnt);
 
-   if (count <= 0)
-   break;
+   if (count <= 0) {
+   /*
+   * Check if count <= 0 because the buffer is full or
+   * because the sg length is 0. In the latest case,
+   * check if there is another sg in the list, a 0 length
+   * sg doesn't necessarily mean the end of the sg list.
+   */
+   if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) {
+   ctx->sg = sg_next(ctx->sg);
+   continue;
+   } else {
+   break;
+   }
+   }
 
scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
ctx->offset, count, 0);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/10] crypto: atmel-sha: correct the max burst size

2015-04-07 Thread Leilei Zhao
The maximum source and destination burst size is 16
according to the datasheet of Atmel DMA. And the value
is also checked in function at_xdmac_csize of Atmel
DMA driver. With the restrict, the value beyond maximum
value will not be processed in DMA driver, so SHA384 and
SHA512 will not work and the program will wait forever.

So here change the max burst size of all the cases to 16
in order to make SHA384 and SHA512 work and keep consistent
with DMA driver and datasheet.

Signed-off-by: Leilei Zhao 
Acked-by: Nicolas Ferre 
---
 drivers/crypto/atmel-sha.c |   10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 6430f6a..99a632c 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -432,14 +432,8 @@ static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, 
dma_addr_t dma_addr1,
dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %d, final: 
%d\n",
ctx->digcnt[1], ctx->digcnt[0], length1, final);
 
-   if (ctx->flags & (SHA_FLAGS_SHA1 | SHA_FLAGS_SHA224 |
-   SHA_FLAGS_SHA256)) {
-   dd->dma_lch_in.dma_conf.src_maxburst = 16;
-   dd->dma_lch_in.dma_conf.dst_maxburst = 16;
-   } else {
-   dd->dma_lch_in.dma_conf.src_maxburst = 32;
-   dd->dma_lch_in.dma_conf.dst_maxburst = 32;
-   }
+   dd->dma_lch_in.dma_conf.src_maxburst = 16;
+   dd->dma_lch_in.dma_conf.dst_maxburst = 16;
 
dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf);
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/10] crypto: atmel-sha: initialize spinlock in probe

2015-04-07 Thread Leilei Zhao
Kernel will report "BUG: spinlock lockup suspected on CPU#0"
when CONFIG_DEBUG_SPINLOCK is enabled in kernel config and the
spinlock is used at the first time. It's caused by uninitialized
spinlock, so just initialize it in probe.

Signed-off-by: Leilei Zhao 
Acked-by: Nicolas Ferre 
---
 drivers/crypto/atmel-sha.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 9fb8af1..6430f6a 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -1367,6 +1367,7 @@ static int atmel_sha_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, sha_dd);
 
INIT_LIST_HEAD(&sha_dd->list);
+   spin_lock_init(&sha_dd->lock);
 
tasklet_init(&sha_dd->done_task, atmel_sha_done_task,
(unsigned long)sha_dd);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/10] crypto: atmel-sha: correct the way data are split

2015-04-07 Thread Leilei Zhao
From: Ludovic Desroches 

When a hash is requested on data bigger than the buffer allocated by the
SHA driver, the way DMA transfers are performed is quite strange:
The buffer is filled at each update request. When full, a DMA transfer
is done. On next update request, another DMA transfer is done. Then we
wait to have a full buffer (or the end of the data) to perform the dma
transfer. Such a situation lead sometimes, on SAMA5D4, to a case where
dma transfer is finished but the data ready irq never comes. Moreover
hash was incorrect in this case.

With this patch, dma transfers are only performed when the buffer is
full or when there is no more data. So it removes the transfer whose size
is equal the update size after the full buffer transmission.

Signed-off-by: Ludovic Desroches 
Signed-off-by: Leilei Zhao 
Acked-by: Nicolas Ferre 
---
 drivers/crypto/atmel-sha.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index d092313..b471bbe 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -529,7 +529,7 @@ static int atmel_sha_update_dma_slow(struct atmel_sha_dev 
*dd)
if (final)
atmel_sha_fill_padding(ctx, 0);
 
-   if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
+   if (final || (ctx->bufcnt == ctx->buflen)) {
count = ctx->bufcnt;
ctx->bufcnt = 0;
return atmel_sha_xmit_dma_map(dd, ctx, count, final);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/10] crypto: atmel-sha: add new version

2015-04-07 Thread Leilei Zhao
Add new version of atmel-sha available with SAMA5D4 devices.

Signed-off-by: Leilei Zhao 
Signed-off-by: Ludovic Desroches 
Acked-by: Nicolas Ferre 
---
 drivers/crypto/atmel-sha.c |6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 34db04a..d092313 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -1266,6 +1266,12 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd)
 
/* keep only major version number */
switch (dd->hw_version & 0xff0) {
+   case 0x420:
+   dd->caps.has_dma = 1;
+   dd->caps.has_dualbuff = 1;
+   dd->caps.has_sha224 = 1;
+   dd->caps.has_sha_384_512 = 1;
+   break;
case 0x410:
dd->caps.has_dma = 1;
dd->caps.has_dualbuff = 1;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/10] crypto: atmel-aes: add new version

2015-04-07 Thread Leilei Zhao
Add new version of atmel-aes available with SAMA5D4 devices.

Signed-off-by: Leilei Zhao 
Signed-off-by: Ludovic Desroches 
Acked-by: Nicolas Ferre 
---
 drivers/crypto/atmel-aes.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 6597aac..ddea772 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -1246,6 +1246,11 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
 
/* keep only major version number */
switch (dd->hw_version & 0xff0) {
+   case 0x200:
+   dd->caps.has_dualbuff = 1;
+   dd->caps.has_cfb64 = 1;
+   dd->caps.max_burst_size = 4;
+   break;
case 0x130:
dd->caps.has_dualbuff = 1;
dd->caps.has_cfb64 = 1;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/10] crypto: at91: add support for SAMA5D4 and fix related bugs

2015-04-07 Thread Leilei Zhao
The series of patches add crypto driver for SAMA5D4 and 
fix some bugs about Atmel crypto driver:
 - Add new IP version for AES and SHA.
 - The spinlock is not initialized before using.
 - Fix sg list management to avoid crash if there is a zero length sg in list.
 - The max burst size in DMA configuration is out of range.
 - The result of AES periodically goes wrong with performance test in OpenSSH.
 - Fix a hangup issue in driver's internal buffer process.

The patches were made from herbert/crypto-2.6.git repository, and tested on a
SAMA5D4EK board and also did regression tests on SAMA5D3EK.

Leilei Zhao (9):
  crypto: atmel-aes: add new version
  crypto: atmel-sha: add new version
  crypto: atmel-sha: fix sg list management
  crypto: atmel-sha: initialize spinlock in probe
  crypto: atmel-sha: correct the max burst size
  crypto: atmel-tdes: initialize spinlock in probe
  crypto: atmel-aes: initialize spinlock in probe
  crypto: atmel-aes: sync the buf used in DMA or CPU
  crypto: atmel-aes: correct usage of dma_sync_* API

Ludovic Desroches (1):
  crypto: atmel-sha: correct the way data are split

 drivers/crypto/atmel-aes.c  |   24 +++-
 drivers/crypto/atmel-sha.c  |   35 ---
 drivers/crypto/atmel-tdes.c |1 +
 3 files changed, 44 insertions(+), 16 deletions(-)

-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 15/16] crypto/x86: move SHA-224/256 SSSE3 implementation to base layer

2015-04-07 Thread Ard Biesheuvel
Signed-off-by: Ard Biesheuvel 
---
 arch/x86/crypto/sha256_ssse3_glue.c | 184 +++-
 1 file changed, 36 insertions(+), 148 deletions(-)

diff --git a/arch/x86/crypto/sha256_ssse3_glue.c 
b/arch/x86/crypto/sha256_ssse3_glue.c
index 8fad72f4dfd2..bd4ae0da0a49 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -36,7 +36,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -55,174 +55,61 @@ asmlinkage void sha256_transform_rorx(const char *data, 
u32 *digest,
 
 static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64);
 
-
-static int sha256_ssse3_init(struct shash_desc *desc)
-{
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-
-   sctx->state[0] = SHA256_H0;
-   sctx->state[1] = SHA256_H1;
-   sctx->state[2] = SHA256_H2;
-   sctx->state[3] = SHA256_H3;
-   sctx->state[4] = SHA256_H4;
-   sctx->state[5] = SHA256_H5;
-   sctx->state[6] = SHA256_H6;
-   sctx->state[7] = SHA256_H7;
-   sctx->count = 0;
-
-   return 0;
-}
-
-static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
-  unsigned int len, unsigned int partial)
+static void sha256_ssse3_block_fn(int blocks, u8 const *src, u32 *state,
+ const u8 *head, void *p)
 {
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   unsigned int done = 0;
-
-   sctx->count += len;
-
-   if (partial) {
-   done = SHA256_BLOCK_SIZE - partial;
-   memcpy(sctx->buf + partial, data, done);
-   sha256_transform_asm(sctx->buf, sctx->state, 1);
-   }
-
-   if (len - done >= SHA256_BLOCK_SIZE) {
-   const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
-
-   sha256_transform_asm(data + done, sctx->state, (u64) rounds);
-
-   done += rounds * SHA256_BLOCK_SIZE;
-   }
-
-   memcpy(sctx->buf, data + done, len - done);
-
-   return 0;
+   if (head)
+   sha256_transform_asm(head, state, 1);
+   if (blocks)
+   sha256_transform_asm(src, state, blocks);
 }
 
 static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
 unsigned int len)
 {
struct sha256_state *sctx = shash_desc_ctx(desc);
-   unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
-   int res;
 
-   /* Handle the fast case right here */
-   if (partial + len < SHA256_BLOCK_SIZE) {
-   sctx->count += len;
-   memcpy(sctx->buf + partial, data, len);
-
-   return 0;
-   }
-
-   if (!irq_fpu_usable()) {
-   res = crypto_sha256_update(desc, data, len);
-   } else {
-   kernel_fpu_begin();
-   res = __sha256_ssse3_update(desc, data, len, partial);
-   kernel_fpu_end();
-   }
+   if (!irq_fpu_usable() ||
+   (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
+   return crypto_sha256_update(desc, data, len);
 
-   return res;
-}
-
-
-/* Add padding and return the message digest. */
-static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
-{
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   unsigned int i, index, padlen;
-   __be32 *dst = (__be32 *)out;
-   __be64 bits;
-   static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
-
-   bits = cpu_to_be64(sctx->count << 3);
-
-   /* Pad out to 56 mod 64 and append length */
-   index = sctx->count % SHA256_BLOCK_SIZE;
-   padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
-
-   if (!irq_fpu_usable()) {
-   crypto_sha256_update(desc, padding, padlen);
-   crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
-   } else {
-   kernel_fpu_begin();
-   /* We need to fill a whole block for __sha256_ssse3_update() */
-   if (padlen <= 56) {
-   sctx->count += padlen;
-   memcpy(sctx->buf + index, padding, padlen);
-   } else {
-   __sha256_ssse3_update(desc, padding, padlen, index);
-   }
-   __sha256_ssse3_update(desc, (const u8 *)&bits,
-   sizeof(bits), 56);
-   kernel_fpu_end();
-   }
-
-   /* Store state in digest */
-   for (i = 0; i < 8; i++)
-   dst[i] = cpu_to_be32(sctx->state[i]);
-
-   /* Wipe context */
-   memset(sctx, 0, sizeof(*sctx));
+   kernel_fpu_begin();
+   sha256_base_do_update(desc, data, len, sha256_ssse3_block_fn, NULL);
+   kernel_fpu_end();
 
return 0;
 }
 
-static int sha256_ssse3_export(struct shash_desc *desc, void *out)
+static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
+

[PATCH v3 13/16] crypto/arm64: move SHA-224/256 ARMv8 implementation to base layer

2015-04-07 Thread Ard Biesheuvel
Signed-off-by: Ard Biesheuvel 
---
 arch/arm64/crypto/sha2-ce-core.S |  11 ++-
 arch/arm64/crypto/sha2-ce-glue.c | 209 ++-
 2 files changed, 38 insertions(+), 182 deletions(-)

diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 7f29fc031ea8..65ad56636fba 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -135,15 +135,18 @@ CPU_LE(   rev32   v19.16b, v19.16b)
 
/*
 * Final block: add padding and total bit count.
-* Skip if we have no total byte count in x4. In that case, the input
-* size was not a round multiple of the block size, and the padding is
-* handled by the C code.
+* Skip if the input size was not a round multiple of the block size,
+* the padding is handled by the C code in that case.
 */
cbz x4, 3f
+   ldr x5, [x2, #-8]   // sha256_state::count
+   tst x5, #0x3f   // round multiple of block size?
+   b.ne3f
+   str wzr, [x4]
moviv17.2d, #0
mov x8, #0x8000
moviv18.2d, #0
-   ror x7, x4, #29 // ror(lsl(x4, 3), 32)
+   ror x7, x5, #29 // ror(lsl(x4, 3), 32)
fmovd16, x8
mov x4, #0
mov v19.d[0], xzr
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index ae67e88c28b9..91ac3682a730 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -20,195 +21,47 @@ MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using 
ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel ");
 MODULE_LICENSE("GPL v2");
 
-asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
-u8 *head, long bytes);
+asmlinkage void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
+ const u8 *head, void *p);
 
-static int sha224_init(struct shash_desc *desc)
+static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
+   unsigned int len)
 {
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-
-   *sctx = (struct sha256_state){
-   .state = {
-   SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
-   SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
-   }
-   };
-   return 0;
-}
-
-static int sha256_init(struct shash_desc *desc)
-{
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-
-   *sctx = (struct sha256_state){
-   .state = {
-   SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
-   SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
-   }
-   };
-   return 0;
-}
-
-static int sha2_update(struct shash_desc *desc, const u8 *data,
-  unsigned int len)
-{
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
-
-   sctx->count += len;
-
-   if ((partial + len) >= SHA256_BLOCK_SIZE) {
-   int blocks;
-
-   if (partial) {
-   int p = SHA256_BLOCK_SIZE - partial;
-
-   memcpy(sctx->buf + partial, data, p);
-   data += p;
-   len -= p;
-   }
-
-   blocks = len / SHA256_BLOCK_SIZE;
-   len %= SHA256_BLOCK_SIZE;
-
-   kernel_neon_begin_partial(28);
-   sha2_ce_transform(blocks, data, sctx->state,
- partial ? sctx->buf : NULL, 0);
-   kernel_neon_end();
-
-   data += blocks * SHA256_BLOCK_SIZE;
-   partial = 0;
-   }
-   if (len)
-   memcpy(sctx->buf + partial, data, len);
-   return 0;
-}
-
-static void sha2_final(struct shash_desc *desc)
-{
-   static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
-
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   __be64 bits = cpu_to_be64(sctx->count << 3);
-   u32 padlen = SHA256_BLOCK_SIZE
-- ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
-
-   sha2_update(desc, padding, padlen);
-   sha2_update(desc, (const u8 *)&bits, sizeof(bits));
-}
-
-static int sha224_final(struct shash_desc *desc, u8 *out)
-{
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   __be32 *dst = (__be32 *)out;
-   int i;
-
-   sha2_final(desc);
-
-   for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
-   put_unaligned_be32(sctx->state[i], dst++);
-
-   *sctx = (struct sha256_state){};
-   return 0;
-}
-

[PATCH v3 16/16] crypto/x86: move SHA-384/512 SSSE3 implementation to base layer

2015-04-07 Thread Ard Biesheuvel
Signed-off-by: Ard Biesheuvel 
---
 arch/x86/crypto/sha512_ssse3_glue.c | 193 +++-
 1 file changed, 36 insertions(+), 157 deletions(-)

diff --git a/arch/x86/crypto/sha512_ssse3_glue.c 
b/arch/x86/crypto/sha512_ssse3_glue.c
index 0b6af26832bf..4daa27a5d347 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -34,7 +34,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -54,183 +54,61 @@ asmlinkage void sha512_transform_rorx(const char *data, 
u64 *digest,
 
 static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64);
 
-
-static int sha512_ssse3_init(struct shash_desc *desc)
-{
-   struct sha512_state *sctx = shash_desc_ctx(desc);
-
-   sctx->state[0] = SHA512_H0;
-   sctx->state[1] = SHA512_H1;
-   sctx->state[2] = SHA512_H2;
-   sctx->state[3] = SHA512_H3;
-   sctx->state[4] = SHA512_H4;
-   sctx->state[5] = SHA512_H5;
-   sctx->state[6] = SHA512_H6;
-   sctx->state[7] = SHA512_H7;
-   sctx->count[0] = sctx->count[1] = 0;
-
-   return 0;
-}
-
-static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
-  unsigned int len, unsigned int partial)
+static void sha512_ssse3_block_fn(int blocks, u8 const *src, u64 *state,
+ const u8 *head, void *p)
 {
-   struct sha512_state *sctx = shash_desc_ctx(desc);
-   unsigned int done = 0;
-
-   sctx->count[0] += len;
-   if (sctx->count[0] < len)
-   sctx->count[1]++;
-
-   if (partial) {
-   done = SHA512_BLOCK_SIZE - partial;
-   memcpy(sctx->buf + partial, data, done);
-   sha512_transform_asm(sctx->buf, sctx->state, 1);
-   }
-
-   if (len - done >= SHA512_BLOCK_SIZE) {
-   const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
-
-   sha512_transform_asm(data + done, sctx->state, (u64) rounds);
-
-   done += rounds * SHA512_BLOCK_SIZE;
-   }
-
-   memcpy(sctx->buf, data + done, len - done);
-
-   return 0;
+   if (head)
+   sha512_transform_asm(head, state, 1);
+   if (blocks)
+   sha512_transform_asm(src, state, blocks);
 }
 
 static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
 unsigned int len)
 {
struct sha512_state *sctx = shash_desc_ctx(desc);
-   unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
-   int res;
-
-   /* Handle the fast case right here */
-   if (partial + len < SHA512_BLOCK_SIZE) {
-   sctx->count[0] += len;
-   if (sctx->count[0] < len)
-   sctx->count[1]++;
-   memcpy(sctx->buf + partial, data, len);
-
-   return 0;
-   }
-
-   if (!irq_fpu_usable()) {
-   res = crypto_sha512_update(desc, data, len);
-   } else {
-   kernel_fpu_begin();
-   res = __sha512_ssse3_update(desc, data, len, partial);
-   kernel_fpu_end();
-   }
-
-   return res;
-}
-
-
-/* Add padding and return the message digest. */
-static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
-{
-   struct sha512_state *sctx = shash_desc_ctx(desc);
-   unsigned int i, index, padlen;
-   __be64 *dst = (__be64 *)out;
-   __be64 bits[2];
-   static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
-
-   /* save number of bits */
-   bits[1] = cpu_to_be64(sctx->count[0] << 3);
-   bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
-
-   /* Pad out to 112 mod 128 and append length */
-   index = sctx->count[0] & 0x7f;
-   padlen = (index < 112) ? (112 - index) : ((128+112) - index);
-
-   if (!irq_fpu_usable()) {
-   crypto_sha512_update(desc, padding, padlen);
-   crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
-   } else {
-   kernel_fpu_begin();
-   /* We need to fill a whole block for __sha512_ssse3_update() */
-   if (padlen <= 112) {
-   sctx->count[0] += padlen;
-   if (sctx->count[0] < padlen)
-   sctx->count[1]++;
-   memcpy(sctx->buf + index, padding, padlen);
-   } else {
-   __sha512_ssse3_update(desc, padding, padlen, index);
-   }
-   __sha512_ssse3_update(desc, (const u8 *)&bits,
-   sizeof(bits), 112);
-   kernel_fpu_end();
-   }
-
-   /* Store state in digest */
-   for (i = 0; i < 8; i++)
-   dst[i] = cpu_to_be64(sctx->state[i]);
-
-   /* Wipe context */
-   memset(sctx, 0, sizeof(*sctx));
-
-   return 0;
-}
-
-static int sha512_ssse3_export(struct shash_desc *desc, void *out)
-{
-  

[PATCH v3 12/16] crypto/arm64: move SHA-1 ARMv8 implementation to base layer

2015-04-07 Thread Ard Biesheuvel
Signed-off-by: Ard Biesheuvel 
---
 arch/arm64/crypto/sha1-ce-core.S |  11 ++--
 arch/arm64/crypto/sha1-ce-glue.c | 133 +++
 2 files changed, 31 insertions(+), 113 deletions(-)

diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 09d57d98609c..a2c3ad51286b 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -131,15 +131,18 @@ CPU_LE(   rev32   v11.16b, v11.16b)
 
/*
 * Final block: add padding and total bit count.
-* Skip if we have no total byte count in x4. In that case, the input
-* size was not a round multiple of the block size, and the padding is
-* handled by the C code.
+* Skip if the input size was not a round multiple of the block size,
+* the padding is handled by the C code in that case.
 */
cbz x4, 3f
+   ldr x5, [x2, #-8]   // sha1_state::count
+   tst x5, #0x3f   // round multiple of block size?
+   b.ne3f
+   str wzr, [x4]
moviv9.2d, #0
mov x8, #0x8000
moviv10.2d, #0
-   ror x7, x4, #29 // ror(lsl(x4, 3), 32)
+   ror x7, x5, #29 // ror(lsl(x4, 3), 32)
fmovd8, x8
mov x4, #0
mov v11.d[0], xzr
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 6fe83f37a750..141d5f3d7389 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -21,132 +22,46 @@ MODULE_AUTHOR("Ard Biesheuvel 
");
 MODULE_LICENSE("GPL v2");
 
 asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
- u8 *head, long bytes);
+ const u8 *head, void *p);
 
-static int sha1_init(struct shash_desc *desc)
+static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
 {
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-
-   *sctx = (struct sha1_state){
-   .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
-   };
-   return 0;
-}
-
-static int sha1_update(struct shash_desc *desc, const u8 *data,
-  unsigned int len)
-{
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-
-   sctx->count += len;
-
-   if ((partial + len) >= SHA1_BLOCK_SIZE) {
-   int blocks;
-
-   if (partial) {
-   int p = SHA1_BLOCK_SIZE - partial;
-
-   memcpy(sctx->buffer + partial, data, p);
-   data += p;
-   len -= p;
-   }
-
-   blocks = len / SHA1_BLOCK_SIZE;
-   len %= SHA1_BLOCK_SIZE;
-
-   kernel_neon_begin_partial(16);
-   sha1_ce_transform(blocks, data, sctx->state,
- partial ? sctx->buffer : NULL, 0);
-   kernel_neon_end();
-
-   data += blocks * SHA1_BLOCK_SIZE;
-   partial = 0;
-   }
-   if (len)
-   memcpy(sctx->buffer + partial, data, len);
-   return 0;
-}
-
-static int sha1_final(struct shash_desc *desc, u8 *out)
-{
-   static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
-
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   __be64 bits = cpu_to_be64(sctx->count << 3);
-   __be32 *dst = (__be32 *)out;
-   int i;
-
-   u32 padlen = SHA1_BLOCK_SIZE
-- ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
-
-   sha1_update(desc, padding, padlen);
-   sha1_update(desc, (const u8 *)&bits, sizeof(bits));
-
-   for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
-   put_unaligned_be32(sctx->state[i], dst++);
+   kernel_neon_begin_partial(16);
+   sha1_base_do_update(desc, data, len, sha1_ce_transform, NULL);
+   kernel_neon_end();
 
-   *sctx = (struct sha1_state){};
return 0;
 }
 
-static int sha1_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
+static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
+unsigned int len, u8 *out)
 {
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   __be32 *dst = (__be32 *)out;
-   int blocks;
-   int i;
-
-   if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
-   sha1_update(desc, data, len);
-   return sha1_final(desc, out);
-   }
-
-   /*
-* Use a fast path if the input is a multiple of 64 bytes. In
-* this case, there is no need to copy data

[PATCH v3 06/16] crypto: sha512-generic: move to generic glue implementation

2015-04-07 Thread Ard Biesheuvel
This updated the generic SHA-512 implementation to use the
generic shared SHA-512 glue code.

It also implements a .finup hook crypto_sha512_finup() and exports
it to other modules.

Signed-off-by: Ard Biesheuvel 
---
 crypto/sha512_generic.c | 127 ++--
 include/crypto/sha.h|   3 ++
 2 files changed, 29 insertions(+), 101 deletions(-)

diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 1c3c3767e079..8cf0082d7084 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -130,125 +131,48 @@ sha512_transform(u64 *state, const u8 *input)
a = b = c = d = e = f = g = h = t1 = t2 = 0;
 }
 
-static int
-sha512_init(struct shash_desc *desc)
+static void sha512_generic_block_fn(int blocks, u8 const *src, u64 *state,
+   const u8 *head, void *p)
 {
-   struct sha512_state *sctx = shash_desc_ctx(desc);
-   sctx->state[0] = SHA512_H0;
-   sctx->state[1] = SHA512_H1;
-   sctx->state[2] = SHA512_H2;
-   sctx->state[3] = SHA512_H3;
-   sctx->state[4] = SHA512_H4;
-   sctx->state[5] = SHA512_H5;
-   sctx->state[6] = SHA512_H6;
-   sctx->state[7] = SHA512_H7;
-   sctx->count[0] = sctx->count[1] = 0;
+   if (head)
+   sha512_transform(state, head);
 
-   return 0;
-}
-
-static int
-sha384_init(struct shash_desc *desc)
-{
-   struct sha512_state *sctx = shash_desc_ctx(desc);
-   sctx->state[0] = SHA384_H0;
-   sctx->state[1] = SHA384_H1;
-   sctx->state[2] = SHA384_H2;
-   sctx->state[3] = SHA384_H3;
-   sctx->state[4] = SHA384_H4;
-   sctx->state[5] = SHA384_H5;
-   sctx->state[6] = SHA384_H6;
-   sctx->state[7] = SHA384_H7;
-   sctx->count[0] = sctx->count[1] = 0;
-
-   return 0;
+   while (blocks--) {
+   sha512_transform(state, src);
+   src += SHA512_BLOCK_SIZE;
+   }
 }
 
 int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
 {
-   struct sha512_state *sctx = shash_desc_ctx(desc);
-
-   unsigned int i, index, part_len;
-
-   /* Compute number of bytes mod 128 */
-   index = sctx->count[0] & 0x7f;
-
-   /* Update number of bytes */
-   if ((sctx->count[0] += len) < len)
-   sctx->count[1]++;
-
-part_len = 128 - index;
-
-   /* Transform as many times as possible. */
-   if (len >= part_len) {
-   memcpy(&sctx->buf[index], data, part_len);
-   sha512_transform(sctx->state, sctx->buf);
-
-   for (i = part_len; i + 127 < len; i+=128)
-   sha512_transform(sctx->state, &data[i]);
-
-   index = 0;
-   } else {
-   i = 0;
-   }
-
-   /* Buffer remaining input */
-   memcpy(&sctx->buf[index], &data[i], len - i);
-
-   return 0;
+   return sha512_base_do_update(desc, data, len, sha512_generic_block_fn,
+NULL);
 }
 EXPORT_SYMBOL(crypto_sha512_update);
 
-static int
-sha512_final(struct shash_desc *desc, u8 *hash)
+int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
+   unsigned int len, u8 *hash)
 {
-   struct sha512_state *sctx = shash_desc_ctx(desc);
-static u8 padding[128] = { 0x80, };
-   __be64 *dst = (__be64 *)hash;
-   __be64 bits[2];
-   unsigned int index, pad_len;
-   int i;
-
-   /* Save number of bits */
-   bits[1] = cpu_to_be64(sctx->count[0] << 3);
-   bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
-
-   /* Pad out to 112 mod 128. */
-   index = sctx->count[0] & 0x7f;
-   pad_len = (index < 112) ? (112 - index) : ((128+112) - index);
-   crypto_sha512_update(desc, padding, pad_len);
-
-   /* Append length (before padding) */
-   crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits));
-
-   /* Store state in digest */
-   for (i = 0; i < 8; i++)
-   dst[i] = cpu_to_be64(sctx->state[i]);
-
-   /* Zeroize sensitive information. */
-   memset(sctx, 0, sizeof(struct sha512_state));
-
-   return 0;
+   if (len)
+   sha512_base_do_update(desc, data, len, sha512_generic_block_fn,
+ NULL);
+   sha512_base_do_finalize(desc, sha512_generic_block_fn, NULL);
+   return sha512_base_finish(desc, hash);
 }
+EXPORT_SYMBOL(crypto_sha512_finup);
 
-static int sha384_final(struct shash_desc *desc, u8 *hash)
+static int sha512_final(struct shash_desc *desc, u8 *hash)
 {
-   u8 D[64];
-
-   sha512_final(desc, D);
-
-   memcpy(hash, D, 48);
-   memzero_explicit(D, 64);
-
-   return 0;
+   return crypto_sha512_finup(desc, NULL, 0, hash);
 }
 
 static struct shash_alg sha512_algs[2] = { {
.digestsize =   SHA

[PATCH v3 14/16] crypto/x86: move SHA-1 SSSE3 implementation to base layer

2015-04-07 Thread Ard Biesheuvel
Signed-off-by: Ard Biesheuvel 
---
 arch/x86/crypto/sha1_ssse3_glue.c | 136 +-
 1 file changed, 30 insertions(+), 106 deletions(-)

diff --git a/arch/x86/crypto/sha1_ssse3_glue.c 
b/arch/x86/crypto/sha1_ssse3_glue.c
index 6c20fe04a738..8678dc75fbf3 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -28,7 +28,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -49,127 +49,50 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const 
char *data,
 
 static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned 
int);
 
-
-static int sha1_ssse3_init(struct shash_desc *desc)
+static void sha1_ssse3_block_fn(int blocks, u8 const *src, u32 *state,
+   const u8 *head, void *p)
 {
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-
-   *sctx = (struct sha1_state){
-   .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
-   };
-
-   return 0;
-}
-
-static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
-  unsigned int len, unsigned int partial)
-{
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   unsigned int done = 0;
-
-   sctx->count += len;
-
-   if (partial) {
-   done = SHA1_BLOCK_SIZE - partial;
-   memcpy(sctx->buffer + partial, data, done);
-   sha1_transform_asm(sctx->state, sctx->buffer, 1);
-   }
-
-   if (len - done >= SHA1_BLOCK_SIZE) {
-   const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
-
-   sha1_transform_asm(sctx->state, data + done, rounds);
-   done += rounds * SHA1_BLOCK_SIZE;
-   }
-
-   memcpy(sctx->buffer, data + done, len - done);
-
-   return 0;
+   if (head)
+   sha1_transform_asm(state, head, 1);
+   if (blocks)
+   sha1_transform_asm(state, src, blocks);
 }
 
 static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
 unsigned int len)
 {
struct sha1_state *sctx = shash_desc_ctx(desc);
-   unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-   int res;
 
-   /* Handle the fast case right here */
-   if (partial + len < SHA1_BLOCK_SIZE) {
-   sctx->count += len;
-   memcpy(sctx->buffer + partial, data, len);
+   if (!irq_fpu_usable() ||
+   (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
+   return crypto_sha1_update(desc, data, len);
 
-   return 0;
-   }
-
-   if (!irq_fpu_usable()) {
-   res = crypto_sha1_update(desc, data, len);
-   } else {
-   kernel_fpu_begin();
-   res = __sha1_ssse3_update(desc, data, len, partial);
-   kernel_fpu_end();
-   }
-
-   return res;
-}
-
-
-/* Add padding and return the message digest. */
-static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
-{
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   unsigned int i, index, padlen;
-   __be32 *dst = (__be32 *)out;
-   __be64 bits;
-   static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
-
-   bits = cpu_to_be64(sctx->count << 3);
-
-   /* Pad out to 56 mod 64 and append length */
-   index = sctx->count % SHA1_BLOCK_SIZE;
-   padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
-   if (!irq_fpu_usable()) {
-   crypto_sha1_update(desc, padding, padlen);
-   crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
-   } else {
-   kernel_fpu_begin();
-   /* We need to fill a whole block for __sha1_ssse3_update() */
-   if (padlen <= 56) {
-   sctx->count += padlen;
-   memcpy(sctx->buffer + index, padding, padlen);
-   } else {
-   __sha1_ssse3_update(desc, padding, padlen, index);
-   }
-   __sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56);
-   kernel_fpu_end();
-   }
-
-   /* Store state in digest */
-   for (i = 0; i < 5; i++)
-   dst[i] = cpu_to_be32(sctx->state[i]);
-
-   /* Wipe context */
-   memset(sctx, 0, sizeof(*sctx));
+   kernel_fpu_begin();
+   sha1_base_do_update(desc, data, len, sha1_ssse3_block_fn, NULL);
+   kernel_fpu_end();
 
return 0;
 }
 
-static int sha1_ssse3_export(struct shash_desc *desc, void *out)
+static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
 {
-   struct sha1_state *sctx = shash_desc_ctx(desc);
+   if (!irq_fpu_usable())
+   return crypto_sha1_finup(desc, data, len, out);
 
-   memcpy(out, sctx, sizeof(*sctx));
+   kernel_fpu_begin();
+   if (len)
+   sha1_base

[PATCH v3 05/16] crypto: sha256-generic: move to generic glue implementation

2015-04-07 Thread Ard Biesheuvel
This updates the generic SHA-256 implementation to use the
new shared SHA-256 glue code.

It also implements a .finup hook crypto_sha256_finup() and exports
it to other modules.

Signed-off-by: Ard Biesheuvel 
---
 crypto/sha256_generic.c | 140 ++--
 include/crypto/sha.h|   3 ++
 2 files changed, 31 insertions(+), 112 deletions(-)

diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c
index b001ff5c2efc..794e31889ac9 100644
--- a/crypto/sha256_generic.c
+++ b/crypto/sha256_generic.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -214,136 +215,50 @@ static void sha256_transform(u32 *state, const u8 *input)
memzero_explicit(W, 64 * sizeof(u32));
 }
 
-static int sha224_init(struct shash_desc *desc)
+static void sha256_generic_block_fn(int blocks, u8 const *src, u32 *state,
+   const u8 *head, void *p)
 {
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   sctx->state[0] = SHA224_H0;
-   sctx->state[1] = SHA224_H1;
-   sctx->state[2] = SHA224_H2;
-   sctx->state[3] = SHA224_H3;
-   sctx->state[4] = SHA224_H4;
-   sctx->state[5] = SHA224_H5;
-   sctx->state[6] = SHA224_H6;
-   sctx->state[7] = SHA224_H7;
-   sctx->count = 0;
+   if (head)
+   sha256_transform(state, head);
 
-   return 0;
-}
-
-static int sha256_init(struct shash_desc *desc)
-{
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   sctx->state[0] = SHA256_H0;
-   sctx->state[1] = SHA256_H1;
-   sctx->state[2] = SHA256_H2;
-   sctx->state[3] = SHA256_H3;
-   sctx->state[4] = SHA256_H4;
-   sctx->state[5] = SHA256_H5;
-   sctx->state[6] = SHA256_H6;
-   sctx->state[7] = SHA256_H7;
-   sctx->count = 0;
-
-   return 0;
+   while (blocks--) {
+   sha256_transform(state, src);
+   src += SHA256_BLOCK_SIZE;
+   }
 }
 
 int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
  unsigned int len)
 {
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   unsigned int partial, done;
-   const u8 *src;
-
-   partial = sctx->count & 0x3f;
-   sctx->count += len;
-   done = 0;
-   src = data;
-
-   if ((partial + len) > 63) {
-   if (partial) {
-   done = -partial;
-   memcpy(sctx->buf + partial, data, done + 64);
-   src = sctx->buf;
-   }
-
-   do {
-   sha256_transform(sctx->state, src);
-   done += 64;
-   src = data + done;
-   } while (done + 63 < len);
-
-   partial = 0;
-   }
-   memcpy(sctx->buf + partial, src, len - done);
-
-   return 0;
+   return sha256_base_do_update(desc, data, len, sha256_generic_block_fn,
+NULL);
 }
 EXPORT_SYMBOL(crypto_sha256_update);
 
-static int sha256_final(struct shash_desc *desc, u8 *out)
-{
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   __be32 *dst = (__be32 *)out;
-   __be64 bits;
-   unsigned int index, pad_len;
-   int i;
-   static const u8 padding[64] = { 0x80, };
-
-   /* Save number of bits */
-   bits = cpu_to_be64(sctx->count << 3);
-
-   /* Pad out to 56 mod 64. */
-   index = sctx->count & 0x3f;
-   pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
-   crypto_sha256_update(desc, padding, pad_len);
-
-   /* Append length (before padding) */
-   crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
-
-   /* Store state in digest */
-   for (i = 0; i < 8; i++)
-   dst[i] = cpu_to_be32(sctx->state[i]);
-
-   /* Zeroize sensitive information. */
-   memset(sctx, 0, sizeof(*sctx));
-
-   return 0;
-}
-
-static int sha224_final(struct shash_desc *desc, u8 *hash)
-{
-   u8 D[SHA256_DIGEST_SIZE];
-
-   sha256_final(desc, D);
-
-   memcpy(hash, D, SHA224_DIGEST_SIZE);
-   memzero_explicit(D, SHA256_DIGEST_SIZE);
-
-   return 0;
-}
-
-static int sha256_export(struct shash_desc *desc, void *out)
+int crypto_sha256_finup(struct shash_desc *desc, const u8 *data,
+   unsigned int len, u8 *hash)
 {
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-
-   memcpy(out, sctx, sizeof(*sctx));
-   return 0;
+   if (len)
+   sha256_base_do_update(desc, data, len, sha256_generic_block_fn,
+ NULL);
+   sha256_base_do_finalize(desc, sha256_generic_block_fn, NULL);
+   return sha256_base_finish(desc, hash);
 }
+EXPORT_SYMBOL(crypto_sha256_finup);
 
-static int sha256_import(struct shash_desc *desc, const void *in)
+static int sha256_final(struct shash_desc *desc, u8 *out)
 {
-   struct sha256_state *sctx = shash_desc_ctx(desc);

[PATCH v3 10/16] crypto/arm: move SHA-224/256 ASM/NEON implementation to base layer

2015-04-07 Thread Ard Biesheuvel
Signed-off-by: Ard Biesheuvel 
---
 arch/arm/crypto/sha256_glue.c  | 174 -
 arch/arm/crypto/sha256_glue.h  |  17 +---
 arch/arm/crypto/sha256_neon_glue.c | 144 +-
 3 files changed, 81 insertions(+), 254 deletions(-)

diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
index ccef5e25bbcb..6f14a5a0a467 100644
--- a/arch/arm/crypto/sha256_glue.c
+++ b/arch/arm/crypto/sha256_glue.c
@@ -24,163 +24,56 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
+
 #include "sha256_glue.h"
 
 asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
- unsigned int num_blks);
-
+   unsigned int num_blks);
 
-int sha256_init(struct shash_desc *desc)
+static void sha256_arm_block_fn(int blocks, u8 const *src, u32 *state,
+   const u8 *head, void *p)
 {
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-
-   sctx->state[0] = SHA256_H0;
-   sctx->state[1] = SHA256_H1;
-   sctx->state[2] = SHA256_H2;
-   sctx->state[3] = SHA256_H3;
-   sctx->state[4] = SHA256_H4;
-   sctx->state[5] = SHA256_H5;
-   sctx->state[6] = SHA256_H6;
-   sctx->state[7] = SHA256_H7;
-   sctx->count = 0;
-
-   return 0;
+   if (head)
+   sha256_block_data_order(state, head, 1);
+   if (blocks)
+   sha256_block_data_order(state, src, blocks);
 }
 
-int sha224_init(struct shash_desc *desc)
+int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
+unsigned int len)
 {
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-
-   sctx->state[0] = SHA224_H0;
-   sctx->state[1] = SHA224_H1;
-   sctx->state[2] = SHA224_H2;
-   sctx->state[3] = SHA224_H3;
-   sctx->state[4] = SHA224_H4;
-   sctx->state[5] = SHA224_H5;
-   sctx->state[6] = SHA224_H6;
-   sctx->state[7] = SHA224_H7;
-   sctx->count = 0;
-
-   return 0;
+   return sha256_base_do_update(desc, data, len, sha256_arm_block_fn,
+NULL);
 }
+EXPORT_SYMBOL(crypto_sha256_arm_update);
 
-int __sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len,
-   unsigned int partial)
+int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
+   unsigned int len, u8 *hash)
 {
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   unsigned int done = 0;
-
-   sctx->count += len;
-
-   if (partial) {
-   done = SHA256_BLOCK_SIZE - partial;
-   memcpy(sctx->buf + partial, data, done);
-   sha256_block_data_order(sctx->state, sctx->buf, 1);
-   }
-
-   if (len - done >= SHA256_BLOCK_SIZE) {
-   const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
-
-   sha256_block_data_order(sctx->state, data + done, rounds);
-   done += rounds * SHA256_BLOCK_SIZE;
-   }
-
-   memcpy(sctx->buf, data + done, len - done);
-
-   return 0;
+   if (len)
+   sha256_base_do_update(desc, data, len, sha256_arm_block_fn,
+ NULL);
+   sha256_base_do_finalize(desc, sha256_arm_block_fn, NULL);
+   return sha256_base_finish(desc, hash);
 }
+EXPORT_SYMBOL(crypto_sha256_arm_finup);
 
-int sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len)
-{
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
-
-   /* Handle the fast case right here */
-   if (partial + len < SHA256_BLOCK_SIZE) {
-   sctx->count += len;
-   memcpy(sctx->buf + partial, data, len);
-
-   return 0;
-   }
-
-   return __sha256_update(desc, data, len, partial);
-}
-
-/* Add padding and return the message digest. */
 static int sha256_final(struct shash_desc *desc, u8 *out)
 {
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   unsigned int i, index, padlen;
-   __be32 *dst = (__be32 *)out;
-   __be64 bits;
-   static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
-
-   /* save number of bits */
-   bits = cpu_to_be64(sctx->count << 3);
-
-   /* Pad out to 56 mod 64 and append length */
-   index = sctx->count % SHA256_BLOCK_SIZE;
-   padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
-
-   /* We need to fill a whole block for __sha256_update */
-   if (padlen <= 56) {
-   sctx->count += padlen;
-   memcpy(sctx->buf + index, padding, padlen);
-   } else {
-   __sha256_update(desc, padding, padlen, index);
-   }
-   __sha256_update(desc, (const u8 *)&bits, sizeof(bits), 56);
-
-   /* Store state in digest */
-   for (i = 0; i < 8; i++)
-   

[PATCH v3 07/16] crypto/arm: move SHA-1 ARM asm implementation to base layer

2015-04-07 Thread Ard Biesheuvel
Signed-off-by: Ard Biesheuvel 
---
 arch/arm/crypto/sha1-ce-glue.c   |   3 +-
 arch/arm/{include/asm => }/crypto/sha1.h |   3 +
 arch/arm/crypto/sha1_glue.c  | 116 ++-
 arch/arm/crypto/sha1_neon_glue.c |   2 +-
 4 files changed, 29 insertions(+), 95 deletions(-)
 rename arch/arm/{include/asm => }/crypto/sha1.h (67%)

diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c
index a9dd90df9fd7..e93b24c1af1f 100644
--- a/arch/arm/crypto/sha1-ce-glue.c
+++ b/arch/arm/crypto/sha1-ce-glue.c
@@ -13,12 +13,13 @@
 #include 
 #include 
 
-#include 
 #include 
 #include 
 #include 
 #include 
 
+#include "sha1.h"
+
 MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel ");
 MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/crypto/sha1.h
similarity index 67%
rename from arch/arm/include/asm/crypto/sha1.h
rename to arch/arm/crypto/sha1.h
index 75e6a417416b..ffd8bd08b1a7 100644
--- a/arch/arm/include/asm/crypto/sha1.h
+++ b/arch/arm/crypto/sha1.h
@@ -7,4 +7,7 @@
 extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
   unsigned int len);
 
+extern int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
+  unsigned int len, u8 *out);
+
 #endif
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index e31b0440c613..c5a9519d 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -22,125 +22,55 @@
 #include 
 #include 
 #include 
+#include 
 #include 
-#include 
 
+#include "sha1.h"
 
 asmlinkage void sha1_block_data_order(u32 *digest,
const unsigned char *data, unsigned int rounds);
 
-
-static int sha1_init(struct shash_desc *desc)
-{
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-
-   *sctx = (struct sha1_state){
-   .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
-   };
-
-   return 0;
-}
-
-
-static int __sha1_update(struct sha1_state *sctx, const u8 *data,
-unsigned int len, unsigned int partial)
+static void sha1_arm_block_fn(int blocks, u8 const *src, u32 *state,
+ const u8 *head, void *p)
 {
-   unsigned int done = 0;
-
-   sctx->count += len;
-
-   if (partial) {
-   done = SHA1_BLOCK_SIZE - partial;
-   memcpy(sctx->buffer + partial, data, done);
-   sha1_block_data_order(sctx->state, sctx->buffer, 1);
-   }
-
-   if (len - done >= SHA1_BLOCK_SIZE) {
-   const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
-   sha1_block_data_order(sctx->state, data + done, rounds);
-   done += rounds * SHA1_BLOCK_SIZE;
-   }
-
-   memcpy(sctx->buffer, data + done, len - done);
-   return 0;
+   if (head)
+   sha1_block_data_order(state, head, 1);
+   if (blocks)
+   sha1_block_data_order(state, src, blocks);
 }
 
-
 int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len)
 {
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-   int res;
-
-   /* Handle the fast case right here */
-   if (partial + len < SHA1_BLOCK_SIZE) {
-   sctx->count += len;
-   memcpy(sctx->buffer + partial, data, len);
-   return 0;
-   }
-   res = __sha1_update(sctx, data, len, partial);
-   return res;
+   return sha1_base_do_update(desc, data, len, sha1_arm_block_fn, NULL);
 }
 EXPORT_SYMBOL_GPL(sha1_update_arm);
 
-
-/* Add padding and return the message digest. */
-static int sha1_final(struct shash_desc *desc, u8 *out)
+int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
+  unsigned int len, u8 *out)
 {
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   unsigned int i, index, padlen;
-   __be32 *dst = (__be32 *)out;
-   __be64 bits;
-   static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
-
-   bits = cpu_to_be64(sctx->count << 3);
-
-   /* Pad out to 56 mod 64 and append length */
-   index = sctx->count % SHA1_BLOCK_SIZE;
-   padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
-   /* We need to fill a whole block for __sha1_update() */
-   if (padlen <= 56) {
-   sctx->count += padlen;
-   memcpy(sctx->buffer + index, padding, padlen);
-   } else {
-   __sha1_update(sctx, padding, padlen, index);
-   }
-   __sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
-
-   /* Store state in digest */
-   for (i = 0; i < 5; i++)
-   dst[i] = cpu_to_be32(sctx->state[i]);
-
-   /* Wipe context */
-   memset(sctx, 0, sizeof(*sctx));
-   return 0;
-}
-
+   if (len)
+   sha1_base_do_upda

[PATCH v3 03/16] crypto: sha512: implement base layer for SHA-512

2015-04-07 Thread Ard Biesheuvel
To reduce the number of copies of boilerplate code throughout
the tree, this patch implements generic glue for the SHA-512
algorithm. This allows a specific arch or hardware implementation
to only implement the special handling that it needs.

Signed-off-by: Ard Biesheuvel 
---
 include/crypto/sha512_base.h | 147 +++
 1 file changed, 147 insertions(+)
 create mode 100644 include/crypto/sha512_base.h

diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h
new file mode 100644
index ..44351f781dce
--- /dev/null
+++ b/include/crypto/sha512_base.h
@@ -0,0 +1,147 @@
+/*
+ * sha512_base.h - core logic for SHA-512 implementations
+ *
+ * Copyright (C) 2015 Linaro Ltd 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+typedef void (sha512_block_fn)(int blocks, u8 const *src, u64 *state,
+  const u8 *head, void *p);
+
+static inline int sha384_base_init(struct shash_desc *desc)
+{
+   struct sha512_state *sctx = shash_desc_ctx(desc);
+
+   sctx->state[0] = SHA384_H0;
+   sctx->state[1] = SHA384_H1;
+   sctx->state[2] = SHA384_H2;
+   sctx->state[3] = SHA384_H3;
+   sctx->state[4] = SHA384_H4;
+   sctx->state[5] = SHA384_H5;
+   sctx->state[6] = SHA384_H6;
+   sctx->state[7] = SHA384_H7;
+   sctx->count[0] = sctx->count[1] = 0;
+
+   return 0;
+}
+
+static inline int sha512_base_init(struct shash_desc *desc)
+{
+   struct sha512_state *sctx = shash_desc_ctx(desc);
+
+   sctx->state[0] = SHA512_H0;
+   sctx->state[1] = SHA512_H1;
+   sctx->state[2] = SHA512_H2;
+   sctx->state[3] = SHA512_H3;
+   sctx->state[4] = SHA512_H4;
+   sctx->state[5] = SHA512_H5;
+   sctx->state[6] = SHA512_H6;
+   sctx->state[7] = SHA512_H7;
+   sctx->count[0] = sctx->count[1] = 0;
+
+   return 0;
+}
+
+static inline int sha512_base_export(struct shash_desc *desc, void *out)
+{
+   struct sha512_state *sctx = shash_desc_ctx(desc);
+   struct sha512_state *dst = out;
+
+   *dst = *sctx;
+
+   return 0;
+}
+
+static inline int sha512_base_import(struct shash_desc *desc, const void *in)
+{
+   struct sha512_state *sctx = shash_desc_ctx(desc);
+   struct sha512_state const *src = in;
+
+   *sctx = *src;
+
+   return 0;
+}
+
+static inline int sha512_base_do_update(struct shash_desc *desc, const u8 
*data,
+   unsigned int len,
+   sha512_block_fn *block_fn, void *p)
+{
+   struct sha512_state *sctx = shash_desc_ctx(desc);
+   unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
+
+   sctx->count[0] += len;
+   if (sctx->count[0] < len)
+   sctx->count[1]++;
+
+   if (unlikely((partial + len) >= SHA512_BLOCK_SIZE)) {
+   int blocks;
+
+   if (partial) {
+   int p = SHA512_BLOCK_SIZE - partial;
+
+   memcpy(sctx->buf + partial, data, p);
+   data += p;
+   len -= p;
+   }
+
+   blocks = len / SHA512_BLOCK_SIZE;
+   len %= SHA512_BLOCK_SIZE;
+
+   block_fn(blocks, data, sctx->state,
+partial ? sctx->buf : NULL, p);
+   data += blocks * SHA512_BLOCK_SIZE;
+   partial = 0;
+   }
+   if (len)
+   memcpy(sctx->buf + partial, data, len);
+
+   return 0;
+}
+
+static inline int sha512_base_do_finalize(struct shash_desc *desc,
+  sha512_block_fn *block_fn, void *p)
+{
+   const int bit_offset = SHA512_BLOCK_SIZE - sizeof(__be64[2]);
+   struct sha512_state *sctx = shash_desc_ctx(desc);
+   __be64 *bits = (__be64 *)(sctx->buf + bit_offset);
+   unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
+
+   sctx->buf[partial++] = 0x80;
+   if (partial > bit_offset) {
+   memset(sctx->buf + partial, 0x0, SHA512_BLOCK_SIZE - partial);
+   partial = 0;
+
+   block_fn(1, sctx->buf, sctx->state, NULL, p);
+   }
+
+   memset(sctx->buf + partial, 0x0, bit_offset - partial);
+   bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
+   bits[1] = cpu_to_be64(sctx->count[0] << 3);
+   block_fn(1, sctx->buf, sctx->state, NULL, p);
+
+   return 0;
+}
+
+static inline int sha512_base_finish(struct shash_desc *desc, u8 *out)
+{
+   unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
+   struct sha512_state *sctx = shash_desc_ctx(desc);
+   __be64 *digest = (__be64 *)out;
+   int i;
+
+   for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be64))
+  

[PATCH v3 08/16] crypto/arm: move SHA-1 NEON implementation to base layer

2015-04-07 Thread Ard Biesheuvel
Signed-off-by: Ard Biesheuvel 
---
 arch/arm/crypto/sha1_neon_glue.c | 137 +--
 1 file changed, 30 insertions(+), 107 deletions(-)

diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
index 5d9a1b4aac73..4280f657fb9d 100644
--- a/arch/arm/crypto/sha1_neon_glue.c
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -25,7 +25,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 
@@ -34,136 +34,59 @@
 asmlinkage void sha1_transform_neon(void *state_h, const char *data,
unsigned int rounds);
 
-
-static int sha1_neon_init(struct shash_desc *desc)
+static void sha1_neon_block_fn(int blocks, u8 const *src, u32 *state,
+ const u8 *head, void *p)
 {
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-
-   *sctx = (struct sha1_state){
-   .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
-   };
-
-   return 0;
-}
-
-static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
-  unsigned int len, unsigned int partial)
-{
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   unsigned int done = 0;
-
-   sctx->count += len;
-
-   if (partial) {
-   done = SHA1_BLOCK_SIZE - partial;
-   memcpy(sctx->buffer + partial, data, done);
-   sha1_transform_neon(sctx->state, sctx->buffer, 1);
-   }
-
-   if (len - done >= SHA1_BLOCK_SIZE) {
-   const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
-
-   sha1_transform_neon(sctx->state, data + done, rounds);
-   done += rounds * SHA1_BLOCK_SIZE;
-   }
-
-   memcpy(sctx->buffer, data + done, len - done);
-
-   return 0;
+   if (head)
+   sha1_transform_neon(state, head, 1);
+   if (blocks)
+   sha1_transform_neon(state, src, blocks);
 }
 
 static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
-unsigned int len)
-{
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-   int res;
-
-   /* Handle the fast case right here */
-   if (partial + len < SHA1_BLOCK_SIZE) {
-   sctx->count += len;
-   memcpy(sctx->buffer + partial, data, len);
-
-   return 0;
-   }
-
-   if (!may_use_simd()) {
-   res = sha1_update_arm(desc, data, len);
-   } else {
-   kernel_neon_begin();
-   res = __sha1_neon_update(desc, data, len, partial);
-   kernel_neon_end();
-   }
-
-   return res;
-}
-
-
-/* Add padding and return the message digest. */
-static int sha1_neon_final(struct shash_desc *desc, u8 *out)
+ unsigned int len)
 {
struct sha1_state *sctx = shash_desc_ctx(desc);
-   unsigned int i, index, padlen;
-   __be32 *dst = (__be32 *)out;
-   __be64 bits;
-   static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
-
-   bits = cpu_to_be64(sctx->count << 3);
-
-   /* Pad out to 56 mod 64 and append length */
-   index = sctx->count % SHA1_BLOCK_SIZE;
-   padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
-   if (!may_use_simd()) {
-   sha1_update_arm(desc, padding, padlen);
-   sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
-   } else {
-   kernel_neon_begin();
-   /* We need to fill a whole block for __sha1_neon_update() */
-   if (padlen <= 56) {
-   sctx->count += padlen;
-   memcpy(sctx->buffer + index, padding, padlen);
-   } else {
-   __sha1_neon_update(desc, padding, padlen, index);
-   }
-   __sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
-   kernel_neon_end();
-   }
 
-   /* Store state in digest */
-   for (i = 0; i < 5; i++)
-   dst[i] = cpu_to_be32(sctx->state[i]);
+   if (!may_use_simd() ||
+   (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
+   return sha1_update_arm(desc, data, len);
 
-   /* Wipe context */
-   memset(sctx, 0, sizeof(*sctx));
+   kernel_neon_begin();
+   sha1_base_do_update(desc, data, len, sha1_neon_block_fn, NULL);
+   kernel_neon_end();
 
return 0;
 }
 
-static int sha1_neon_export(struct shash_desc *desc, void *out)
+static int sha1_neon_finup(struct shash_desc *desc, const u8 *data,
+  unsigned int len, u8 *out)
 {
-   struct sha1_state *sctx = shash_desc_ctx(desc);
+   if (!may_use_simd())
+   return sha1_finup_arm(desc, data, len, out);
 
-   memcpy(out, sctx, sizeof(*sctx));
+   kernel_neon_begin();
+   if (len)
+   sha1_base_do_update(desc, data, l

[PATCH v3 09/16] crypto/arm: move SHA-1 ARMv8 implementation to base layer

2015-04-07 Thread Ard Biesheuvel
Signed-off-by: Ard Biesheuvel 
---
 arch/arm/crypto/Kconfig|   1 -
 arch/arm/crypto/sha1-ce-glue.c | 108 +++--
 2 files changed, 28 insertions(+), 81 deletions(-)

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 458729d2ce22..5ed98bc6f95d 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -31,7 +31,6 @@ config CRYPTO_SHA1_ARM_CE
tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_SHA1_ARM
-   select CRYPTO_SHA1
select CRYPTO_HASH
help
  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c
index e93b24c1af1f..9d0e86e5647b 100644
--- a/arch/arm/crypto/sha1-ce-glue.c
+++ b/arch/arm/crypto/sha1-ce-glue.c
@@ -10,13 +10,13 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
 #include 
 #include 
 #include 
-#include 
 
 #include "sha1.h"
 
@@ -24,104 +24,52 @@ MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto 
Extensions");
 MODULE_AUTHOR("Ard Biesheuvel ");
 MODULE_LICENSE("GPL v2");
 
-asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, 
- u8 *head);
+asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
+ const u8 *head, void *p);
 
-static int sha1_init(struct shash_desc *desc)
+static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
 {
struct sha1_state *sctx = shash_desc_ctx(desc);
 
-   *sctx = (struct sha1_state){
-   .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
-   };
-   return 0;
-}
-
-static int sha1_update(struct shash_desc *desc, const u8 *data,
-  unsigned int len)
-{
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   unsigned int partial;
-
-   if (!may_use_simd())
+   if (!may_use_simd() ||
+   (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
return sha1_update_arm(desc, data, len);
 
-   partial = sctx->count % SHA1_BLOCK_SIZE;
-   sctx->count += len;
-
-   if ((partial + len) >= SHA1_BLOCK_SIZE) {
-   int blocks;
+   kernel_neon_begin();
+   sha1_base_do_update(desc, data, len, sha1_ce_transform, NULL);
+   kernel_neon_end();
 
-   if (partial) {
-   int p = SHA1_BLOCK_SIZE - partial;
-
-   memcpy(sctx->buffer + partial, data, p);
-   data += p;
-   len -= p;
-   }
-
-   blocks = len / SHA1_BLOCK_SIZE;
-   len %= SHA1_BLOCK_SIZE;
-
-   kernel_neon_begin();
-   sha1_ce_transform(blocks, data, sctx->state,
- partial ? sctx->buffer : NULL);
-   kernel_neon_end();
-
-   data += blocks * SHA1_BLOCK_SIZE;
-   partial = 0;
-   }
-   if (len)
-   memcpy(sctx->buffer + partial, data, len);
return 0;
 }
 
-static int sha1_final(struct shash_desc *desc, u8 *out)
+static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
+unsigned int len, u8 *out)
 {
-   static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
-
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   __be64 bits = cpu_to_be64(sctx->count << 3);
-   __be32 *dst = (__be32 *)out;
-   int i;
-
-   u32 padlen = SHA1_BLOCK_SIZE
-- ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
-
-   sha1_update(desc, padding, padlen);
-   sha1_update(desc, (const u8 *)&bits, sizeof(bits));
-
-   for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
-   put_unaligned_be32(sctx->state[i], dst++);
-
-   *sctx = (struct sha1_state){};
-   return 0;
-}
+   if (!may_use_simd())
+   return sha1_finup_arm(desc, data, len, out);
 
-static int sha1_export(struct shash_desc *desc, void *out)
-{
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   struct sha1_state *dst = out;
+   kernel_neon_begin();
+   if (len)
+   sha1_base_do_update(desc, data, len, sha1_ce_transform, NULL);
+   sha1_base_do_finalize(desc, sha1_ce_transform, NULL);
+   kernel_neon_end();
 
-   *dst = *sctx;
-   return 0;
+   return sha1_base_finish(desc, out);
 }
 
-static int sha1_import(struct shash_desc *desc, const void *in)
+static int sha1_ce_final(struct shash_desc *desc, u8 *out)
 {
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   struct sha1_state const *src = in;
-
-   *sctx = *src;
-   return 0;
+   return sha1_ce_finup(desc, NULL, 0l, out);
 }
 
 static struct shash_alg alg = {
-   .init   = sha1_init,
-   .update 

[PATCH v3 04/16] crypto: sha1-generic: move to generic glue implementation

2015-04-07 Thread Ard Biesheuvel
This updates the generic SHA-1 implementation to use the generic
shared SHA-1 glue code.

It also implements a .finup hook crypto_sha1_finup() and exports
it to other modules.

Signed-off-by: Ard Biesheuvel 
---
 crypto/sha1_generic.c | 108 +-
 include/crypto/sha.h  |   3 ++
 2 files changed, 31 insertions(+), 80 deletions(-)

diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c
index a3e50c37eb6f..322a2278d939 100644
--- a/crypto/sha1_generic.c
+++ b/crypto/sha1_generic.c
@@ -23,109 +23,57 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
-static int sha1_init(struct shash_desc *desc)
+static void sha1_generic_block_fn(int blocks, u8 const *src, u32 *state,
+ const u8 *head, void *p)
 {
-   struct sha1_state *sctx = shash_desc_ctx(desc);
+   u32 temp[SHA_WORKSPACE_WORDS];
 
-   *sctx = (struct sha1_state){
-   .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
-   };
+   if (head)
+   sha_transform(state, head, temp);
 
-   return 0;
+   while (blocks--) {
+   sha_transform(state, src, temp);
+   src += SHA1_BLOCK_SIZE;
+   }
+   memzero_explicit(temp, sizeof(temp));
 }
 
 int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
 {
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   unsigned int partial, done;
-   const u8 *src;
-
-   partial = sctx->count % SHA1_BLOCK_SIZE;
-   sctx->count += len;
-   done = 0;
-   src = data;
-
-   if ((partial + len) >= SHA1_BLOCK_SIZE) {
-   u32 temp[SHA_WORKSPACE_WORDS];
-
-   if (partial) {
-   done = -partial;
-   memcpy(sctx->buffer + partial, data,
-  done + SHA1_BLOCK_SIZE);
-   src = sctx->buffer;
-   }
-
-   do {
-   sha_transform(sctx->state, src, temp);
-   done += SHA1_BLOCK_SIZE;
-   src = data + done;
-   } while (done + SHA1_BLOCK_SIZE <= len);
-
-   memzero_explicit(temp, sizeof(temp));
-   partial = 0;
-   }
-   memcpy(sctx->buffer + partial, src, len - done);
-
-   return 0;
+   return sha1_base_do_update(desc, data, len, sha1_generic_block_fn,
+  NULL);
 }
 EXPORT_SYMBOL(crypto_sha1_update);
 
-
-/* Add padding and return the message digest. */
-static int sha1_final(struct shash_desc *desc, u8 *out)
-{
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-   __be32 *dst = (__be32 *)out;
-   u32 i, index, padlen;
-   __be64 bits;
-   static const u8 padding[64] = { 0x80, };
-
-   bits = cpu_to_be64(sctx->count << 3);
-
-   /* Pad out to 56 mod 64 */
-   index = sctx->count & 0x3f;
-   padlen = (index < 56) ? (56 - index) : ((64+56) - index);
-   crypto_sha1_update(desc, padding, padlen);
-
-   /* Append length */
-   crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
-
-   /* Store state in digest */
-   for (i = 0; i < 5; i++)
-   dst[i] = cpu_to_be32(sctx->state[i]);
-
-   /* Wipe context */
-   memset(sctx, 0, sizeof *sctx);
-
-   return 0;
-}
-
-static int sha1_export(struct shash_desc *desc, void *out)
+int crypto_sha1_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
 {
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-
-   memcpy(out, sctx, sizeof(*sctx));
-   return 0;
+   if (len)
+   sha1_base_do_update(desc, data, len, sha1_generic_block_fn,
+   NULL);
+   sha1_base_do_finalize(desc, sha1_generic_block_fn, NULL);
+   return sha1_base_finish(desc, out);
 }
+EXPORT_SYMBOL(crypto_sha1_finup);
 
-static int sha1_import(struct shash_desc *desc, const void *in)
+/* Add padding and return the message digest. */
+static int sha1_final(struct shash_desc *desc, u8 *out)
 {
-   struct sha1_state *sctx = shash_desc_ctx(desc);
-
-   memcpy(sctx, in, sizeof(*sctx));
-   return 0;
+   return crypto_sha1_finup(desc, NULL, 0, out);
 }
 
 static struct shash_alg alg = {
.digestsize =   SHA1_DIGEST_SIZE,
-   .init   =   sha1_init,
+   .init   =   sha1_base_init,
.update =   crypto_sha1_update,
.final  =   sha1_final,
-   .export =   sha1_export,
-   .import =   sha1_import,
+   .finup  =   crypto_sha1_finup,
+   .export =   sha1_base_export,
+   .import =   sha1_base_import,
.descsize   =   sizeof(struct sha1_state),
.statesize  =   sizeof(struct sha1_state),
.base   =   {
diff --git a

[PATCH v3 11/16] crypto/arm: move SHA-224/256 ARMv8 implementation to base layer

2015-04-07 Thread Ard Biesheuvel
Signed-off-by: Ard Biesheuvel 
---
 arch/arm/crypto/Kconfig|   2 +-
 arch/arm/crypto/sha2-ce-glue.c | 154 ++---
 2 files changed, 36 insertions(+), 120 deletions(-)

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 5ed98bc6f95d..a267529d9577 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -39,7 +39,7 @@ config CRYPTO_SHA1_ARM_CE
 config CRYPTO_SHA2_ARM_CE
tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
-   select CRYPTO_SHA256
+   select CRYPTO_SHA256_ARM
select CRYPTO_HASH
help
  SHA-256 secure hash standard (DFIPS 180-2) implemented
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c
index 0449eca3aab3..6110b937264c 100644
--- a/arch/arm/crypto/sha2-ce-glue.c
+++ b/arch/arm/crypto/sha2-ce-glue.c
@@ -10,6 +10,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -18,145 +19,59 @@
 #include 
 #include 
 
+#include "sha256_glue.h"
+
 MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto 
Extensions");
 MODULE_AUTHOR("Ard Biesheuvel ");
 MODULE_LICENSE("GPL v2");
 
 asmlinkage void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
- u8 *head);
+ const u8 *head, void *p);
 
-static int sha224_init(struct shash_desc *desc)
+static int sha2_ce_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
 {
struct sha256_state *sctx = shash_desc_ctx(desc);
 
-   *sctx = (struct sha256_state){
-   .state = {
-   SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
-   SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
-   }
-   };
-   return 0;
-}
+   if (!may_use_simd() ||
+   (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
+   return crypto_sha256_arm_update(desc, data, len);
 
-static int sha256_init(struct shash_desc *desc)
-{
-   struct sha256_state *sctx = shash_desc_ctx(desc);
+   kernel_neon_begin();
+   sha256_base_do_update(desc, data, len, sha2_ce_transform, NULL);
+   kernel_neon_end();
 
-   *sctx = (struct sha256_state){
-   .state = {
-   SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
-   SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
-   }
-   };
return 0;
 }
 
-static int sha2_update(struct shash_desc *desc, const u8 *data,
-  unsigned int len)
+static int sha2_ce_finup(struct shash_desc *desc, const u8 *data,
+unsigned int len, u8 *out)
 {
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   unsigned int partial;
-
if (!may_use_simd())
-   return crypto_sha256_update(desc, data, len);
-
-   partial = sctx->count % SHA256_BLOCK_SIZE;
-   sctx->count += len;
-
-   if ((partial + len) >= SHA256_BLOCK_SIZE) {
-   int blocks;
-
-   if (partial) {
-   int p = SHA256_BLOCK_SIZE - partial;
-
-   memcpy(sctx->buf + partial, data, p);
-   data += p;
-   len -= p;
-   }
-
-   blocks = len / SHA256_BLOCK_SIZE;
-   len %= SHA256_BLOCK_SIZE;
+   return crypto_sha256_arm_finup(desc, data, len, out);
 
-   kernel_neon_begin();
-   sha2_ce_transform(blocks, data, sctx->state,
- partial ? sctx->buf : NULL);
-   kernel_neon_end();
-
-   data += blocks * SHA256_BLOCK_SIZE;
-   partial = 0;
-   }
+   kernel_neon_begin();
if (len)
-   memcpy(sctx->buf + partial, data, len);
-   return 0;
-}
-
-static void sha2_final(struct shash_desc *desc)
-{
-   static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
+   sha256_base_do_update(desc, data, len,
+sha2_ce_transform, NULL);
+   sha256_base_do_finalize(desc, sha2_ce_transform, NULL);
+   kernel_neon_end();
 
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   __be64 bits = cpu_to_be64(sctx->count << 3);
-   u32 padlen = SHA256_BLOCK_SIZE
-- ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
-
-   sha2_update(desc, padding, padlen);
-   sha2_update(desc, (const u8 *)&bits, sizeof(bits));
+   return sha256_base_finish(desc, out);
 }
 
-static int sha224_final(struct shash_desc *desc, u8 *out)
+static int sha2_ce_final(struct shash_desc *desc, u8 *out)
 {
-   struct sha256_state *sctx = shash_desc_ctx(desc);
-   __be32 *dst = (__be32 *)out;
-   int i;
-
-   sha2_final(desc);
-
-   for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
-   put_una

[PATCH v3 02/16] crypto: sha256: implement base layer for SHA-256

2015-04-07 Thread Ard Biesheuvel
To reduce the number of copies of boilerplate code throughout
the tree, this patch implements generic glue for the SHA-256
algorithm. This allows a specific arch or hardware implementation
to only implement the special handling that it needs.

Signed-off-by: Ard Biesheuvel 
---
 include/crypto/sha256_base.h | 144 +++
 1 file changed, 144 insertions(+)
 create mode 100644 include/crypto/sha256_base.h

diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h
new file mode 100644
index ..237d549b4093
--- /dev/null
+++ b/include/crypto/sha256_base.h
@@ -0,0 +1,144 @@
+/*
+ * sha256_base.h - core logic for SHA-256 implementations
+ *
+ * Copyright (C) 2015 Linaro Ltd 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+typedef void (sha256_block_fn)(int blocks, u8 const *src, u32 *state,
+  const u8 *head, void *p);
+
+static inline int sha224_base_init(struct shash_desc *desc)
+{
+   struct sha256_state *sctx = shash_desc_ctx(desc);
+
+   sctx->state[0] = SHA224_H0;
+   sctx->state[1] = SHA224_H1;
+   sctx->state[2] = SHA224_H2;
+   sctx->state[3] = SHA224_H3;
+   sctx->state[4] = SHA224_H4;
+   sctx->state[5] = SHA224_H5;
+   sctx->state[6] = SHA224_H6;
+   sctx->state[7] = SHA224_H7;
+   sctx->count = 0;
+
+   return 0;
+}
+
+static inline int sha256_base_init(struct shash_desc *desc)
+{
+   struct sha256_state *sctx = shash_desc_ctx(desc);
+
+   sctx->state[0] = SHA256_H0;
+   sctx->state[1] = SHA256_H1;
+   sctx->state[2] = SHA256_H2;
+   sctx->state[3] = SHA256_H3;
+   sctx->state[4] = SHA256_H4;
+   sctx->state[5] = SHA256_H5;
+   sctx->state[6] = SHA256_H6;
+   sctx->state[7] = SHA256_H7;
+   sctx->count = 0;
+
+   return 0;
+}
+
+static inline int sha256_base_export(struct shash_desc *desc, void *out)
+{
+   struct sha256_state *sctx = shash_desc_ctx(desc);
+   struct sha256_state *dst = out;
+
+   *dst = *sctx;
+
+   return 0;
+}
+
+static inline int sha256_base_import(struct shash_desc *desc, const void *in)
+{
+   struct sha256_state *sctx = shash_desc_ctx(desc);
+   struct sha256_state const *src = in;
+
+   *sctx = *src;
+
+   return 0;
+}
+
+static inline int sha256_base_do_update(struct shash_desc *desc, const u8 
*data,
+   unsigned int len,
+   sha256_block_fn *block_fn, void *p)
+{
+   struct sha256_state *sctx = shash_desc_ctx(desc);
+   unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
+
+   sctx->count += len;
+
+   if (unlikely((partial + len) >= SHA256_BLOCK_SIZE)) {
+   int blocks;
+
+   if (partial) {
+   int p = SHA256_BLOCK_SIZE - partial;
+
+   memcpy(sctx->buf + partial, data, p);
+   data += p;
+   len -= p;
+   }
+
+   blocks = len / SHA256_BLOCK_SIZE;
+   len %= SHA256_BLOCK_SIZE;
+
+   block_fn(blocks, data, sctx->state,
+partial ? sctx->buf : NULL, p);
+   data += blocks * SHA256_BLOCK_SIZE;
+   partial = 0;
+   }
+   if (len)
+   memcpy(sctx->buf + partial, data, len);
+
+   return 0;
+}
+
+static inline int sha256_base_do_finalize(struct shash_desc *desc,
+ sha256_block_fn *block_fn, void *p)
+{
+   const int bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64);
+   struct sha256_state *sctx = shash_desc_ctx(desc);
+   __be64 *bits = (__be64 *)(sctx->buf + bit_offset);
+   unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
+
+   sctx->buf[partial++] = 0x80;
+   if (partial > bit_offset) {
+   memset(sctx->buf + partial, 0x0, SHA256_BLOCK_SIZE - partial);
+   partial = 0;
+
+   block_fn(1, sctx->buf, sctx->state, NULL, p);
+   }
+
+   memset(sctx->buf + partial, 0x0, bit_offset - partial);
+   *bits = cpu_to_be64(sctx->count << 3);
+   block_fn(1, sctx->buf, sctx->state, NULL, p);
+
+   return 0;
+}
+
+static inline int sha256_base_finish(struct shash_desc *desc, u8 *out)
+{
+   unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
+   struct sha256_state *sctx = shash_desc_ctx(desc);
+   __be32 *digest = (__be32 *)out;
+   int i;
+
+   for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be32))
+   put_unaligned_be32(sctx->state[i], digest++);
+
+   *sctx = (struct sha256_state){};
+   return 0;
+}
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linu

[PATCH v3 01/16] crypto: sha1: implement base layer for SHA-1

2015-04-07 Thread Ard Biesheuvel
To reduce the number of copies of boilerplate code throughout
the tree, this patch implements generic glue for the SHA-1
algorithm. This allows a specific arch or hardware implementation
to only implement the special handling that it needs.

Signed-off-by: Ard Biesheuvel 
---
 include/crypto/sha1_base.h | 123 +
 1 file changed, 123 insertions(+)
 create mode 100644 include/crypto/sha1_base.h

diff --git a/include/crypto/sha1_base.h b/include/crypto/sha1_base.h
new file mode 100644
index ..919db0920203
--- /dev/null
+++ b/include/crypto/sha1_base.h
@@ -0,0 +1,123 @@
+/*
+ * sha1_base.h - core logic for SHA-1 implementations
+ *
+ * Copyright (C) 2015 Linaro Ltd 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+typedef void (sha1_block_fn)(int blocks, u8 const *src, u32 *state,
+const u8 *head, void *p);
+
+static inline int sha1_base_init(struct shash_desc *desc)
+{
+   struct sha1_state *sctx = shash_desc_ctx(desc);
+
+   sctx->state[0] = SHA1_H0;
+   sctx->state[1] = SHA1_H1;
+   sctx->state[2] = SHA1_H2;
+   sctx->state[3] = SHA1_H3;
+   sctx->state[4] = SHA1_H4;
+   sctx->count = 0;
+
+   return 0;
+}
+
+static inline int sha1_base_export(struct shash_desc *desc, void *out)
+{
+   struct sha1_state *sctx = shash_desc_ctx(desc);
+   struct sha1_state *dst = out;
+
+   *dst = *sctx;
+
+   return 0;
+}
+
+static inline int sha1_base_import(struct shash_desc *desc, const void *in)
+{
+   struct sha1_state *sctx = shash_desc_ctx(desc);
+   struct sha1_state const *src = in;
+
+   *sctx = *src;
+
+   return 0;
+}
+
+static inline int sha1_base_do_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len, sha1_block_fn *block_fn,
+ void *p)
+{
+   struct sha1_state *sctx = shash_desc_ctx(desc);
+   unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+
+   sctx->count += len;
+
+   if (unlikely((partial + len) >= SHA1_BLOCK_SIZE)) {
+   int blocks;
+
+   if (partial) {
+   int p = SHA1_BLOCK_SIZE - partial;
+
+   memcpy(sctx->buffer + partial, data, p);
+   data += p;
+   len -= p;
+   }
+
+   blocks = len / SHA1_BLOCK_SIZE;
+   len %= SHA1_BLOCK_SIZE;
+
+   block_fn(blocks, data, sctx->state,
+partial ? sctx->buffer : NULL, p);
+   data += blocks * SHA1_BLOCK_SIZE;
+   partial = 0;
+   }
+   if (len)
+   memcpy(sctx->buffer + partial, data, len);
+
+   return 0;
+}
+
+static inline int sha1_base_do_finalize(struct shash_desc *desc,
+   sha1_block_fn *block_fn, void *p)
+{
+   const int bit_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
+   struct sha1_state *sctx = shash_desc_ctx(desc);
+   __be64 *bits = (__be64 *)(sctx->buffer + bit_offset);
+   unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+
+   sctx->buffer[partial++] = 0x80;
+   if (partial > bit_offset) {
+   memset(sctx->buffer + partial, 0x0, SHA1_BLOCK_SIZE - partial);
+   partial = 0;
+
+   block_fn(1, sctx->buffer, sctx->state, NULL, p);
+   }
+
+   memset(sctx->buffer + partial, 0x0, bit_offset - partial);
+   *bits = cpu_to_be64(sctx->count << 3);
+   block_fn(1, sctx->buffer, sctx->state, NULL, p);
+
+   return 0;
+}
+
+static inline int sha1_base_finish(struct shash_desc *desc, u8 *out)
+{
+   struct sha1_state *sctx = shash_desc_ctx(desc);
+   __be32 *digest = (__be32 *)out;
+   int i;
+
+   for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
+   put_unaligned_be32(sctx->state[i], digest++);
+
+   *sctx = (struct sha1_state){};
+   return 0;
+}
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 00/16] crypto: SHA glue code consolidation

2015-04-07 Thread Ard Biesheuvel
Hello all,

This is v3 of what is now a complete glue code consolidation series
for generic, x86, arm and arm64 implementations of SHA-1, SHA-224/256
and SHA-384/512.

The purpose is to have a single, canonical implementation of the core
logic that gets reused by all versions of the algorithm. Note that this
is not about saving space in the binary, but about ensuring that the same
code is used everywhere, reducing the maintenance burden.

The base layer implements all the update and finalization logic around
the block transforms, where the prototypes of the latter look something
like this:

typedef void (shaXXX_block_fn)(int blocks, u8 const *src, uXX *state,
 const u8 *head, void *p);

The block implementation should process the head block first, then
process the requested number of block starting at 'src'. The generic
pointer 'p' is passed down from the do_update/do_finalize() versions;
this is used for instance by the ARM64 implementations to indicate to
the core ASM implementation that it should finalize the digest, which
it will do only if the input was a round multiple of the block size.
The generic pointer is used here as a means of conveying that information
back and forth.

Note that the base functions prototypes are all 'returning int' but
they all return 0. They should be invoked as tail calls where possible
to eliminate some of the function call overhead. If that is not possible,
the return values can be safely ignored.

Changes since v2:
- Replace the base modules with header files containing static inlines that
  implement the core logic. This avoids introducing new modules or new
  inter-module dependencies, and gives the compiler the opportunity for
  optimization.
- Now includes new glue fo the existing SHA-1 NEON module and Sami's new
  SHA-224/256 ASM+NEON module
- Use direct assigments instead of memcpy() to set the initial state (as is
  done in many of the call sites of the various init functions that are being
  converted by this series)

Changes since v1 (RFC):
- prefixed globally visible generic symbols with crypto_
- added SHA-1 base layer
- updated init code to only set the initial constants and clear the
  count, clearing the buffer is unnecessary [Markus]
- favor the small update path in crypto_sha_XXX_base_do_update() [Markus]
- update crypto_sha_XXX_do_finalize() to use memset() on the buffer directly
  rather than copying a statically allocated padding buffer into it
  [Markus]
- moved a bunch of existing arm and x86 implementations to use the new base
  layers

Note: looking at the generated asm (for arm64), I noticed that the memcpy/memset
invocations with compile time constant src and len arguments (which includes
the empty struct assignments) are eliminated completely, and replaced by
direct loads and stores. Hopefully this addresses the concern raised by Markus
regarding this.

Ard Biesheuvel (16):
  crypto: sha1: implement base layer for SHA-1
  crypto: sha256: implement base layer for SHA-256
  crypto: sha512: implement base layer for SHA-512
  crypto: sha1-generic: move to generic glue implementation
  crypto: sha256-generic: move to generic glue implementation
  crypto: sha512-generic: move to generic glue implementation
  crypto/arm: move SHA-1 ARM asm implementation to base layer
  crypto/arm: move SHA-1 NEON implementation to base layer
  crypto/arm: move SHA-1 ARMv8 implementation to base layer
  crypto/arm: move SHA-224/256 ASM/NEON implementation to base layer
  crypto/arm: move SHA-224/256 ARMv8 implementation to base layer
  crypto/arm64: move SHA-1 ARMv8 implementation to base layer
  crypto/arm64: move SHA-224/256 ARMv8 implementation to base layer
  crypto/x86: move SHA-1 SSSE3 implementation to base layer
  crypto/x86: move SHA-224/256 SSSE3 implementation to base layer
  crypto/x86: move SHA-384/512 SSSE3 implementation to base layer

 arch/arm/crypto/Kconfig  |   3 +-
 arch/arm/crypto/sha1-ce-glue.c   | 111 +---
 arch/arm/{include/asm => }/crypto/sha1.h |   3 +
 arch/arm/crypto/sha1_glue.c  | 116 -
 arch/arm/crypto/sha1_neon_glue.c | 139 +---
 arch/arm/crypto/sha2-ce-glue.c   | 154 ++-
 arch/arm/crypto/sha256_glue.c| 174 +
 arch/arm/crypto/sha256_glue.h|  17 +--
 arch/arm/crypto/sha256_neon_glue.c   | 144 +++--
 arch/arm64/crypto/sha1-ce-core.S |  11 +-
 arch/arm64/crypto/sha1-ce-glue.c | 133 
 arch/arm64/crypto/sha2-ce-core.S |  11 +-
 arch/arm64/crypto/sha2-ce-glue.c | 209 +--
 arch/x86/crypto/sha1_ssse3_glue.c| 136 +---
 arch/x86/crypto/sha256_ssse3_glue.c  | 184 ++-
 arch/x86/crypto/sha512_ssse3_glue.c  | 193 ++--
 crypto/sha1_generic.c| 108 +---
 cryp