Re: [PATCH 6/6] squashfs: Make SquashFS 4 use the new pcomp crypto interface

2009-03-17 Thread Geert Uytterhoeven
On Wed, 25 Feb 2009, Geert Uytterhoeven wrote:
 Modify SquashFS 4 to use the new pcomp crypto interface for decompression,
 instead of calling the underlying zlib library directly. This simplifies e.g.
 the addition of support for hardware decompression and different decompression
 algorithms.

This is an updated patch, to accomodate for the recent changes in SquashFS.
---
From 46b8e0ab105e9b414d455c0a7205a7c79f0517e8 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven geert.uytterhoe...@sonycom.com
Date: Thu, 18 Dec 2008 14:35:22 +0100
Subject: [PATCH] squashfs: Make SquashFS 4 use the new pcomp crypto interface

Modify SquashFS 4 to use the new pcomp crypto interface for decompression,
instead of calling the underlying zlib library directly. This simplifies e.g.
the addition of support for hardware decompression and different decompression
algorithms.

Signed-off-by: Geert Uytterhoeven geert.uytterhoe...@sonycom.com
Cc: Phillip Lougher phil...@lougher.demon.co.uk
---
 fs/squashfs/Kconfig  |3 +-
 fs/squashfs/block.c  |   70 -
 fs/squashfs/squashfs_fs_sb.h |2 +-
 fs/squashfs/super.c  |   44 +-
 4 files changed, 80 insertions(+), 39 deletions(-)

diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index 25a00d1..18e33a6 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -1,7 +1,8 @@
 config SQUASHFS
tristate SquashFS 4.0 - Squashed file system support
depends on BLOCK
-   select ZLIB_INFLATE
+   select CRYPTO
+   select CRYPTO_ZLIB
help
  Saying Y here includes support for SquashFS 4.0 (a Compressed
  Read-Only File System).  Squashfs is a highly compressed read-only
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2a79603..6196821 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -32,7 +32,8 @@
 #include linux/mutex.h
 #include linux/string.h
 #include linux/buffer_head.h
-#include linux/zlib.h
+
+#include crypto/compress.h
 
 #include squashfs_fs.h
 #include squashfs_fs_sb.h
@@ -153,7 +154,9 @@ int squashfs_read_data(struct super_block *sb, void 
**buffer, u64 index,
}
 
if (compressed) {
-   int zlib_err = 0, zlib_init = 0;
+   int error = 0, decomp_init = 0;
+   struct comp_request req;
+   unsigned int produced = 0;
 
/*
 * Uncompress block.
@@ -161,12 +164,13 @@ int squashfs_read_data(struct super_block *sb, void 
**buffer, u64 index,
 
mutex_lock(msblk-read_data_mutex);
 
-   msblk-stream.avail_out = 0;
-   msblk-stream.avail_in = 0;
+   req.avail_out = 0;
+   req.avail_in = 0;
 
bytes = length;
+   length = 0;
do {
-   if (msblk-stream.avail_in == 0  k  b) {
+   if (req.avail_in == 0  k  b) {
avail = min(bytes, msblk-devblksize - offset);
bytes -= avail;
wait_on_buffer(bh[k]);
@@ -179,45 +183,53 @@ int squashfs_read_data(struct super_block *sb, void 
**buffer, u64 index,
continue;
}
 
-   msblk-stream.next_in = bh[k]-b_data + offset;
-   msblk-stream.avail_in = avail;
+   req.next_in = bh[k]-b_data + offset;
+   req.avail_in = avail;
offset = 0;
}
 
-   if (msblk-stream.avail_out == 0  page  pages) {
-   msblk-stream.next_out = buffer[page++];
-   msblk-stream.avail_out = PAGE_CACHE_SIZE;
+   if (req.avail_out == 0  page  pages) {
+   req.next_out = buffer[page++];
+   req.avail_out = PAGE_CACHE_SIZE;
}
 
-   if (!zlib_init) {
-   zlib_err = zlib_inflateInit(msblk-stream);
-   if (zlib_err != Z_OK) {
-   ERROR(zlib_inflateInit returned
-unexpected result 0x%x,
-srclength %d\n, zlib_err,
-   srclength);
+   if (!decomp_init) {
+   error = crypto_decompress_init(msblk-tfm);
+   if (error) {
+   ERROR(crypto_decompress_init 
+   returned %d, srclength %d\n,
+   error, srclength);
goto 

[PATCH/RFC] crypto: compress - Add comp_request.total_out (was: Re: [PATCH 6/6] squashfs: Make SquashFS 4 use the new pcomp crypto interface)

2009-03-17 Thread Geert Uytterhoeven
On Wed, 11 Mar 2009, Geert Uytterhoeven wrote:
 On Sun, 8 Mar 2009, Phillip Lougher wrote:
  Two API issues of concern (one major, one minor).  Both of these relate to 
  the
  way Squashfs drives the decompression code, where it repeatedly calls it
  supplying additional input/output buffers, rather than using a single-shot
  approach where it calls the decompression code once supplying all the
  necessary input and output buffer space.
  
  1. Minor issue -the lack of a stream.total_out field.  The current
  zlib_inflate code collects the total number of bytes decompressed over the
  multiple calls into the stream.total_out field.
  
 There is clearly no such field available in the cryto API, leading to the
  somewhat clumsy need to track it, i.e. it leads to the following additional
  code.
 
 If people feel the need for a total_out field, I can add it to struct
 comp_request.
 
 BTW, what about total_in, which is also provided by plain zlib's z_stream?
 Do people see a need for a similar field?

The patch below (on top of the updated one to convert SquashFS to pcomp) adds
comp_request.total_out, so you don't have to calculate and accumulate the
decompressed output sizes in SquashFS.

Notes:
  - This required the addition of a `struct comp_request *' parameter to
crypto_{,de}compress_init()
  - Still, there's one of the 

produced = req.avail_out;
...
produced -= req.avail_out;

left, as this is part of the logic to discover the end of decompression
(no bytes produced, no error returned).

Perhaps it's better to instead make crypto_{,de}compress_{update,final}()
return the (positive) number of output bytes (of the current step)?

Currently it returns zero (no error) or a negative error value.
That would allow to get rid of both `produced = ... / produced -= ...'
constructs, but the user would have to accumulate the total output size again
(which is not such a big deal, IMHO).

Thanks for your comments!

From e43f85baa75668be4cce340ae98a3b76e66a452a Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven geert.uytterhoe...@sonycom.com
Date: Mon, 16 Mar 2009 15:53:30 +0100
Subject: [PATCH] crypto: compress - Add comp_request.total_out

Signed-off-by: Geert Uytterhoeven geert.uytterhoe...@sonycom.com
---
 crypto/testmgr.c  |4 ++--
 crypto/zlib.c |   12 ++--
 fs/squashfs/block.c   |   10 +++---
 include/crypto/compress.h |   17 +++--
 4 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index b50c3c6..2b112ae 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -927,7 +927,7 @@ static int test_pcomp(struct crypto_pcomp *tfm,
return error;
}
 
-   error = crypto_compress_init(tfm);
+   error = crypto_compress_init(tfm, req);
if (error) {
pr_err(alg: pcomp: compression init failed on test 
   %d for %s: error=%d\n, i + 1, algo, error);
@@ -996,7 +996,7 @@ static int test_pcomp(struct crypto_pcomp *tfm,
return error;
}
 
-   error = crypto_decompress_init(tfm);
+   error = crypto_decompress_init(tfm, req);
if (error) {
pr_err(alg: pcomp: decompression init failed on test 
   %d for %s: error=%d\n, i + 1, algo, error);
diff --git a/crypto/zlib.c b/crypto/zlib.c
index 33609ba..93ec380 100644
--- a/crypto/zlib.c
+++ b/crypto/zlib.c
@@ -125,7 +125,8 @@ static int zlib_compress_setup(struct crypto_pcomp *tfm, 
void *params,
return 0;
 }
 
-static int zlib_compress_init(struct crypto_pcomp *tfm)
+static int zlib_compress_init(struct crypto_pcomp *tfm,
+ struct comp_request *req)
 {
int ret;
struct zlib_ctx *dctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
@@ -135,6 +136,7 @@ static int zlib_compress_init(struct crypto_pcomp *tfm)
if (ret != Z_OK)
return -EINVAL;
 
+   req-total_out = 0;
return 0;
 }
 
@@ -173,6 +175,7 @@ static int zlib_compress_update(struct crypto_pcomp *tfm,
req-avail_in = stream-avail_in;
req-next_out = stream-next_out;
req-avail_out = stream-avail_out;
+   req-total_out = stream-total_out;
return 0;
 }
 
@@ -203,6 +206,7 @@ static int zlib_compress_final(struct crypto_pcomp *tfm,
req-avail_in = stream-avail_in;
req-next_out = stream-next_out;
req-avail_out = stream-avail_out;
+   req-total_out = stream-total_out;
return 0;
 }
 
@@ -239,7 +243,8 @@ static int zlib_decompress_setup(struct crypto_pcomp *tfm, 
void *params,
return 0;
 }
 
-static int zlib_decompress_init(struct crypto_pcomp *tfm)
+static int zlib_decompress_init(struct crypto_pcomp *tfm,
+   struct comp_request *req)
 {
int ret;

[WIP/RFC] crypto: add support for Orion5X crypto engine

2009-03-17 Thread Sebastian Andrzej Siewior
This is version two of the the driver. New things:
- aes-ecb passes selftests
- aes-cbc passes selftests

The driver still does memcpy() from/to sram. To solve this, a dma driver
would be required but first I wanted to compare the performance between
now and nothing/generic aes. However I managed to crash cryptsetup with
luksOpen. Got look into this...

Signed-off-by: Sebastian Andrzej Siewior sebast...@breakpoint.cc
---
 drivers/crypto/Kconfig  |9 +
 drivers/crypto/Makefile |1 +
 drivers/crypto/mav_crypto.c |  724 +++
 3 files changed, 734 insertions(+), 0 deletions(-)
 create mode 100644 drivers/crypto/mav_crypto.c

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 01afd75..514fe78 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -157,6 +157,15 @@ config S390_PRNG
  ANSI X9.17 standard. The PRNG is usable via the char device
  /dev/prandom.
 
+config CRYPTO_DEV_MARVELL_CRYPTO_ENGINE
+   tristate Marvell's Cryptographic Engine
+   depends on PLAT_ORION
+   select CRYPTO_ALGAPI
+   select CRYPTO_AES
+   help
+ This driver allows you utilize the cryptographic engine which can be
+ found on certain SoC like QNAP's TS-209.
+
 config CRYPTO_DEV_HIFN_795X
tristate Driver HIFN 795x crypto accelerator chips
select CRYPTO_DES
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 9bf4a2b..9c7053c 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
 obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
 obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
+obj-$(CONFIG_CRYPTO_DEV_MARVELL_CRYPTO_ENGINE) += mav_crypto.o
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
 obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
diff --git a/drivers/crypto/mav_crypto.c b/drivers/crypto/mav_crypto.c
new file mode 100644
index 000..07152e7
--- /dev/null
+++ b/drivers/crypto/mav_crypto.c
@@ -0,0 +1,724 @@
+/*
+ * Support for Marvell's crypto engine which can be found on some Orion5X
+ * boards.
+ *
+ * Author: Sebastian Andrzej Siewior  sebastian at breakpoint dot cc 
+ * License: GPL
+ *
+ */
+#include linux/io.h
+#include linux/crypto.h
+#include linux/platform_device.h
+#include linux/interrupt.h
+#include linux/scatterlist.h
+#include linux/io.h
+#include linux/delay.h
+#include linux/kthread.h
+#include crypto/algapi.h
+#include crypto/aes.h
+
+enum engine_status {
+   engine_idle,
+   engine_busy,
+   engine_w_dequeue,
+};
+
+struct req_progress {
+   struct sg_mapping_iter src_sg_it;
+   struct sg_mapping_iter dst_sg_it;
+
+   /* src mostly */
+   int this_sg_b_left;
+   int src_start;
+   int crypt_len;
+   /* dst mostly */
+   int this_dst_sg_b_left;
+   int dst_start;
+   int total_req_bytes;
+};
+
+struct crypto_priv {
+   void __iomem *reg;
+   void __iomem *sram;
+   int irq;
+   struct task_struct *queue_th;
+
+   spinlock_t lock;
+   struct crypto_queue queue;
+   enum engine_status eng_st;
+   struct ablkcipher_request *cur_req;
+   struct req_progress p;
+};
+
+static struct crypto_priv *cpg;
+
+static void reg_write(void __iomem *mem, u32 val)
+{
+   __raw_writel(val, mem);
+}
+
+static u32 reg_read(void __iomem *mem)
+{
+   return __raw_readl(mem);
+}
+
+#define DIGEST_INITIAL_VAL_A   0xdd00
+#define DES_CMD_REG0xdd58
+
+#define SEC_ACCEL_CMD  0xde00
+#define SEC_CMD_EN_SEC_ACCL0   (1  0)
+#define SEC_CMD_EN_SEC_ACCL1   (1  1)
+#define SEC_CMD_DISABLE_SEC(1  2)
+
+#define SEC_ACCEL_DESC_P0  0xde04
+#define SEC_DESC_P0_PTR(x) (x)
+
+#define SEC_ACCEL_DESC_P1  0xde14
+#define SEC_DESC_P1_PTR(x) (x)
+
+#define SEC_ACCEL_CFG  0xde08
+#define SEC_CFG_STOP_DIG_ERR   (1  0)
+#define SEC_CFG_CH0_W_IDMA (1  7)
+#define SEC_CFG_CH1_W_IDMA (1  8)
+#define SEC_CFG_ACT_CH0_IDMA   (1  9)
+#define SEC_CFG_ACT_CH1_IDMA   (1  10)
+
+#define SEC_ACCEL_STATUS   0xde0c
+#define SEC_ST_ACT_0   (1  0)
+#define SEC_ST_ACT_1   (1  1)
+
+
+#define SEC_ACCEL_INT_STATUS   0xde20
+#define SEC_INT_AUTH_DONE  (1  0)
+#define SEC_INT_DES_E_DONE (1  1)
+#define SEC_INT_AES_E_DONE (1  2)
+#define SEC_INT_AES_D_DONE (1  3)
+#define SEC_INT_ENC_DONE   (1  4)
+#define SEC_INT_ACCEL0_DONE(1  5)
+#define SEC_INT_ACCEL1_DONE(1  6)
+#define SEC_INT_ACC0_IDMA_DONE (1  7)
+#define SEC_INT_ACC1_IDMA_DONE (1  8)
+
+#define SEC_ACCEL_INT_MASK 0xde24
+
+#define AES_KEY_LEN(8 * 4)
+
+struct sec_accel_config {
+
+   u32 config;
+#define CFG_OP_MAC_ONLY(0)
+#define CFG_OP_CRYPT_ONLY  (1)
+#define CFG_OP_MAC_CRYPT   (2)
+#define CFG_OP_CRYPT_MAC   (3)
+#define CFG_MACM_MD5   

Re: New kernel hifn795x driver does not play nice with luks

2009-03-17 Thread markus reichelt
 I have encountered a problem with the new HiFn driver in 2.6.27,
 cryptsetup and luks.

the mailinglist you posted to mainly deals with loop-aes these days,
the dm-crypt/luks/mainline loop-crypto guys for some obscure reason
opted to start a mailinglist of the same name at
linux-crypto@vger.kernel.org, so you are better off asking there
(blame them for confusing people).

CC added, though I guess you need to subscribe to their holy shrine,
anyway. And keep in mind that dm-crypt/yada is still beta-code, it's
nowhere near stable.

HTH


 I am able to unlock my luks devices fine, but attempting to mount
 the device will result in mount hanging forever (ps shows it as D+,
 presumably it's waiting for I/O from the card), forcing me to
 manually kill the process. If i then try to close the luks
 partition luksClose informs me that the drive is in use. The
 hifn_795x module will be locked as well in the process.
 
 However, as soon as I disable/remove the hifn_795x module prior to
 mounting the device, everything works as it should yet again.
 
 It seems the HiFn card has taken over as the preferred module to
 use for AES, according to /proc/crypto:
 
 --
 name : cbc(aes)
 driver   : cbc-aes-hifn0
 module   : hifn_795x
 priority : 300
 (...)
 --
 name : aes
 driver   : aes-asm
 module   : kernel
 priority : 200
 (...)
 --
 name : aes
 driver   : aes-generic
 module   : kernel
 priority : 100
 (...)
 --
 
 I have no idea what the problem might be, but I suppose the HiFn
 card might not like my luks partitions (aes, cbc-essiv), or some
 other incompatibility or other issues somewhere.
 
 Have anyone else experienced this behavior? Any possible solutions?
 
 Prior to the 2.6.27 kernel I was running OCF to handle my card, and
 everything was working splendid, however the performance of the
 HiFn RNG is many times higher with the built-in driver (from
 ~300kb/s to ~2.5MB/s), and since my main beef with this card is the
 RNG I would very much like to stay with the driver from 2.6.27.
 
 Is there any way to change the priority on these cipher modules so
 the kernel ones would be used in preference of the hifn_795x ones?
 I am perfectly happy doing luks and general AES in software, if
 only there was a way for me to set their priority by hand.
 
 
 
 Best Regards,
 Frode Moseng Monsson

-- 
left blank, right bald
loop-AES FAQ: http://mareichelt.de/pub/texts.loop-aes.php#faq
--
To unsubscribe from this list: send the line unsubscribe linux-crypto in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: New kernel hifn795x driver does not play nice with luks

2009-03-17 Thread Evgeniy Polyakov
On Tue, Mar 17, 2009 at 11:33:32PM +0100, markus reichelt (m...@mareichelt.de) 
wrote:
  I have encountered a problem with the new HiFn driver in 2.6.27,
  cryptsetup and luks.
 
 the mailinglist you posted to mainly deals with loop-aes these days,
 the dm-crypt/luks/mainline loop-crypto guys for some obscure reason
 opted to start a mailinglist of the same name at
 linux-crypto@vger.kernel.org, so you are better off asking there
 (blame them for confusing people).
 
 CC added, though I guess you need to subscribe to their holy shrine,
 anyway. And keep in mind that dm-crypt/yada is still beta-code, it's
 nowhere near stable.
 
What was that?
Also, m...@mareichelt.de email was not in the proper header, so it does not
appear in copy list.
 
  I am able to unlock my luks devices fine, but attempting to mount
  the device will result in mount hanging forever (ps shows it as D+,
  presumably it's waiting for I/O from the card), forcing me to
  manually kill the process. If i then try to close the luks
  partition luksClose informs me that the drive is in use. The
  hifn_795x module will be locked as well in the process.
  
  However, as soon as I disable/remove the hifn_795x module prior to
  mounting the device, everything works as it should yet again.

Please send a dmesg output when card is blocked.
Thank you.

-- 
Evgeniy Polyakov
--
To unsubscribe from this list: send the line unsubscribe linux-crypto in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH -v3 1/3] crypto: Fix tfm allocation in cryptd_alloc_ablkcipher

2009-03-17 Thread Huang Ying
Use crypto_alloc_base() instead of crypto_alloc_ablkcipher() to
allocate underlying tfm in cryptd_alloc_ablkcipher. Because
crypto_alloc_ablkcipher() prefer GENIV encapsulated crypto instead of
raw one, while cryptd_alloc_ablkcipher needed the raw one.

Signed-off-by: Huang Ying ying.hu...@intel.com

---
 crypto/cryptd.c |   14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -586,20 +586,24 @@ struct cryptd_ablkcipher *cryptd_alloc_a
  u32 type, u32 mask)
 {
char cryptd_alg_name[CRYPTO_MAX_ALG_NAME];
-   struct crypto_ablkcipher *tfm;
+   struct crypto_tfm *tfm;
 
if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
 cryptd(%s), alg_name) = CRYPTO_MAX_ALG_NAME)
return ERR_PTR(-EINVAL);
-   tfm = crypto_alloc_ablkcipher(cryptd_alg_name, type, mask);
+   type = ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+   type |= CRYPTO_ALG_TYPE_BLKCIPHER;
+   mask = ~CRYPTO_ALG_TYPE_MASK;
+   mask |= (CRYPTO_ALG_GENIV | CRYPTO_ALG_TYPE_BLKCIPHER_MASK);
+   tfm = crypto_alloc_base(cryptd_alg_name, type, mask);
if (IS_ERR(tfm))
return ERR_CAST(tfm);
-   if (crypto_ablkcipher_tfm(tfm)-__crt_alg-cra_module != THIS_MODULE) {
-   crypto_free_ablkcipher(tfm);
+   if (tfm-__crt_alg-cra_module != THIS_MODULE) {
+   crypto_free_tfm(tfm);
return ERR_PTR(-EINVAL);
}
 
-   return __cryptd_ablkcipher_cast(tfm);
+   return __cryptd_ablkcipher_cast(__crypto_ablkcipher_cast(tfm));
 }
 EXPORT_SYMBOL_GPL(cryptd_alloc_ablkcipher);
 



signature.asc
Description: This is a digitally signed message part


[PATCH -v3 2/3] crypto: Add fpu template, a wrapper for blkcipher touching FPU

2009-03-17 Thread Huang Ying
Blkcipher touching FPU need to be enclosed by kernel_fpu_begin() and
kernel_fpu_end(). If they are invoked in cipher algorithm
implementation, they will be invoked for each block, so that
performance will be hurt, because they are slow operations. This
patch implements fpu template, which makes these operations to be
invoked for each request.

v2: Make FPU mode invisible to end user

Signed-off-by: Huang Ying ying.hu...@intel.com

---
 crypto/Kconfig  |5 +
 crypto/Makefile |1 
 crypto/fpu.c|  166 
 3 files changed, 172 insertions(+)

--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -241,6 +241,11 @@ config CRYPTO_XTS
  key size 256, 384 or 512 bits. This implementation currently
  can't handle a sectorsize which is not a multiple of 16 bytes.
 
+config CRYPTO_FPU
+   tristate
+   select CRYPTO_BLKCIPHER
+   select CRYPTO_MANAGER
+
 comment Hash modes
 
 config CRYPTO_HMAC
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_CRYPTO_PCBC) += pcbc.o
 obj-$(CONFIG_CRYPTO_CTS) += cts.o
 obj-$(CONFIG_CRYPTO_LRW) += lrw.o
 obj-$(CONFIG_CRYPTO_XTS) += xts.o
+obj-$(CONFIG_CRYPTO_FPU) += fpu.o
 obj-$(CONFIG_CRYPTO_CTR) += ctr.o
 obj-$(CONFIG_CRYPTO_GCM) += gcm.o
 obj-$(CONFIG_CRYPTO_CCM) += ccm.o
--- /dev/null
+++ b/crypto/fpu.c
@@ -0,0 +1,166 @@
+/*
+ * FPU: Wrapper for blkcipher touching fpu
+ *
+ * Copyright (c) Intel Corp.
+ *   Author: Huang Ying ying.hu...@intel.com
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include crypto/algapi.h
+#include linux/err.h
+#include linux/init.h
+#include linux/kernel.h
+#include linux/module.h
+#include asm/i387.h
+
+struct crypto_fpu_ctx {
+   struct crypto_blkcipher *child;
+};
+
+static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key,
+unsigned int keylen)
+{
+   struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent);
+   struct crypto_blkcipher *child = ctx-child;
+   int err;
+
+   crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+   crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) 
+  CRYPTO_TFM_REQ_MASK);
+   err = crypto_blkcipher_setkey(child, key, keylen);
+   crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) 
+CRYPTO_TFM_RES_MASK);
+   return err;
+}
+
+static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+   int err;
+   struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in-tfm);
+   struct crypto_blkcipher *child = ctx-child;
+   struct blkcipher_desc desc = {
+   .tfm = child,
+   .info = desc_in-info,
+   .flags = desc_in-flags,
+   };
+
+   kernel_fpu_begin();
+   err = crypto_blkcipher_crt(desc.tfm)-encrypt(desc, dst, src, nbytes);
+   kernel_fpu_end();
+   return err;
+}
+
+static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+   int err;
+   struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in-tfm);
+   struct crypto_blkcipher *child = ctx-child;
+   struct blkcipher_desc desc = {
+   .tfm = child,
+   .info = desc_in-info,
+   .flags = desc_in-flags,
+   };
+
+   kernel_fpu_begin();
+   err = crypto_blkcipher_crt(desc.tfm)-decrypt(desc, dst, src, nbytes);
+   kernel_fpu_end();
+   return err;
+}
+
+static int crypto_fpu_init_tfm(struct crypto_tfm *tfm)
+{
+   struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
+   struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+   struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
+   struct crypto_blkcipher *cipher;
+
+   cipher = crypto_spawn_blkcipher(spawn);
+   if (IS_ERR(cipher))
+   return PTR_ERR(cipher);
+
+   ctx-child = cipher;
+   return 0;
+}
+
+static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm)
+{
+   struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
+   crypto_free_blkcipher(ctx-child);
+}
+
+static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb)
+{
+   struct crypto_instance *inst;
+   struct crypto_alg *alg;
+   int err;
+
+   err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+   if (err)
+   return ERR_PTR(err);
+
+   alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
+ CRYPTO_ALG_TYPE_MASK);
+ 

[PATCH -v3 3/3] crypto: Add AES-NI support for more modes

2009-03-17 Thread Huang Ying
Because kernel_fpu_begin() and kernel_fpu_end() operations are too
slow, the performance gain of general mode implementation + aes-aesni
is almost all compensated.

The AES-NI support for more modes are implemented as follow:

- Add a new AES algorithm implementation named __aes-aesni without
  kernel_fpu_begin/end()

- Use fpu(mode(AES)) to provide kenrel_fpu_begin/end() invoking

- Add mode(AES) ablkcipher, which uses cryptd(fpu(mode(AES))) to
  defer cryption to cryptd context in soft_irq context.

Now the ctr, lrw, pcbc and xts support are added.

Performance testing based on dm-crypt shows that cryption time can be
reduced to 50% of general mode implementation + aes-aesni implementation.

v2: Add description of mode acceleration support in Kconfig

v3: Fix some bugs of CTR block size, LRW and XTS min/max key size.

Signed-off-by: Huang Ying ying.hu...@intel.com

---
 arch/x86/crypto/aesni-intel_glue.c |  267 -
 crypto/Kconfig |5 
 2 files changed, 271 insertions(+), 1 deletion(-)

--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -21,6 +21,22 @@
 #include asm/i387.h
 #include asm/aes.h
 
+#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE)
+#define HAS_CTR
+#endif
+
+#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE)
+#define HAS_LRW
+#endif
+
+#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
+#define HAS_PCBC
+#endif
+
+#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE)
+#define HAS_XTS
+#endif
+
 struct async_aes_ctx {
struct cryptd_ablkcipher *cryptd_tfm;
 };
@@ -137,6 +153,41 @@ static struct crypto_alg aesni_alg = {
}
 };
 
+static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+   struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
+
+   aesni_enc(ctx, dst, src);
+}
+
+static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+   struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
+
+   aesni_dec(ctx, dst, src);
+}
+
+static struct crypto_alg __aesni_alg = {
+   .cra_name   = __aes-aesni,
+   .cra_driver_name= __driver-aes-aesni,
+   .cra_priority   = 0,
+   .cra_flags  = CRYPTO_ALG_TYPE_CIPHER,
+   .cra_blocksize  = AES_BLOCK_SIZE,
+   .cra_ctxsize= sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+   .cra_alignmask  = 0,
+   .cra_module = THIS_MODULE,
+   .cra_list   = LIST_HEAD_INIT(__aesni_alg.cra_list),
+   .cra_u  = {
+   .cipher = {
+   .cia_min_keysize= AES_MIN_KEY_SIZE,
+   .cia_max_keysize= AES_MAX_KEY_SIZE,
+   .cia_setkey = aes_set_key,
+   .cia_encrypt= __aes_encrypt,
+   .cia_decrypt= __aes_decrypt
+   }
+   }
+};
+
 static int ecb_encrypt(struct blkcipher_desc *desc,
   struct scatterlist *dst, struct scatterlist *src,
   unsigned int nbytes)
@@ -277,8 +328,16 @@ static int ablk_set_key(struct crypto_ab
unsigned int key_len)
 {
struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+   struct crypto_ablkcipher *child = ctx-cryptd_tfm-base;
+   int err;
 
-   return crypto_ablkcipher_setkey(ctx-cryptd_tfm-base, key, key_len);
+   crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+   crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
+CRYPTO_TFM_REQ_MASK);
+   err = crypto_ablkcipher_setkey(child, key, key_len);
+   crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
+CRYPTO_TFM_RES_MASK);
+   return err;
 }
 
 static int ablk_encrypt(struct ablkcipher_request *req)
@@ -411,6 +470,163 @@ static struct crypto_alg ablk_cbc_alg = 
},
 };
 
+#ifdef HAS_CTR
+static int ablk_ctr_init(struct crypto_tfm *tfm)
+{
+   struct cryptd_ablkcipher *cryptd_tfm;
+
+   cryptd_tfm = cryptd_alloc_ablkcipher(fpu(ctr(__driver-aes-aesni)),
+0, 0);
+   if (IS_ERR(cryptd_tfm))
+   return PTR_ERR(cryptd_tfm);
+   ablk_init_common(tfm, cryptd_tfm);
+   return 0;
+}
+
+static struct crypto_alg ablk_ctr_alg = {
+   .cra_name   = ctr(aes),
+   .cra_driver_name= ctr-aes-aesni,
+   .cra_priority   = 400,
+   .cra_flags  = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+   .cra_blocksize  = 1,
+   .cra_ctxsize= sizeof(struct async_aes_ctx),
+   .cra_alignmask  = 0,
+   .cra_type   = crypto_ablkcipher_type,
+   .cra_module = THIS_MODULE,
+