Re: [PATCH 1/2] arm64 aes: fix encryption of unaligned data
On 26 July 2014 01:40, Mikulas Patocka mpato...@redhat.com wrote: cryptsetup fails on arm64 when using kernel encryption via AF_ALG socket. See https://bugzilla.redhat.com/show_bug.cgi?id=1122937 The bug is caused by incorrect handling of unaligned data in arch/arm64/crypto/aes-glue.c. Cryptsetup creates a buffer that is aligned on 8 bytes, but not on 16 bytes. It opens AF_ALG socket and uses the socket to encrypt data in the buffer. The arm64 crypto accelerator causes data corruption or crashes in the scatterwalk_pagedone. This patch fixes the bug by passing the residue bytes that were not processed as the last parameter to blkcipher_walk_done. Signed-off-by: Mikulas Patocka mpato...@redhat.com Acked-by: Ard Biesheuvel ard.biesheu...@linaro.org Thanks for the patch. This correctly fixes a thinko on my part regarding the guarantees offered by the blkcipher API. Unfortunately, this wasn't caught by the tcrypt test suite, so I will propose some patches later to address cases like this. BTW using kernel crypto with AF_ALG is fairly pointless when using crypto instructions instead of crypto accelerator peripherals, should we change anything on the kernel side so we don't expose these drivers? @Catalin: this is a bug fix for the code that was merged this cycle. I would recommend to merge this for 3.16, but if not, could you please add a cc stable? Or ack it and perhaps Herbert can take both? (There is a similar patch for ARM as well) Regards, Ard. Index: linux-3.16.0-0.rc6.git1.1.fc21.aarch64/arch/arm64/crypto/aes-glue.c === --- linux-3.16.0-0.rc6.git1.1.fc21.aarch64.orig/arch/arm64/crypto/aes-glue.c +++ linux-3.16.0-0.rc6.git1.1.fc21.aarch64/arch/arm64/crypto/aes-glue.c @@ -106,7 +106,7 @@ static int ecb_encrypt(struct blkcipher_ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx-key_enc, rounds, blocks, first); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -128,7 +128,7 @@ static int ecb_decrypt(struct blkcipher_ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx-key_dec, rounds, blocks, first); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -151,7 +151,7 @@ static int cbc_encrypt(struct blkcipher_ aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx-key_enc, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -174,7 +174,7 @@ static int cbc_decrypt(struct blkcipher_ aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx-key_dec, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -243,7 +243,7 @@ static int xts_encrypt(struct blkcipher_ aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx-key1.key_enc, rounds, blocks, (u8 *)ctx-key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); @@ -267,7 +267,7 @@ static int xts_decrypt(struct blkcipher_ aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx-key1.key_dec, rounds, blocks, (u8 *)ctx-key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] arm64 aes: fix encryption of unaligned data
On 26 July 2014 15:13, Ard Biesheuvel ard.biesheu...@linaro.org wrote: On 26 July 2014 01:40, Mikulas Patocka mpato...@redhat.com wrote: cryptsetup fails on arm64 when using kernel encryption via AF_ALG socket. See https://bugzilla.redhat.com/show_bug.cgi?id=1122937 The bug is caused by incorrect handling of unaligned data in arch/arm64/crypto/aes-glue.c. Cryptsetup creates a buffer that is aligned on 8 bytes, but not on 16 bytes. It opens AF_ALG socket and uses the socket to encrypt data in the buffer. The arm64 crypto accelerator causes data corruption or crashes in the scatterwalk_pagedone. This patch fixes the bug by passing the residue bytes that were not processed as the last parameter to blkcipher_walk_done. Signed-off-by: Mikulas Patocka mpato...@redhat.com Acked-by: Ard Biesheuvel ard.biesheu...@linaro.org Thanks for the patch. This correctly fixes a thinko on my part regarding the guarantees offered by the blkcipher API. Unfortunately, this wasn't caught by the tcrypt test suite, so I will propose some patches later to address cases like this. BTW using kernel crypto with AF_ALG is fairly pointless when using crypto instructions instead of crypto accelerator peripherals, should we change anything on the kernel side so we don't expose these drivers? @Catalin: this is a bug fix for the code that was merged this cycle. I would recommend to merge this for 3.16, but if not, could you please add a cc stable? Or ack it and perhaps Herbert can take both? (There is a similar patch for ARM as well) ... only this patch fails to repair the ECB case. @Mikulas: could you do a v2 and include ECB encryption/decryption? Cheers, Ard. Index: linux-3.16.0-0.rc6.git1.1.fc21.aarch64/arch/arm64/crypto/aes-glue.c === --- linux-3.16.0-0.rc6.git1.1.fc21.aarch64.orig/arch/arm64/crypto/aes-glue.c +++ linux-3.16.0-0.rc6.git1.1.fc21.aarch64/arch/arm64/crypto/aes-glue.c @@ -106,7 +106,7 @@ static int ecb_encrypt(struct blkcipher_ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx-key_enc, rounds, blocks, first); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -128,7 +128,7 @@ static int ecb_decrypt(struct blkcipher_ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx-key_dec, rounds, blocks, first); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -151,7 +151,7 @@ static int cbc_encrypt(struct blkcipher_ aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx-key_enc, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -174,7 +174,7 @@ static int cbc_decrypt(struct blkcipher_ aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx-key_dec, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -243,7 +243,7 @@ static int xts_encrypt(struct blkcipher_ aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx-key1.key_enc, rounds, blocks, (u8 *)ctx-key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); @@ -267,7 +267,7 @@ static int xts_decrypt(struct blkcipher_ aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx-key1.key_dec, rounds, blocks, (u8 *)ctx-key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info
Re: [PATCH 2/2] arm aes: fix encryption of unaligned data
On 26 July 2014 01:42, Mikulas Patocka mpato...@redhat.com wrote: Fix the same alignment bug as in arm64 - we need to pass residue unprocessed bytes as the last argument to blkcipher_walk_done. Signed-off-by: Mikulas Patocka mpato...@redhat.com Cc: sta...@vger.kernel.org # 3.13+ Acked-by: Ard Biesheuvel ard.biesheu...@linaro.org As for the previous patch, this correctly fixes a thinko on my part regarding the guarantees offered by the blkcipher API. @Russell: could you please indicate whether you prefer to take this yourself or ack it so we can ask Herbert to take both ARM and arm64 patches as a set. Regards, Ard. Index: linux-3.16.0-0.rc6.git1.1.fc21.aarch64/arch/arm/crypto/aesbs-glue.c === --- linux-3.16.0-0.rc6.git1.1.fc21.aarch64.orig/arch/arm/crypto/aesbs-glue.c +++ linux-3.16.0-0.rc6.git1.1.fc21.aarch64/arch/arm/crypto/aesbs-glue.c @@ -137,7 +137,7 @@ static int aesbs_cbc_encrypt(struct blkc dst += AES_BLOCK_SIZE; } while (--blocks); } - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } @@ -158,7 +158,7 @@ static int aesbs_cbc_decrypt(struct blkc bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, ctx-dec, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } while (walk.nbytes) { u32 blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -182,7 +182,7 @@ static int aesbs_cbc_decrypt(struct blkc dst += AES_BLOCK_SIZE; src += AES_BLOCK_SIZE; } while (--blocks); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } @@ -268,7 +268,7 @@ static int aesbs_xts_encrypt(struct blkc bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, ctx-enc, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } @@ -292,7 +292,7 @@ static int aesbs_xts_decrypt(struct blkc bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, ctx-dec, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, walk, 0); + err = blkcipher_walk_done(desc, walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] crypto: use chunks smaller than algo block size in chunk tests
This patch updates many of the chunked tcrypt test cases so that not all of the chunks are an exact multiple of the block size. This should help uncover cases where the residue passed to blkcipher_walk_done() is incorrect. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- Herbert, After this change, the ARM and arm64 bugs regarding blkcipher_walk_done() are identified by tcrypt mode=10 Regards, Ard. crypto/testmgr.h | 264 +++ 1 file changed, 132 insertions(+), 132 deletions(-) diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 69d0dd8ef27e..63ca0776cfca 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -3097,8 +3097,8 @@ static struct cipher_testvec des_enc_tv_template[] = { \x5F\x62\xC7\x72\xD9\xFC\xCB\x9A, .rlen = 248, .also_non_np = 1, - .np = 2, - .tap= { 248 - 8, 8 }, + .np = 3, + .tap= { 248 - 10, 2, 8 }, }, }; @@ -3207,8 +3207,8 @@ static struct cipher_testvec des_dec_tv_template[] = { \xC6\x2F\xBB\x24\x8D\x19\x82\xEB, .rlen = 248, .also_non_np = 1, - .np = 2, - .tap= { 248 - 8, 8 }, + .np = 3, + .tap= { 248 - 10, 2, 8 }, }, }; @@ -,8 +,8 @@ static struct cipher_testvec des_cbc_enc_tv_template[] = { \xC6\x4A\xF3\x55\xC7\x29\x2E\x63, .rlen = 248, .also_non_np = 1, - .np = 2, - .tap= { 248 - 8, 8 }, + .np = 3, + .tap= { 248 - 10, 2, 8 }, }, }; @@ -3442,8 +3442,8 @@ static struct cipher_testvec des_cbc_dec_tv_template[] = { \xC6\x2F\xBB\x24\x8D\x19\x82\xEB, .rlen = 248, .also_non_np = 1, - .np = 2, - .tap= { 248 - 8, 8 }, + .np = 3, + .tap= { 248 - 10, 2, 8 }, }, }; @@ -3517,8 +3517,8 @@ static struct cipher_testvec des_ctr_enc_tv_template[] = { \x69\x74\xA1\x06\x46\x0F\x4E\x75, .rlen = 248, .also_non_np = 1, - .np = 2, - .tap= { 248 - 8, 8 }, + .np = 3, + .tap= { 248 - 10, 2, 8 }, }, { /* Generated with Crypto++ */ .key= \xC9\x83\xA6\xC9\xEC\x0F\x32\x55, .klen = 8, @@ -3663,8 +3663,8 @@ static struct cipher_testvec des_ctr_dec_tv_template[] = { \xC6\x2F\xBB\x24\x8D\x19\x82\xEB, .rlen = 248, .also_non_np = 1, - .np = 2, - .tap= { 248 - 8, 8 }, + .np = 3, + .tap= { 248 - 10, 2, 8 }, }, { /* Generated with Crypto++ */ .key= \xC9\x83\xA6\xC9\xEC\x0F\x32\x55, .klen = 8, @@ -3899,8 +3899,8 @@ static struct cipher_testvec des3_ede_enc_tv_template[] = { \xD8\x45\xFF\x33\xBA\xBB\x2B\x63, .rlen = 496, .also_non_np = 1, - .np = 2, - .tap= { 496 - 16, 16 }, + .np = 3, + .tap= { 496 - 20, 4, 16 }, }, }; @@ -4064,8 +4064,8 @@ static struct cipher_testvec des3_ede_dec_tv_template[] = { \xB8\x03\xEA\x7D\xE1\x48\xD3\x47, .rlen = 496, .also_non_np = 1, - .np = 2, - .tap= { 496 - 16, 16 }, + .np = 3, + .tap= { 496 - 20, 4, 16 }, }, }; @@ -4244,8 +4244,8 @@ static struct cipher_testvec des3_ede_cbc_enc_tv_template[] = { \x95\x63\x73\xA2\x44\xAC\xF8\xA5, .rlen = 496, .also_non_np = 1, - .np = 2, - .tap= { 496 - 16, 16 }, + .np = 3, + .tap= { 496 - 20, 4, 16 }, }, }; @@ -4424,8 +4424,8 @@ static struct cipher_testvec des3_ede_cbc_dec_tv_template[] = { \xB8\x03\xEA\x7D\xE1\x48\xD3\x47, .rlen = 496, .also_non_np = 1, - .np = 2, - .tap= { 496 - 16, 16 }, + .np = 3, + .tap= { 496 - 20, 4, 16 }, }, }; @@ -4564,8 +4564,8 @@ static struct cipher_testvec des3_ede_ctr_enc_tv_template[] = { \x5C\xEE\xFC\xCF\xC4\x70\x00\x34, .rlen = 496, .also_non_np = 1, - .np = 2, - .tap= { 496 - 16, 16 }, + .np = 3, + .tap= { 496 - 20, 4, 16
Re: [PATCH] [v3] crypto: sha512: add ARM NEON implementation
On 30 June 2014 18:39, Jussi Kivilinna jussi.kivili...@iki.fi wrote: This patch adds ARM NEON assembly implementation of SHA-512 and SHA-384 algorithms. tcrypt benchmark results on Cortex-A8, sha512-generic vs sha512-neon-asm: block-size bytes/updateold-vs-new 16 16 2.99x 64 16 2.67x 64 64 3.00x 256 16 2.64x 256 64 3.06x 256 256 3.33x 102416 2.53x 1024256 3.39x 102410243.52x 204816 2.50x 2048256 3.41x 204810243.54x 204820483.57x 409616 2.49x 4096256 3.42x 409610243.56x 409640963.59x 819216 2.48x 8192256 3.42x 819210243.56x 819240963.60x 819281923.60x Acked-by: Ard Biesheuvel ard.biesheu...@linaro.org Tested-by: Ard Biesheuvel ard.biesheu...@linaro.org Signed-off-by: Jussi Kivilinna jussi.kivili...@iki.fi --- Changes in v2: - Use ENTRY/ENDPROC - Don't provide Thumb2 version v3: - Changelog moved below '---' Hi Jussi, What is the status of these patches? Have you sent them to Russell's patch tracker? -- Ard. --- arch/arm/crypto/Makefile|2 arch/arm/crypto/sha512-armv7-neon.S | 455 +++ arch/arm/crypto/sha512_neon_glue.c | 305 +++ crypto/Kconfig | 15 + 4 files changed, 777 insertions(+) create mode 100644 arch/arm/crypto/sha512-armv7-neon.S create mode 100644 arch/arm/crypto/sha512_neon_glue.c diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 374956d..b48fa34 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -6,11 +6,13 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o +obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y:= sha1-armv7-neon.o sha1_neon_glue.o +sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o quiet_cmd_perl = PERL$@ cmd_perl = $(PERL) $() $(@) diff --git a/arch/arm/crypto/sha512-armv7-neon.S b/arch/arm/crypto/sha512-armv7-neon.S new file mode 100644 index 000..fe99472 --- /dev/null +++ b/arch/arm/crypto/sha512-armv7-neon.S @@ -0,0 +1,455 @@ +/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform + * + * Copyright © 2013-2014 Jussi Kivilinna jussi.kivili...@iki.fi + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include linux/linkage.h + + +.syntax unified +.code 32 +.fpu neon + +.text + +/* structure of SHA512_CONTEXT */ +#define hd_a 0 +#define hd_b ((hd_a) + 8) +#define hd_c ((hd_b) + 8) +#define hd_d ((hd_c) + 8) +#define hd_e ((hd_d) + 8) +#define hd_f ((hd_e) + 8) +#define hd_g ((hd_f) + 8) + +/* register macros */ +#define RK %r2 + +#define RA d0 +#define RB d1 +#define RC d2 +#define RD d3 +#define RE d4 +#define RF d5 +#define RG d6 +#define RH d7 + +#define RT0 d8 +#define RT1 d9 +#define RT2 d10 +#define RT3 d11 +#define RT4 d12 +#define RT5 d13 +#define RT6 d14 +#define RT7 d15 + +#define RT01q q4 +#define RT23q q5 +#define RT45q q6 +#define RT67q q7 + +#define RW0 d16 +#define RW1 d17 +#define RW2 d18 +#define RW3 d19 +#define RW4 d20 +#define RW5 d21 +#define RW6 d22 +#define RW7 d23 +#define RW8 d24 +#define RW9 d25 +#define RW10 d26 +#define RW11 d27 +#define RW12 d28 +#define RW13 d29 +#define RW14 d30 +#define RW15 d31 + +#define RW01q q8 +#define RW23q q9 +#define RW45q q10 +#define RW67q q11 +#define RW89q q12 +#define RW1011q q13 +#define RW1213q q14 +#define RW1415q q15 + +/*** + * ARM assembly implementation of sha512 transform + ***/ +#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \ + rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \ + /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \ + vshr.u64 RT2, re, #14; \ + vshl.u64 RT3, re, #64 - 14
[PATCH 1/2] ARM: crypto: enable NEON SHA-1 for big endian
This tweaks the SHA-1 NEON code slightly so it works correctly under big endian, and removes the Kconfig condition preventing it from being selected if CONFIG_CPU_BIG_ENDIAN is set. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/sha1-armv7-neon.S | 8 crypto/Kconfig| 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S index 50013c0e2864..49179e0653e6 100644 --- a/arch/arm/crypto/sha1-armv7-neon.S +++ b/arch/arm/crypto/sha1-armv7-neon.S @@ -150,9 +150,9 @@ #define W_PRECALC_00_15() \ add RWK, sp, #(WK_offs(0)); \ \ - vld1.32 {tmp0, tmp1}, [RDATA]!; \ + vld1.8{tmp0, tmp1}, [RDATA]!; \ vrev32.8 W0, tmp0; /* big = little */ \ - vld1.32 {tmp2, tmp3}, [RDATA]!; \ + vld1.8{tmp2, tmp3}, [RDATA]!; \ vadd.u32 tmp0, W0, curK; \ vrev32.8 W7, tmp1; /* big = little */ \ vrev32.8 W6, tmp2; /* big = little */ \ @@ -164,7 +164,7 @@ vst1.32 {tmp2, tmp3}, [RWK]; \ #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vld1.32 {tmp0, tmp1}, [RDATA]!; \ + vld1.8{tmp0, tmp1}, [RDATA]!; \ #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ add RWK, sp, #(WK_offs(0)); \ @@ -173,7 +173,7 @@ vrev32.8 W0, tmp0; /* big = little */ \ #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ - vld1.32 {tmp2, tmp3}, [RDATA]!; \ + vld1.8{tmp2, tmp3}, [RDATA]!; \ #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ vadd.u32 tmp0, W0, curK; \ diff --git a/crypto/Kconfig b/crypto/Kconfig index 749b1e05c490..deef2a4b6559 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -542,7 +542,7 @@ config CRYPTO_SHA1_ARM config CRYPTO_SHA1_ARM_NEON tristate SHA1 digest algorithm (ARM NEON) - depends on ARM KERNEL_MODE_NEON !CPU_BIG_ENDIAN + depends on ARM KERNEL_MODE_NEON select CRYPTO_SHA1_ARM select CRYPTO_SHA1 select CRYPTO_HASH -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 3.17 regression; alg: skcipher: Chunk test 1 failed on encryption at page 0 for ecb-aes-padlock
On 20 October 2014 09:14, Jamie Heilman ja...@audible.transient.net wrote: I get this new failure w/3.17.0 on my system with a VIA Esther processor: alg: skcipher: Chunk test 1 failed on encryption at page 0 for ecb-aes-padlock : 71 73 f7 db 24 93 21 6d 61 1e bb 63 42 79 db 64 0010: 6f 82 c0 ca a3 9b fa 0b d9 08 c7 4a 90 ae 8f 5f 0020: 5e 06 f0 5f 31 51 18 37 45 d7 ca 3a fd 6c 3f e1 0030: dd 8d 22 65 2b 00 50 ce ba 28 67 d7 ce 0e 0d ea 0040: 78 69 7f ae 8f 8b 69 37 75 e0 dc 96 e0 b7 f4 09 0050: cb 6d a2 fb da af 09 f8 81 82 27 fa 45 9c 29 a4 0060: 22 8b 78 69 5b 46 f9 39 1b cc f9 1d 09 eb bc 5c 0070: 41 72 51 97 1d 07 49 a0 1b 8e 65 4b b2 6a 12 03 0080: 6a 60 95 ac bd ac 1a 64 de 5a a5 f0 83 2f cb ca 0090: 22 74 a6 6c 9b 73 ce 3f e1 8b 22 17 59 0c 47 89 00a0: 33 a1 d6 47 03 19 4f a8 67 69 f0 5b f0 20 ad 06 00b0: 27 81 92 d8 c5 ba 98 12 be 24 b5 2f 75 02 c2 ad 00c0: 12 2f 07 32 ee 39 af 64 05 8f b3 d4 eb 1b 46 6e 00d0: d9 21 f9 c4 b7 c9 45 68 b4 a1 74 9f 82 47 eb cc 00e0: bd 0a 14 95 0f 8b a8 2f 4b 1b a7 bf 82 a6 43 0c 00f0: b9 39 4a a8 10 6f 50 7b 25 fb 26 81 e0 2f f0 96 0100: 8d 8b ac 92 0f f6 ed 64 63 29 4c 8e 18 13 c5 bf 0110: fc a0 d9 bf 7c 3a 0e 29 6f d1 6c 6f a5 da bf b1 0120: 30 ea 44 2d c3 8f 16 e1 66 fa a3 21 3e fc 13 ca 0130: f0 f6 f0 59 bd 8f 38 50 31 cb 69 3f 96 15 d6 f5 0140: ae ff f6 aa 41 85 4c 10 58 e3 f9 44 e6 28 da 9a 0150: dc 6a 80 34 73 97 1b c5 ca 26 16 77 0e 60 ab 89 0160: 0f 04 27 bd ce 3e 71 b4 a0 d7 22 7e db eb 24 70 0170: 42 71 51 78 70 b3 e0 3d 84 8e 8d 7b d0 6d ea 92 0180: 11 08 42 4f e5 ad 26 92 d2 00 ae a8 e3 4b 37 47 0190: 22 c1 95 c1 63 7f cb 03 f3 e3 d7 9d 60 c7 bc ea 01a0: 35 a2 fd 45 52 39 13 6f c1 53 f3 53 df 33 84 d7 01b0: d2 c8 37 b0 75 e3 41 46 b3 c7 83 2e 8a bb a4 e5 01c0: 7f 3c fd 8b eb ea 63 bd b7 46 e7 bf 09 9c 0d 0f 01d0: 33 84 aa 1c 8d 29 b4 ac 4f ad e6 89 I've bisected this to 3b9b8fe0ade1ee84ee4058261d2e39a1f283704b so ... perhaps intended in terms of uncovering problems. Seems to have identified something in my case at any rate. Attached is my full 3.17.0 dmesg, kernel config, and /proc/crypto contents (the only difference between 3.16 and 3.17 for the latter being the selftest value for ecb-aes-padlock which used to be passed with 3.16 and earlier.) Let me know if you need anything else. Interesting. I don't have access to the hardware, but I found something interesting in the driver related to the prefetch size (ecb_fetch_bytes) of ECB versus CBC (Note that the CBC selftest passes) So perhaps this might solve the bug, could you please test it? diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 633ba945e153..2834f0b23713 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -351,7 +351,7 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, padlock_reset_key(ctx-cword.encrypt); blkcipher_walk_init(walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, walk); + err = blkcipher_walk_virt_block(desc, walk, ecb_fetch_bytes); ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { @@ -380,7 +380,7 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, padlock_reset_key(ctx-cword.decrypt); blkcipher_walk_init(walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, walk); + err = blkcipher_walk_virt_block(desc, walk, ecb_fetch_bytes); ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { It will basically instruct the crypto layer not to pass fewer than 2 blocks at a time until there is really no other way, i.e., until the input is exhausted. -- Ard. -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 3.17 regression; alg: skcipher: Chunk test 1 failed on encryption at page 0 for ecb-aes-padlock
On 21 October 2014 01:43, Jamie Heilman ja...@audible.transient.net wrote: Ard Biesheuvel wrote: On 20 October 2014 09:14, Jamie Heilman ja...@audible.transient.net wrote: I get this new failure w/3.17.0 on my system with a VIA Esther processor: alg: skcipher: Chunk test 1 failed on encryption at page 0 for ecb-aes-padlock : 71 73 f7 db 24 93 21 6d 61 1e bb 63 42 79 db 64 0010: 6f 82 c0 ca a3 9b fa 0b d9 08 c7 4a 90 ae 8f 5f 0020: 5e 06 f0 5f 31 51 18 37 45 d7 ca 3a fd 6c 3f e1 0030: dd 8d 22 65 2b 00 50 ce ba 28 67 d7 ce 0e 0d ea 0040: 78 69 7f ae 8f 8b 69 37 75 e0 dc 96 e0 b7 f4 09 0050: cb 6d a2 fb da af 09 f8 81 82 27 fa 45 9c 29 a4 0060: 22 8b 78 69 5b 46 f9 39 1b cc f9 1d 09 eb bc 5c 0070: 41 72 51 97 1d 07 49 a0 1b 8e 65 4b b2 6a 12 03 0080: 6a 60 95 ac bd ac 1a 64 de 5a a5 f0 83 2f cb ca 0090: 22 74 a6 6c 9b 73 ce 3f e1 8b 22 17 59 0c 47 89 00a0: 33 a1 d6 47 03 19 4f a8 67 69 f0 5b f0 20 ad 06 00b0: 27 81 92 d8 c5 ba 98 12 be 24 b5 2f 75 02 c2 ad 00c0: 12 2f 07 32 ee 39 af 64 05 8f b3 d4 eb 1b 46 6e 00d0: d9 21 f9 c4 b7 c9 45 68 b4 a1 74 9f 82 47 eb cc 00e0: bd 0a 14 95 0f 8b a8 2f 4b 1b a7 bf 82 a6 43 0c 00f0: b9 39 4a a8 10 6f 50 7b 25 fb 26 81 e0 2f f0 96 0100: 8d 8b ac 92 0f f6 ed 64 63 29 4c 8e 18 13 c5 bf 0110: fc a0 d9 bf 7c 3a 0e 29 6f d1 6c 6f a5 da bf b1 0120: 30 ea 44 2d c3 8f 16 e1 66 fa a3 21 3e fc 13 ca 0130: f0 f6 f0 59 bd 8f 38 50 31 cb 69 3f 96 15 d6 f5 0140: ae ff f6 aa 41 85 4c 10 58 e3 f9 44 e6 28 da 9a 0150: dc 6a 80 34 73 97 1b c5 ca 26 16 77 0e 60 ab 89 0160: 0f 04 27 bd ce 3e 71 b4 a0 d7 22 7e db eb 24 70 0170: 42 71 51 78 70 b3 e0 3d 84 8e 8d 7b d0 6d ea 92 0180: 11 08 42 4f e5 ad 26 92 d2 00 ae a8 e3 4b 37 47 0190: 22 c1 95 c1 63 7f cb 03 f3 e3 d7 9d 60 c7 bc ea 01a0: 35 a2 fd 45 52 39 13 6f c1 53 f3 53 df 33 84 d7 01b0: d2 c8 37 b0 75 e3 41 46 b3 c7 83 2e 8a bb a4 e5 01c0: 7f 3c fd 8b eb ea 63 bd b7 46 e7 bf 09 9c 0d 0f 01d0: 33 84 aa 1c 8d 29 b4 ac 4f ad e6 89 I've bisected this to 3b9b8fe0ade1ee84ee4058261d2e39a1f283704b so ... perhaps intended in terms of uncovering problems. Seems to have identified something in my case at any rate. Attached is my full 3.17.0 dmesg, kernel config, and /proc/crypto contents (the only difference between 3.16 and 3.17 for the latter being the selftest value for ecb-aes-padlock which used to be passed with 3.16 and earlier.) Let me know if you need anything else. Interesting. I don't have access to the hardware, but I found something interesting in the driver related to the prefetch size (ecb_fetch_bytes) of ECB versus CBC (Note that the CBC selftest passes) So perhaps this might solve the bug, could you please test it? diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 633ba945e153..2834f0b23713 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -351,7 +351,7 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, padlock_reset_key(ctx-cword.encrypt); blkcipher_walk_init(walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, walk); + err = blkcipher_walk_virt_block(desc, walk, ecb_fetch_bytes); ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { @@ -380,7 +380,7 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, padlock_reset_key(ctx-cword.decrypt); blkcipher_walk_init(walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, walk); + err = blkcipher_walk_virt_block(desc, walk, ecb_fetch_bytes); ts_state = irq_ts_save(); while ((nbytes = walk.nbytes)) { It will basically instruct the crypto layer not to pass fewer than 2 blocks at a time until there is really no other way, i.e., until the input is exhausted. Nope. Test still fails w/exactly the same output as before. OK, thanks for trying. I am going to let those with access with the hardware take over now ... -- Ard. -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] arm64/crypto: use crypto instructions for generating AES key schedule
This patch implements the AES key schedule generation using ARMv8 Crypto Instructions. It replaces the table based C implementation in aes_generic.ko, which means we can drop the dependency on that module. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm64/crypto/Kconfig | 5 +- arch/arm64/crypto/aes-ce-ccm-glue.c | 4 +- arch/arm64/crypto/aes-ce-cipher.c | 112 +++- arch/arm64/crypto/aes-ce-setkey.h | 5 ++ arch/arm64/crypto/aes-glue.c| 18 -- 5 files changed, 133 insertions(+), 11 deletions(-) create mode 100644 arch/arm64/crypto/aes-ce-setkey.h diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 5562652c5316..a38b02ce5f9a 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -27,20 +27,19 @@ config CRYPTO_AES_ARM64_CE tristate AES core cipher using ARMv8 Crypto Extensions depends on ARM64 KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_AES config CRYPTO_AES_ARM64_CE_CCM tristate AES in CCM mode using ARMv8 Crypto Extensions depends on ARM64 KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_AES + select CRYPTO_AES_ARM64_CE select CRYPTO_AEAD config CRYPTO_AES_ARM64_CE_BLK tristate AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions depends on ARM64 KERNEL_MODE_NEON select CRYPTO_BLKCIPHER - select CRYPTO_AES + select CRYPTO_AES_ARM64_CE select CRYPTO_ABLK_HELPER config CRYPTO_AES_ARM64_NEON_BLK diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index 9e6cdde9b43d..0ac73b838fa3 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -16,6 +16,8 @@ #include linux/crypto.h #include linux/module.h +#include aes-ce-setkey.h + static int num_rounds(struct crypto_aes_ctx *ctx) { /* @@ -48,7 +50,7 @@ static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key, struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm); int ret; - ret = crypto_aes_expand_key(ctx, in_key, key_len); + ret = ce_aes_expandkey(ctx, in_key, key_len); if (!ret) return 0; diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c index 2075e1acae6b..4207c83389d3 100644 --- a/arch/arm64/crypto/aes-ce-cipher.c +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -14,6 +14,8 @@ #include linux/crypto.h #include linux/module.h +#include aes-ce-setkey.h + MODULE_DESCRIPTION(Synchronous AES cipher using ARMv8 Crypto Extensions); MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); @@ -124,6 +126,114 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) kernel_neon_end(); } +/* + * aes_sub() - use the aese instruction to perform the AES sbox substitution + * on each byte in 'input' + */ +static u32 aes_sub(u32 input) +{ + u32 ret; + + __asm__(dupv1.4s, %w[in] ; + movi v0.16b, #0 ; + aese v0.16b, v1.16b ; + umov %w[out], v0.4s[0] ; + + : [out] =r(ret) + : [in]r(input) + : v0,v1); + + return ret; +} + +int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, +unsigned int key_len) +{ + /* +* The AES key schedule round constants +*/ + static u8 const rcon[] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, + }; + + u32 kwords = key_len / sizeof(u32); + struct aes_block *key_enc, *key_dec; + int i, j; + + if (key_len != AES_KEYSIZE_128 + key_len != AES_KEYSIZE_192 + key_len != AES_KEYSIZE_256) + return -EINVAL; + + memcpy(ctx-key_enc, in_key, key_len); + ctx-key_length = key_len; + + kernel_neon_begin_partial(2); + for (i = 0; i sizeof(rcon); i++) { + u32 *rki = ctx-key_enc + (i * kwords); + u32 *rko = rki + kwords; + + rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; + rko[1] = rko[0] ^ rki[1]; + rko[2] = rko[1] ^ rki[2]; + rko[3] = rko[2] ^ rki[3]; + + if (key_len == AES_KEYSIZE_192) { + if (i = 7) + break; + rko[4] = rko[3] ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + } else if (key_len == AES_KEYSIZE_256) { + if (i = 6) + break; + rko[4] = aes_sub(rko[3]) ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + rko[6] = rko[5] ^ rki[6]; + rko[7] = rko[6] ^ rki[7
Re: [PATCH] arm64/crypto: use crypto instructions for generating AES key schedule
On 22 October 2014 18:25, Catalin Marinas catalin.mari...@arm.com wrote: On Wed, Oct 22, 2014 at 08:15:32AM +0100, Ard Biesheuvel wrote: This patch implements the AES key schedule generation using ARMv8 Crypto Instructions. It replaces the table based C implementation in aes_generic.ko, which means we can drop the dependency on that module. I don't really understand the AES stuff but there's something else I noticed: if you build a kernel with the crypto stuff and run it on an implementation that does not have the crypto extensions, there does not seem to be any check for not using these algorithms (or falling back to the generic implementation). Does the kernel end up with undef faults? No, that is handled by https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=67bad2fdb754dbef14596c0b5d28b3a12c8dfe84 https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=3be1a5c4f75989cf457f13f38ff0913dff6d4996 and the modules use module_cpu_feature_match(), so they are only installed if the corresponding hwcap bit is set. -- Ard. -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] arm64/crypto: use crypto instructions to generate AES key schedule
This patch implements the AES key schedule generation using ARMv8 Crypto Instructions. It replaces the table based C implementation in aes_generic.ko, which means we can drop the dependency on that module. Tested-by: Steve Capper steve.cap...@linaro.org Acked-by: Steve Capper steve.cap...@linaro.org Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- v2: grammar/whitespace fixes, acked/tested-by added arch/arm64/crypto/Kconfig | 5 +- arch/arm64/crypto/aes-ce-ccm-glue.c | 4 +- arch/arm64/crypto/aes-ce-cipher.c | 112 +++- arch/arm64/crypto/aes-ce-setkey.h | 5 ++ arch/arm64/crypto/aes-glue.c| 18 -- 5 files changed, 133 insertions(+), 11 deletions(-) create mode 100644 arch/arm64/crypto/aes-ce-setkey.h diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 5562652c5316..a38b02ce5f9a 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -27,20 +27,19 @@ config CRYPTO_AES_ARM64_CE tristate AES core cipher using ARMv8 Crypto Extensions depends on ARM64 KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_AES config CRYPTO_AES_ARM64_CE_CCM tristate AES in CCM mode using ARMv8 Crypto Extensions depends on ARM64 KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_AES + select CRYPTO_AES_ARM64_CE select CRYPTO_AEAD config CRYPTO_AES_ARM64_CE_BLK tristate AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions depends on ARM64 KERNEL_MODE_NEON select CRYPTO_BLKCIPHER - select CRYPTO_AES + select CRYPTO_AES_ARM64_CE select CRYPTO_ABLK_HELPER config CRYPTO_AES_ARM64_NEON_BLK diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index 9e6cdde9b43d..0ac73b838fa3 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -16,6 +16,8 @@ #include linux/crypto.h #include linux/module.h +#include aes-ce-setkey.h + static int num_rounds(struct crypto_aes_ctx *ctx) { /* @@ -48,7 +50,7 @@ static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key, struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm); int ret; - ret = crypto_aes_expand_key(ctx, in_key, key_len); + ret = ce_aes_expandkey(ctx, in_key, key_len); if (!ret) return 0; diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c index 2075e1acae6b..7f96e8af6a5a 100644 --- a/arch/arm64/crypto/aes-ce-cipher.c +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -14,6 +14,8 @@ #include linux/crypto.h #include linux/module.h +#include aes-ce-setkey.h + MODULE_DESCRIPTION(Synchronous AES cipher using ARMv8 Crypto Extensions); MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); @@ -124,6 +126,114 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) kernel_neon_end(); } +/* + * aes_sub() - use the aese instruction to perform the AES sbox substitution + * on each byte in 'input' + */ +static u32 aes_sub(u32 input) +{ + u32 ret; + + __asm__(dupv1.4s, %w[in] ; + movi v0.16b, #0 ; + aese v0.16b, v1.16b ; + umov %w[out], v0.4s[0] ; + + : [out] =r(ret) + : [in]r(input) + : v0,v1); + + return ret; +} + +int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, +unsigned int key_len) +{ + /* +* The AES key schedule round constants +*/ + static u8 const rcon[] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, + }; + + u32 kwords = key_len / sizeof(u32); + struct aes_block *key_enc, *key_dec; + int i, j; + + if (key_len != AES_KEYSIZE_128 + key_len != AES_KEYSIZE_192 + key_len != AES_KEYSIZE_256) + return -EINVAL; + + memcpy(ctx-key_enc, in_key, key_len); + ctx-key_length = key_len; + + kernel_neon_begin_partial(2); + for (i = 0; i sizeof(rcon); i++) { + u32 *rki = ctx-key_enc + (i * kwords); + u32 *rko = rki + kwords; + + rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; + rko[1] = rko[0] ^ rki[1]; + rko[2] = rko[1] ^ rki[2]; + rko[3] = rko[2] ^ rki[3]; + + if (key_len == AES_KEYSIZE_192) { + if (i = 7) + break; + rko[4] = rko[3] ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + } else if (key_len == AES_KEYSIZE_256) { + if (i = 6) + break; + rko[4] = aes_sub(rko[3]) ^ rki[4
Re: simd: Allow simd use in kernel threads with softirqs disabled
On 14 November 2014 16:43, Herbert Xu herb...@gondor.apana.org.au wrote: While working on the cryptd request reordering problem, I noticed an anomaly where kernel threads are normally allowed to use simd per may_use_simd, but as soon as you disable softirqs, they suddenly lose that ability for no good reason. The problem is that in_interrupt does not distinguish between softirq processing and simply having softirqs disabled. This patch creates a new helper in_serving_interrupt which makes that distinction. It then uses this in all current implementations of may_use_simd. Isn't that a much more widespread problem if in_interrupt() yields true while no interrupt is being served? Signed-off-by: Herbert Xu herb...@gondor.apana.org.au diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index a9a4229..6cdaa852 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -63,7 +63,7 @@ static inline bool interrupted_user_mode(void) */ bool irq_fpu_usable(void) { - return !in_interrupt() || + return !in_serving_interrupt() || interrupted_user_mode() || interrupted_kernel_fpu_idle(); } diff --git a/include/asm-generic/simd.h b/include/asm-generic/simd.h index f57eb7b..74e0b05 100644 --- a/include/asm-generic/simd.h +++ b/include/asm-generic/simd.h @@ -10,5 +10,5 @@ */ static __must_check inline bool may_use_simd(void) { - return !in_interrupt(); + return !in_serving_interrupt(); } diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h index dbeec4d..18f3b46 100644 --- a/include/linux/preempt_mask.h +++ b/include/linux/preempt_mask.h @@ -65,6 +65,8 @@ #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) #define in_serving_softirq() (softirq_count() SOFTIRQ_OFFSET) +#define in_serving_interrupt() (irq_count() (HARDIRQ_MASK | \ + SOFTIRQ_OFFSET | NMI_MASK)) /* * Are we in NMI context? Thanks, -- Email: Herbert Xu herb...@gondor.apana.org.au Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] arm64: crypto: increase AES interleave to 4x
This patch increases the interleave factor for parallel AES modes to 4x. This improves performance on Cortex-A57 by ~35%. This is due to the 3-cycle latency of AES instructions on the A57's relatively deep pipeline (compared to Cortex-A53 where the AES instruction latency is only 2 cycles). At the same time, disable inline expansion of the core AES functions, as the performance benefit of this feature is negligible. Measured on AMD Seattle (using tcrypt.ko mode=500 sec=1): Baseline (2x interleave, inline expansion) -- testing speed of async cbc(aes) (cbc-aes-ce) decryption test 4 (128 bit key, 8192 byte blocks): 95545 operations in 1 seconds test 14 (256 bit key, 8192 byte blocks): 68496 operations in 1 seconds This patch (4x interleave, no inline expansion) --- testing speed of async cbc(aes) (cbc-aes-ce) decryption test 4 (128 bit key, 8192 byte blocks): 124735 operations in 1 seconds test 14 (256 bit key, 8192 byte blocks): 92328 operations in 1 seconds Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm64/crypto/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 5720608c50b1..abb79b3cfcfe 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -29,7 +29,7 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o aes-neon-blk-y := aes-glue-neon.o aes-neon.o -AFLAGS_aes-ce.o:= -DINTERLEAVE=2 -DINTERLEAVE_INLINE +AFLAGS_aes-ce.o:= -DINTERLEAVE=4 AFLAGS_aes-neon.o := -DINTERLEAVE=4 CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] arm: crypto: Add NEON optimized SHA-256
Hello Sami, On 16 March 2015 at 16:48, Sami Tolvanen samitolva...@google.com wrote: Add Andy Polyakov's NEON optimized SHA-256 implementation. On Nexus 6, this implementation is ~2x faster than sha256-generic. Signed-off-by: Sami Tolvanen samitolva...@google.com Have you tested this code with the tcrypt.ko module? Some more comments below --- arch/arm/crypto/Makefile|2 arch/arm/crypto/sha256-armv7-neon.S | 819 arch/arm/crypto/sha256_neon_glue.c | 201 crypto/Kconfig | 12 4 files changed, 1034 insertions(+) diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b48fa34..316dba2 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -6,12 +6,14 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o +obj-$(CONFIG_CRYPTO_SHA256_ARM_NEON) += sha256-arm-neon.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y:= sha1-armv7-neon.o sha1_neon_glue.o +sha256-arm-neon-y := sha256-armv7-neon.o sha256_neon_glue.o sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o quiet_cmd_perl = PERL$@ diff --git a/arch/arm/crypto/sha256-armv7-neon.S b/arch/arm/crypto/sha256-armv7-neon.S new file mode 100644 index 000..5ce04c2 --- /dev/null +++ b/arch/arm/crypto/sha256-armv7-neon.S @@ -0,0 +1,819 @@ +@ sha256-armv7-neon.S - ARM/NEON assembly implementation of SHA-256 transform +@ +@ +@ Written by Andy Polyakov ap...@openssl.org for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ + Did you talk to Andy about the license? I don't think this is permissible for the kernel as-is. +#include linux/linkage.h + +.text +.code 32 +.fpu neon + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator +.word 0 +.align 5 + +.align 5 +ENTRY(sha256_transform_neon) + /* Input: +* %r0: SHA256_CONTEXT +* %r1: data +* %r2: nblks +*/ + sub r3,pc,#8@ sha256_transform_neon This is broken on thumb-2, use adr instead + add r2,r1,r2,lsl#6 @ len to point at the end of inp + + stmdb sp!,{r4-r12,lr} + + mov r12,sp + sub sp,sp,#16*4+16 @ alloca + sub r14,r3,#256+32 @ K256 + bic sp,sp,#15 @ align for 128-bit stores + + vld1.8 {q0},[r1]! + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + vld1.32 {q8},[r14,:128]! + vld1.32 {q9},[r14,:128]! + vld1.32 {q10},[r14,:128]! + vld1.32 {q11},[r14,:128]! + vrev32.8q0,q0 @ yes, even on + str r0,[sp,#64] + vrev32.8q1,q1 @ big-endian + str r1,[sp,#68] + mov r1,sp + vrev32.8q2,q2 + str r2,[sp,#72] + vrev32.8q3,q3 + str r12,[sp,#76]@ save original sp + vadd.i32q8,q8,q0 + vadd.i32q9,q9,q1 + vst1.32 {q8},[r1,:128]! + vadd.i32q10,q10,q2 + vst1.32 {q9},[r1,:128]! + vadd.i32q11,q11,q3 + vst1.32 {q10},[r1,:128]! + vst1.32 {q11},[r1,:128]! + + ldmia r0,{r4-r11} + sub r1,r1,#64 + ldr
[PATCH] arm64/crypto: issue aese/aesmc instructions in pairs
This changes the AES core transform implementations to issue aese/aesmc (and aesd/aesimc) in pairs. This enables a micro-architectural optimization in recent Cortex-A5x cores that improves performance by 50-90%. Measured performance in cycles per byte (Cortex-A57): CBC enc CBC dec CTR before3.641.341.32 after 1.950.850.93 Note that this results in a ~5% performance decrease for older cores. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- Will, This is the optimization you yourself mentioned to me about a year ago (or even longer perhaps?) Anyway, we have now been able to confirm it on a sample 'in the wild', (i.e., a Galaxy S6 phone) arch/arm64/crypto/aes-ce-ccm-core.S | 12 ++-- arch/arm64/crypto/aes-ce.S | 10 +++--- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index 432e4841cd81..a2a7fbcacc14 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -101,19 +101,19 @@ ENTRY(ce_aes_ccm_final) 0: mov v4.16b, v3.16b 1: ld1 {v5.2d}, [x2], #16 /* load next round key */ aesev0.16b, v4.16b - aesev1.16b, v4.16b aesmc v0.16b, v0.16b + aesev1.16b, v4.16b aesmc v1.16b, v1.16b 2: ld1 {v3.2d}, [x2], #16 /* load next round key */ aesev0.16b, v5.16b - aesev1.16b, v5.16b aesmc v0.16b, v0.16b + aesev1.16b, v5.16b aesmc v1.16b, v1.16b 3: ld1 {v4.2d}, [x2], #16 /* load next round key */ subsw3, w3, #3 aesev0.16b, v3.16b - aesev1.16b, v3.16b aesmc v0.16b, v0.16b + aesev1.16b, v3.16b aesmc v1.16b, v1.16b bpl 1b aesev0.16b, v4.16b @@ -146,19 +146,19 @@ ENDPROC(ce_aes_ccm_final) ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ 2: /* inner loop: 3 rounds, 2x interleaved */ aesev0.16b, v4.16b - aesev1.16b, v4.16b aesmc v0.16b, v0.16b + aesev1.16b, v4.16b aesmc v1.16b, v1.16b 3: ld1 {v3.2d}, [x10], #16 /* load next round key */ aesev0.16b, v5.16b - aesev1.16b, v5.16b aesmc v0.16b, v0.16b + aesev1.16b, v5.16b aesmc v1.16b, v1.16b 4: ld1 {v4.2d}, [x10], #16 /* load next round key */ subsw7, w7, #3 aesev0.16b, v3.16b - aesev1.16b, v3.16b aesmc v0.16b, v0.16b + aesev1.16b, v3.16b aesmc v1.16b, v1.16b ld1 {v5.2d}, [x10], #16 /* load next round key */ bpl 2b diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index 685a18f731eb..78f3cfe92c08 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -45,18 +45,14 @@ .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 aes\de \i0\().16b, \k\().16b - .ifnb \i1 - aes\de \i1\().16b, \k\().16b - .ifnb \i3 - aes\de \i2\().16b, \k\().16b - aes\de \i3\().16b, \k\().16b - .endif - .endif aes\mc \i0\().16b, \i0\().16b .ifnb \i1 + aes\de \i1\().16b, \k\().16b aes\mc \i1\().16b, \i1\().16b .ifnb \i3 + aes\de \i2\().16b, \k\().16b aes\mc \i2\().16b, \i2\().16b + aes\de \i3\().16b, \k\().16b aes\mc \i3\().16b, \i3\().16b .endif .endif -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] crypto/arm: add support for GHASH using ARMv8 Crypto Extensions
This implements the GHASH hash algorithm (as used by the GCM AEAD chaining mode) using the AArch32 version of the 64x64 to 128 bit polynomial multiplication instruction (vmull.p64) that is part of the ARMv8 Crypto Extensions. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig | 9 ++ arch/arm/crypto/Makefile| 2 + arch/arm/crypto/ghash-ce-core.S | 93 arch/arm/crypto/ghash-ce-glue.c | 318 4 files changed, 422 insertions(+) create mode 100644 arch/arm/crypto/ghash-ce-core.S create mode 100644 arch/arm/crypto/ghash-ce-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 63588bdf3b5d..5dc8a9e79744 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -110,4 +110,13 @@ config CRYPTO_AES_ARM_CE Use an implementation of AES in CBC, CTR and XTS modes that uses ARMv8 Crypto Extensions +config CRYPTO_GHASH_ARM_CE + tristate PMULL-accelerated GHASH using ARMv8 Crypto Extensions + depends on KERNEL_MODE_NEON + select CRYPTO_HASH + help + Use an implementation of GHASH (used by the GCM AEAD chaining mode) + that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64) + that is part of the ARMv8 Crypto Extensions + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 2514c420e8d3..9a273bd7dffd 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o +obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o @@ -19,6 +20,7 @@ sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o +ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o quiet_cmd_perl = PERL$@ cmd_perl = $(PERL) $() $(@) diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S new file mode 100644 index ..53bee60ff94b --- /dev/null +++ b/arch/arm/crypto/ghash-ce-core.S @@ -0,0 +1,93 @@ +/* + * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. + * + * Copyright (C) 2015 Linaro Ltd. ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include linux/linkage.h +#include asm/assembler.h + + SHASH .reqq0 + SHASH2 .reqq1 + T1 .reqq2 + T2 .reqq3 + MASK.reqq4 + XL .reqq5 + XM .reqq6 + XH .reqq7 + IN1 .reqq7 + + SHASH_L .reqd0 + SHASH_H .reqd1 + SHASH2_L.reqd2 + T1_L.reqd4 + MASK_L .reqd8 + XL_L.reqd10 + XL_H.reqd11 + XM_L.reqd12 + XM_H.reqd13 + XH_L.reqd14 + + .text + .fpucrypto-neon-fp-armv8 + + /* +* void pmull_ghash_update(int blocks, u64 dg[], const char *src, +* struct ghash_key const *k, const char *head) +*/ +ENTRY(pmull_ghash_update) + vld1.8 {SHASH}, [r3] + vld1.8 {XL}, [r1] + vmov.i8 MASK, #0xe1 + vext.8 SHASH2, SHASH, SHASH, #8 + vshl.u64MASK, MASK, #57 + veorSHASH2, SHASH2, SHASH + + /* do the head block first, if supplied */ + ldr ip, [sp] + teq ip, #0 + beq 0f + vld1.8 {T1}, [ip] + teq r0, #0 + b 1f + +0: vld1.8 {T1}, [r2]! + subsr0, r0, #1 + +1: /* multiply XL by SHASH in GF(2^128) */ + vrev64.8T1, T1 + + vext.8 T2, XL, XL, #8 + vext.8 IN1, T1, T1, #8 + veorT1, T1, T2 + veorXL, XL, IN1 + + vmull.p64 XH, SHASH_H, XL_H @ a1 * b1 + veorT1, T1, XL + vmull.p64 XL, SHASH_L, XL_L @ a0 * b0 + vmull.p64 XM, SHASH2_L, T1_L @ (a1 + a0)(b1 + b0) + + vext.8 T1, XL, XH, #8 + veorT2, XL, XH + veorXM, XM, T1 + veorXM, XM, T2 + vmull.p64 T2, XL_L, MASK_L
[PATCH 4/4] crypto/arm: AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions
This implements the ECB, CBC, CTR and XTS asynchronous block ciphers using the AArch32 versions of the ARMv8 Crypto Extensions for AES. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig | 9 + arch/arm/crypto/Makefile | 2 + arch/arm/crypto/aes-ce-core.S | 535 ++ arch/arm/crypto/aes-ce-glue.c | 520 4 files changed, 1066 insertions(+) create mode 100644 arch/arm/crypto/aes-ce-core.S create mode 100644 arch/arm/crypto/aes-ce-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 9c1478e55a40..63588bdf3b5d 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -101,4 +101,13 @@ config CRYPTO_AES_ARM_BS This implementation does not rely on any lookup tables so it is believed to be invulnerable to cache timing attacks. +config CRYPTO_AES_ARM_CE + tristate Accelerated AES using ARMv8 Crypto Extensions + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_ABLK_HELPER + help + Use an implementation of AES in CBC, CTR and XTS modes that uses + ARMv8 Crypto Extensions + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 4ea9f96c2782..2514c420e8d3 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o +obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o @@ -17,6 +18,7 @@ sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o +aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o quiet_cmd_perl = PERL$@ cmd_perl = $(PERL) $() $(@) diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S new file mode 100644 index ..0fb1b7b29c51 --- /dev/null +++ b/arch/arm/crypto/aes-ce-core.S @@ -0,0 +1,535 @@ +/* + * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions + * + * Copyright (C) 2015 Linaro Ltd ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include linux/linkage.h +#include asm/assembler.h + + .text + .fpucrypto-neon-fp-armv8 + .align 3 + + /* +* u32 ce_aes_sub(u32 input) - use the aese instruction to perform the +* AES sbox substitution on each byte in +* 'input' +*/ +ENTRY(ce_aes_sub) + vdup.32 q1, r0 + veorq0, q0, q0 + aese.8 q0, q1 + vmovr0, s0 + bx lr +ENDPROC(ce_aes_sub) + + /* +* void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns +*operation on round key *src +*/ +ENTRY(ce_aes_invert) + vld1.8 {q0}, [r1] + aesimc.8q0, q0 + vst1.8 {q0}, [r0] + bx lr +ENDPROC(ce_aes_invert) + + .macro enc_round, state, key + aese.8 \state, \key + aesmc.8 \state, \state + .endm + + .macro dec_round, state, key + aesd.8 \state, \key + aesimc.8\state, \state + .endm + + .macro enc_dround, key1, key2 + enc_round q0, \key1 + enc_round q0, \key2 + .endm + + .macro dec_dround, key1, key2 + dec_round q0, \key1 + dec_round q0, \key2 + .endm + + .macro enc_fround, key1, key2, key3 + enc_round q0, \key1 + aese.8 q0, \key2 + veorq0, q0, \key3 + .endm + + .macro dec_fround, key1, key2, key3 + dec_round q0, \key1 + aesd.8 q0, \key2 + veorq0, q0, \key3 + .endm + + .macro enc_dround_3x, key1, key2 + enc_round q0, \key1 + enc_round q1, \key1 + enc_round q2, \key1 + enc_round q0, \key2 + enc_round q1, \key2 + enc_round q2, \key2 + .endm + + .macro dec_dround_3x, key1, key2 + dec_round q0, \key1 + dec_round q1, \key1 + dec_round q2, \key1 + dec_round q0, \key2 + dec_round q1, \key2 + dec_round q2, \key2 + .endm + + .macro
[PATCH 2/4] crypto/arm: add support for SHA1 using ARMv8 Crypto Instructions
This implements the SHA1 secure hash algorithm using the AArch32 versions of the ARMv8 Crypto Extensions for SHA1. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig| 10 +++ arch/arm/crypto/Makefile | 2 + arch/arm/crypto/sha1-ce-core.S | 134 arch/arm/crypto/sha1-ce-glue.c | 150 + 4 files changed, 296 insertions(+) create mode 100644 arch/arm/crypto/sha1-ce-core.S create mode 100644 arch/arm/crypto/sha1-ce-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 66fe82857e99..d7bc10beb8ac 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -27,6 +27,16 @@ config CRYPTO_SHA1_ARM_NEON using optimized ARM NEON assembly, when NEON instructions are available. +config CRYPTO_SHA1_ARM_CE + tristate SHA1 digest algorithm (ARM v8 Crypto Extensions) + depends on KERNEL_MODE_NEON + select CRYPTO_SHA1_ARM + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using special ARMv8 Crypto Extensions. + config CRYPTO_SHA512_ARM_NEON tristate SHA384 and SHA512 digest algorithm (ARM NEON) depends on KERNEL_MODE_NEON diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b48fa341648d..d92d05ba646e 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -7,12 +7,14 @@ obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o +obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y:= sha1-armv7-neon.o sha1_neon_glue.o sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o +sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o quiet_cmd_perl = PERL$@ cmd_perl = $(PERL) $() $(@) diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S new file mode 100644 index ..7542f10b2233 --- /dev/null +++ b/arch/arm/crypto/sha1-ce-core.S @@ -0,0 +1,134 @@ +/* + * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2015 Linaro Ltd. + * Author: Ard Biesheuvel ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include linux/linkage.h +#include asm/assembler.h + + .text + .fpucrypto-neon-fp-armv8 + + k0 .reqq0 + k1 .reqq1 + k2 .reqq2 + k3 .reqq3 + + ta0 .reqq4 + ta1 .reqq5 + tb0 .reqq5 + tb1 .reqq4 + + dga .reqq6 + dgb .reqq7 + dgbs.reqs28 + + dg0 .reqq12 + dg1a0 .reqq13 + dg1a1 .reqq14 + dg1b0 .reqq14 + dg1b1 .reqq13 + + .macro add_only, op, ev, rc, s0, dg1 + .ifnb \s0 + vadd.u32tb\ev, q\s0, \rc + .endif + sha1h.32dg1b\ev, dg0 + .ifb\dg1 + sha1\op\().32 dg0, dg1a\ev, ta\ev + .else + sha1\op\().32 dg0, \dg1, ta\ev + .endif + .endm + + .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 + sha1su0.32 q\s0, q\s1, q\s2 + add_only\op, \ev, \rc, \s1, \dg1 + sha1su1.32 q\s0, q\s3 + .endm + + .align 6 +.Lsha1_rcon: + .word 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999 + .word 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1 + .word 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc + .word 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6 + + /* +* void sha1_ce_transform(int blocks, u8 const *src, u32 *state, +*u8 *head); +*/ +ENTRY(sha1_ce_transform) + /* load round constants */ + adr ip, .Lsha1_rcon + vld1.32 {k0-k1}, [ip]! + vld1.32 {k2-k3}, [ip] + + /* load state */ + vldmr2, {dga} + vldrdgbs, [r2, #16] + + /* load partial input (if supplied) */ + teq r3, #0 + beq 0f + vld1.8 {q8-q9}, [r3]! + vld1.8 {q10-q11}, [r3] + teq r0, #0 + b 1f + + /* load input */ +0: vld1.8
[PATCH 3/4] crypto/arm: add support for SHA-224/256 using ARMv8 Crypto Extensions
This implements the SHA-224/256 secure hash algorithm using the AArch32 versions of the ARMv8 Crypto Extensions for SHA2. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig| 9 ++ arch/arm/crypto/Makefile | 2 + arch/arm/crypto/sha2-ce-core.S | 134 +++ arch/arm/crypto/sha2-ce-glue.c | 203 + 4 files changed, 348 insertions(+) create mode 100644 arch/arm/crypto/sha2-ce-core.S create mode 100644 arch/arm/crypto/sha2-ce-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index d7bc10beb8ac..9c1478e55a40 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -37,6 +37,15 @@ config CRYPTO_SHA1_ARM_CE SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented using special ARMv8 Crypto Extensions. +config CRYPTO_SHA2_ARM_CE + tristate SHA-224/256 digest algorithm (ARM v8 Crypto Extensions) + depends on KERNEL_MODE_NEON + select CRYPTO_SHA256 + select CRYPTO_HASH + help + SHA-256 secure hash standard (DFIPS 180-2) implemented + using special ARMv8 Crypto Extensions. + config CRYPTO_SHA512_ARM_NEON tristate SHA384 and SHA512 digest algorithm (ARM NEON) depends on KERNEL_MODE_NEON diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index d92d05ba646e..4ea9f96c2782 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o +obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o @@ -15,6 +16,7 @@ sha1-arm-y:= sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y:= sha1-armv7-neon.o sha1_neon_glue.o sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o +sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o quiet_cmd_perl = PERL$@ cmd_perl = $(PERL) $() $(@) diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S new file mode 100644 index ..39e01ee7d485 --- /dev/null +++ b/arch/arm/crypto/sha2-ce-core.S @@ -0,0 +1,134 @@ +/* + * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2015 Linaro Ltd. + * Author: Ard Biesheuvel ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include linux/linkage.h +#include asm/assembler.h + + .text + .fpucrypto-neon-fp-armv8 + + k0 .reqq7 + k1 .reqq8 + rk .reqr3 + + ta0 .reqq9 + ta1 .reqq10 + tb0 .reqq10 + tb1 .reqq9 + + dga .reqq11 + dgb .reqq12 + + dg0 .reqq13 + dg1 .reqq14 + dg2 .reqq15 + + .macro add_only, ev, s0 + vmovdg2, dg0 + .ifnb \s0 + vld1.32 {k\ev}, [rk]! + .endif + sha256h.32 dg0, dg1, tb\ev + sha256h2.32 dg1, dg2, tb\ev + .ifnb \s0 + vadd.u32ta\ev, q\s0, k\ev + .endif + .endm + + .macro add_update, ev, s0, s1, s2, s3 + sha256su0.32q\s0, q\s1 + add_only\ev, \s1 + sha256su1.32q\s0, q\s2, q\s3 + .endm + + .align 6 +.Lsha256_rcon: + .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .word 0x90befffa
[PATCH 1/4] crypto/arm: move ARM specific Kconfig definitions to a dedicated file
This moves all Kconfig symbols defined in crypto/Kconfig that depend on CONFIG_ARM to a dedicated Kconfig file in arch/arm/crypto, which is where the code that implements those features resides as well. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/Kconfig| 3 ++ arch/arm/crypto/Kconfig | 85 + crypto/Kconfig | 75 --- 3 files changed, 88 insertions(+), 75 deletions(-) create mode 100644 arch/arm/crypto/Kconfig diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9f1f09a2bc9b..e60da5ab8aec 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2167,6 +2167,9 @@ source arch/arm/Kconfig.debug source security/Kconfig source crypto/Kconfig +if CRYPTO +source arch/arm/crypto/Kconfig +endif source lib/Kconfig diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig new file mode 100644 index ..66fe82857e99 --- /dev/null +++ b/arch/arm/crypto/Kconfig @@ -0,0 +1,85 @@ + +menuconfig ARM_CRYPTO + bool ARM Accelerated Cryptographic Algorithms + depends on ARM + help + Say Y here to choose from a selection of cryptographic algorithms + implemented using ARM specific CPU features or instructions. + +if ARM_CRYPTO + +config CRYPTO_SHA1_ARM + tristate SHA1 digest algorithm (ARM-asm) + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using optimized ARM assembler. + +config CRYPTO_SHA1_ARM_NEON + tristate SHA1 digest algorithm (ARM NEON) + depends on KERNEL_MODE_NEON + select CRYPTO_SHA1_ARM + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using optimized ARM NEON assembly, when NEON instructions are + available. + +config CRYPTO_SHA512_ARM_NEON + tristate SHA384 and SHA512 digest algorithm (ARM NEON) + depends on KERNEL_MODE_NEON + select CRYPTO_SHA512 + select CRYPTO_HASH + help + SHA-512 secure hash standard (DFIPS 180-2) implemented + using ARM NEON instructions, when available. + + This version of SHA implements a 512 bit hash with 256 bits of + security against collision attacks. + + This code also includes SHA-384, a 384 bit hash with 192 bits + of security against collision attacks. + +config CRYPTO_AES_ARM + tristate AES cipher algorithms (ARM-asm) + depends on ARM + select CRYPTO_ALGAPI + select CRYPTO_AES + help + Use optimized AES assembler routines for ARM platforms. + + AES cipher algorithms (FIPS-197). AES uses the Rijndael + algorithm. + + Rijndael appears to be consistently a very good performer in + both hardware and software across a wide range of computing + environments regardless of its use in feedback or non-feedback + modes. Its key setup time is excellent, and its key agility is + good. Rijndael's very low memory requirements make it very well + suited for restricted-space environments, in which it also + demonstrates excellent performance. Rijndael's operations are + among the easiest to defend against power and timing attacks. + + The AES specifies three key sizes: 128, 192 and 256 bits + + See http://csrc.nist.gov/encryption/aes/ for more information. + +config CRYPTO_AES_ARM_BS + tristate Bit sliced AES using NEON instructions + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES_ARM + select CRYPTO_ABLK_HELPER + help + Use a faster and more secure NEON based implementation of AES in CBC, + CTR and XTS modes + + Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode + and for XTS mode encryption, CBC and XTS mode decryption speedup is + around 25%. (CBC encryption speed is not affected by this driver.) + This implementation does not rely on any lookup tables so it is + believed to be invulnerable to cache timing attacks. + +endif diff --git a/crypto/Kconfig b/crypto/Kconfig index 50f4da44a304..c50900b467c8 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -555,26 +555,6 @@ config CRYPTO_SHA1_SPARC64 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented using sparc64 crypto instructions, when available. -config CRYPTO_SHA1_ARM - tristate SHA1 digest algorithm (ARM-asm) - depends on ARM - select CRYPTO_SHA1 - select CRYPTO_HASH - help - SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented - using optimized ARM assembler. - -config CRYPTO_SHA1_ARM_NEON - tristate SHA1 digest algorithm (ARM NEON) - depends on ARM KERNEL_MODE_NEON
[PATCH v2 2/5] crypto/arm: add support for SHA1 using ARMv8 Crypto Instructions
This implements the SHA1 secure hash algorithm using the AArch32 versions of the ARMv8 Crypto Extensions for SHA1. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig| 10 +++ arch/arm/crypto/Makefile | 2 + arch/arm/crypto/sha1-ce-core.S | 134 arch/arm/crypto/sha1-ce-glue.c | 150 + 4 files changed, 296 insertions(+) create mode 100644 arch/arm/crypto/sha1-ce-core.S create mode 100644 arch/arm/crypto/sha1-ce-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 66fe82857e99..d7bc10beb8ac 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -27,6 +27,16 @@ config CRYPTO_SHA1_ARM_NEON using optimized ARM NEON assembly, when NEON instructions are available. +config CRYPTO_SHA1_ARM_CE + tristate SHA1 digest algorithm (ARM v8 Crypto Extensions) + depends on KERNEL_MODE_NEON + select CRYPTO_SHA1_ARM + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using special ARMv8 Crypto Extensions. + config CRYPTO_SHA512_ARM_NEON tristate SHA384 and SHA512 digest algorithm (ARM NEON) depends on KERNEL_MODE_NEON diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b48fa341648d..d92d05ba646e 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -7,12 +7,14 @@ obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o +obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y:= sha1-armv7-neon.o sha1_neon_glue.o sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o +sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o quiet_cmd_perl = PERL$@ cmd_perl = $(PERL) $() $(@) diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S new file mode 100644 index ..4aad520935d8 --- /dev/null +++ b/arch/arm/crypto/sha1-ce-core.S @@ -0,0 +1,134 @@ +/* + * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2015 Linaro Ltd. + * Author: Ard Biesheuvel ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include linux/linkage.h +#include asm/assembler.h + + .text + .fpucrypto-neon-fp-armv8 + + k0 .reqq0 + k1 .reqq1 + k2 .reqq2 + k3 .reqq3 + + ta0 .reqq4 + ta1 .reqq5 + tb0 .reqq5 + tb1 .reqq4 + + dga .reqq6 + dgb .reqq7 + dgbs.reqs28 + + dg0 .reqq12 + dg1a0 .reqq13 + dg1a1 .reqq14 + dg1b0 .reqq14 + dg1b1 .reqq13 + + .macro add_only, op, ev, rc, s0, dg1 + .ifnb \s0 + vadd.u32tb\ev, q\s0, \rc + .endif + sha1h.32dg1b\ev, dg0 + .ifb\dg1 + sha1\op\().32 dg0, dg1a\ev, ta\ev + .else + sha1\op\().32 dg0, \dg1, ta\ev + .endif + .endm + + .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 + sha1su0.32 q\s0, q\s1, q\s2 + add_only\op, \ev, \rc, \s1, \dg1 + sha1su1.32 q\s0, q\s3 + .endm + + .align 6 +.Lsha1_rcon: + .word 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999 + .word 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1 + .word 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc + .word 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6 + + /* +* void sha1_ce_transform(int blocks, u8 const *src, u32 *state, +*u8 *head); +*/ +ENTRY(sha1_ce_transform) + /* load round constants */ + adr ip, .Lsha1_rcon + vld1.32 {k0-k1}, [ip, :128]! + vld1.32 {k2-k3}, [ip, :128] + + /* load state */ + vld1.32 {dga}, [r2] + vldrdgbs, [r2, #16] + + /* load partial input (if supplied) */ + teq r3, #0 + beq 0f + vld1.32 {q8-q9}, [r3]! + vld1.32 {q10-q11}, [r3] + teq r0, #0 + b 1f + + /* load input */ +0
[PATCH v2 3/5] crypto/arm: add support for SHA-224/256 using ARMv8 Crypto Extensions
This implements the SHA-224/256 secure hash algorithm using the AArch32 versions of the ARMv8 Crypto Extensions for SHA2. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig| 9 ++ arch/arm/crypto/Makefile | 2 + arch/arm/crypto/sha2-ce-core.S | 134 +++ arch/arm/crypto/sha2-ce-glue.c | 203 + 4 files changed, 348 insertions(+) create mode 100644 arch/arm/crypto/sha2-ce-core.S create mode 100644 arch/arm/crypto/sha2-ce-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index d7bc10beb8ac..9c1478e55a40 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -37,6 +37,15 @@ config CRYPTO_SHA1_ARM_CE SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented using special ARMv8 Crypto Extensions. +config CRYPTO_SHA2_ARM_CE + tristate SHA-224/256 digest algorithm (ARM v8 Crypto Extensions) + depends on KERNEL_MODE_NEON + select CRYPTO_SHA256 + select CRYPTO_HASH + help + SHA-256 secure hash standard (DFIPS 180-2) implemented + using special ARMv8 Crypto Extensions. + config CRYPTO_SHA512_ARM_NEON tristate SHA384 and SHA512 digest algorithm (ARM NEON) depends on KERNEL_MODE_NEON diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index d92d05ba646e..4ea9f96c2782 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o +obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o @@ -15,6 +16,7 @@ sha1-arm-y:= sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y:= sha1-armv7-neon.o sha1_neon_glue.o sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o +sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o quiet_cmd_perl = PERL$@ cmd_perl = $(PERL) $() $(@) diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S new file mode 100644 index ..96af09fe957b --- /dev/null +++ b/arch/arm/crypto/sha2-ce-core.S @@ -0,0 +1,134 @@ +/* + * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2015 Linaro Ltd. + * Author: Ard Biesheuvel ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include linux/linkage.h +#include asm/assembler.h + + .text + .fpucrypto-neon-fp-armv8 + + k0 .reqq7 + k1 .reqq8 + rk .reqr3 + + ta0 .reqq9 + ta1 .reqq10 + tb0 .reqq10 + tb1 .reqq9 + + dga .reqq11 + dgb .reqq12 + + dg0 .reqq13 + dg1 .reqq14 + dg2 .reqq15 + + .macro add_only, ev, s0 + vmovdg2, dg0 + .ifnb \s0 + vld1.32 {k\ev}, [rk, :128]! + .endif + sha256h.32 dg0, dg1, tb\ev + sha256h2.32 dg1, dg2, tb\ev + .ifnb \s0 + vadd.u32ta\ev, q\s0, k\ev + .endif + .endm + + .macro add_update, ev, s0, s1, s2, s3 + sha256su0.32q\s0, q\s1 + add_only\ev, \s1 + sha256su1.32q\s0, q\s2, q\s3 + .endm + + .align 6 +.Lsha256_rcon: + .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .word
[PATCH v2 4/5] crypto/arm: AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions
This implements the ECB, CBC, CTR and XTS asynchronous block ciphers using the AArch32 versions of the ARMv8 Crypto Extensions for AES. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig | 9 + arch/arm/crypto/Makefile | 2 + arch/arm/crypto/aes-ce-core.S | 518 + arch/arm/crypto/aes-ce-glue.c | 520 ++ 4 files changed, 1049 insertions(+) create mode 100644 arch/arm/crypto/aes-ce-core.S create mode 100644 arch/arm/crypto/aes-ce-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 9c1478e55a40..63588bdf3b5d 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -101,4 +101,13 @@ config CRYPTO_AES_ARM_BS This implementation does not rely on any lookup tables so it is believed to be invulnerable to cache timing attacks. +config CRYPTO_AES_ARM_CE + tristate Accelerated AES using ARMv8 Crypto Extensions + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_ABLK_HELPER + help + Use an implementation of AES in CBC, CTR and XTS modes that uses + ARMv8 Crypto Extensions + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 4ea9f96c2782..2514c420e8d3 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o +obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o @@ -17,6 +18,7 @@ sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o +aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o quiet_cmd_perl = PERL$@ cmd_perl = $(PERL) $() $(@) diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S new file mode 100644 index ..8cfa468ee570 --- /dev/null +++ b/arch/arm/crypto/aes-ce-core.S @@ -0,0 +1,518 @@ +/* + * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions + * + * Copyright (C) 2015 Linaro Ltd ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include linux/linkage.h +#include asm/assembler.h + + .text + .fpucrypto-neon-fp-armv8 + .align 3 + + .macro enc_round, state, key + aese.8 \state, \key + aesmc.8 \state, \state + .endm + + .macro dec_round, state, key + aesd.8 \state, \key + aesimc.8\state, \state + .endm + + .macro enc_dround, key1, key2 + enc_round q0, \key1 + enc_round q0, \key2 + .endm + + .macro dec_dround, key1, key2 + dec_round q0, \key1 + dec_round q0, \key2 + .endm + + .macro enc_fround, key1, key2, key3 + enc_round q0, \key1 + aese.8 q0, \key2 + veorq0, q0, \key3 + .endm + + .macro dec_fround, key1, key2, key3 + dec_round q0, \key1 + aesd.8 q0, \key2 + veorq0, q0, \key3 + .endm + + .macro enc_dround_3x, key1, key2 + enc_round q0, \key1 + enc_round q1, \key1 + enc_round q2, \key1 + enc_round q0, \key2 + enc_round q1, \key2 + enc_round q2, \key2 + .endm + + .macro dec_dround_3x, key1, key2 + dec_round q0, \key1 + dec_round q1, \key1 + dec_round q2, \key1 + dec_round q0, \key2 + dec_round q1, \key2 + dec_round q2, \key2 + .endm + + .macro enc_fround_3x, key1, key2, key3 + enc_round q0, \key1 + enc_round q1, \key1 + enc_round q2, \key1 + aese.8 q0, \key2 + aese.8 q1, \key2 + aese.8 q2, \key2 + veorq0, q0, \key3 + veorq1, q1, \key3 + veorq2, q2, \key3 + .endm + + .macro dec_fround_3x, key1, key2, key3 + dec_round q0, \key1 + dec_round q1, \key1 + dec_round q2, \key1 + aesd.8 q0, \key2 + aesd.8 q1, \key2 + aesd.8 q2, \key2 + veorq0, q0, \key3 + veorq1, q1, \key3 + veorq2, q2, \key3 + .endm + + .macro do_block
[PATCH v2 0/5] ARM: crypto: ARMv8 Crypto Extensions
This is v2 of the ARM crypto series I sent out yesterday, erroneously without a cover letter. Patch #1 moves all the ARM specific crypto options to arch/arm/crypto/Kconfig. Patches #2 - #5 implement SHA1, SHA-224/256, AES-ECB/CBC/CTR/XTS and GHASH, respectively. Changes since v1: - fixes for BE (currently still untested) - added alignment hints where appropriate (e,g., [rX, :128]) - various minor tweaks There are all tested on LE using the respective tcrypt tests. Ard Biesheuvel (5): crypto/arm: move ARM specific Kconfig definitions to a dedicated file crypto/arm: add support for SHA1 using ARMv8 Crypto Instructions crypto/arm: add support for SHA-224/256 using ARMv8 Crypto Extensions crypto/arm: AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions crypto/arm: add support for GHASH using ARMv8 Crypto Extensions arch/arm/Kconfig| 3 + arch/arm/crypto/Kconfig | 123 ++ arch/arm/crypto/Makefile| 8 + arch/arm/crypto/aes-ce-core.S | 518 +++ arch/arm/crypto/aes-ce-glue.c | 520 arch/arm/crypto/ghash-ce-core.S | 94 arch/arm/crypto/ghash-ce-glue.c | 318 arch/arm/crypto/sha1-ce-core.S | 134 +++ arch/arm/crypto/sha1-ce-glue.c | 150 arch/arm/crypto/sha2-ce-core.S | 134 +++ arch/arm/crypto/sha2-ce-glue.c | 203 crypto/Kconfig | 75 -- 12 files changed, 2205 insertions(+), 75 deletions(-) create mode 100644 arch/arm/crypto/Kconfig create mode 100644 arch/arm/crypto/aes-ce-core.S create mode 100644 arch/arm/crypto/aes-ce-glue.c create mode 100644 arch/arm/crypto/ghash-ce-core.S create mode 100644 arch/arm/crypto/ghash-ce-glue.c create mode 100644 arch/arm/crypto/sha1-ce-core.S create mode 100644 arch/arm/crypto/sha1-ce-glue.c create mode 100644 arch/arm/crypto/sha2-ce-core.S create mode 100644 arch/arm/crypto/sha2-ce-glue.c -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 5/5] crypto/arm: add support for GHASH using ARMv8 Crypto Extensions
This implements the GHASH hash algorithm (as used by the GCM AEAD chaining mode) using the AArch32 version of the 64x64 to 128 bit polynomial multiplication instruction (vmull.p64) that is part of the ARMv8 Crypto Extensions. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig | 10 ++ arch/arm/crypto/Makefile| 2 + arch/arm/crypto/ghash-ce-core.S | 94 arch/arm/crypto/ghash-ce-glue.c | 318 4 files changed, 424 insertions(+) create mode 100644 arch/arm/crypto/ghash-ce-core.S create mode 100644 arch/arm/crypto/ghash-ce-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 63588bdf3b5d..d63f319924d2 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -110,4 +110,14 @@ config CRYPTO_AES_ARM_CE Use an implementation of AES in CBC, CTR and XTS modes that uses ARMv8 Crypto Extensions +config CRYPTO_GHASH_ARM_CE + tristate PMULL-accelerated GHASH using ARMv8 Crypto Extensions + depends on KERNEL_MODE_NEON + select CRYPTO_HASH + select CRYPTO_CRYPTD + help + Use an implementation of GHASH (used by the GCM AEAD chaining mode) + that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64) + that is part of the ARMv8 Crypto Extensions + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 2514c420e8d3..9a273bd7dffd 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o +obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o @@ -19,6 +20,7 @@ sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o +ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o quiet_cmd_perl = PERL$@ cmd_perl = $(PERL) $() $(@) diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S new file mode 100644 index ..e643a15eadf2 --- /dev/null +++ b/arch/arm/crypto/ghash-ce-core.S @@ -0,0 +1,94 @@ +/* + * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. + * + * Copyright (C) 2015 Linaro Ltd. ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include linux/linkage.h +#include asm/assembler.h + + SHASH .reqq0 + SHASH2 .reqq1 + T1 .reqq2 + T2 .reqq3 + MASK.reqq4 + XL .reqq5 + XM .reqq6 + XH .reqq7 + IN1 .reqq7 + + SHASH_L .reqd0 + SHASH_H .reqd1 + SHASH2_L.reqd2 + T1_L.reqd4 + MASK_L .reqd8 + XL_L.reqd10 + XL_H.reqd11 + XM_L.reqd12 + XM_H.reqd13 + XH_L.reqd14 + + .text + .fpucrypto-neon-fp-armv8 + + /* +* void pmull_ghash_update(int blocks, u64 dg[], const char *src, +* struct ghash_key const *k, const char *head) +*/ +ENTRY(pmull_ghash_update) + vld1.8 {SHASH}, [r3] + vld1.64 {XL}, [r1] + vmov.i8 MASK, #0xe1 + vext.8 SHASH2, SHASH, SHASH, #8 + vshl.u64MASK, MASK, #57 + veorSHASH2, SHASH2, SHASH + + /* do the head block first, if supplied */ + ldr ip, [sp] + teq ip, #0 + beq 0f + vld1.64 {T1}, [ip] + teq r0, #0 + b 1f + +0: vld1.64 {T1}, [r2]! + subsr0, r0, #1 + +1: /* multiply XL by SHASH in GF(2^128) */ +#ifndef CONFIG_CPU_BIG_ENDIAN + vrev64.8T1, T1 +#endif + vext.8 T2, XL, XL, #8 + vext.8 IN1, T1, T1, #8 + veorT1, T1, T2 + veorXL, XL, IN1 + + vmull.p64 XH, SHASH_H, XL_H @ a1 * b1 + veorT1, T1, XL + vmull.p64 XL, SHASH_L, XL_L @ a0 * b0 + vmull.p64 XM, SHASH2_L, T1_L @ (a1 + a0)(b1 + b0) + + vext.8 T1, XL, XH, #8 + veorT2, XL, XH + veorXM, XM, T1
[PATCH v2 1/5] crypto/arm: move ARM specific Kconfig definitions to a dedicated file
This moves all Kconfig symbols defined in crypto/Kconfig that depend on CONFIG_ARM to a dedicated Kconfig file in arch/arm/crypto, which is where the code that implements those features resides as well. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/Kconfig| 3 ++ arch/arm/crypto/Kconfig | 85 + crypto/Kconfig | 75 --- 3 files changed, 88 insertions(+), 75 deletions(-) create mode 100644 arch/arm/crypto/Kconfig diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9f1f09a2bc9b..e60da5ab8aec 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2167,6 +2167,9 @@ source arch/arm/Kconfig.debug source security/Kconfig source crypto/Kconfig +if CRYPTO +source arch/arm/crypto/Kconfig +endif source lib/Kconfig diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig new file mode 100644 index ..66fe82857e99 --- /dev/null +++ b/arch/arm/crypto/Kconfig @@ -0,0 +1,85 @@ + +menuconfig ARM_CRYPTO + bool ARM Accelerated Cryptographic Algorithms + depends on ARM + help + Say Y here to choose from a selection of cryptographic algorithms + implemented using ARM specific CPU features or instructions. + +if ARM_CRYPTO + +config CRYPTO_SHA1_ARM + tristate SHA1 digest algorithm (ARM-asm) + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using optimized ARM assembler. + +config CRYPTO_SHA1_ARM_NEON + tristate SHA1 digest algorithm (ARM NEON) + depends on KERNEL_MODE_NEON + select CRYPTO_SHA1_ARM + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using optimized ARM NEON assembly, when NEON instructions are + available. + +config CRYPTO_SHA512_ARM_NEON + tristate SHA384 and SHA512 digest algorithm (ARM NEON) + depends on KERNEL_MODE_NEON + select CRYPTO_SHA512 + select CRYPTO_HASH + help + SHA-512 secure hash standard (DFIPS 180-2) implemented + using ARM NEON instructions, when available. + + This version of SHA implements a 512 bit hash with 256 bits of + security against collision attacks. + + This code also includes SHA-384, a 384 bit hash with 192 bits + of security against collision attacks. + +config CRYPTO_AES_ARM + tristate AES cipher algorithms (ARM-asm) + depends on ARM + select CRYPTO_ALGAPI + select CRYPTO_AES + help + Use optimized AES assembler routines for ARM platforms. + + AES cipher algorithms (FIPS-197). AES uses the Rijndael + algorithm. + + Rijndael appears to be consistently a very good performer in + both hardware and software across a wide range of computing + environments regardless of its use in feedback or non-feedback + modes. Its key setup time is excellent, and its key agility is + good. Rijndael's very low memory requirements make it very well + suited for restricted-space environments, in which it also + demonstrates excellent performance. Rijndael's operations are + among the easiest to defend against power and timing attacks. + + The AES specifies three key sizes: 128, 192 and 256 bits + + See http://csrc.nist.gov/encryption/aes/ for more information. + +config CRYPTO_AES_ARM_BS + tristate Bit sliced AES using NEON instructions + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES_ARM + select CRYPTO_ABLK_HELPER + help + Use a faster and more secure NEON based implementation of AES in CBC, + CTR and XTS modes + + Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode + and for XTS mode encryption, CBC and XTS mode decryption speedup is + around 25%. (CBC encryption speed is not affected by this driver.) + This implementation does not rely on any lookup tables so it is + believed to be invulnerable to cache timing attacks. + +endif diff --git a/crypto/Kconfig b/crypto/Kconfig index 50f4da44a304..c50900b467c8 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -555,26 +555,6 @@ config CRYPTO_SHA1_SPARC64 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented using sparc64 crypto instructions, when available. -config CRYPTO_SHA1_ARM - tristate SHA1 digest algorithm (ARM-asm) - depends on ARM - select CRYPTO_SHA1 - select CRYPTO_HASH - help - SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented - using optimized ARM assembler. - -config CRYPTO_SHA1_ARM_NEON - tristate SHA1 digest algorithm (ARM NEON) - depends on ARM KERNEL_MODE_NEON
Re: [PATCHv2] arm: crypto: Add optimized SHA-256/224
On 24 March 2015 at 14:05, Jean-Christophe PLAGNIOL-VILLARD plagn...@jcrosoft.com wrote: + 'eor ($t0,$t0,$a,ror#.($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) + 'add ($h,$h,$t1)', # h+=Ch(e,f,g) + 'ldr ($t1,sprintf [sp,#%d],4*(($j+1)15)) if (($j15)!=15);'. + 'ldr ($t1,[$Ktbl]) if ($j==15);'. + 'ldr ($t1,[sp,#64])if ($j==31)', + 'and ($t3,$t3,$t2)', # (b^c)=(a^b) + 'add ($d,$d,$h)',# d+=h + 'add ($h,$h,$t0,ror#$Sigma0[0]);'. # h+=Sigma0(a) + 'eor ($t3,$t3,$b)', # Maj(a,b,c) + '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' + ) +} + +$code.=___; +#if __ARM_MAX_ARCH__=7 this will be compile on armv4 but gcc will not allow it we need to drop the neon code for older non v7 build The .arch and .fpu declarations ensure that it can be built regardless of the platform you are compiling for, unless you have a really old toolchain. I known but does not work for me The glue code ensures that the module can only be loaded if HWCAP_NEON is set. Did you get errors trying to build it? yes I do I use arm-none-linux-gnueabi-gcc (Sourcery CodeBench Lite 2014.05-29) 4.8.3 20140320 (prerelease) Copyright (C) 2013 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. so it's not that old Could you share the error log please? -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv2] arm: crypto: Add optimized SHA-256/224
On 24 March 2015 at 13:27, Jean-Christophe PLAGNIOL-VILLARD plagn...@jcrosoft.com wrote: On 13:50 Mon 23 Mar , Sami Tolvanen wrote: Add Andy Polyakov's optimized assembly and NEON implementations for SHA-256/224. The sha256-armv4.pl script for generating the assembly code is from OpenSSL commit 2ecd32a1f8f0643ae7b38f59bbaf9f0d6ef326fe. Compared to sha256-generic these implementations have the following tcrypt speed improvements on Motorola Nexus 6 (Snapdragon 805): bsb/u sha256-neon sha256-asm 1616 x1.32x1.19 6416 x1.27x1.15 6464 x1.36x1.20 256 16 x1.22x1.11 256 64 x1.36x1.19 256 256 x1.59x1.23 1024 16 x1.21x1.10 1024 256 x1.65x1.23 1024 1024 x1.76x1.25 2048 16 x1.21x1.10 2048 256 x1.66x1.23 2048 1024 x1.78x1.25 2048 2048 x1.79x1.25 4096 16 x1.20x1.09 4096 256 x1.66x1.23 4096 1024 x1.79x1.26 4096 4096 x1.82x1.26 8192 16 x1.20x1.09 8192 256 x1.67x1.23 8192 1024 x1.80x1.26 8192 4096 x1.85x1.28 8192 8192 x1.85x1.27 Where bs refers to block size and b/u to bytes per update. Signed-off-by: Sami Tolvanen samitolva...@google.com Cc: Andy Polyakov ap...@openssl.org --- Changes since v1: Rebased to Herbert's cryptodev tree Include sha256-armv4.pl and use it to generate sha256-core.S Add integer-only assembly version as sha256-asm Add support for SHA-224 to the glue code Change priority for sha256/224-ce to 300 --- arch/arm/crypto/Kconfig |7 arch/arm/crypto/Makefile |8 arch/arm/crypto/sha2-ce-glue.c|4 arch/arm/crypto/sha256-armv4.pl | 713 ++ arch/arm/crypto/sha256-core.S_shipped | 2775 arch/arm/crypto/sha256_glue.c | 246 ++ arch/arm/crypto/sha256_glue.h | 23 arch/arm/crypto/sha256_neon_glue.c| 172 + 8 files changed, 3945 insertions(+), 3 deletions(-) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index d63f319..458729d 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -46,6 +46,13 @@ config CRYPTO_SHA2_ARM_CE SHA-256 secure hash standard (DFIPS 180-2) implemented using special ARMv8 Crypto Extensions. +config CRYPTO_SHA256_ARM + tristate SHA-224/256 digest algorithm (ARM-asm and NEON) + select CRYPTO_HASH + help + SHA-256 secure hash standard (DFIPS 180-2) implemented + using optimized ARM assembler and NEON, when available. + config CRYPTO_SHA512_ARM_NEON tristate SHA384 and SHA512 digest algorithm (ARM NEON) depends on KERNEL_MODE_NEON diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 9a273bd..ef46e89 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o +obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o @@ -16,6 +17,8 @@ aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o +sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o +sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o sha1-arm-ce-y:= sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y:= sha2-ce-core.o sha2-ce-glue.o @@ -28,4 +31,7 @@ quiet_cmd_perl = PERL$@ $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl $(call cmd,perl) -.PRECIOUS: $(obj)/aesbs-core.S +$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c index 9ffe8ad..0449eca 100644 --- a/arch/arm/crypto/sha2-ce-glue.c +++ b/arch/arm/crypto/sha2-ce-glue.c @@ -163,7 +163,7 @@ static struct shash_alg algs[] = { { .base = { .cra_name = sha224, .cra_driver_name= sha224-ce, - .cra_priority = 200, + .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, @@
Re: [PATCHv2] arm: crypto: Add optimized SHA-256/224
On 24 March 2015 at 12:35, Herbert Xu herb...@gondor.apana.org.au wrote: On Mon, Mar 23, 2015 at 07:26:03PM +0100, Ard Biesheuvel wrote: (resending due to size bounce) Aha that's why the patch didn't make it through. Can it be split up? Not so easily. It consists (among other things) of a .pl file that generates a .S file, but to prevent introducing a build time dependency on perl, the .S file is included as a .S_shipped file. That is the big one. i suppose we could add the .S_shipped file in a separate patch, but I'd prefer to keep it as is -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv2] arm: crypto: Add optimized SHA-256/224
On 24 March 2015 at 12:46, Herbert Xu herb...@gondor.apana.org.au wrote: On Tue, Mar 24, 2015 at 12:40:50PM +0100, Ard Biesheuvel wrote: Not so easily. It consists (among other things) of a .pl file that generates a .S file, but to prevent introducing a build time dependency on perl, the .S file is included as a .S_shipped file. That is the big one. i suppose we could add the .S_shipped file in a separate patch, but I'd prefer to keep it as is OK then this will have to go up on a website somewhere and then the URL can be posted to the list for review. You can pull it from here if you like https://git.linaro.org/people/ard.biesheuvel/linux-arm.git/shortlog/refs/tags/arm-sha256-neon (rebased onto your cryptodev-2.6/master branch as of 30 mins ago) Does that work or you? Thanks, Ard. -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv2] arm: crypto: Add optimized SHA-256/224
On 24 March 2015 at 12:32, Herbert Xu herb...@gondor.apana.org.au wrote: On Mon, Mar 23, 2015 at 01:50:09PM +, Sami Tolvanen wrote: Add Andy Polyakov's optimized assembly and NEON implementations for SHA-256/224. The sha256-armv4.pl script for generating the assembly code is from OpenSSL commit 2ecd32a1f8f0643ae7b38f59bbaf9f0d6ef326fe. Compared to sha256-generic these implementations have the following tcrypt speed improvements on Motorola Nexus 6 (Snapdragon 805): bsb/u sha256-neon sha256-asm 1616 x1.32x1.19 6416 x1.27x1.15 6464 x1.36x1.20 256 16 x1.22x1.11 256 64 x1.36x1.19 256 256 x1.59x1.23 1024 16 x1.21x1.10 1024 256 x1.65x1.23 1024 1024 x1.76x1.25 2048 16 x1.21x1.10 2048 256 x1.66x1.23 2048 1024 x1.78x1.25 2048 2048 x1.79x1.25 4096 16 x1.20x1.09 4096 256 x1.66x1.23 4096 1024 x1.79x1.26 4096 4096 x1.82x1.26 8192 16 x1.20x1.09 8192 256 x1.67x1.23 8192 1024 x1.80x1.26 8192 4096 x1.85x1.28 8192 8192 x1.85x1.27 Where bs refers to block size and b/u to bytes per update. Signed-off-by: Sami Tolvanen samitolva...@google.com Cc: Andy Polyakov ap...@openssl.org Your patch didn't make it to the linux-crypto list and therefore it never got into patchwork. Can you please find out why and resend it? Most likely because it is so big ... -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv2] arm: crypto: Add optimized SHA-256/224
On 24 March 2015 at 14:06, Ard Biesheuvel ard.biesheu...@linaro.org wrote: On 24 March 2015 at 14:05, Jean-Christophe PLAGNIOL-VILLARD plagn...@jcrosoft.com wrote: + 'eor ($t0,$t0,$a,ror#.($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) + 'add ($h,$h,$t1)', # h+=Ch(e,f,g) + 'ldr ($t1,sprintf [sp,#%d],4*(($j+1)15)) if (($j15)!=15);'. + 'ldr ($t1,[$Ktbl]) if ($j==15);'. + 'ldr ($t1,[sp,#64])if ($j==31)', + 'and ($t3,$t3,$t2)', # (b^c)=(a^b) + 'add ($d,$d,$h)',# d+=h + 'add ($h,$h,$t0,ror#$Sigma0[0]);'. # h+=Sigma0(a) + 'eor ($t3,$t3,$b)', # Maj(a,b,c) + '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' + ) +} + +$code.=___; +#if __ARM_MAX_ARCH__=7 this will be compile on armv4 but gcc will not allow it we need to drop the neon code for older non v7 build The .arch and .fpu declarations ensure that it can be built regardless of the platform you are compiling for, unless you have a really old toolchain. I known but does not work for me The glue code ensures that the module can only be loaded if HWCAP_NEON is set. Did you get errors trying to build it? yes I do I use arm-none-linux-gnueabi-gcc (Sourcery CodeBench Lite 2014.05-29) 4.8.3 20140320 (prerelease) Copyright (C) 2013 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. so it's not that old Could you share the error log please? OK, I spotted one issue with this code: arch/arm/crypto/sha256-core.S: Assembler messages: arch/arm/crypto/sha256-core.S:1847: Error: invalid constant (efb0) after fixup This is caused by the fact that, when building the integer-only code for an older architecture, the conditional compilation produces a slightly bigger preceding function, and the symbol K256 is out of range for the adr instruction. @Jean-Christophe: is that the same problem that you hit? @Andy: I propose we do something similar as in the bsaes code: #ifdef __thumb__ #define adrl adr #endif and replace the offending line with adrl r14,K256 @Herbert: we will need to respin this, so please don't pull it yet. Regards, -- Ard. -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH 4/6] crypto: sha256-generic: move to generic glue implementation
This updates the generic SHA-256 implementation to use the new shared SHA-256 glue code. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 1 + crypto/sha256_generic.c | 131 +++- 2 files changed, 18 insertions(+), 114 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 551bbf2e2ab5..59243df4ea13 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -608,6 +608,7 @@ config CRYPTO_SHA256_BASE config CRYPTO_SHA256 tristate SHA224 and SHA256 digest algorithm + select CRYPTO_SHA256_BASE select CRYPTO_HASH help SHA256 secure hash standard (DFIPS 180-2). diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c index b001ff5c2efc..7119346c2f41 100644 --- a/crypto/sha256_generic.c +++ b/crypto/sha256_generic.c @@ -214,136 +214,39 @@ static void sha256_transform(u32 *state, const u8 *input) memzero_explicit(W, 64 * sizeof(u32)); } -static int sha224_init(struct shash_desc *desc) +static void sha256_generic_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - struct sha256_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA224_H0; - sctx-state[1] = SHA224_H1; - sctx-state[2] = SHA224_H2; - sctx-state[3] = SHA224_H3; - sctx-state[4] = SHA224_H4; - sctx-state[5] = SHA224_H5; - sctx-state[6] = SHA224_H6; - sctx-state[7] = SHA224_H7; - sctx-count = 0; + if (head) + sha256_transform(state, head); - return 0; -} - -static int sha256_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA256_H0; - sctx-state[1] = SHA256_H1; - sctx-state[2] = SHA256_H2; - sctx-state[3] = SHA256_H3; - sctx-state[4] = SHA256_H4; - sctx-state[5] = SHA256_H5; - sctx-state[6] = SHA256_H6; - sctx-state[7] = SHA256_H7; - sctx-count = 0; - - return 0; + while (blocks--) { + sha256_transform(state, src); + src += SHA256_BLOCK_SIZE; + } } int crypto_sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial, done; - const u8 *src; - - partial = sctx-count 0x3f; - sctx-count += len; - done = 0; - src = data; - - if ((partial + len) 63) { - if (partial) { - done = -partial; - memcpy(sctx-buf + partial, data, done + 64); - src = sctx-buf; - } - - do { - sha256_transform(sctx-state, src); - done += 64; - src = data + done; - } while (done + 63 len); - - partial = 0; - } - memcpy(sctx-buf + partial, src, len - done); - - return 0; + return sha256_base_do_update(desc, data, len, sha256_generic_block_fn, +NULL); } EXPORT_SYMBOL(crypto_sha256_update); static int sha256_final(struct shash_desc *desc, u8 *out) { - struct sha256_state *sctx = shash_desc_ctx(desc); - __be32 *dst = (__be32 *)out; - __be64 bits; - unsigned int index, pad_len; - int i; - static const u8 padding[64] = { 0x80, }; - - /* Save number of bits */ - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64. */ - index = sctx-count 0x3f; - pad_len = (index 56) ? (56 - index) : ((64+56) - index); - crypto_sha256_update(desc, padding, pad_len); - - /* Append length (before padding) */ - crypto_sha256_update(desc, (const u8 *)bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i 8; i++) - dst[i] = cpu_to_be32(sctx-state[i]); - - /* Zeroize sensitive information. */ - memset(sctx, 0, sizeof(*sctx)); - - return 0; -} - -static int sha224_final(struct shash_desc *desc, u8 *hash) -{ - u8 D[SHA256_DIGEST_SIZE]; - - sha256_final(desc, D); - - memcpy(hash, D, SHA224_DIGEST_SIZE); - memzero_explicit(D, SHA256_DIGEST_SIZE); - - return 0; -} - -static int sha256_export(struct shash_desc *desc, void *out) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - memcpy(out, sctx, sizeof(*sctx)); - return 0; -} - -static int sha256_import(struct shash_desc *desc, const void *in) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - memcpy(sctx, in, sizeof(*sctx)); - return 0; + sha256_base_do_finalize(desc, sha256_generic_block_fn, NULL); + return sha256_base_finish(desc, out); } static struct shash_alg sha256_algs[2] = { { .digestsize = SHA256_DIGEST_SIZE
[RFC PATCH 1/6] crypto: sha512: implement base layer for SHA-512
To reduce the number of copies of boilerplate code throughout the tree, this patch implements generic glue for the SHA-512 algorithm. This allows a specific arch or hardware implementation to only implement the special handling that it needs. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 3 ++ crypto/Makefile | 1 + crypto/sha512_base.c | 143 +++ include/crypto/sha.h | 20 +++ 4 files changed, 167 insertions(+) create mode 100644 crypto/sha512_base.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 88639937a934..3400cf4e3cdb 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -641,6 +641,9 @@ config CRYPTO_SHA256_SPARC64 SHA-256 secure hash standard (DFIPS 180-2) implemented using sparc64 crypto instructions, when available. +config CRYPTO_SHA512_BASE + tristate + config CRYPTO_SHA512 tristate SHA384 and SHA512 digest algorithms select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index 97b7d3ac87e7..6174bf2592fe 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_CRYPTO_RMD256) += rmd256.o obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o +obj-$(CONFIG_CRYPTO_SHA512_BASE) += sha512_base.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o diff --git a/crypto/sha512_base.c b/crypto/sha512_base.c new file mode 100644 index ..488e24cc6f0a --- /dev/null +++ b/crypto/sha512_base.c @@ -0,0 +1,143 @@ +/* + * sha512_base.c - core logic for SHA-512 implementations + * + * Copyright (C) 2015 Linaro Ltd ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include crypto/internal/hash.h +#include crypto/sha.h +#include linux/crypto.h +#include linux/module.h + +#include asm/unaligned.h + +int sha384_base_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha512_state){ + .state = { + SHA384_H0, SHA384_H1, SHA384_H2, SHA384_H3, + SHA384_H4, SHA384_H5, SHA384_H6, SHA384_H7, + } + }; + return 0; +} +EXPORT_SYMBOL(sha384_base_init); + +int sha512_base_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha512_state){ + .state = { + SHA512_H0, SHA512_H1, SHA512_H2, SHA512_H3, + SHA512_H4, SHA512_H5, SHA512_H6, SHA512_H7, + } + }; + return 0; +} +EXPORT_SYMBOL(sha512_base_init); + +int sha512_base_export(struct shash_desc *desc, void *out) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + struct sha512_state *dst = out; + + *dst = *sctx; + + return 0; +} +EXPORT_SYMBOL(sha512_base_export); + +int sha512_base_import(struct shash_desc *desc, const void *in) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + struct sha512_state const *src = in; + + *sctx = *src; + + return 0; +} +EXPORT_SYMBOL(sha512_base_import); + +int sha512_base_do_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha512_block_fn *block_fn, void *p) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx-count[0] % SHA512_BLOCK_SIZE; + + sctx-count[0] += len; + if (sctx-count[0] len) + sctx-count[1]++; + + if ((partial + len) = SHA512_BLOCK_SIZE) { + int blocks; + + if (partial) { + int p = SHA512_BLOCK_SIZE - partial; + + memcpy(sctx-buf + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA512_BLOCK_SIZE; + len %= SHA512_BLOCK_SIZE; + + block_fn(blocks, data, sctx-state, +partial ? sctx-buf : NULL, p); + data += blocks * SHA512_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx-buf + partial, data, len); + + return 0; +} +EXPORT_SYMBOL(sha512_base_do_update); + +int sha512_base_do_finalize(struct shash_desc *desc, sha512_block_fn *block_fn, + void *p) +{ + static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; + + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int padlen; + __be64 bits[2]; + + padlen = SHA512_BLOCK_SIZE - +(sctx-count[0] + sizeof(bits)) % SHA512_BLOCK_SIZE; + + bits[0
[RFC PATCH 2/6] crypto: sha512-generic: move to generic glue implementation
This updated the generic SHA-512 implementation to use the generic shared SHA-512 glue code. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 1 + crypto/sha512_generic.c | 117 +++- 2 files changed, 16 insertions(+), 102 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 3400cf4e3cdb..880aa518c2eb 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -646,6 +646,7 @@ config CRYPTO_SHA512_BASE config CRYPTO_SHA512 tristate SHA384 and SHA512 digest algorithms + select CRYPTO_SHA512_BASE select CRYPTO_HASH help SHA512 secure hash standard (DFIPS 180-2). diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 1c3c3767e079..0d8e973d0d4b 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -130,123 +130,36 @@ sha512_transform(u64 *state, const u8 *input) a = b = c = d = e = f = g = h = t1 = t2 = 0; } -static int -sha512_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA512_H0; - sctx-state[1] = SHA512_H1; - sctx-state[2] = SHA512_H2; - sctx-state[3] = SHA512_H3; - sctx-state[4] = SHA512_H4; - sctx-state[5] = SHA512_H5; - sctx-state[6] = SHA512_H6; - sctx-state[7] = SHA512_H7; - sctx-count[0] = sctx-count[1] = 0; - - return 0; -} - -static int -sha384_init(struct shash_desc *desc) +static void sha512_generic_block_fn(int blocks, u8 const *src, u64 *state, + const u8 *head, void *p) { - struct sha512_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA384_H0; - sctx-state[1] = SHA384_H1; - sctx-state[2] = SHA384_H2; - sctx-state[3] = SHA384_H3; - sctx-state[4] = SHA384_H4; - sctx-state[5] = SHA384_H5; - sctx-state[6] = SHA384_H6; - sctx-state[7] = SHA384_H7; - sctx-count[0] = sctx-count[1] = 0; + if (head) + sha512_transform(state, head); - return 0; + while (blocks--) { + sha512_transform(state, src); + src += SHA512_BLOCK_SIZE; + } } int crypto_sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha512_state *sctx = shash_desc_ctx(desc); - - unsigned int i, index, part_len; - - /* Compute number of bytes mod 128 */ - index = sctx-count[0] 0x7f; - - /* Update number of bytes */ - if ((sctx-count[0] += len) len) - sctx-count[1]++; - -part_len = 128 - index; - - /* Transform as many times as possible. */ - if (len = part_len) { - memcpy(sctx-buf[index], data, part_len); - sha512_transform(sctx-state, sctx-buf); - - for (i = part_len; i + 127 len; i+=128) - sha512_transform(sctx-state, data[i]); - - index = 0; - } else { - i = 0; - } - - /* Buffer remaining input */ - memcpy(sctx-buf[index], data[i], len - i); - - return 0; + return sha512_base_do_update(desc, data, len, sha512_generic_block_fn, +NULL); } EXPORT_SYMBOL(crypto_sha512_update); static int sha512_final(struct shash_desc *desc, u8 *hash) { - struct sha512_state *sctx = shash_desc_ctx(desc); -static u8 padding[128] = { 0x80, }; - __be64 *dst = (__be64 *)hash; - __be64 bits[2]; - unsigned int index, pad_len; - int i; - - /* Save number of bits */ - bits[1] = cpu_to_be64(sctx-count[0] 3); - bits[0] = cpu_to_be64(sctx-count[1] 3 | sctx-count[0] 61); - - /* Pad out to 112 mod 128. */ - index = sctx-count[0] 0x7f; - pad_len = (index 112) ? (112 - index) : ((128+112) - index); - crypto_sha512_update(desc, padding, pad_len); - - /* Append length (before padding) */ - crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i 8; i++) - dst[i] = cpu_to_be64(sctx-state[i]); - - /* Zeroize sensitive information. */ - memset(sctx, 0, sizeof(struct sha512_state)); - - return 0; -} - -static int sha384_final(struct shash_desc *desc, u8 *hash) -{ - u8 D[64]; - - sha512_final(desc, D); - - memcpy(hash, D, 48); - memzero_explicit(D, 64); - - return 0; + sha512_base_do_finalize(desc, sha512_generic_block_fn, NULL); + return sha512_base_finish(desc, hash); } static struct shash_alg sha512_algs[2] = { { .digestsize = SHA512_DIGEST_SIZE, - .init = sha512_init, + .init = sha512_base_init, .update = crypto_sha512_update, .final = sha512_final, .descsize
[RFC PATCH 5/6] arm64/crypto: move ARMv8 SHA-224/256 driver to SHA-256 base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm64/crypto/Kconfig| 1 + arch/arm64/crypto/sha2-ce-core.S | 11 +- arch/arm64/crypto/sha2-ce-glue.c | 211 ++- 3 files changed, 40 insertions(+), 183 deletions(-) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 2cf32e9887e1..13008362154b 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -17,6 +17,7 @@ config CRYPTO_SHA2_ARM64_CE tristate SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions) depends on ARM64 KERNEL_MODE_NEON select CRYPTO_HASH + select CRYPTO_SHA256_BASE config CRYPTO_GHASH_ARM64_CE tristate GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 7f29fc031ea8..65ad56636fba 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -135,15 +135,18 @@ CPU_LE( rev32 v19.16b, v19.16b) /* * Final block: add padding and total bit count. -* Skip if we have no total byte count in x4. In that case, the input -* size was not a round multiple of the block size, and the padding is -* handled by the C code. +* Skip if the input size was not a round multiple of the block size, +* the padding is handled by the C code in that case. */ cbz x4, 3f + ldr x5, [x2, #-8] // sha256_state::count + tst x5, #0x3f // round multiple of block size? + b.ne3f + str wzr, [x4] moviv17.2d, #0 mov x8, #0x8000 moviv18.2d, #0 - ror x7, x4, #29 // ror(lsl(x4, 3), 32) + ror x7, x5, #29 // ror(lsl(x4, 3), 32) fmovd16, x8 mov x4, #0 mov v19.d[0], xzr diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index ae67e88c28b9..8b35ca32538a 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -20,195 +20,48 @@ MODULE_DESCRIPTION(SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions); MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); -asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, -u8 *head, long bytes); +asmlinkage void sha2_ce_transform(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p); -static int sha224_init(struct shash_desc *desc) +static int sha256_ce_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { - struct sha256_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha256_state){ - .state = { - SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, - SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, - } - }; - return 0; -} - -static int sha256_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha256_state){ - .state = { - SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, - SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, - } - }; - return 0; -} - -static int sha2_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA256_BLOCK_SIZE; - - sctx-count += len; - - if ((partial + len) = SHA256_BLOCK_SIZE) { - int blocks; - - if (partial) { - int p = SHA256_BLOCK_SIZE - partial; - - memcpy(sctx-buf + partial, data, p); - data += p; - len -= p; - } - - blocks = len / SHA256_BLOCK_SIZE; - len %= SHA256_BLOCK_SIZE; - - kernel_neon_begin_partial(28); - sha2_ce_transform(blocks, data, sctx-state, - partial ? sctx-buf : NULL, 0); - kernel_neon_end(); - - data += blocks * SHA256_BLOCK_SIZE; - partial = 0; - } - if (len) - memcpy(sctx-buf + partial, data, len); - return 0; -} - -static void sha2_final(struct shash_desc *desc) -{ - static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; - - struct sha256_state *sctx = shash_desc_ctx(desc); - __be64 bits = cpu_to_be64(sctx-count 3); - u32 padlen = SHA256_BLOCK_SIZE -- ((sctx-count + sizeof(bits)) % SHA256_BLOCK_SIZE
[RFC PATCH 3/6] crypto: sha256: implement base layer for SHA-256
To reduce the number of copies of boilerplate code throughout the tree, this patch implements generic glue for the SHA-256 algorithm. This allows a specific arch or hardware implementation to only implement the special handling that it needs. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 4 ++ crypto/Makefile | 1 + crypto/sha256_base.c | 138 +++ include/crypto/sha.h | 17 +++ 4 files changed, 160 insertions(+) create mode 100644 crypto/sha256_base.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 880aa518c2eb..551bbf2e2ab5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -602,6 +602,10 @@ config CRYPTO_SHA1_MB lanes remain unfilled, a flush operation will be initiated to process the crypto jobs, adding a slight latency. + +config CRYPTO_SHA256_BASE + tristate + config CRYPTO_SHA256 tristate SHA224 and SHA256 digest algorithm select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index 6174bf2592fe..bb9bafeb3ac7 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o obj-$(CONFIG_CRYPTO_RMD256) += rmd256.o obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o +obj-$(CONFIG_CRYPTO_SHA256_BASE) += sha256_base.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o obj-$(CONFIG_CRYPTO_SHA512_BASE) += sha512_base.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o diff --git a/crypto/sha256_base.c b/crypto/sha256_base.c new file mode 100644 index ..1ba2f6812c6b --- /dev/null +++ b/crypto/sha256_base.c @@ -0,0 +1,138 @@ +/* + * sha256_base.c - core logic for SHA-256 implementations + * + * Copyright (C) 2015 Linaro Ltd ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include crypto/internal/hash.h +#include crypto/sha.h +#include linux/crypto.h +#include linux/module.h + +#include asm/unaligned.h + +int sha224_base_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state){ + .state = { + SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, + SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, + } + }; + return 0; +} +EXPORT_SYMBOL(sha224_base_init); + +int sha256_base_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state){ + .state = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, + } + }; + return 0; +} +EXPORT_SYMBOL(sha256_base_init); + +int sha256_base_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state *dst = out; + + *dst = *sctx; + + return 0; +} +EXPORT_SYMBOL(sha256_base_export); + +int sha256_base_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state const *src = in; + + *sctx = *src; + + return 0; +} +EXPORT_SYMBOL(sha256_base_import); + +int sha256_base_do_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha256_block_fn *block_fn, void *p) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx-count % SHA256_BLOCK_SIZE; + + sctx-count += len; + + if ((partial + len) = SHA256_BLOCK_SIZE) { + int blocks; + + if (partial) { + int p = SHA256_BLOCK_SIZE - partial; + + memcpy(sctx-buf + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA256_BLOCK_SIZE; + len %= SHA256_BLOCK_SIZE; + + block_fn(blocks, data, sctx-state, + partial ? sctx-buf : NULL, p); + data += blocks * SHA256_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx-buf + partial, data, len); + + return 0; +} +EXPORT_SYMBOL(sha256_base_do_update); + +int sha256_base_do_finalize(struct shash_desc *desc, sha256_block_fn *block_fn, + void *p) +{ + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int padlen; + __be64 bits; + + padlen = SHA256_BLOCK_SIZE - +(sctx-count + sizeof(bits)) % SHA256_BLOCK_SIZE; + + bits = cpu_to_be64(sctx-count 3); + + sha256_base_do_update(desc
[PATCH v2 00/14] crypto: SHA glue code consolidation
Hello all, This is v2 of what is now a complete glue code consolidation series for generic, x86, arm and arm64 implementations of SHA-1, SHA-224/256 and SHA-384/512. The base layer implements all the update and finalization logic around the block transforms, where the prototypes of the latter look something like this: typedef void (shaXXX_block_fn)(int blocks, u8 const *src, uXX *state, const u8 *head, void *p); The block implementation should process the head block first, then process the requested number of block starting at 'src'. The generic pointer 'p' is passed down from the do_update/do_finalize() versions; this is used for instance by the ARM64 implementations to indicate to the core ASM implementation that it should finalize the digest, which it will do only if the input was a round multiple of the block size. The generic pointer is used here as a means of conveying that information back and forth. Note that the base functions prototypes are all 'returning int' but they all return 0. They should be invoked as tail calls where possible to eliminate some of the function call overhead. If that is not possible, the return values can be safely ignored. Changes since v1 (RFC): - prefixed globally visible generic symbols with crypto_ - added SHA-1 base layer - updated init code to only set the initial constants and clear the count, clearing the buffer is unnecessary [Markus] - favor the small update path in crypto_sha_XXX_base_do_update() [Markus] - update crypto_sha_XXX_do_finalize() to use memset() on the buffer directly rather than copying a statically allocated padding buffer into it [Markus] - moved a bunch of existing arm and x86 implementations to use the new base layers Note: looking at the generated asm (for arm64), I noticed that the memcpy/memset invocations with compile time constant src and len arguments (which includes the empty struct assignments) are eliminated completely, and replaced by direct loads and stores. Hopefully this addresses the concern raised by Markus regarding this. Ard Biesheuvel (14): crypto: sha512: implement base layer for SHA-512 crypto: sha256: implement base layer for SHA-256 crypto: sha1: implement base layer for SHA-1 crypto: sha512-generic: move to generic glue implementation crypto: sha256-generic: move to generic glue implementation crypto: sha1-generic: move to generic glue implementation crypto/arm: move SHA-1 ARM asm implementation to base layer crypto/arm: move SHA-1 ARMv8 implementation to base layer crypto/arm: move SHA-224/256 ARMv8 implementation to base layer crypto/arm64: move SHA-1 ARMv8 implementation to base layer crypto/arm64: move SHA-224/256 ARMv8 implementation to base layer crypto/x86: move SHA-1 SSSE3 implementation to base layer crypto/x86: move SHA-224/256 SSSE3 implementation to base layer crypto/x86: move SHA-384/512 SSSE3 implementation to base layer arch/arm/crypto/Kconfig | 4 +- arch/arm/crypto/sha1-ce-glue.c | 110 +--- arch/arm/{include/asm = }/crypto/sha1.h | 3 + arch/arm/crypto/sha1_glue.c | 117 - arch/arm/crypto/sha2-ce-glue.c | 151 +- arch/arm64/crypto/Kconfig| 2 + arch/arm64/crypto/sha1-ce-core.S | 11 +- arch/arm64/crypto/sha1-ce-glue.c | 132 arch/arm64/crypto/sha2-ce-core.S | 11 +- arch/arm64/crypto/sha2-ce-glue.c | 208 +-- arch/x86/crypto/sha1_ssse3_glue.c| 139 + arch/x86/crypto/sha256_ssse3_glue.c | 186 ++- arch/x86/crypto/sha512_ssse3_glue.c | 195 ++--- crypto/Kconfig | 16 +++ crypto/Makefile | 3 + crypto/sha1_base.c | 125 +++ crypto/sha1_generic.c| 105 crypto/sha256_base.c | 140 + crypto/sha256_generic.c | 139 - crypto/sha512_base.c | 143 + crypto/sha512_generic.c | 126 --- include/crypto/sha.h | 62 + 22 files changed, 836 insertions(+), 1292 deletions(-) rename arch/arm/{include/asm = }/crypto/sha1.h (67%) create mode 100644 crypto/sha1_base.c create mode 100644 crypto/sha256_base.c create mode 100644 crypto/sha512_base.c -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 05/14] crypto: sha256-generic: move to generic glue implementation
This updates the generic SHA-256 implementation to use the new shared SHA-256 glue code. It also implements a .finup hook crypto_sha256_finup() and exports it to other modules. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 1 + crypto/sha256_generic.c | 139 ++-- include/crypto/sha.h| 3 ++ 3 files changed, 31 insertions(+), 112 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 83bc1680391a..72bf5af7240d 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -611,6 +611,7 @@ config CRYPTO_SHA256_BASE config CRYPTO_SHA256 tristate SHA224 and SHA256 digest algorithm + select CRYPTO_SHA256_BASE select CRYPTO_HASH help SHA256 secure hash standard (DFIPS 180-2). diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c index b001ff5c2efc..d5c18c08b3da 100644 --- a/crypto/sha256_generic.c +++ b/crypto/sha256_generic.c @@ -214,136 +214,50 @@ static void sha256_transform(u32 *state, const u8 *input) memzero_explicit(W, 64 * sizeof(u32)); } -static int sha224_init(struct shash_desc *desc) +static void sha256_generic_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - struct sha256_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA224_H0; - sctx-state[1] = SHA224_H1; - sctx-state[2] = SHA224_H2; - sctx-state[3] = SHA224_H3; - sctx-state[4] = SHA224_H4; - sctx-state[5] = SHA224_H5; - sctx-state[6] = SHA224_H6; - sctx-state[7] = SHA224_H7; - sctx-count = 0; + if (head) + sha256_transform(state, head); - return 0; -} - -static int sha256_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA256_H0; - sctx-state[1] = SHA256_H1; - sctx-state[2] = SHA256_H2; - sctx-state[3] = SHA256_H3; - sctx-state[4] = SHA256_H4; - sctx-state[5] = SHA256_H5; - sctx-state[6] = SHA256_H6; - sctx-state[7] = SHA256_H7; - sctx-count = 0; - - return 0; + while (blocks--) { + sha256_transform(state, src); + src += SHA256_BLOCK_SIZE; + } } int crypto_sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial, done; - const u8 *src; - - partial = sctx-count 0x3f; - sctx-count += len; - done = 0; - src = data; - - if ((partial + len) 63) { - if (partial) { - done = -partial; - memcpy(sctx-buf + partial, data, done + 64); - src = sctx-buf; - } - - do { - sha256_transform(sctx-state, src); - done += 64; - src = data + done; - } while (done + 63 len); - - partial = 0; - } - memcpy(sctx-buf + partial, src, len - done); - - return 0; + return crypto_sha256_base_do_update(desc, data, len, + sha256_generic_block_fn, NULL); } EXPORT_SYMBOL(crypto_sha256_update); -static int sha256_final(struct shash_desc *desc, u8 *out) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - __be32 *dst = (__be32 *)out; - __be64 bits; - unsigned int index, pad_len; - int i; - static const u8 padding[64] = { 0x80, }; - - /* Save number of bits */ - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64. */ - index = sctx-count 0x3f; - pad_len = (index 56) ? (56 - index) : ((64+56) - index); - crypto_sha256_update(desc, padding, pad_len); - - /* Append length (before padding) */ - crypto_sha256_update(desc, (const u8 *)bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i 8; i++) - dst[i] = cpu_to_be32(sctx-state[i]); - - /* Zeroize sensitive information. */ - memset(sctx, 0, sizeof(*sctx)); - - return 0; -} - -static int sha224_final(struct shash_desc *desc, u8 *hash) -{ - u8 D[SHA256_DIGEST_SIZE]; - - sha256_final(desc, D); - - memcpy(hash, D, SHA224_DIGEST_SIZE); - memzero_explicit(D, SHA256_DIGEST_SIZE); - - return 0; -} - -static int sha256_export(struct shash_desc *desc, void *out) +int crypto_sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash) { - struct sha256_state *sctx = shash_desc_ctx(desc); - - memcpy(out, sctx, sizeof(*sctx)); - return 0; + if (len) + crypto_sha256_base_do_update(desc, data, len, +sha256_generic_block_fn, NULL
[PATCH v2 03/14] crypto: sha1: implement base layer for SHA-1
To reduce the number of copies of boilerplate code throughout the tree, this patch implements generic glue for the SHA-1 algorithm. This allows a specific arch or hardware implementation to only implement the special handling that it needs. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 3 ++ crypto/Makefile | 1 + crypto/sha1_base.c | 125 +++ include/crypto/sha.h | 17 +++ 4 files changed, 146 insertions(+) create mode 100644 crypto/sha1_base.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 1664bd68b97d..155cc15c2719 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -516,6 +516,9 @@ config CRYPTO_RMD320 Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel. See http://homes.esat.kuleuven.be/~bosselae/ripemd160.html +config CRYPTO_SHA1_BASE + tristate + config CRYPTO_SHA1 tristate SHA1 digest algorithm select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index bb9bafeb3ac7..42446cab15f3 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_CRYPTO_RMD128) += rmd128.o obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o obj-$(CONFIG_CRYPTO_RMD256) += rmd256.o obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o +obj-$(CONFIG_CRYPTO_SHA1_BASE) += sha1_base.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o obj-$(CONFIG_CRYPTO_SHA256_BASE) += sha256_base.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o diff --git a/crypto/sha1_base.c b/crypto/sha1_base.c new file mode 100644 index ..30fb0f9b47cf --- /dev/null +++ b/crypto/sha1_base.c @@ -0,0 +1,125 @@ +/* + * sha1_base.c - core logic for SHA-1 implementations + * + * Copyright (C) 2015 Linaro Ltd ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include crypto/internal/hash.h +#include crypto/sha.h +#include linux/crypto.h +#include linux/module.h + +#include asm/unaligned.h + +int crypto_sha1_base_init(struct shash_desc *desc) +{ + static const u32 sha1_init_state[] = { + SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4, + }; + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx-state, sha1_init_state, sizeof(sctx-state)); + sctx-count = 0; + return 0; +} +EXPORT_SYMBOL(crypto_sha1_base_init); + +int crypto_sha1_base_export(struct shash_desc *desc, void *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state *dst = out; + + *dst = *sctx; + + return 0; +} +EXPORT_SYMBOL(crypto_sha1_base_export); + +int crypto_sha1_base_import(struct shash_desc *desc, const void *in) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state const *src = in; + + *sctx = *src; + + return 0; +} +EXPORT_SYMBOL(crypto_sha1_base_import); + +int crypto_sha1_base_do_update(struct shash_desc *desc, const u8 *data, +unsigned int len, sha1_block_fn *block_fn, +void *p) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx-count % SHA1_BLOCK_SIZE; + + sctx-count += len; + + if (unlikely((partial + len) = SHA1_BLOCK_SIZE)) { + int blocks; + + if (partial) { + int p = SHA1_BLOCK_SIZE - partial; + + memcpy(sctx-buffer + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA1_BLOCK_SIZE; + len %= SHA1_BLOCK_SIZE; + + block_fn(blocks, data, sctx-state, +partial ? sctx-buffer : NULL, p); + data += blocks * SHA1_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx-buffer + partial, data, len); + + return 0; +} +EXPORT_SYMBOL(crypto_sha1_base_do_update); + +int crypto_sha1_base_do_finalize(struct shash_desc *desc, +sha1_block_fn *block_fn, void *p) +{ + const int bit_offset = SHA1_BLOCK_SIZE - sizeof(__be64); + struct sha1_state *sctx = shash_desc_ctx(desc); + __be64 *bits = (__be64 *)(sctx-buffer + bit_offset); + unsigned int partial = sctx-count % SHA1_BLOCK_SIZE; + + sctx-buffer[partial++] = 0x80; + if (partial bit_offset) { + memset(sctx-buffer + partial, 0x0, SHA1_BLOCK_SIZE - partial); + partial = 0; + + block_fn(1, sctx-buffer, sctx-state, NULL, p); + } + + memset(sctx-buffer + partial, 0x0, bit_offset - partial); + *bits = cpu_to_be64(sctx-count 3); + block_fn(1, sctx-buffer, sctx-state, NULL, p); + + return 0; +} +EXPORT_SYMBOL(crypto_sha1_base_do_finalize
[PATCH v2 01/14] crypto: sha512: implement base layer for SHA-512
To reduce the number of copies of boilerplate code throughout the tree, this patch implements generic glue for the SHA-512 algorithm. This allows a specific arch or hardware implementation to only implement the special handling that it needs. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 3 ++ crypto/Makefile | 1 + crypto/sha512_base.c | 143 +++ include/crypto/sha.h | 20 +++ 4 files changed, 167 insertions(+) create mode 100644 crypto/sha512_base.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 88639937a934..3400cf4e3cdb 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -641,6 +641,9 @@ config CRYPTO_SHA256_SPARC64 SHA-256 secure hash standard (DFIPS 180-2) implemented using sparc64 crypto instructions, when available. +config CRYPTO_SHA512_BASE + tristate + config CRYPTO_SHA512 tristate SHA384 and SHA512 digest algorithms select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index 97b7d3ac87e7..6174bf2592fe 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_CRYPTO_RMD256) += rmd256.o obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o +obj-$(CONFIG_CRYPTO_SHA512_BASE) += sha512_base.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o diff --git a/crypto/sha512_base.c b/crypto/sha512_base.c new file mode 100644 index ..9a60829e06c4 --- /dev/null +++ b/crypto/sha512_base.c @@ -0,0 +1,143 @@ +/* + * sha512_base.c - core logic for SHA-512 implementations + * + * Copyright (C) 2015 Linaro Ltd ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include crypto/internal/hash.h +#include crypto/sha.h +#include linux/crypto.h +#include linux/module.h + +#include asm/unaligned.h + +int crypto_sha384_base_init(struct shash_desc *desc) +{ + static const u64 sha384_init_state[] = { + SHA384_H0, SHA384_H1, SHA384_H2, SHA384_H3, + SHA384_H4, SHA384_H5, SHA384_H6, SHA384_H7, + }; + struct sha512_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx-state, sha384_init_state, sizeof(sctx-state)); + sctx-count[0] = sctx-count[1] = 0; + return 0; +} +EXPORT_SYMBOL(crypto_sha384_base_init); + +int crypto_sha512_base_init(struct shash_desc *desc) +{ + static const u64 sha512_init_state[] = { + SHA512_H0, SHA512_H1, SHA512_H2, SHA512_H3, + SHA512_H4, SHA512_H5, SHA512_H6, SHA512_H7, + }; + struct sha512_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx-state, sha512_init_state, sizeof(sctx-state)); + sctx-count[0] = sctx-count[1] = 0; + return 0; +} +EXPORT_SYMBOL(crypto_sha512_base_init); + +int crypto_sha512_base_export(struct shash_desc *desc, void *out) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + struct sha512_state *dst = out; + + *dst = *sctx; + + return 0; +} +EXPORT_SYMBOL(crypto_sha512_base_export); + +int crypto_sha512_base_import(struct shash_desc *desc, const void *in) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + struct sha512_state const *src = in; + + *sctx = *src; + + return 0; +} +EXPORT_SYMBOL(crypto_sha512_base_import); + +int crypto_sha512_base_do_update(struct shash_desc *desc, const u8 *data, +unsigned int len, sha512_block_fn *block_fn, +void *p) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx-count[0] % SHA512_BLOCK_SIZE; + + sctx-count[0] += len; + if (sctx-count[0] len) + sctx-count[1]++; + + if (unlikely((partial + len) = SHA512_BLOCK_SIZE)) { + int blocks; + + if (partial) { + int p = SHA512_BLOCK_SIZE - partial; + + memcpy(sctx-buf + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA512_BLOCK_SIZE; + len %= SHA512_BLOCK_SIZE; + + block_fn(blocks, data, sctx-state, +partial ? sctx-buf : NULL, p); + data += blocks * SHA512_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx-buf + partial, data, len); + + return 0; +} +EXPORT_SYMBOL(crypto_sha512_base_do_update); + +int crypto_sha512_base_do_finalize(struct shash_desc *desc, + sha512_block_fn *block_fn, void *p) +{ + const int bit_offset = SHA512_BLOCK_SIZE
[PATCH v2 resend 10/14] crypto/arm64: move SHA-1 ARMv8 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm64/crypto/Kconfig| 1 + arch/arm64/crypto/sha1-ce-core.S | 11 ++-- arch/arm64/crypto/sha1-ce-glue.c | 132 +++ 3 files changed, 31 insertions(+), 113 deletions(-) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 2cf32e9887e1..c87792dfaacc 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -12,6 +12,7 @@ config CRYPTO_SHA1_ARM64_CE tristate SHA-1 digest algorithm (ARMv8 Crypto Extensions) depends on ARM64 KERNEL_MODE_NEON select CRYPTO_HASH + select CRYPTO_SHA1_BASE config CRYPTO_SHA2_ARM64_CE tristate SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions) diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 09d57d98609c..a2c3ad51286b 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -131,15 +131,18 @@ CPU_LE( rev32 v11.16b, v11.16b) /* * Final block: add padding and total bit count. -* Skip if we have no total byte count in x4. In that case, the input -* size was not a round multiple of the block size, and the padding is -* handled by the C code. +* Skip if the input size was not a round multiple of the block size, +* the padding is handled by the C code in that case. */ cbz x4, 3f + ldr x5, [x2, #-8] // sha1_state::count + tst x5, #0x3f // round multiple of block size? + b.ne3f + str wzr, [x4] moviv9.2d, #0 mov x8, #0x8000 moviv10.2d, #0 - ror x7, x4, #29 // ror(lsl(x4, 3), 32) + ror x7, x5, #29 // ror(lsl(x4, 3), 32) fmovd8, x8 mov x4, #0 mov v11.d[0], xzr diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 6fe83f37a750..a1cf07b9a8fa 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -21,132 +21,46 @@ MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, - u8 *head, long bytes); + const u8 *head, void *p); -static int sha1_init(struct shash_desc *desc) +static int sha1_ce_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { - struct sha1_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; - return 0; -} - -static int sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA1_BLOCK_SIZE; - - sctx-count += len; - - if ((partial + len) = SHA1_BLOCK_SIZE) { - int blocks; - - if (partial) { - int p = SHA1_BLOCK_SIZE - partial; - - memcpy(sctx-buffer + partial, data, p); - data += p; - len -= p; - } - - blocks = len / SHA1_BLOCK_SIZE; - len %= SHA1_BLOCK_SIZE; - - kernel_neon_begin_partial(16); - sha1_ce_transform(blocks, data, sctx-state, - partial ? sctx-buffer : NULL, 0); - kernel_neon_end(); - - data += blocks * SHA1_BLOCK_SIZE; - partial = 0; - } - if (len) - memcpy(sctx-buffer + partial, data, len); - return 0; -} - -static int sha1_final(struct shash_desc *desc, u8 *out) -{ - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; - - struct sha1_state *sctx = shash_desc_ctx(desc); - __be64 bits = cpu_to_be64(sctx-count 3); - __be32 *dst = (__be32 *)out; - int i; - - u32 padlen = SHA1_BLOCK_SIZE -- ((sctx-count + sizeof(bits)) % SHA1_BLOCK_SIZE); - - sha1_update(desc, padding, padlen); - sha1_update(desc, (const u8 *)bits, sizeof(bits)); - - for (i = 0; i SHA1_DIGEST_SIZE / sizeof(__be32); i++) - put_unaligned_be32(sctx-state[i], dst++); + kernel_neon_begin_partial(16); + crypto_sha1_base_do_update(desc, data, len, sha1_ce_transform, NULL); + kernel_neon_end(); - *sctx = (struct sha1_state){}; return 0; } -static int sha1_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int sha1_ce_finup(struct shash_desc *desc, const u8 *data
[PATCH v2 04/14] crypto: sha512-generic: move to generic glue implementation
This updated the generic SHA-512 implementation to use the generic shared SHA-512 glue code. It also implements a .finup hook crypto_sha512_finup() and exports it to other modules. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 1 + crypto/sha512_generic.c | 126 ++-- include/crypto/sha.h| 2 + 3 files changed, 28 insertions(+), 101 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 155cc15c2719..83bc1680391a 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -653,6 +653,7 @@ config CRYPTO_SHA512_BASE config CRYPTO_SHA512 tristate SHA384 and SHA512 digest algorithms + select CRYPTO_SHA512_BASE select CRYPTO_HASH help SHA512 secure hash standard (DFIPS 180-2). diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 1c3c3767e079..88f36a6920ef 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -130,125 +130,48 @@ sha512_transform(u64 *state, const u8 *input) a = b = c = d = e = f = g = h = t1 = t2 = 0; } -static int -sha512_init(struct shash_desc *desc) +static void sha512_generic_block_fn(int blocks, u8 const *src, u64 *state, + const u8 *head, void *p) { - struct sha512_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA512_H0; - sctx-state[1] = SHA512_H1; - sctx-state[2] = SHA512_H2; - sctx-state[3] = SHA512_H3; - sctx-state[4] = SHA512_H4; - sctx-state[5] = SHA512_H5; - sctx-state[6] = SHA512_H6; - sctx-state[7] = SHA512_H7; - sctx-count[0] = sctx-count[1] = 0; + if (head) + sha512_transform(state, head); - return 0; -} - -static int -sha384_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA384_H0; - sctx-state[1] = SHA384_H1; - sctx-state[2] = SHA384_H2; - sctx-state[3] = SHA384_H3; - sctx-state[4] = SHA384_H4; - sctx-state[5] = SHA384_H5; - sctx-state[6] = SHA384_H6; - sctx-state[7] = SHA384_H7; - sctx-count[0] = sctx-count[1] = 0; - - return 0; + while (blocks--) { + sha512_transform(state, src); + src += SHA512_BLOCK_SIZE; + } } int crypto_sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha512_state *sctx = shash_desc_ctx(desc); - - unsigned int i, index, part_len; - - /* Compute number of bytes mod 128 */ - index = sctx-count[0] 0x7f; - - /* Update number of bytes */ - if ((sctx-count[0] += len) len) - sctx-count[1]++; - -part_len = 128 - index; - - /* Transform as many times as possible. */ - if (len = part_len) { - memcpy(sctx-buf[index], data, part_len); - sha512_transform(sctx-state, sctx-buf); - - for (i = part_len; i + 127 len; i+=128) - sha512_transform(sctx-state, data[i]); - - index = 0; - } else { - i = 0; - } - - /* Buffer remaining input */ - memcpy(sctx-buf[index], data[i], len - i); - - return 0; + return crypto_sha512_base_do_update(desc, data, len, + sha512_generic_block_fn, NULL); } EXPORT_SYMBOL(crypto_sha512_update); -static int -sha512_final(struct shash_desc *desc, u8 *hash) +int crypto_sha512_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash) { - struct sha512_state *sctx = shash_desc_ctx(desc); -static u8 padding[128] = { 0x80, }; - __be64 *dst = (__be64 *)hash; - __be64 bits[2]; - unsigned int index, pad_len; - int i; - - /* Save number of bits */ - bits[1] = cpu_to_be64(sctx-count[0] 3); - bits[0] = cpu_to_be64(sctx-count[1] 3 | sctx-count[0] 61); - - /* Pad out to 112 mod 128. */ - index = sctx-count[0] 0x7f; - pad_len = (index 112) ? (112 - index) : ((128+112) - index); - crypto_sha512_update(desc, padding, pad_len); - - /* Append length (before padding) */ - crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i 8; i++) - dst[i] = cpu_to_be64(sctx-state[i]); - - /* Zeroize sensitive information. */ - memset(sctx, 0, sizeof(struct sha512_state)); - - return 0; + if (len) + crypto_sha512_base_do_update(desc, data, len, +sha512_generic_block_fn, NULL); + crypto_sha512_base_do_finalize(desc, sha512_generic_block_fn, NULL); + return crypto_sha512_base_finish(desc, hash); } +EXPORT_SYMBOL(crypto_sha512_finup); -static int sha384_final(struct shash_desc *desc, u8 *hash) +int
[RFC PATCH 5/6] arm64/crypto: move ARMv8 SHA-224/256 driver to SHA-256 base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm64/crypto/Kconfig| 1 + arch/arm64/crypto/sha2-ce-core.S | 11 +- arch/arm64/crypto/sha2-ce-glue.c | 211 ++- 3 files changed, 40 insertions(+), 183 deletions(-) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 2cf32e9887e1..13008362154b 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -17,6 +17,7 @@ config CRYPTO_SHA2_ARM64_CE tristate SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions) depends on ARM64 KERNEL_MODE_NEON select CRYPTO_HASH + select CRYPTO_SHA256_BASE config CRYPTO_GHASH_ARM64_CE tristate GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 7f29fc031ea8..65ad56636fba 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -135,15 +135,18 @@ CPU_LE( rev32 v19.16b, v19.16b) /* * Final block: add padding and total bit count. -* Skip if we have no total byte count in x4. In that case, the input -* size was not a round multiple of the block size, and the padding is -* handled by the C code. +* Skip if the input size was not a round multiple of the block size, +* the padding is handled by the C code in that case. */ cbz x4, 3f + ldr x5, [x2, #-8] // sha256_state::count + tst x5, #0x3f // round multiple of block size? + b.ne3f + str wzr, [x4] moviv17.2d, #0 mov x8, #0x8000 moviv18.2d, #0 - ror x7, x4, #29 // ror(lsl(x4, 3), 32) + ror x7, x5, #29 // ror(lsl(x4, 3), 32) fmovd16, x8 mov x4, #0 mov v19.d[0], xzr diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index ae67e88c28b9..8b35ca32538a 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -20,195 +20,48 @@ MODULE_DESCRIPTION(SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions); MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); -asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, -u8 *head, long bytes); +asmlinkage void sha2_ce_transform(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p); -static int sha224_init(struct shash_desc *desc) +static int sha256_ce_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { - struct sha256_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha256_state){ - .state = { - SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, - SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, - } - }; - return 0; -} - -static int sha256_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha256_state){ - .state = { - SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, - SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, - } - }; - return 0; -} - -static int sha2_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA256_BLOCK_SIZE; - - sctx-count += len; - - if ((partial + len) = SHA256_BLOCK_SIZE) { - int blocks; - - if (partial) { - int p = SHA256_BLOCK_SIZE - partial; - - memcpy(sctx-buf + partial, data, p); - data += p; - len -= p; - } - - blocks = len / SHA256_BLOCK_SIZE; - len %= SHA256_BLOCK_SIZE; - - kernel_neon_begin_partial(28); - sha2_ce_transform(blocks, data, sctx-state, - partial ? sctx-buf : NULL, 0); - kernel_neon_end(); - - data += blocks * SHA256_BLOCK_SIZE; - partial = 0; - } - if (len) - memcpy(sctx-buf + partial, data, len); - return 0; -} - -static void sha2_final(struct shash_desc *desc) -{ - static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; - - struct sha256_state *sctx = shash_desc_ctx(desc); - __be64 bits = cpu_to_be64(sctx-count 3); - u32 padlen = SHA256_BLOCK_SIZE -- ((sctx-count + sizeof(bits)) % SHA256_BLOCK_SIZE
[PATCH v2 08/14] crypto/arm: move SHA-1 ARMv8 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig| 2 +- arch/arm/crypto/sha1-ce-glue.c | 110 +++-- 2 files changed, 31 insertions(+), 81 deletions(-) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index c111d8992afb..31ad19f18af2 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -32,7 +32,7 @@ config CRYPTO_SHA1_ARM_CE tristate SHA1 digest algorithm (ARM v8 Crypto Extensions) depends on KERNEL_MODE_NEON select CRYPTO_SHA1_ARM - select CRYPTO_SHA1 + select CRYPTO_SHA1_BASE select CRYPTO_HASH help SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c index a9dd90df9fd7..29039d1bcdf9 100644 --- a/arch/arm/crypto/sha1-ce-glue.c +++ b/arch/arm/crypto/sha1-ce-glue.c @@ -13,114 +13,64 @@ #include linux/crypto.h #include linux/module.h -#include asm/crypto/sha1.h #include asm/hwcap.h #include asm/neon.h #include asm/simd.h #include asm/unaligned.h +#include sha1.h + MODULE_DESCRIPTION(SHA1 secure hash using ARMv8 Crypto Extensions); MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); -asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, - u8 *head); - -static int sha1_init(struct shash_desc *desc) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; - return 0; -} +asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p); -static int sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int sha1_ce_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial; - if (!may_use_simd()) + if (!may_use_simd() || + (sctx-count % SHA1_BLOCK_SIZE) + len SHA1_BLOCK_SIZE) return sha1_update_arm(desc, data, len); - partial = sctx-count % SHA1_BLOCK_SIZE; - sctx-count += len; + kernel_neon_begin(); + crypto_sha1_base_do_update(desc, data, len, sha1_ce_transform, NULL); + kernel_neon_end(); - if ((partial + len) = SHA1_BLOCK_SIZE) { - int blocks; - - if (partial) { - int p = SHA1_BLOCK_SIZE - partial; - - memcpy(sctx-buffer + partial, data, p); - data += p; - len -= p; - } - - blocks = len / SHA1_BLOCK_SIZE; - len %= SHA1_BLOCK_SIZE; - - kernel_neon_begin(); - sha1_ce_transform(blocks, data, sctx-state, - partial ? sctx-buffer : NULL); - kernel_neon_end(); - - data += blocks * SHA1_BLOCK_SIZE; - partial = 0; - } - if (len) - memcpy(sctx-buffer + partial, data, len); return 0; } -static int sha1_final(struct shash_desc *desc, u8 *out) +static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, +unsigned int len, u8 *out) { - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; - - struct sha1_state *sctx = shash_desc_ctx(desc); - __be64 bits = cpu_to_be64(sctx-count 3); - __be32 *dst = (__be32 *)out; - int i; - - u32 padlen = SHA1_BLOCK_SIZE -- ((sctx-count + sizeof(bits)) % SHA1_BLOCK_SIZE); - - sha1_update(desc, padding, padlen); - sha1_update(desc, (const u8 *)bits, sizeof(bits)); - - for (i = 0; i SHA1_DIGEST_SIZE / sizeof(__be32); i++) - put_unaligned_be32(sctx-state[i], dst++); - - *sctx = (struct sha1_state){}; - return 0; -} + if (!may_use_simd()) + return sha1_finup_arm(desc, data, len, out); -static int sha1_export(struct shash_desc *desc, void *out) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - struct sha1_state *dst = out; + kernel_neon_begin(); + if (len) + crypto_sha1_base_do_update(desc, data, len, + sha1_ce_transform, NULL); + crypto_sha1_base_do_finalize(desc, sha1_ce_transform, NULL); + kernel_neon_end(); - *dst = *sctx; - return 0; + return crypto_sha1_base_finish(desc, out); } -static int sha1_import(struct shash_desc *desc, const void *in) +static int sha1_ce_final(struct shash_desc *desc, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); - struct sha1_state const *src = in; - - *sctx = *src
[RFC PATCH 3/6] crypto: sha256: implement base layer for SHA-256
To reduce the number of copies of boilerplate code throughout the tree, this patch implements generic glue for the SHA-256 algorithm. This allows a specific arch or hardware implementation to only implement the special handling that it needs. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 4 ++ crypto/Makefile | 1 + crypto/sha256_base.c | 138 +++ include/crypto/sha.h | 17 +++ 4 files changed, 160 insertions(+) create mode 100644 crypto/sha256_base.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 880aa518c2eb..551bbf2e2ab5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -602,6 +602,10 @@ config CRYPTO_SHA1_MB lanes remain unfilled, a flush operation will be initiated to process the crypto jobs, adding a slight latency. + +config CRYPTO_SHA256_BASE + tristate + config CRYPTO_SHA256 tristate SHA224 and SHA256 digest algorithm select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index 6174bf2592fe..bb9bafeb3ac7 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o obj-$(CONFIG_CRYPTO_RMD256) += rmd256.o obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o +obj-$(CONFIG_CRYPTO_SHA256_BASE) += sha256_base.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o obj-$(CONFIG_CRYPTO_SHA512_BASE) += sha512_base.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o diff --git a/crypto/sha256_base.c b/crypto/sha256_base.c new file mode 100644 index ..1ba2f6812c6b --- /dev/null +++ b/crypto/sha256_base.c @@ -0,0 +1,138 @@ +/* + * sha256_base.c - core logic for SHA-256 implementations + * + * Copyright (C) 2015 Linaro Ltd ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include crypto/internal/hash.h +#include crypto/sha.h +#include linux/crypto.h +#include linux/module.h + +#include asm/unaligned.h + +int sha224_base_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state){ + .state = { + SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, + SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, + } + }; + return 0; +} +EXPORT_SYMBOL(sha224_base_init); + +int sha256_base_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state){ + .state = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, + } + }; + return 0; +} +EXPORT_SYMBOL(sha256_base_init); + +int sha256_base_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state *dst = out; + + *dst = *sctx; + + return 0; +} +EXPORT_SYMBOL(sha256_base_export); + +int sha256_base_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state const *src = in; + + *sctx = *src; + + return 0; +} +EXPORT_SYMBOL(sha256_base_import); + +int sha256_base_do_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha256_block_fn *block_fn, void *p) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx-count % SHA256_BLOCK_SIZE; + + sctx-count += len; + + if ((partial + len) = SHA256_BLOCK_SIZE) { + int blocks; + + if (partial) { + int p = SHA256_BLOCK_SIZE - partial; + + memcpy(sctx-buf + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA256_BLOCK_SIZE; + len %= SHA256_BLOCK_SIZE; + + block_fn(blocks, data, sctx-state, + partial ? sctx-buf : NULL, p); + data += blocks * SHA256_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx-buf + partial, data, len); + + return 0; +} +EXPORT_SYMBOL(sha256_base_do_update); + +int sha256_base_do_finalize(struct shash_desc *desc, sha256_block_fn *block_fn, + void *p) +{ + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int padlen; + __be64 bits; + + padlen = SHA256_BLOCK_SIZE - +(sctx-count + sizeof(bits)) % SHA256_BLOCK_SIZE; + + bits = cpu_to_be64(sctx-count 3); + + sha256_base_do_update(desc
[RFC PATCH 2/6] crypto: sha512-generic: move to generic glue implementation
This updated the generic SHA-512 implementation to use the generic shared SHA-512 glue code. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 1 + crypto/sha512_generic.c | 117 +++- 2 files changed, 16 insertions(+), 102 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 3400cf4e3cdb..880aa518c2eb 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -646,6 +646,7 @@ config CRYPTO_SHA512_BASE config CRYPTO_SHA512 tristate SHA384 and SHA512 digest algorithms + select CRYPTO_SHA512_BASE select CRYPTO_HASH help SHA512 secure hash standard (DFIPS 180-2). diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 1c3c3767e079..0d8e973d0d4b 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -130,123 +130,36 @@ sha512_transform(u64 *state, const u8 *input) a = b = c = d = e = f = g = h = t1 = t2 = 0; } -static int -sha512_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA512_H0; - sctx-state[1] = SHA512_H1; - sctx-state[2] = SHA512_H2; - sctx-state[3] = SHA512_H3; - sctx-state[4] = SHA512_H4; - sctx-state[5] = SHA512_H5; - sctx-state[6] = SHA512_H6; - sctx-state[7] = SHA512_H7; - sctx-count[0] = sctx-count[1] = 0; - - return 0; -} - -static int -sha384_init(struct shash_desc *desc) +static void sha512_generic_block_fn(int blocks, u8 const *src, u64 *state, + const u8 *head, void *p) { - struct sha512_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA384_H0; - sctx-state[1] = SHA384_H1; - sctx-state[2] = SHA384_H2; - sctx-state[3] = SHA384_H3; - sctx-state[4] = SHA384_H4; - sctx-state[5] = SHA384_H5; - sctx-state[6] = SHA384_H6; - sctx-state[7] = SHA384_H7; - sctx-count[0] = sctx-count[1] = 0; + if (head) + sha512_transform(state, head); - return 0; + while (blocks--) { + sha512_transform(state, src); + src += SHA512_BLOCK_SIZE; + } } int crypto_sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha512_state *sctx = shash_desc_ctx(desc); - - unsigned int i, index, part_len; - - /* Compute number of bytes mod 128 */ - index = sctx-count[0] 0x7f; - - /* Update number of bytes */ - if ((sctx-count[0] += len) len) - sctx-count[1]++; - -part_len = 128 - index; - - /* Transform as many times as possible. */ - if (len = part_len) { - memcpy(sctx-buf[index], data, part_len); - sha512_transform(sctx-state, sctx-buf); - - for (i = part_len; i + 127 len; i+=128) - sha512_transform(sctx-state, data[i]); - - index = 0; - } else { - i = 0; - } - - /* Buffer remaining input */ - memcpy(sctx-buf[index], data[i], len - i); - - return 0; + return sha512_base_do_update(desc, data, len, sha512_generic_block_fn, +NULL); } EXPORT_SYMBOL(crypto_sha512_update); static int sha512_final(struct shash_desc *desc, u8 *hash) { - struct sha512_state *sctx = shash_desc_ctx(desc); -static u8 padding[128] = { 0x80, }; - __be64 *dst = (__be64 *)hash; - __be64 bits[2]; - unsigned int index, pad_len; - int i; - - /* Save number of bits */ - bits[1] = cpu_to_be64(sctx-count[0] 3); - bits[0] = cpu_to_be64(sctx-count[1] 3 | sctx-count[0] 61); - - /* Pad out to 112 mod 128. */ - index = sctx-count[0] 0x7f; - pad_len = (index 112) ? (112 - index) : ((128+112) - index); - crypto_sha512_update(desc, padding, pad_len); - - /* Append length (before padding) */ - crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i 8; i++) - dst[i] = cpu_to_be64(sctx-state[i]); - - /* Zeroize sensitive information. */ - memset(sctx, 0, sizeof(struct sha512_state)); - - return 0; -} - -static int sha384_final(struct shash_desc *desc, u8 *hash) -{ - u8 D[64]; - - sha512_final(desc, D); - - memcpy(hash, D, 48); - memzero_explicit(D, 64); - - return 0; + sha512_base_do_finalize(desc, sha512_generic_block_fn, NULL); + return sha512_base_finish(desc, hash); } static struct shash_alg sha512_algs[2] = { { .digestsize = SHA512_DIGEST_SIZE, - .init = sha512_init, + .init = sha512_base_init, .update = crypto_sha512_update, .final = sha512_final, .descsize
[PATCH v2 09/14] crypto/arm: move SHA-224/256 ARMv8 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig| 1 + arch/arm/crypto/sha2-ce-glue.c | 151 + 2 files changed, 33 insertions(+), 119 deletions(-) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 31ad19f18af2..de91f0447240 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -42,6 +42,7 @@ config CRYPTO_SHA2_ARM_CE tristate SHA-224/256 digest algorithm (ARM v8 Crypto Extensions) depends on KERNEL_MODE_NEON select CRYPTO_SHA256 + select CRYPTO_SHA256_BASE select CRYPTO_HASH help SHA-256 secure hash standard (DFIPS 180-2) implemented diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c index 9ffe8ad27402..df57192c41cd 100644 --- a/arch/arm/crypto/sha2-ce-glue.c +++ b/arch/arm/crypto/sha2-ce-glue.c @@ -23,140 +23,52 @@ MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); asmlinkage void sha2_ce_transform(int blocks, u8 const *src, u32 *state, - u8 *head); + const u8 *head, void *p); -static int sha224_init(struct shash_desc *desc) +static int sha2_ce_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { struct sha256_state *sctx = shash_desc_ctx(desc); - *sctx = (struct sha256_state){ - .state = { - SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, - SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, - } - }; - return 0; -} + if (!may_use_simd() || + (sctx-count % SHA256_BLOCK_SIZE) + len SHA256_BLOCK_SIZE) + return crypto_sha256_update(desc, data, len); -static int sha256_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); + kernel_neon_begin(); + crypto_sha256_base_do_update(desc, data, len, sha2_ce_transform, NULL); + kernel_neon_end(); - *sctx = (struct sha256_state){ - .state = { - SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, - SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, - } - }; return 0; } -static int sha2_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int sha2_ce_finup(struct shash_desc *desc, const u8 *data, +unsigned int len, u8 *out) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial; - if (!may_use_simd()) - return crypto_sha256_update(desc, data, len); - - partial = sctx-count % SHA256_BLOCK_SIZE; - sctx-count += len; - - if ((partial + len) = SHA256_BLOCK_SIZE) { - int blocks; - - if (partial) { - int p = SHA256_BLOCK_SIZE - partial; - - memcpy(sctx-buf + partial, data, p); - data += p; - len -= p; - } + return crypto_sha256_finup(desc, data, len, out); - blocks = len / SHA256_BLOCK_SIZE; - len %= SHA256_BLOCK_SIZE; - - kernel_neon_begin(); - sha2_ce_transform(blocks, data, sctx-state, - partial ? sctx-buf : NULL); - kernel_neon_end(); - - data += blocks * SHA256_BLOCK_SIZE; - partial = 0; - } + kernel_neon_begin(); if (len) - memcpy(sctx-buf + partial, data, len); - return 0; -} - -static void sha2_final(struct shash_desc *desc) -{ - static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; - - struct sha256_state *sctx = shash_desc_ctx(desc); - __be64 bits = cpu_to_be64(sctx-count 3); - u32 padlen = SHA256_BLOCK_SIZE -- ((sctx-count + sizeof(bits)) % SHA256_BLOCK_SIZE); - - sha2_update(desc, padding, padlen); - sha2_update(desc, (const u8 *)bits, sizeof(bits)); -} + crypto_sha256_base_do_update(desc, data, len, +sha2_ce_transform, NULL); + crypto_sha256_base_do_finalize(desc, sha2_ce_transform, NULL); + kernel_neon_end(); -static int sha224_final(struct shash_desc *desc, u8 *out) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - __be32 *dst = (__be32 *)out; - int i; - - sha2_final(desc); - - for (i = 0; i SHA224_DIGEST_SIZE / sizeof(__be32); i++) - put_unaligned_be32(sctx-state[i], dst++); - - *sctx = (struct sha256_state){}; - return 0; + return crypto_sha256_base_finish(desc, out); } -static int sha256_final(struct shash_desc *desc, u8 *out) +static int sha2_ce_final(struct shash_desc *desc, u8 *out) { - struct
[RFC PATCH 6/6] arm/crypto: accelerated SHA-512 using ARM generic ASM and NEON
This updates the SHA-512 NEON module with the faster and more versatile implementation from the OpenSSL project. It consists of both a NEON and a generic ASM version of the core SHA-512 transform, where the NEON version reverts to the ASM version when invoked in non-process context. Performance relative to the generic implementation (measured using tcrypt.ko mode=306 sec=1 running on a Cortex-A57 under KVM): input sizeblock size asm neonold neon 1616 1.392.542.21 6416 1.322.332.09 6464 1.382.532.19 256 16 1.312.282.06 256 64 1.382.542.25 256 256 1.402.772.39 1024 16 1.292.222.01 1024 256 1.402.822.45 1024 10241.412.932.53 2048 16 1.332.212.00 2048 256 1.402.842.46 2048 10241.412.962.55 2048 20481.412.982.56 4096 16 1.342.201.99 4096 256 1.402.842.46 4096 10241.412.972.56 4096 40961.413.012.58 8192 16 1.342.191.99 8192 256 1.402.852.47 8192 10241.412.982.56 8192 40961.412.712.59 8192 81921.513.512.69 Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig |8 + arch/arm/crypto/Makefile |8 +- arch/arm/crypto/sha512-armv4.pl | 656 arch/arm/crypto/sha512-core.S_shipped | 1814 + arch/arm/crypto/sha512-glue.c | 137 +++ arch/arm/crypto/sha512-neon-glue.c| 111 ++ arch/arm/crypto/sha512.h |8 + 7 files changed, 2741 insertions(+), 1 deletion(-) create mode 100644 arch/arm/crypto/sha512-armv4.pl create mode 100644 arch/arm/crypto/sha512-core.S_shipped create mode 100644 arch/arm/crypto/sha512-glue.c create mode 100644 arch/arm/crypto/sha512-neon-glue.c create mode 100644 arch/arm/crypto/sha512.h diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 458729d2ce22..6b50c6d77b77 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -53,6 +53,14 @@ config CRYPTO_SHA256_ARM SHA-256 secure hash standard (DFIPS 180-2) implemented using optimized ARM assembler and NEON, when available. +config CRYPTO_SHA512_ARM + tristate SHA-384/512 digest algorithm (ARM-asm and NEON) + select CRYPTO_HASH + select CRYPTO_SHA512_BASE + help + SHA-512 secure hash standard (DFIPS 180-2) implemented + using optimized ARM assembler and NEON, when available. + config CRYPTO_SHA512_ARM_NEON tristate SHA384 and SHA512 digest algorithm (ARM NEON) depends on KERNEL_MODE_NEON diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index ef46e898f98b..322a6ca999a2 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o +obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o @@ -19,6 +20,8 @@ sha1-arm-y:= sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y:= sha1-armv7-neon.o sha1_neon_glue.o sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) +sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o +sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y) sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o @@ -34,4 +37,7 @@ $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl $(call cmd,perl) -.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S +$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl new file mode 100644 index ..7e540f8439da --- /dev/null +++ b/arch/arm/crypto/sha512-armv4.pl @@ -0,0 +1,656 @@ +#!/usr/bin/env perl + +# +# Written by Andy Polyakov ap
[PATCH v2 02/14] crypto: sha256: implement base layer for SHA-256
To reduce the number of copies of boilerplate code throughout the tree, this patch implements generic glue for the SHA-256 algorithm. This allows a specific arch or hardware implementation to only implement the special handling that it needs. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 4 ++ crypto/Makefile | 1 + crypto/sha256_base.c | 140 +++ include/crypto/sha.h | 17 +++ 4 files changed, 162 insertions(+) create mode 100644 crypto/sha256_base.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 3400cf4e3cdb..1664bd68b97d 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -602,6 +602,10 @@ config CRYPTO_SHA1_MB lanes remain unfilled, a flush operation will be initiated to process the crypto jobs, adding a slight latency. + +config CRYPTO_SHA256_BASE + tristate + config CRYPTO_SHA256 tristate SHA224 and SHA256 digest algorithm select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index 6174bf2592fe..bb9bafeb3ac7 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o obj-$(CONFIG_CRYPTO_RMD256) += rmd256.o obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o +obj-$(CONFIG_CRYPTO_SHA256_BASE) += sha256_base.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o obj-$(CONFIG_CRYPTO_SHA512_BASE) += sha512_base.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o diff --git a/crypto/sha256_base.c b/crypto/sha256_base.c new file mode 100644 index ..5fd728066912 --- /dev/null +++ b/crypto/sha256_base.c @@ -0,0 +1,140 @@ +/* + * sha256_base.c - core logic for SHA-256 implementations + * + * Copyright (C) 2015 Linaro Ltd ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include crypto/internal/hash.h +#include crypto/sha.h +#include linux/crypto.h +#include linux/module.h + +#include asm/unaligned.h + +int crypto_sha224_base_init(struct shash_desc *desc) +{ + static const u32 sha224_init_state[] = { + SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, + SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, + }; + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx-state, sha224_init_state, sizeof(sctx-state)); + sctx-count = 0; + return 0; +} +EXPORT_SYMBOL(crypto_sha224_base_init); + +int crypto_sha256_base_init(struct shash_desc *desc) +{ + static const u32 sha256_init_state[] = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, + }; + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx-state, sha256_init_state, sizeof(sctx-state)); + sctx-count = 0; + return 0; +} +EXPORT_SYMBOL(crypto_sha256_base_init); + +int crypto_sha256_base_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state *dst = out; + + *dst = *sctx; + + return 0; +} +EXPORT_SYMBOL(crypto_sha256_base_export); + +int crypto_sha256_base_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state const *src = in; + + *sctx = *src; + + return 0; +} +EXPORT_SYMBOL(crypto_sha256_base_import); + +int crypto_sha256_base_do_update(struct shash_desc *desc, const u8 *data, +unsigned int len, sha256_block_fn *block_fn, +void *p) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx-count % SHA256_BLOCK_SIZE; + + sctx-count += len; + + if (unlikely((partial + len) = SHA256_BLOCK_SIZE)) { + int blocks; + + if (partial) { + int p = SHA256_BLOCK_SIZE - partial; + + memcpy(sctx-buf + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA256_BLOCK_SIZE; + len %= SHA256_BLOCK_SIZE; + + block_fn(blocks, data, sctx-state, +partial ? sctx-buf : NULL, p); + data += blocks * SHA256_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx-buf + partial, data, len); + + return 0; +} +EXPORT_SYMBOL(crypto_sha256_base_do_update); + +int crypto_sha256_base_do_finalize(struct shash_desc *desc, + sha256_block_fn *block_fn, void *p) +{ + const int bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64); + struct sha256_state *sctx = shash_desc_ctx(desc); + __be64 *bits
[RFC PATCH 4/6] crypto: sha256-generic: move to generic glue implementation
This updates the generic SHA-256 implementation to use the new shared SHA-256 glue code. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 1 + crypto/sha256_generic.c | 131 +++- 2 files changed, 18 insertions(+), 114 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 551bbf2e2ab5..59243df4ea13 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -608,6 +608,7 @@ config CRYPTO_SHA256_BASE config CRYPTO_SHA256 tristate SHA224 and SHA256 digest algorithm + select CRYPTO_SHA256_BASE select CRYPTO_HASH help SHA256 secure hash standard (DFIPS 180-2). diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c index b001ff5c2efc..7119346c2f41 100644 --- a/crypto/sha256_generic.c +++ b/crypto/sha256_generic.c @@ -214,136 +214,39 @@ static void sha256_transform(u32 *state, const u8 *input) memzero_explicit(W, 64 * sizeof(u32)); } -static int sha224_init(struct shash_desc *desc) +static void sha256_generic_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - struct sha256_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA224_H0; - sctx-state[1] = SHA224_H1; - sctx-state[2] = SHA224_H2; - sctx-state[3] = SHA224_H3; - sctx-state[4] = SHA224_H4; - sctx-state[5] = SHA224_H5; - sctx-state[6] = SHA224_H6; - sctx-state[7] = SHA224_H7; - sctx-count = 0; + if (head) + sha256_transform(state, head); - return 0; -} - -static int sha256_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA256_H0; - sctx-state[1] = SHA256_H1; - sctx-state[2] = SHA256_H2; - sctx-state[3] = SHA256_H3; - sctx-state[4] = SHA256_H4; - sctx-state[5] = SHA256_H5; - sctx-state[6] = SHA256_H6; - sctx-state[7] = SHA256_H7; - sctx-count = 0; - - return 0; + while (blocks--) { + sha256_transform(state, src); + src += SHA256_BLOCK_SIZE; + } } int crypto_sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial, done; - const u8 *src; - - partial = sctx-count 0x3f; - sctx-count += len; - done = 0; - src = data; - - if ((partial + len) 63) { - if (partial) { - done = -partial; - memcpy(sctx-buf + partial, data, done + 64); - src = sctx-buf; - } - - do { - sha256_transform(sctx-state, src); - done += 64; - src = data + done; - } while (done + 63 len); - - partial = 0; - } - memcpy(sctx-buf + partial, src, len - done); - - return 0; + return sha256_base_do_update(desc, data, len, sha256_generic_block_fn, +NULL); } EXPORT_SYMBOL(crypto_sha256_update); static int sha256_final(struct shash_desc *desc, u8 *out) { - struct sha256_state *sctx = shash_desc_ctx(desc); - __be32 *dst = (__be32 *)out; - __be64 bits; - unsigned int index, pad_len; - int i; - static const u8 padding[64] = { 0x80, }; - - /* Save number of bits */ - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64. */ - index = sctx-count 0x3f; - pad_len = (index 56) ? (56 - index) : ((64+56) - index); - crypto_sha256_update(desc, padding, pad_len); - - /* Append length (before padding) */ - crypto_sha256_update(desc, (const u8 *)bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i 8; i++) - dst[i] = cpu_to_be32(sctx-state[i]); - - /* Zeroize sensitive information. */ - memset(sctx, 0, sizeof(*sctx)); - - return 0; -} - -static int sha224_final(struct shash_desc *desc, u8 *hash) -{ - u8 D[SHA256_DIGEST_SIZE]; - - sha256_final(desc, D); - - memcpy(hash, D, SHA224_DIGEST_SIZE); - memzero_explicit(D, SHA256_DIGEST_SIZE); - - return 0; -} - -static int sha256_export(struct shash_desc *desc, void *out) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - memcpy(out, sctx, sizeof(*sctx)); - return 0; -} - -static int sha256_import(struct shash_desc *desc, const void *in) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - memcpy(sctx, in, sizeof(*sctx)); - return 0; + sha256_base_do_finalize(desc, sha256_generic_block_fn, NULL); + return sha256_base_finish(desc, out); } static struct shash_alg sha256_algs[2] = { { .digestsize = SHA256_DIGEST_SIZE
[PATCH v2 resend 01/14] crypto: sha512: implement base layer for SHA-512
To reduce the number of copies of boilerplate code throughout the tree, this patch implements generic glue for the SHA-512 algorithm. This allows a specific arch or hardware implementation to only implement the special handling that it needs. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 3 ++ crypto/Makefile | 1 + crypto/sha512_base.c | 143 +++ include/crypto/sha.h | 20 +++ 4 files changed, 167 insertions(+) create mode 100644 crypto/sha512_base.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 88639937a934..3400cf4e3cdb 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -641,6 +641,9 @@ config CRYPTO_SHA256_SPARC64 SHA-256 secure hash standard (DFIPS 180-2) implemented using sparc64 crypto instructions, when available. +config CRYPTO_SHA512_BASE + tristate + config CRYPTO_SHA512 tristate SHA384 and SHA512 digest algorithms select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index 97b7d3ac87e7..6174bf2592fe 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_CRYPTO_RMD256) += rmd256.o obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o +obj-$(CONFIG_CRYPTO_SHA512_BASE) += sha512_base.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o diff --git a/crypto/sha512_base.c b/crypto/sha512_base.c new file mode 100644 index ..9a60829e06c4 --- /dev/null +++ b/crypto/sha512_base.c @@ -0,0 +1,143 @@ +/* + * sha512_base.c - core logic for SHA-512 implementations + * + * Copyright (C) 2015 Linaro Ltd ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include crypto/internal/hash.h +#include crypto/sha.h +#include linux/crypto.h +#include linux/module.h + +#include asm/unaligned.h + +int crypto_sha384_base_init(struct shash_desc *desc) +{ + static const u64 sha384_init_state[] = { + SHA384_H0, SHA384_H1, SHA384_H2, SHA384_H3, + SHA384_H4, SHA384_H5, SHA384_H6, SHA384_H7, + }; + struct sha512_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx-state, sha384_init_state, sizeof(sctx-state)); + sctx-count[0] = sctx-count[1] = 0; + return 0; +} +EXPORT_SYMBOL(crypto_sha384_base_init); + +int crypto_sha512_base_init(struct shash_desc *desc) +{ + static const u64 sha512_init_state[] = { + SHA512_H0, SHA512_H1, SHA512_H2, SHA512_H3, + SHA512_H4, SHA512_H5, SHA512_H6, SHA512_H7, + }; + struct sha512_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx-state, sha512_init_state, sizeof(sctx-state)); + sctx-count[0] = sctx-count[1] = 0; + return 0; +} +EXPORT_SYMBOL(crypto_sha512_base_init); + +int crypto_sha512_base_export(struct shash_desc *desc, void *out) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + struct sha512_state *dst = out; + + *dst = *sctx; + + return 0; +} +EXPORT_SYMBOL(crypto_sha512_base_export); + +int crypto_sha512_base_import(struct shash_desc *desc, const void *in) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + struct sha512_state const *src = in; + + *sctx = *src; + + return 0; +} +EXPORT_SYMBOL(crypto_sha512_base_import); + +int crypto_sha512_base_do_update(struct shash_desc *desc, const u8 *data, +unsigned int len, sha512_block_fn *block_fn, +void *p) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx-count[0] % SHA512_BLOCK_SIZE; + + sctx-count[0] += len; + if (sctx-count[0] len) + sctx-count[1]++; + + if (unlikely((partial + len) = SHA512_BLOCK_SIZE)) { + int blocks; + + if (partial) { + int p = SHA512_BLOCK_SIZE - partial; + + memcpy(sctx-buf + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA512_BLOCK_SIZE; + len %= SHA512_BLOCK_SIZE; + + block_fn(blocks, data, sctx-state, +partial ? sctx-buf : NULL, p); + data += blocks * SHA512_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx-buf + partial, data, len); + + return 0; +} +EXPORT_SYMBOL(crypto_sha512_base_do_update); + +int crypto_sha512_base_do_finalize(struct shash_desc *desc, + sha512_block_fn *block_fn, void *p) +{ + const int bit_offset = SHA512_BLOCK_SIZE
[PATCH v2 resend 05/14] crypto: sha256-generic: move to generic glue implementation
This updates the generic SHA-256 implementation to use the new shared SHA-256 glue code. It also implements a .finup hook crypto_sha256_finup() and exports it to other modules. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 1 + crypto/sha256_generic.c | 139 ++-- include/crypto/sha.h| 3 ++ 3 files changed, 31 insertions(+), 112 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 83bc1680391a..72bf5af7240d 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -611,6 +611,7 @@ config CRYPTO_SHA256_BASE config CRYPTO_SHA256 tristate SHA224 and SHA256 digest algorithm + select CRYPTO_SHA256_BASE select CRYPTO_HASH help SHA256 secure hash standard (DFIPS 180-2). diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c index b001ff5c2efc..d5c18c08b3da 100644 --- a/crypto/sha256_generic.c +++ b/crypto/sha256_generic.c @@ -214,136 +214,50 @@ static void sha256_transform(u32 *state, const u8 *input) memzero_explicit(W, 64 * sizeof(u32)); } -static int sha224_init(struct shash_desc *desc) +static void sha256_generic_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - struct sha256_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA224_H0; - sctx-state[1] = SHA224_H1; - sctx-state[2] = SHA224_H2; - sctx-state[3] = SHA224_H3; - sctx-state[4] = SHA224_H4; - sctx-state[5] = SHA224_H5; - sctx-state[6] = SHA224_H6; - sctx-state[7] = SHA224_H7; - sctx-count = 0; + if (head) + sha256_transform(state, head); - return 0; -} - -static int sha256_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA256_H0; - sctx-state[1] = SHA256_H1; - sctx-state[2] = SHA256_H2; - sctx-state[3] = SHA256_H3; - sctx-state[4] = SHA256_H4; - sctx-state[5] = SHA256_H5; - sctx-state[6] = SHA256_H6; - sctx-state[7] = SHA256_H7; - sctx-count = 0; - - return 0; + while (blocks--) { + sha256_transform(state, src); + src += SHA256_BLOCK_SIZE; + } } int crypto_sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial, done; - const u8 *src; - - partial = sctx-count 0x3f; - sctx-count += len; - done = 0; - src = data; - - if ((partial + len) 63) { - if (partial) { - done = -partial; - memcpy(sctx-buf + partial, data, done + 64); - src = sctx-buf; - } - - do { - sha256_transform(sctx-state, src); - done += 64; - src = data + done; - } while (done + 63 len); - - partial = 0; - } - memcpy(sctx-buf + partial, src, len - done); - - return 0; + return crypto_sha256_base_do_update(desc, data, len, + sha256_generic_block_fn, NULL); } EXPORT_SYMBOL(crypto_sha256_update); -static int sha256_final(struct shash_desc *desc, u8 *out) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - __be32 *dst = (__be32 *)out; - __be64 bits; - unsigned int index, pad_len; - int i; - static const u8 padding[64] = { 0x80, }; - - /* Save number of bits */ - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64. */ - index = sctx-count 0x3f; - pad_len = (index 56) ? (56 - index) : ((64+56) - index); - crypto_sha256_update(desc, padding, pad_len); - - /* Append length (before padding) */ - crypto_sha256_update(desc, (const u8 *)bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i 8; i++) - dst[i] = cpu_to_be32(sctx-state[i]); - - /* Zeroize sensitive information. */ - memset(sctx, 0, sizeof(*sctx)); - - return 0; -} - -static int sha224_final(struct shash_desc *desc, u8 *hash) -{ - u8 D[SHA256_DIGEST_SIZE]; - - sha256_final(desc, D); - - memcpy(hash, D, SHA224_DIGEST_SIZE); - memzero_explicit(D, SHA256_DIGEST_SIZE); - - return 0; -} - -static int sha256_export(struct shash_desc *desc, void *out) +int crypto_sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash) { - struct sha256_state *sctx = shash_desc_ctx(desc); - - memcpy(out, sctx, sizeof(*sctx)); - return 0; + if (len) + crypto_sha256_base_do_update(desc, data, len, +sha256_generic_block_fn, NULL
[PATCH v2 resend 07/14] crypto/arm: move SHA-1 ARM asm implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig | 1 + arch/arm/{include/asm = }/crypto/sha1.h | 3 + arch/arm/crypto/sha1_glue.c | 117 +++ 3 files changed, 28 insertions(+), 93 deletions(-) rename arch/arm/{include/asm = }/crypto/sha1.h (67%) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index d63f319924d2..c111d8992afb 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -11,6 +11,7 @@ if ARM_CRYPTO config CRYPTO_SHA1_ARM tristate SHA1 digest algorithm (ARM-asm) select CRYPTO_SHA1 + select CRYPTO_SHA1_BASE select CRYPTO_HASH help SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/crypto/sha1.h similarity index 67% rename from arch/arm/include/asm/crypto/sha1.h rename to arch/arm/crypto/sha1.h index 75e6a417416b..ffd8bd08b1a7 100644 --- a/arch/arm/include/asm/crypto/sha1.h +++ b/arch/arm/crypto/sha1.h @@ -7,4 +7,7 @@ extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, unsigned int len); +extern int sha1_finup_arm(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out); + #endif diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index e31b0440c613..b6a78be0367f 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -23,124 +23,55 @@ #include linux/types.h #include crypto/sha.h #include asm/byteorder.h -#include asm/crypto/sha1.h +#include sha1.h asmlinkage void sha1_block_data_order(u32 *digest, const unsigned char *data, unsigned int rounds); - -static int sha1_init(struct shash_desc *desc) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; - - return 0; -} - - -static int __sha1_update(struct sha1_state *sctx, const u8 *data, -unsigned int len, unsigned int partial) +static void sha1_arm_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - unsigned int done = 0; - - sctx-count += len; - - if (partial) { - done = SHA1_BLOCK_SIZE - partial; - memcpy(sctx-buffer + partial, data, done); - sha1_block_data_order(sctx-state, sctx-buffer, 1); - } - - if (len - done = SHA1_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; - sha1_block_data_order(sctx-state, data + done, rounds); - done += rounds * SHA1_BLOCK_SIZE; - } - - memcpy(sctx-buffer, data + done, len - done); - return 0; + if (head) + sha1_block_data_order(state, head, 1); + if (blocks) + sha1_block_data_order(state, src, blocks); } - int sha1_update_arm(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA1_BLOCK_SIZE; - int res; - - /* Handle the fast case right here */ - if (partial + len SHA1_BLOCK_SIZE) { - sctx-count += len; - memcpy(sctx-buffer + partial, data, len); - return 0; - } - res = __sha1_update(sctx, data, len, partial); - return res; + return crypto_sha1_base_do_update(desc, data, len, sha1_arm_block_fn, + NULL); } EXPORT_SYMBOL_GPL(sha1_update_arm); - -/* Add padding and return the message digest. */ -static int sha1_final(struct shash_desc *desc, u8 *out) +int sha1_finup_arm(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be32 *dst = (__be32 *)out; - __be64 bits; - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; - - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64 and append length */ - index = sctx-count % SHA1_BLOCK_SIZE; - padlen = (index 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); - /* We need to fill a whole block for __sha1_update() */ - if (padlen = 56) { - sctx-count += padlen; - memcpy(sctx-buffer + index, padding, padlen); - } else { - __sha1_update(sctx, padding, padlen, index); - } - __sha1_update(sctx, (const u8 *)bits, sizeof(bits), 56); - - /* Store state in digest */ - for (i = 0; i 5; i++) - dst[i] = cpu_to_be32(sctx-state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx)); - return 0; -} - + if (len
[PATCH v2 resend 03/14] crypto: sha1: implement base layer for SHA-1
To reduce the number of copies of boilerplate code throughout the tree, this patch implements generic glue for the SHA-1 algorithm. This allows a specific arch or hardware implementation to only implement the special handling that it needs. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 3 ++ crypto/Makefile | 1 + crypto/sha1_base.c | 125 +++ include/crypto/sha.h | 17 +++ 4 files changed, 146 insertions(+) create mode 100644 crypto/sha1_base.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 1664bd68b97d..155cc15c2719 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -516,6 +516,9 @@ config CRYPTO_RMD320 Developed by Hans Dobbertin, Antoon Bosselaers and Bart Preneel. See http://homes.esat.kuleuven.be/~bosselae/ripemd160.html +config CRYPTO_SHA1_BASE + tristate + config CRYPTO_SHA1 tristate SHA1 digest algorithm select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index bb9bafeb3ac7..42446cab15f3 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_CRYPTO_RMD128) += rmd128.o obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o obj-$(CONFIG_CRYPTO_RMD256) += rmd256.o obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o +obj-$(CONFIG_CRYPTO_SHA1_BASE) += sha1_base.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o obj-$(CONFIG_CRYPTO_SHA256_BASE) += sha256_base.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o diff --git a/crypto/sha1_base.c b/crypto/sha1_base.c new file mode 100644 index ..30fb0f9b47cf --- /dev/null +++ b/crypto/sha1_base.c @@ -0,0 +1,125 @@ +/* + * sha1_base.c - core logic for SHA-1 implementations + * + * Copyright (C) 2015 Linaro Ltd ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include crypto/internal/hash.h +#include crypto/sha.h +#include linux/crypto.h +#include linux/module.h + +#include asm/unaligned.h + +int crypto_sha1_base_init(struct shash_desc *desc) +{ + static const u32 sha1_init_state[] = { + SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4, + }; + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx-state, sha1_init_state, sizeof(sctx-state)); + sctx-count = 0; + return 0; +} +EXPORT_SYMBOL(crypto_sha1_base_init); + +int crypto_sha1_base_export(struct shash_desc *desc, void *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state *dst = out; + + *dst = *sctx; + + return 0; +} +EXPORT_SYMBOL(crypto_sha1_base_export); + +int crypto_sha1_base_import(struct shash_desc *desc, const void *in) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state const *src = in; + + *sctx = *src; + + return 0; +} +EXPORT_SYMBOL(crypto_sha1_base_import); + +int crypto_sha1_base_do_update(struct shash_desc *desc, const u8 *data, +unsigned int len, sha1_block_fn *block_fn, +void *p) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx-count % SHA1_BLOCK_SIZE; + + sctx-count += len; + + if (unlikely((partial + len) = SHA1_BLOCK_SIZE)) { + int blocks; + + if (partial) { + int p = SHA1_BLOCK_SIZE - partial; + + memcpy(sctx-buffer + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA1_BLOCK_SIZE; + len %= SHA1_BLOCK_SIZE; + + block_fn(blocks, data, sctx-state, +partial ? sctx-buffer : NULL, p); + data += blocks * SHA1_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx-buffer + partial, data, len); + + return 0; +} +EXPORT_SYMBOL(crypto_sha1_base_do_update); + +int crypto_sha1_base_do_finalize(struct shash_desc *desc, +sha1_block_fn *block_fn, void *p) +{ + const int bit_offset = SHA1_BLOCK_SIZE - sizeof(__be64); + struct sha1_state *sctx = shash_desc_ctx(desc); + __be64 *bits = (__be64 *)(sctx-buffer + bit_offset); + unsigned int partial = sctx-count % SHA1_BLOCK_SIZE; + + sctx-buffer[partial++] = 0x80; + if (partial bit_offset) { + memset(sctx-buffer + partial, 0x0, SHA1_BLOCK_SIZE - partial); + partial = 0; + + block_fn(1, sctx-buffer, sctx-state, NULL, p); + } + + memset(sctx-buffer + partial, 0x0, bit_offset - partial); + *bits = cpu_to_be64(sctx-count 3); + block_fn(1, sctx-buffer, sctx-state, NULL, p); + + return 0; +} +EXPORT_SYMBOL(crypto_sha1_base_do_finalize
[PATCH v2 resend 04/14] crypto: sha512-generic: move to generic glue implementation
This updated the generic SHA-512 implementation to use the generic shared SHA-512 glue code. It also implements a .finup hook crypto_sha512_finup() and exports it to other modules. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 1 + crypto/sha512_generic.c | 126 ++-- include/crypto/sha.h| 2 + 3 files changed, 28 insertions(+), 101 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 155cc15c2719..83bc1680391a 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -653,6 +653,7 @@ config CRYPTO_SHA512_BASE config CRYPTO_SHA512 tristate SHA384 and SHA512 digest algorithms + select CRYPTO_SHA512_BASE select CRYPTO_HASH help SHA512 secure hash standard (DFIPS 180-2). diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 1c3c3767e079..88f36a6920ef 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -130,125 +130,48 @@ sha512_transform(u64 *state, const u8 *input) a = b = c = d = e = f = g = h = t1 = t2 = 0; } -static int -sha512_init(struct shash_desc *desc) +static void sha512_generic_block_fn(int blocks, u8 const *src, u64 *state, + const u8 *head, void *p) { - struct sha512_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA512_H0; - sctx-state[1] = SHA512_H1; - sctx-state[2] = SHA512_H2; - sctx-state[3] = SHA512_H3; - sctx-state[4] = SHA512_H4; - sctx-state[5] = SHA512_H5; - sctx-state[6] = SHA512_H6; - sctx-state[7] = SHA512_H7; - sctx-count[0] = sctx-count[1] = 0; + if (head) + sha512_transform(state, head); - return 0; -} - -static int -sha384_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA384_H0; - sctx-state[1] = SHA384_H1; - sctx-state[2] = SHA384_H2; - sctx-state[3] = SHA384_H3; - sctx-state[4] = SHA384_H4; - sctx-state[5] = SHA384_H5; - sctx-state[6] = SHA384_H6; - sctx-state[7] = SHA384_H7; - sctx-count[0] = sctx-count[1] = 0; - - return 0; + while (blocks--) { + sha512_transform(state, src); + src += SHA512_BLOCK_SIZE; + } } int crypto_sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha512_state *sctx = shash_desc_ctx(desc); - - unsigned int i, index, part_len; - - /* Compute number of bytes mod 128 */ - index = sctx-count[0] 0x7f; - - /* Update number of bytes */ - if ((sctx-count[0] += len) len) - sctx-count[1]++; - -part_len = 128 - index; - - /* Transform as many times as possible. */ - if (len = part_len) { - memcpy(sctx-buf[index], data, part_len); - sha512_transform(sctx-state, sctx-buf); - - for (i = part_len; i + 127 len; i+=128) - sha512_transform(sctx-state, data[i]); - - index = 0; - } else { - i = 0; - } - - /* Buffer remaining input */ - memcpy(sctx-buf[index], data[i], len - i); - - return 0; + return crypto_sha512_base_do_update(desc, data, len, + sha512_generic_block_fn, NULL); } EXPORT_SYMBOL(crypto_sha512_update); -static int -sha512_final(struct shash_desc *desc, u8 *hash) +int crypto_sha512_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash) { - struct sha512_state *sctx = shash_desc_ctx(desc); -static u8 padding[128] = { 0x80, }; - __be64 *dst = (__be64 *)hash; - __be64 bits[2]; - unsigned int index, pad_len; - int i; - - /* Save number of bits */ - bits[1] = cpu_to_be64(sctx-count[0] 3); - bits[0] = cpu_to_be64(sctx-count[1] 3 | sctx-count[0] 61); - - /* Pad out to 112 mod 128. */ - index = sctx-count[0] 0x7f; - pad_len = (index 112) ? (112 - index) : ((128+112) - index); - crypto_sha512_update(desc, padding, pad_len); - - /* Append length (before padding) */ - crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i 8; i++) - dst[i] = cpu_to_be64(sctx-state[i]); - - /* Zeroize sensitive information. */ - memset(sctx, 0, sizeof(struct sha512_state)); - - return 0; + if (len) + crypto_sha512_base_do_update(desc, data, len, +sha512_generic_block_fn, NULL); + crypto_sha512_base_do_finalize(desc, sha512_generic_block_fn, NULL); + return crypto_sha512_base_finish(desc, hash); } +EXPORT_SYMBOL(crypto_sha512_finup); -static int sha384_final(struct shash_desc *desc, u8 *hash) +int
[PATCH v2 resend 12/14] crypto/x86: move SHA-1 SSSE3 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/x86/crypto/sha1_ssse3_glue.c | 139 +- crypto/Kconfig| 1 + 2 files changed, 34 insertions(+), 106 deletions(-) diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 6c20fe04a738..ee0b775f2b1f 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -49,127 +49,53 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); - -static int sha1_ssse3_init(struct shash_desc *desc) +static void sha1_ssse3_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - struct sha1_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; - - return 0; -} - -static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx-count += len; - - if (partial) { - done = SHA1_BLOCK_SIZE - partial; - memcpy(sctx-buffer + partial, data, done); - sha1_transform_asm(sctx-state, sctx-buffer, 1); - } - - if (len - done = SHA1_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; - - sha1_transform_asm(sctx-state, data + done, rounds); - done += rounds * SHA1_BLOCK_SIZE; - } - - memcpy(sctx-buffer, data + done, len - done); - - return 0; + if (head) + sha1_transform_asm(state, head, 1); + if (blocks) + sha1_transform_asm(state, src, blocks); } static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA1_BLOCK_SIZE; - int res; - - /* Handle the fast case right here */ - if (partial + len SHA1_BLOCK_SIZE) { - sctx-count += len; - memcpy(sctx-buffer + partial, data, len); + int err; - return 0; - } + if (!irq_fpu_usable() || + (sctx-count % SHA1_BLOCK_SIZE) + len SHA1_BLOCK_SIZE) + return crypto_sha1_update(desc, data, len); - if (!irq_fpu_usable()) { - res = crypto_sha1_update(desc, data, len); - } else { - kernel_fpu_begin(); - res = __sha1_ssse3_update(desc, data, len, partial); - kernel_fpu_end(); - } + kernel_fpu_begin(); + err = crypto_sha1_base_do_update(desc, data, len, +sha1_ssse3_block_fn, NULL); + kernel_fpu_end(); - return res; + return err; } - -/* Add padding and return the message digest. */ -static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be32 *dst = (__be32 *)out; - __be64 bits; - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; - - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64 and append length */ - index = sctx-count % SHA1_BLOCK_SIZE; - padlen = (index 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); - if (!irq_fpu_usable()) { - crypto_sha1_update(desc, padding, padlen); - crypto_sha1_update(desc, (const u8 *)bits, sizeof(bits)); - } else { - kernel_fpu_begin(); - /* We need to fill a whole block for __sha1_ssse3_update() */ - if (padlen = 56) { - sctx-count += padlen; - memcpy(sctx-buffer + index, padding, padlen); - } else { - __sha1_ssse3_update(desc, padding, padlen, index); - } - __sha1_ssse3_update(desc, (const u8 *)bits, sizeof(bits), 56); - kernel_fpu_end(); - } - - /* Store state in digest */ - for (i = 0; i 5; i++) - dst[i] = cpu_to_be32(sctx-state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx)); - - return 0; -} - -static int sha1_ssse3_export(struct shash_desc *desc, void *out) +static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); + if (!irq_fpu_usable()) + return crypto_sha1_finup(desc, data, len, out); - memcpy(out, sctx, sizeof(*sctx)); + kernel_fpu_begin
[PATCH v2 resend 08/14] crypto/arm: move SHA-1 ARMv8 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig| 2 +- arch/arm/crypto/sha1-ce-glue.c | 110 +++-- 2 files changed, 31 insertions(+), 81 deletions(-) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index c111d8992afb..31ad19f18af2 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -32,7 +32,7 @@ config CRYPTO_SHA1_ARM_CE tristate SHA1 digest algorithm (ARM v8 Crypto Extensions) depends on KERNEL_MODE_NEON select CRYPTO_SHA1_ARM - select CRYPTO_SHA1 + select CRYPTO_SHA1_BASE select CRYPTO_HASH help SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c index a9dd90df9fd7..29039d1bcdf9 100644 --- a/arch/arm/crypto/sha1-ce-glue.c +++ b/arch/arm/crypto/sha1-ce-glue.c @@ -13,114 +13,64 @@ #include linux/crypto.h #include linux/module.h -#include asm/crypto/sha1.h #include asm/hwcap.h #include asm/neon.h #include asm/simd.h #include asm/unaligned.h +#include sha1.h + MODULE_DESCRIPTION(SHA1 secure hash using ARMv8 Crypto Extensions); MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); -asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, - u8 *head); - -static int sha1_init(struct shash_desc *desc) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; - return 0; -} +asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p); -static int sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int sha1_ce_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial; - if (!may_use_simd()) + if (!may_use_simd() || + (sctx-count % SHA1_BLOCK_SIZE) + len SHA1_BLOCK_SIZE) return sha1_update_arm(desc, data, len); - partial = sctx-count % SHA1_BLOCK_SIZE; - sctx-count += len; + kernel_neon_begin(); + crypto_sha1_base_do_update(desc, data, len, sha1_ce_transform, NULL); + kernel_neon_end(); - if ((partial + len) = SHA1_BLOCK_SIZE) { - int blocks; - - if (partial) { - int p = SHA1_BLOCK_SIZE - partial; - - memcpy(sctx-buffer + partial, data, p); - data += p; - len -= p; - } - - blocks = len / SHA1_BLOCK_SIZE; - len %= SHA1_BLOCK_SIZE; - - kernel_neon_begin(); - sha1_ce_transform(blocks, data, sctx-state, - partial ? sctx-buffer : NULL); - kernel_neon_end(); - - data += blocks * SHA1_BLOCK_SIZE; - partial = 0; - } - if (len) - memcpy(sctx-buffer + partial, data, len); return 0; } -static int sha1_final(struct shash_desc *desc, u8 *out) +static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, +unsigned int len, u8 *out) { - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; - - struct sha1_state *sctx = shash_desc_ctx(desc); - __be64 bits = cpu_to_be64(sctx-count 3); - __be32 *dst = (__be32 *)out; - int i; - - u32 padlen = SHA1_BLOCK_SIZE -- ((sctx-count + sizeof(bits)) % SHA1_BLOCK_SIZE); - - sha1_update(desc, padding, padlen); - sha1_update(desc, (const u8 *)bits, sizeof(bits)); - - for (i = 0; i SHA1_DIGEST_SIZE / sizeof(__be32); i++) - put_unaligned_be32(sctx-state[i], dst++); - - *sctx = (struct sha1_state){}; - return 0; -} + if (!may_use_simd()) + return sha1_finup_arm(desc, data, len, out); -static int sha1_export(struct shash_desc *desc, void *out) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - struct sha1_state *dst = out; + kernel_neon_begin(); + if (len) + crypto_sha1_base_do_update(desc, data, len, + sha1_ce_transform, NULL); + crypto_sha1_base_do_finalize(desc, sha1_ce_transform, NULL); + kernel_neon_end(); - *dst = *sctx; - return 0; + return crypto_sha1_base_finish(desc, out); } -static int sha1_import(struct shash_desc *desc, const void *in) +static int sha1_ce_final(struct shash_desc *desc, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); - struct sha1_state const *src = in; - - *sctx = *src
[PATCH v2 resend 11/14] crypto/arm64: move SHA-224/256 ARMv8 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm64/crypto/Kconfig| 1 + arch/arm64/crypto/sha2-ce-core.S | 11 ++- arch/arm64/crypto/sha2-ce-glue.c | 208 ++- 3 files changed, 38 insertions(+), 182 deletions(-) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index c87792dfaacc..238727dc24ba 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -18,6 +18,7 @@ config CRYPTO_SHA2_ARM64_CE tristate SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions) depends on ARM64 KERNEL_MODE_NEON select CRYPTO_HASH + select CRYPTO_SHA256_BASE config CRYPTO_GHASH_ARM64_CE tristate GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 7f29fc031ea8..65ad56636fba 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -135,15 +135,18 @@ CPU_LE( rev32 v19.16b, v19.16b) /* * Final block: add padding and total bit count. -* Skip if we have no total byte count in x4. In that case, the input -* size was not a round multiple of the block size, and the padding is -* handled by the C code. +* Skip if the input size was not a round multiple of the block size, +* the padding is handled by the C code in that case. */ cbz x4, 3f + ldr x5, [x2, #-8] // sha256_state::count + tst x5, #0x3f // round multiple of block size? + b.ne3f + str wzr, [x4] moviv17.2d, #0 mov x8, #0x8000 moviv18.2d, #0 - ror x7, x4, #29 // ror(lsl(x4, 3), 32) + ror x7, x5, #29 // ror(lsl(x4, 3), 32) fmovd16, x8 mov x4, #0 mov v19.d[0], xzr diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index ae67e88c28b9..3791c6139628 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -20,195 +20,47 @@ MODULE_DESCRIPTION(SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions); MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); -asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, -u8 *head, long bytes); +asmlinkage void sha2_ce_transform(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p); -static int sha224_init(struct shash_desc *desc) +static int sha256_ce_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { - struct sha256_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha256_state){ - .state = { - SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, - SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, - } - }; - return 0; -} - -static int sha256_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha256_state){ - .state = { - SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, - SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, - } - }; - return 0; -} - -static int sha2_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA256_BLOCK_SIZE; - - sctx-count += len; - - if ((partial + len) = SHA256_BLOCK_SIZE) { - int blocks; - - if (partial) { - int p = SHA256_BLOCK_SIZE - partial; - - memcpy(sctx-buf + partial, data, p); - data += p; - len -= p; - } - - blocks = len / SHA256_BLOCK_SIZE; - len %= SHA256_BLOCK_SIZE; - - kernel_neon_begin_partial(28); - sha2_ce_transform(blocks, data, sctx-state, - partial ? sctx-buf : NULL, 0); - kernel_neon_end(); - - data += blocks * SHA256_BLOCK_SIZE; - partial = 0; - } - if (len) - memcpy(sctx-buf + partial, data, len); - return 0; -} - -static void sha2_final(struct shash_desc *desc) -{ - static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; - - struct sha256_state *sctx = shash_desc_ctx(desc); - __be64 bits = cpu_to_be64(sctx-count 3); - u32 padlen = SHA256_BLOCK_SIZE -- ((sctx-count + sizeof(bits)) % SHA256_BLOCK_SIZE
[PATCH v2 resend 13/14] crypto/x86: move SHA-224/256 SSSE3 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/x86/crypto/sha256_ssse3_glue.c | 186 crypto/Kconfig | 1 + 2 files changed, 39 insertions(+), 148 deletions(-) diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 8fad72f4dfd2..bd9f5ec718fd 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -55,174 +55,63 @@ asmlinkage void sha256_transform_rorx(const char *data, u32 *digest, static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64); - -static int sha256_ssse3_init(struct shash_desc *desc) +static void sha256_ssse3_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - struct sha256_state *sctx = shash_desc_ctx(desc); - - sctx-state[0] = SHA256_H0; - sctx-state[1] = SHA256_H1; - sctx-state[2] = SHA256_H2; - sctx-state[3] = SHA256_H3; - sctx-state[4] = SHA256_H4; - sctx-state[5] = SHA256_H5; - sctx-state[6] = SHA256_H6; - sctx-state[7] = SHA256_H7; - sctx-count = 0; - - return 0; -} - -static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx-count += len; - - if (partial) { - done = SHA256_BLOCK_SIZE - partial; - memcpy(sctx-buf + partial, data, done); - sha256_transform_asm(sctx-buf, sctx-state, 1); - } - - if (len - done = SHA256_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; - - sha256_transform_asm(data + done, sctx-state, (u64) rounds); - - done += rounds * SHA256_BLOCK_SIZE; - } - - memcpy(sctx-buf, data + done, len - done); - - return 0; + if (head) + sha256_transform_asm(head, state, 1); + if (blocks) + sha256_transform_asm(src, state, blocks); } static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA256_BLOCK_SIZE; - int res; + int err; - /* Handle the fast case right here */ - if (partial + len SHA256_BLOCK_SIZE) { - sctx-count += len; - memcpy(sctx-buf + partial, data, len); + if (!irq_fpu_usable() || + (sctx-count % SHA256_BLOCK_SIZE) + len SHA256_BLOCK_SIZE) + return crypto_sha256_update(desc, data, len); - return 0; - } + kernel_fpu_begin(); + err = crypto_sha256_base_do_update(desc, data, len, + sha256_ssse3_block_fn, NULL); + kernel_fpu_end(); - if (!irq_fpu_usable()) { - res = crypto_sha256_update(desc, data, len); - } else { - kernel_fpu_begin(); - res = __sha256_ssse3_update(desc, data, len, partial); - kernel_fpu_end(); - } - - return res; + return err; } - -/* Add padding and return the message digest. */ -static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) +static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be32 *dst = (__be32 *)out; - __be64 bits; - static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + if (!irq_fpu_usable()) + return crypto_sha256_finup(desc, data, len, out); - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64 and append length */ - index = sctx-count % SHA256_BLOCK_SIZE; - padlen = (index 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); - - if (!irq_fpu_usable()) { - crypto_sha256_update(desc, padding, padlen); - crypto_sha256_update(desc, (const u8 *)bits, sizeof(bits)); - } else { - kernel_fpu_begin(); - /* We need to fill a whole block for __sha256_ssse3_update() */ - if (padlen = 56) { - sctx-count += padlen; - memcpy(sctx-buf + index, padding, padlen); - } else { - __sha256_ssse3_update(desc, padding, padlen, index); - } - __sha256_ssse3_update(desc, (const u8 *)bits, - sizeof(bits), 56); - kernel_fpu_end(); - } + kernel_fpu_begin(); + if (len) + crypto_sha256_base_do_update(desc, data, len
[PATCH v2 resend 14/14] crypto/x86: move SHA-384/512 SSSE3 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/x86/crypto/sha512_ssse3_glue.c | 195 +++- crypto/Kconfig | 1 + 2 files changed, 39 insertions(+), 157 deletions(-) diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 0b6af26832bf..f5ab7275e50b 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -54,183 +54,63 @@ asmlinkage void sha512_transform_rorx(const char *data, u64 *digest, static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64); - -static int sha512_ssse3_init(struct shash_desc *desc) +static void sha512_ssse3_block_fn(int blocks, u8 const *src, u64 *state, + const u8 *head, void *p) { - struct sha512_state *sctx = shash_desc_ctx(desc); - - sctx-state[0] = SHA512_H0; - sctx-state[1] = SHA512_H1; - sctx-state[2] = SHA512_H2; - sctx-state[3] = SHA512_H3; - sctx-state[4] = SHA512_H4; - sctx-state[5] = SHA512_H5; - sctx-state[6] = SHA512_H6; - sctx-state[7] = SHA512_H7; - sctx-count[0] = sctx-count[1] = 0; - - return 0; -} - -static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx-count[0] += len; - if (sctx-count[0] len) - sctx-count[1]++; - - if (partial) { - done = SHA512_BLOCK_SIZE - partial; - memcpy(sctx-buf + partial, data, done); - sha512_transform_asm(sctx-buf, sctx-state, 1); - } - - if (len - done = SHA512_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; - - sha512_transform_asm(data + done, sctx-state, (u64) rounds); - - done += rounds * SHA512_BLOCK_SIZE; - } - - memcpy(sctx-buf, data + done, len - done); - - return 0; + if (head) + sha512_transform_asm(head, state, 1); + if (blocks) + sha512_transform_asm(src, state, blocks); } static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count[0] % SHA512_BLOCK_SIZE; - int res; - - /* Handle the fast case right here */ - if (partial + len SHA512_BLOCK_SIZE) { - sctx-count[0] += len; - if (sctx-count[0] len) - sctx-count[1]++; - memcpy(sctx-buf + partial, data, len); - - return 0; - } - - if (!irq_fpu_usable()) { - res = crypto_sha512_update(desc, data, len); - } else { - kernel_fpu_begin(); - res = __sha512_ssse3_update(desc, data, len, partial); - kernel_fpu_end(); - } - - return res; -} - - -/* Add padding and return the message digest. */ -static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be64 *dst = (__be64 *)out; - __be64 bits[2]; - static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; - - /* save number of bits */ - bits[1] = cpu_to_be64(sctx-count[0] 3); - bits[0] = cpu_to_be64(sctx-count[1] 3 | sctx-count[0] 61); - - /* Pad out to 112 mod 128 and append length */ - index = sctx-count[0] 0x7f; - padlen = (index 112) ? (112 - index) : ((128+112) - index); - - if (!irq_fpu_usable()) { - crypto_sha512_update(desc, padding, padlen); - crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits)); - } else { - kernel_fpu_begin(); - /* We need to fill a whole block for __sha512_ssse3_update() */ - if (padlen = 112) { - sctx-count[0] += padlen; - if (sctx-count[0] padlen) - sctx-count[1]++; - memcpy(sctx-buf + index, padding, padlen); - } else { - __sha512_ssse3_update(desc, padding, padlen, index); - } - __sha512_ssse3_update(desc, (const u8 *)bits, - sizeof(bits), 112); - kernel_fpu_end(); - } + int err; - /* Store state in digest */ - for (i = 0; i 8; i++) - dst[i] = cpu_to_be64(sctx-state[i]); + if (!irq_fpu_usable() || + (sctx-count[0] % SHA512_BLOCK_SIZE) + len SHA512_BLOCK_SIZE) + return crypto_sha512_update(desc, data, len); - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx
[PATCH v2 resend 09/14] crypto/arm: move SHA-224/256 ARMv8 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig| 1 + arch/arm/crypto/sha2-ce-glue.c | 151 + 2 files changed, 33 insertions(+), 119 deletions(-) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 31ad19f18af2..de91f0447240 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -42,6 +42,7 @@ config CRYPTO_SHA2_ARM_CE tristate SHA-224/256 digest algorithm (ARM v8 Crypto Extensions) depends on KERNEL_MODE_NEON select CRYPTO_SHA256 + select CRYPTO_SHA256_BASE select CRYPTO_HASH help SHA-256 secure hash standard (DFIPS 180-2) implemented diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c index 9ffe8ad27402..df57192c41cd 100644 --- a/arch/arm/crypto/sha2-ce-glue.c +++ b/arch/arm/crypto/sha2-ce-glue.c @@ -23,140 +23,52 @@ MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); asmlinkage void sha2_ce_transform(int blocks, u8 const *src, u32 *state, - u8 *head); + const u8 *head, void *p); -static int sha224_init(struct shash_desc *desc) +static int sha2_ce_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { struct sha256_state *sctx = shash_desc_ctx(desc); - *sctx = (struct sha256_state){ - .state = { - SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, - SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, - } - }; - return 0; -} + if (!may_use_simd() || + (sctx-count % SHA256_BLOCK_SIZE) + len SHA256_BLOCK_SIZE) + return crypto_sha256_update(desc, data, len); -static int sha256_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); + kernel_neon_begin(); + crypto_sha256_base_do_update(desc, data, len, sha2_ce_transform, NULL); + kernel_neon_end(); - *sctx = (struct sha256_state){ - .state = { - SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, - SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, - } - }; return 0; } -static int sha2_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int sha2_ce_finup(struct shash_desc *desc, const u8 *data, +unsigned int len, u8 *out) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial; - if (!may_use_simd()) - return crypto_sha256_update(desc, data, len); - - partial = sctx-count % SHA256_BLOCK_SIZE; - sctx-count += len; - - if ((partial + len) = SHA256_BLOCK_SIZE) { - int blocks; - - if (partial) { - int p = SHA256_BLOCK_SIZE - partial; - - memcpy(sctx-buf + partial, data, p); - data += p; - len -= p; - } + return crypto_sha256_finup(desc, data, len, out); - blocks = len / SHA256_BLOCK_SIZE; - len %= SHA256_BLOCK_SIZE; - - kernel_neon_begin(); - sha2_ce_transform(blocks, data, sctx-state, - partial ? sctx-buf : NULL); - kernel_neon_end(); - - data += blocks * SHA256_BLOCK_SIZE; - partial = 0; - } + kernel_neon_begin(); if (len) - memcpy(sctx-buf + partial, data, len); - return 0; -} - -static void sha2_final(struct shash_desc *desc) -{ - static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; - - struct sha256_state *sctx = shash_desc_ctx(desc); - __be64 bits = cpu_to_be64(sctx-count 3); - u32 padlen = SHA256_BLOCK_SIZE -- ((sctx-count + sizeof(bits)) % SHA256_BLOCK_SIZE); - - sha2_update(desc, padding, padlen); - sha2_update(desc, (const u8 *)bits, sizeof(bits)); -} + crypto_sha256_base_do_update(desc, data, len, +sha2_ce_transform, NULL); + crypto_sha256_base_do_finalize(desc, sha2_ce_transform, NULL); + kernel_neon_end(); -static int sha224_final(struct shash_desc *desc, u8 *out) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - __be32 *dst = (__be32 *)out; - int i; - - sha2_final(desc); - - for (i = 0; i SHA224_DIGEST_SIZE / sizeof(__be32); i++) - put_unaligned_be32(sctx-state[i], dst++); - - *sctx = (struct sha256_state){}; - return 0; + return crypto_sha256_base_finish(desc, out); } -static int sha256_final(struct shash_desc *desc, u8 *out) +static int sha2_ce_final(struct shash_desc *desc, u8 *out) { - struct
[PATCH v2 resend 02/14] crypto: sha256: implement base layer for SHA-256
To reduce the number of copies of boilerplate code throughout the tree, this patch implements generic glue for the SHA-256 algorithm. This allows a specific arch or hardware implementation to only implement the special handling that it needs. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig | 4 ++ crypto/Makefile | 1 + crypto/sha256_base.c | 140 +++ include/crypto/sha.h | 17 +++ 4 files changed, 162 insertions(+) create mode 100644 crypto/sha256_base.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 3400cf4e3cdb..1664bd68b97d 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -602,6 +602,10 @@ config CRYPTO_SHA1_MB lanes remain unfilled, a flush operation will be initiated to process the crypto jobs, adding a slight latency. + +config CRYPTO_SHA256_BASE + tristate + config CRYPTO_SHA256 tristate SHA224 and SHA256 digest algorithm select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index 6174bf2592fe..bb9bafeb3ac7 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_CRYPTO_RMD160) += rmd160.o obj-$(CONFIG_CRYPTO_RMD256) += rmd256.o obj-$(CONFIG_CRYPTO_RMD320) += rmd320.o obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o +obj-$(CONFIG_CRYPTO_SHA256_BASE) += sha256_base.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o obj-$(CONFIG_CRYPTO_SHA512_BASE) += sha512_base.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o diff --git a/crypto/sha256_base.c b/crypto/sha256_base.c new file mode 100644 index ..5fd728066912 --- /dev/null +++ b/crypto/sha256_base.c @@ -0,0 +1,140 @@ +/* + * sha256_base.c - core logic for SHA-256 implementations + * + * Copyright (C) 2015 Linaro Ltd ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include crypto/internal/hash.h +#include crypto/sha.h +#include linux/crypto.h +#include linux/module.h + +#include asm/unaligned.h + +int crypto_sha224_base_init(struct shash_desc *desc) +{ + static const u32 sha224_init_state[] = { + SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, + SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, + }; + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx-state, sha224_init_state, sizeof(sctx-state)); + sctx-count = 0; + return 0; +} +EXPORT_SYMBOL(crypto_sha224_base_init); + +int crypto_sha256_base_init(struct shash_desc *desc) +{ + static const u32 sha256_init_state[] = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, + }; + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx-state, sha256_init_state, sizeof(sctx-state)); + sctx-count = 0; + return 0; +} +EXPORT_SYMBOL(crypto_sha256_base_init); + +int crypto_sha256_base_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state *dst = out; + + *dst = *sctx; + + return 0; +} +EXPORT_SYMBOL(crypto_sha256_base_export); + +int crypto_sha256_base_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state const *src = in; + + *sctx = *src; + + return 0; +} +EXPORT_SYMBOL(crypto_sha256_base_import); + +int crypto_sha256_base_do_update(struct shash_desc *desc, const u8 *data, +unsigned int len, sha256_block_fn *block_fn, +void *p) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx-count % SHA256_BLOCK_SIZE; + + sctx-count += len; + + if (unlikely((partial + len) = SHA256_BLOCK_SIZE)) { + int blocks; + + if (partial) { + int p = SHA256_BLOCK_SIZE - partial; + + memcpy(sctx-buf + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA256_BLOCK_SIZE; + len %= SHA256_BLOCK_SIZE; + + block_fn(blocks, data, sctx-state, +partial ? sctx-buf : NULL, p); + data += blocks * SHA256_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx-buf + partial, data, len); + + return 0; +} +EXPORT_SYMBOL(crypto_sha256_base_do_update); + +int crypto_sha256_base_do_finalize(struct shash_desc *desc, + sha256_block_fn *block_fn, void *p) +{ + const int bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64); + struct sha256_state *sctx = shash_desc_ctx(desc); + __be64 *bits
[PATCH v2 resend 06/14] crypto: sha1-generic: move to generic glue implementation
This updated the generic SHA-1 implementation to use the generic shared SHA-1 glue code. It also implements a .finup hook crypto_sha1_finup() and exports it to other modules. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/Kconfig| 1 + crypto/sha1_generic.c | 105 -- include/crypto/sha.h | 3 ++ 3 files changed, 29 insertions(+), 80 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 72bf5af7240d..8f16d90f7c55 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -522,6 +522,7 @@ config CRYPTO_SHA1_BASE config CRYPTO_SHA1 tristate SHA1 digest algorithm select CRYPTO_HASH + select CRYPTO_SHA1_BASE help SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c index a3e50c37eb6f..3975f63ea6f9 100644 --- a/crypto/sha1_generic.c +++ b/crypto/sha1_generic.c @@ -25,107 +25,52 @@ #include crypto/sha.h #include asm/byteorder.h -static int sha1_init(struct shash_desc *desc) +static void sha1_generic_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - struct sha1_state *sctx = shash_desc_ctx(desc); + u32 temp[SHA_WORKSPACE_WORDS]; - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; + if (head) + sha_transform(state, head, temp); - return 0; + while (blocks--) { + sha_transform(state, src, temp); + src += SHA1_BLOCK_SIZE; + } } int crypto_sha1_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial, done; - const u8 *src; - - partial = sctx-count % SHA1_BLOCK_SIZE; - sctx-count += len; - done = 0; - src = data; - - if ((partial + len) = SHA1_BLOCK_SIZE) { - u32 temp[SHA_WORKSPACE_WORDS]; - - if (partial) { - done = -partial; - memcpy(sctx-buffer + partial, data, - done + SHA1_BLOCK_SIZE); - src = sctx-buffer; - } - - do { - sha_transform(sctx-state, src, temp); - done += SHA1_BLOCK_SIZE; - src = data + done; - } while (done + SHA1_BLOCK_SIZE = len); - - memzero_explicit(temp, sizeof(temp)); - partial = 0; - } - memcpy(sctx-buffer + partial, src, len - done); - - return 0; + return crypto_sha1_base_do_update(desc, data, len, + sha1_generic_block_fn, NULL); } EXPORT_SYMBOL(crypto_sha1_update); - -/* Add padding and return the message digest. */ -static int sha1_final(struct shash_desc *desc, u8 *out) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - __be32 *dst = (__be32 *)out; - u32 i, index, padlen; - __be64 bits; - static const u8 padding[64] = { 0x80, }; - - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64 */ - index = sctx-count 0x3f; - padlen = (index 56) ? (56 - index) : ((64+56) - index); - crypto_sha1_update(desc, padding, padlen); - - /* Append length */ - crypto_sha1_update(desc, (const u8 *)bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i 5; i++) - dst[i] = cpu_to_be32(sctx-state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof *sctx); - - return 0; -} - -static int sha1_export(struct shash_desc *desc, void *out) +int crypto_sha1_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); - - memcpy(out, sctx, sizeof(*sctx)); - return 0; + if (len) + crypto_sha1_base_do_update(desc, data, len, + sha1_generic_block_fn, NULL); + crypto_sha1_base_do_finalize(desc, sha1_generic_block_fn, NULL); + return crypto_sha1_base_finish(desc, out); } +EXPORT_SYMBOL(crypto_sha1_finup); -static int sha1_import(struct shash_desc *desc, const void *in) +/* Add padding and return the message digest. */ +static int sha1_final(struct shash_desc *desc, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); - - memcpy(sctx, in, sizeof(*sctx)); - return 0; + return crypto_sha1_finup(desc, NULL, 0, out); } static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, - .init = sha1_init, + .init = crypto_sha1_base_init, .update = crypto_sha1_update, .final = sha1_final
[PATCH v2 resend 00/14] crypto: SHA glue code consolidation
NOTE: I appear to have screwed up something when I just sent this, so resending now with no patches missing and no duplicates. Hello all, This is v2 of what is now a complete glue code consolidation series for generic, x86, arm and arm64 implementations of SHA-1, SHA-224/256 and SHA-384/512. The base layer implements all the update and finalization logic around the block transforms, where the prototypes of the latter look something like this: typedef void (shaXXX_block_fn)(int blocks, u8 const *src, uXX *state, const u8 *head, void *p); The block implementation should process the head block first, then process the requested number of block starting at 'src'. The generic pointer 'p' is passed down from the do_update/do_finalize() versions; this is used for instance by the ARM64 implementations to indicate to the core ASM implementation that it should finalize the digest, which it will do only if the input was a round multiple of the block size. The generic pointer is used here as a means of conveying that information back and forth. Note that the base functions prototypes are all 'returning int' but they all return 0. They should be invoked as tail calls where possible to eliminate some of the function call overhead. If that is not possible, the return values can be safely ignored. Changes since v1 (RFC): - prefixed globally visible generic symbols with crypto_ - added SHA-1 base layer - updated init code to only set the initial constants and clear the count, clearing the buffer is unnecessary [Markus] - favor the small update path in crypto_sha_XXX_base_do_update() [Markus] - update crypto_sha_XXX_do_finalize() to use memset() on the buffer directly rather than copying a statically allocated padding buffer into it [Markus] - moved a bunch of existing arm and x86 implementations to use the new base layers Note: looking at the generated asm (for arm64), I noticed that the memcpy/memset invocations with compile time constant src and len arguments (which includes the empty struct assignments) are eliminated completely, and replaced by direct loads and stores. Hopefully this addresses the concern raised by Markus regarding this. Ard Biesheuvel (14): crypto: sha512: implement base layer for SHA-512 crypto: sha256: implement base layer for SHA-256 crypto: sha1: implement base layer for SHA-1 crypto: sha512-generic: move to generic glue implementation crypto: sha256-generic: move to generic glue implementation crypto: sha1-generic: move to generic glue implementation crypto/arm: move SHA-1 ARM asm implementation to base layer crypto/arm: move SHA-1 ARMv8 implementation to base layer crypto/arm: move SHA-224/256 ARMv8 implementation to base layer crypto/arm64: move SHA-1 ARMv8 implementation to base layer crypto/arm64: move SHA-224/256 ARMv8 implementation to base layer crypto/x86: move SHA-1 SSSE3 implementation to base layer crypto/x86: move SHA-224/256 SSSE3 implementation to base layer crypto/x86: move SHA-384/512 SSSE3 implementation to base layer arch/arm/crypto/Kconfig | 4 +- arch/arm/crypto/sha1-ce-glue.c | 110 +--- arch/arm/{include/asm = }/crypto/sha1.h | 3 + arch/arm/crypto/sha1_glue.c | 117 - arch/arm/crypto/sha2-ce-glue.c | 151 +- arch/arm64/crypto/Kconfig| 2 + arch/arm64/crypto/sha1-ce-core.S | 11 +- arch/arm64/crypto/sha1-ce-glue.c | 132 arch/arm64/crypto/sha2-ce-core.S | 11 +- arch/arm64/crypto/sha2-ce-glue.c | 208 +-- arch/x86/crypto/sha1_ssse3_glue.c| 139 + arch/x86/crypto/sha256_ssse3_glue.c | 186 ++- arch/x86/crypto/sha512_ssse3_glue.c | 195 ++--- crypto/Kconfig | 16 +++ crypto/Makefile | 3 + crypto/sha1_base.c | 125 +++ crypto/sha1_generic.c| 105 crypto/sha256_base.c | 140 + crypto/sha256_generic.c | 139 - crypto/sha512_base.c | 143 + crypto/sha512_generic.c | 126 --- include/crypto/sha.h | 62 + 22 files changed, 836 insertions(+), 1292 deletions(-) rename arch/arm/{include/asm = }/crypto/sha1.h (67%) create mode 100644 crypto/sha1_base.c create mode 100644 crypto/sha256_base.c create mode 100644 crypto/sha512_base.c -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH 6/6] arm/crypto: accelerated SHA-512 using ARM generic ASM and NEON
This updates the SHA-512 NEON module with the faster and more versatile implementation from the OpenSSL project. It consists of both a NEON and a generic ASM version of the core SHA-512 transform, where the NEON version reverts to the ASM version when invoked in non-process context. Performance relative to the generic implementation (measured using tcrypt.ko mode=306 sec=1 running on a Cortex-A57 under KVM): input sizeblock size asm neonold neon 1616 1.392.542.21 6416 1.322.332.09 6464 1.382.532.19 256 16 1.312.282.06 256 64 1.382.542.25 256 256 1.402.772.39 1024 16 1.292.222.01 1024 256 1.402.822.45 1024 10241.412.932.53 2048 16 1.332.212.00 2048 256 1.402.842.46 2048 10241.412.962.55 2048 20481.412.982.56 4096 16 1.342.201.99 4096 256 1.402.842.46 4096 10241.412.972.56 4096 40961.413.012.58 8192 16 1.342.191.99 8192 256 1.402.852.47 8192 10241.412.982.56 8192 40961.412.712.59 8192 81921.513.512.69 Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig |8 + arch/arm/crypto/Makefile |8 +- arch/arm/crypto/sha512-armv4.pl | 656 arch/arm/crypto/sha512-core.S_shipped | 1814 + arch/arm/crypto/sha512-glue.c | 137 +++ arch/arm/crypto/sha512-neon-glue.c| 111 ++ arch/arm/crypto/sha512.h |8 + 7 files changed, 2741 insertions(+), 1 deletion(-) create mode 100644 arch/arm/crypto/sha512-armv4.pl create mode 100644 arch/arm/crypto/sha512-core.S_shipped create mode 100644 arch/arm/crypto/sha512-glue.c create mode 100644 arch/arm/crypto/sha512-neon-glue.c create mode 100644 arch/arm/crypto/sha512.h diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 458729d2ce22..6b50c6d77b77 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -53,6 +53,14 @@ config CRYPTO_SHA256_ARM SHA-256 secure hash standard (DFIPS 180-2) implemented using optimized ARM assembler and NEON, when available. +config CRYPTO_SHA512_ARM + tristate SHA-384/512 digest algorithm (ARM-asm and NEON) + select CRYPTO_HASH + select CRYPTO_SHA512_BASE + help + SHA-512 secure hash standard (DFIPS 180-2) implemented + using optimized ARM assembler and NEON, when available. + config CRYPTO_SHA512_ARM_NEON tristate SHA384 and SHA512 digest algorithm (ARM NEON) depends on KERNEL_MODE_NEON diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index ef46e898f98b..322a6ca999a2 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o +obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o @@ -19,6 +20,8 @@ sha1-arm-y:= sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y:= sha1-armv7-neon.o sha1_neon_glue.o sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) +sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o +sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y) sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o @@ -34,4 +37,7 @@ $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl $(call cmd,perl) -.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S +$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl new file mode 100644 index ..7e540f8439da --- /dev/null +++ b/arch/arm/crypto/sha512-armv4.pl @@ -0,0 +1,656 @@ +#!/usr/bin/env perl + +# +# Written by Andy Polyakov ap
[RFC PATCH 0/6] SHA-256/512 glue code consolidation
Hello all, After working on various flavors of SHA over the past week, I noticed there is a fair amount of duplication, not only of true boiler plate but also of glue code that is not entirely non-trivial. So this series proposes a way to cut down on that: I implemented generic glue for SHA-256 and SHA-512, and ported the generic implementations to use it. The last two patches are examples of non-trivial uses of it. Patch #5 ports the arm64 SHA-256 Crypto Extensions to use it: this code needs to enable and disable the NEON before and after using it, and has an implementation of the padding in asm for inputs that are round multiples of the block size. The final patch is the same core code as the patch I sent yesterday, but this time with most of the redundant glue removed. Comments, suggestions etc are highly appreciated! Regards, Ard. Ard Biesheuvel (6): crypto: sha512: implement base layer for SHA-512 crypto: sha512-generic: move to generic glue implementation crypto: sha256: implement base layer for SHA-256 crypto: sha256-generic: move to generic glue implementation arm64/crypto: move ARMv8 SHA-224/256 driver to SHA-256 base layer arm/crypto: accelerated SHA-512 using ARM generic ASM and NEON arch/arm/crypto/Kconfig |8 + arch/arm/crypto/Makefile |8 +- arch/arm/crypto/sha512-armv4.pl | 656 arch/arm/crypto/sha512-core.S_shipped | 1814 + arch/arm/crypto/sha512-glue.c | 137 +++ arch/arm/crypto/sha512-neon-glue.c| 111 ++ arch/arm/crypto/sha512.h |8 + arch/arm64/crypto/Kconfig |1 + arch/arm64/crypto/sha2-ce-core.S | 11 +- arch/arm64/crypto/sha2-ce-glue.c | 211 +--- crypto/Kconfig|9 + crypto/Makefile |2 + crypto/sha256_base.c | 138 +++ crypto/sha256_generic.c | 131 +-- crypto/sha512_base.c | 143 +++ crypto/sha512_generic.c | 117 +-- include/crypto/sha.h | 37 + 17 files changed, 3142 insertions(+), 400 deletions(-) create mode 100644 arch/arm/crypto/sha512-armv4.pl create mode 100644 arch/arm/crypto/sha512-core.S_shipped create mode 100644 arch/arm/crypto/sha512-glue.c create mode 100644 arch/arm/crypto/sha512-neon-glue.c create mode 100644 arch/arm/crypto/sha512.h create mode 100644 crypto/sha256_base.c create mode 100644 crypto/sha512_base.c -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv2] arm: crypto: Add optimized SHA-256/224
On 27 March 2015 at 11:42, Andy Polyakov ap...@openssl.org wrote: Could you share the error log please? OK, I spotted one issue with this code: arch/arm/crypto/sha256-core.S: Assembler messages: arch/arm/crypto/sha256-core.S:1847: Error: invalid constant (efb0) after fixup This is caused by the fact that, when building the integer-only code for an older architecture, the conditional compilation produces a slightly bigger preceding function, and the symbol K256 is out of range for the adr instruction. @Jean-Christophe: is that the same problem that you hit? @Andy: I propose we do something similar as in the bsaes code: #ifdef __thumb__ #define adrl adr #endif and replace the offending line with adrl r14,K256 Sorry about delay. Yes, that would do. I did test all combinations, but all my combinations, i.e. without __KERNEL__ defined :-( And without __KERNEL__ there are few extra instructions in integer-only subroutine that push instruction in question code toward higher address, so that constant was efc0, which can be encoded. Anyway, I've chosen to add that #define next to .thumb directive. See attached. Ard, you have mentioned that you've verified it on big-endian, but I've spotted little-endian dependency (see #ifndef __ARMEB__ in attached). I guess that it worked for you either because it was NEON that was tested (it does work as is) or __LINUX_ARM_ARCH__ was less than 7 (in which case it uses endian-neutral byte-by-byte data load). Can you confirm either? I need to double check that, but my suspicion is that it was the latter. -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 1/6] crypto: sha512: implement base layer for SHA-512
On 29 March 2015 at 10:29, Markus Stockhausen stockhau...@collogia.de wrote: Von: linux-crypto-ow...@vger.kernel.org [linux-crypto-ow...@vger.kernel.org]quot; im Auftrag von quot;Ard Biesheuvel [ard.biesheu...@linaro.org] Gesendet: Samstag, 28. März 2015 23:10 An: linux-arm-ker...@lists.infradead.org; linux-crypto@vger.kernel.org; samitolva...@google.com; herb...@gondor.apana.org.au; jussi.kivili...@iki.fi Cc: Ard Biesheuvel Betreff: [RFC PATCH 1/6] crypto: sha512: implement base layer for SHA-512 To reduce the number of copies of boilerplate code throughout the tree, this patch implements generic glue for the SHA-512 algorithm. This allows a specific arch or hardware implementation to only implement the special handling that it needs. Hi Ard, Implementing a common layer is a very good idea - I didn't like to implement the glue code once again for some recently developed PPC crypto modules. From my very short crypto experience I was surprised that my optimized implementations degraded disproportional for small calculations in the =256byte update scenarios in contrast to some very old basic implementations. Below you will find some hints, that might fit your implementation too. Thus all new implementations based on your framework could benefit immediately. Thanks for taking a look! ... +int sha384_base_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha512_state){ + .state = { + SHA384_H0, SHA384_H1, SHA384_H2, SHA384_H3, + SHA384_H4, SHA384_H5, SHA384_H6, SHA384_H7, + } + }; + return 0; +} IIRC the above code will initialize the whole context including the 64/128 byte buffer. Direct assignment of the 8 hashes was faster in my case. Ah, I missed that. I will change it. ... +int sha512_base_do_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha512_block_fn *block_fn, void *p) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx-count[0] % SHA512_BLOCK_SIZE; + + sctx-count[0] += len; + if (sctx-count[0] len) + sctx-count[1]++; You should check if early kick out at this point if the buffer won't be filled up is faster than first taking care about big data. That can improve performance for small blocks while large blocks might be unaffected. + + if ((partial + len) = SHA512_BLOCK_SIZE) { Isn't this early kickout? The if is only entered if there is enough data to run the block function, otherwise it is a straight memcpy. I could add an unlikely() here to favor the small data case + int blocks; + + if (partial) { + int p = SHA512_BLOCK_SIZE - partial; + + memcpy(sctx-buf + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA512_BLOCK_SIZE; + len %= SHA512_BLOCK_SIZE; + + block_fn(blocks, data, sctx-state, +partial ? sctx-buf : NULL, p); + data += blocks * SHA512_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx-buf + partial, data, len); + + return 0; +} +EXPORT_SYMBOL(sha512_base_do_update); + +int sha512_base_do_finalize(struct shash_desc *desc, sha512_block_fn *block_fn, + void *p) +{ + static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; + + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int padlen; + __be64 bits[2]; + + padlen = SHA512_BLOCK_SIZE - +(sctx-count[0] + sizeof(bits)) % SHA512_BLOCK_SIZE; + + bits[0] = cpu_to_be64(sctx-count[1] 3 | + sctx-count[0] 61); + bits[1] = cpu_to_be64(sctx-count[0] 3); + + sha512_base_do_update(desc, padding, padlen, block_fn, p); I know that this is the most intuitive and straight implementation for handling finalization. Nevertheless the maybe a little obscure generic md5 algorithm gives best in class performance for hash finalization of small input data. Well, memcpy'ing a buffer consisting almost entirely of zeroes doesn't quite feel right, indeed. I will instead follow the md5 suggestion For comparison: From the raw numbers the sha1-ppc-spe assembler module written by me is only 10% faster than the old sha1-popwerpc assembler module. Both are simple assembler algorithms without hardware acceleration. For large blocks I gain another 8% by avoding function calls because the core module may process several blocks. But for small single block updates the above glue code optimizations gave 16byte block single update: +24% 64byte block single update: +16
Re: [PATCH] arm64: crypto: increase AES interleave to 4x
On 20 February 2015 at 15:55, Will Deacon will.dea...@arm.com wrote: On Thu, Feb 19, 2015 at 05:25:16PM +, Ard Biesheuvel wrote: This patch increases the interleave factor for parallel AES modes to 4x. This improves performance on Cortex-A57 by ~35%. This is due to the 3-cycle latency of AES instructions on the A57's relatively deep pipeline (compared to Cortex-A53 where the AES instruction latency is only 2 cycles). At the same time, disable inline expansion of the core AES functions, as the performance benefit of this feature is negligible. Measured on AMD Seattle (using tcrypt.ko mode=500 sec=1): Baseline (2x interleave, inline expansion) -- testing speed of async cbc(aes) (cbc-aes-ce) decryption test 4 (128 bit key, 8192 byte blocks): 95545 operations in 1 seconds test 14 (256 bit key, 8192 byte blocks): 68496 operations in 1 seconds This patch (4x interleave, no inline expansion) --- testing speed of async cbc(aes) (cbc-aes-ce) decryption test 4 (128 bit key, 8192 byte blocks): 124735 operations in 1 seconds test 14 (256 bit key, 8192 byte blocks): 92328 operations in 1 seconds Fine by me. Shall I queue this via the arm64 tree? Yes, please. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm64/crypto/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 5720608c50b1..abb79b3cfcfe 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -29,7 +29,7 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o aes-neon-blk-y := aes-glue-neon.o aes-neon.o -AFLAGS_aes-ce.o := -DINTERLEAVE=2 -DINTERLEAVE_INLINE +AFLAGS_aes-ce.o := -DINTERLEAVE=4 AFLAGS_aes-neon.o:= -DINTERLEAVE=4 CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] ARM: crypto: update NEON AES module to latest OpenSSL version
This updates the bit sliced AES module to the latest version in the upstream OpenSSL repository (e620e5ae37bc). This is needed to fix a bug in the XTS decryption path, where data chunked in a certain way could trigger the ciphertext stealing code, which is not supposed to be active in the kernel build (The kernel implementation of XTS only supports round multiples of the AES block size of 16 bytes, whereas the conformant OpenSSL implementation of XTS supports inputs of arbitrary size by applying ciphertext stealing). This is fixed in the upstream version by adding the missing #ifndef XTS_CHAIN_TWEAK around the offending instructions. The upstream code also contains the change applied by Russell to build the code unconditionally, i.e., even if __LINUX_ARM_ARCH__ 7, but implemented slightly differently. Fixes: e4e7f10bfc40 (ARM: add support for bit sliced AES using NEON instructions) Reported-by: Adrian Kotelba adrian.kote...@gmail.com Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- This was found using the tcrypt test code, to which I recently added additional chunking modes. However, XTS typically operates on pages or at least on sectors, so this bug is unlikely to affect anyone in real life. Still, please add cc stable when applying, Thanks, Ard. arch/arm/crypto/aesbs-core.S_shipped | 12 arch/arm/crypto/bsaes-armv7.pl | 12 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped index 71e5fc7cfb18..1d1800f71c5b 100644 --- a/arch/arm/crypto/aesbs-core.S_shipped +++ b/arch/arm/crypto/aesbs-core.S_shipped @@ -58,14 +58,18 @@ # define VFP_ABI_FRAME 0 # define BSAES_ASM_EXTENDED_KEY # define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ 7 +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 #endif #ifdef __thumb__ # define adrl adr #endif -#if __ARM_ARCH__=7 +#if __ARM_MAX_ARCH__=7 +.arch armv7-a +.fpu neon + .text .syntaxunified @ ARMv7-capable assembler is expected to handle this #ifdef __thumb2__ @@ -74,8 +78,6 @@ .code 32 #endif -.fpu neon - .type _bsaes_decrypt8,%function .align 4 _bsaes_decrypt8: @@ -2095,9 +2097,11 @@ bsaes_xts_decrypt: vld1.8 {q8}, [r0] @ initial tweak adr r2, .Lxts_magic +#ifndefXTS_CHAIN_TWEAK tst r9, #0xf@ if not multiple of 16 it ne @ Thumb2 thing, sanity check in ARM subne r9, #0x10 @ subtract another 16 bytes +#endif subsr9, #0x80 blo .Lxts_dec_short diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl index be068db960ee..a4d3856e7d24 100644 --- a/arch/arm/crypto/bsaes-armv7.pl +++ b/arch/arm/crypto/bsaes-armv7.pl @@ -701,14 +701,18 @@ $code.=___; # define VFP_ABI_FRAME 0 # define BSAES_ASM_EXTENDED_KEY # define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ 7 +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 #endif #ifdef __thumb__ # define adrl adr #endif -#if __ARM_ARCH__=7 +#if __ARM_MAX_ARCH__=7 +.arch armv7-a +.fpu neon + .text .syntaxunified @ ARMv7-capable assembler is expected to handle this #ifdef __thumb2__ @@ -717,8 +721,6 @@ $code.=___; .code 32 #endif -.fpu neon - .type _bsaes_decrypt8,%function .align 4 _bsaes_decrypt8: @@ -2076,9 +2078,11 @@ bsaes_xts_decrypt: vld1.8 {@XMM[8]}, [r0] @ initial tweak adr $magic, .Lxts_magic +#ifndefXTS_CHAIN_TWEAK tst $len, #0xf @ if not multiple of 16 it ne @ Thumb2 thing, sanity check in ARM subne $len, #0x10 @ subtract another 16 bytes +#endif subs$len, #0x80 blo .Lxts_dec_short -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv2] arm: crypto: Add optimized SHA-256/224
On 27 March 2015 at 11:44, Ard Biesheuvel ard.biesheu...@linaro.org wrote: On 27 March 2015 at 11:42, Andy Polyakov ap...@openssl.org wrote: Could you share the error log please? OK, I spotted one issue with this code: arch/arm/crypto/sha256-core.S: Assembler messages: arch/arm/crypto/sha256-core.S:1847: Error: invalid constant (efb0) after fixup This is caused by the fact that, when building the integer-only code for an older architecture, the conditional compilation produces a slightly bigger preceding function, and the symbol K256 is out of range for the adr instruction. @Jean-Christophe: is that the same problem that you hit? @Andy: I propose we do something similar as in the bsaes code: #ifdef __thumb__ #define adrl adr #endif and replace the offending line with adrl r14,K256 Sorry about delay. Yes, that would do. I did test all combinations, but all my combinations, i.e. without __KERNEL__ defined :-( And without __KERNEL__ there are few extra instructions in integer-only subroutine that push instruction in question code toward higher address, so that constant was efc0, which can be encoded. Anyway, I've chosen to add that #define next to .thumb directive. See attached. Ard, you have mentioned that you've verified it on big-endian, but I've spotted little-endian dependency (see #ifndef __ARMEB__ in attached). I guess that it worked for you either because it was NEON that was tested (it does work as is) or __LINUX_ARM_ARCH__ was less than 7 (in which case it uses endian-neutral byte-by-byte data load). Can you confirm either? I need to double check that, but my suspicion is that it was the latter. Indeed, if I build for v7 I get [0.269418] : 4e a5 c5 08 a6 56 6e 76 24 05 43 f8 fe b0 6f d4 [0.275261] 0010: 57 77 7b e3 95 49 c4 01 64 36 af da 65 d2 33 0e [0.281031] alg: hash: Test 1 failed for sha224-asm [0.285315] : 9d 6a 5d e9 e1 6c 39 99 c7 14 84 0f 47 77 1f 36 [0.290912] 0010: dc c2 97 a7 bd ef aa c3 6c 95 15 ae which is indeed the integer code failing, and your attached patch fixes it. -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] crypto/arm: accelerated SHA-512 using ARM generic ASM and NEON
This updates the SHA-512 NEON module with the faster and more versatile implementation from the OpenSSL project. It consists of both a NEON and a generic ASM version of the core SHA-512 transform, where the NEON version reverts to the ASM version when invoked in non-process context. Performance relative to the generic implementation (measured using tcrypt.ko mode=306 sec=1 running on a Cortex-A57 under KVM): input size block size asm neonold neon 16 16 1.392.542.21 64 16 1.322.332.09 64 64 1.382.532.19 256 16 1.312.282.06 256 64 1.382.542.25 256 256 1.402.772.39 102416 1.292.222.01 1024256 1.402.822.45 102410241.412.932.53 204816 1.332.212.00 2048256 1.402.842.46 204810241.412.962.55 204820481.412.982.56 409616 1.342.201.99 4096256 1.402.842.46 409610241.412.972.56 409640961.413.012.58 819216 1.342.191.99 8192256 1.402.852.47 819210241.412.982.56 819240961.412.712.59 819281921.513.512.69 Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- This should get the same treatment as Sami's sha56 version: I would like to wait until the OpenSSL source file hits the upstream repository so that I can refer to its sha1 hash in the commit log. arch/arm/crypto/Kconfig |2 - arch/arm/crypto/Makefile |8 +- arch/arm/crypto/sha512-armv4.pl | 656 arch/arm/crypto/sha512-armv7-neon.S | 455 - arch/arm/crypto/sha512-core.S_shipped | 1814 + arch/arm/crypto/sha512.h | 14 + arch/arm/crypto/sha512_glue.c | 255 + arch/arm/crypto/sha512_neon_glue.c| 155 +-- 8 files changed, 2762 insertions(+), 597 deletions(-) create mode 100644 arch/arm/crypto/sha512-armv4.pl delete mode 100644 arch/arm/crypto/sha512-armv7-neon.S create mode 100644 arch/arm/crypto/sha512-core.S_shipped create mode 100644 arch/arm/crypto/sha512.h create mode 100644 arch/arm/crypto/sha512_glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 458729d2ce22..846694ad2b7d 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -55,8 +55,6 @@ config CRYPTO_SHA256_ARM config CRYPTO_SHA512_ARM_NEON tristate SHA384 and SHA512 digest algorithm (ARM NEON) - depends on KERNEL_MODE_NEON - select CRYPTO_SHA512 select CRYPTO_HASH help SHA-512 secure hash standard (DFIPS 180-2) implemented diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index ef46e898f98b..c0ed9b68fe12 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -19,7 +19,8 @@ sha1-arm-y:= sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y:= sha1-armv7-neon.o sha1_neon_glue.o sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) -sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o +sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512_neon_glue.o +sha512-arm-neon-y := sha512-core.o sha512_glue.o $(sha512-arm-neon-y) sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o @@ -34,4 +35,7 @@ $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl $(call cmd,perl) -.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S +$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl new file mode 100644 index ..7e540f8439da --- /dev/null +++ b/arch/arm/crypto/sha512-armv4.pl @@ -0,0 +1,656 @@ +#!/usr/bin/env perl + +# +# Written by Andy Polyakov ap...@openssl.org for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams
[PATCH] crypto/arm: fix big-endian bug in ghash
This fixes a bug in the new v8 Crypto Extensions GHASH code that only manifests itself in big-endian mode. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/ghash-ce-core.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S index e643a15eadf2..f6ab8bcc9efe 100644 --- a/arch/arm/crypto/ghash-ce-core.S +++ b/arch/arm/crypto/ghash-ce-core.S @@ -40,7 +40,7 @@ * struct ghash_key const *k, const char *head) */ ENTRY(pmull_ghash_update) - vld1.8 {SHASH}, [r3] + vld1.64 {SHASH}, [r3] vld1.64 {XL}, [r1] vmov.i8 MASK, #0xe1 vext.8 SHASH2, SHASH, SHASH, #8 -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv2] arm: crypto: Add optimized SHA-256/224
(resending due to size bounce) On 23 March 2015 at 14:50, Sami Tolvanen samitolva...@google.com wrote: Add Andy Polyakov's optimized assembly and NEON implementations for SHA-256/224. The sha256-armv4.pl script for generating the assembly code is from OpenSSL commit 2ecd32a1f8f0643ae7b38f59bbaf9f0d6ef326fe. Compared to sha256-generic these implementations have the following tcrypt speed improvements on Motorola Nexus 6 (Snapdragon 805): bsb/u sha256-neon sha256-asm 1616 x1.32x1.19 6416 x1.27x1.15 6464 x1.36x1.20 256 16 x1.22x1.11 256 64 x1.36x1.19 256 256 x1.59x1.23 1024 16 x1.21x1.10 1024 256 x1.65x1.23 1024 1024 x1.76x1.25 2048 16 x1.21x1.10 2048 256 x1.66x1.23 2048 1024 x1.78x1.25 2048 2048 x1.79x1.25 4096 16 x1.20x1.09 4096 256 x1.66x1.23 4096 1024 x1.79x1.26 4096 4096 x1.82x1.26 8192 16 x1.20x1.09 8192 256 x1.67x1.23 8192 1024 x1.80x1.26 8192 4096 x1.85x1.28 8192 8192 x1.85x1.27 Where bs refers to block size and b/u to bytes per update. Signed-off-by: Sami Tolvanen samitolva...@google.com Cc: Andy Polyakov ap...@openssl.org This builds fine and passes the tcrypt.ko tests in ARM and Thumb2 and even in big-endian (ARM) mode, so Tested-by: Ard Biesheuvel ard.biesheu...@linaro.org Reviewed-by: Ard Biesheuvel ard.biesheu...@linaro.org Nice work! Ard. --- Changes since v1: Rebased to Herbert's cryptodev tree Include sha256-armv4.pl and use it to generate sha256-core.S Add integer-only assembly version as sha256-asm Add support for SHA-224 to the glue code Change priority for sha256/224-ce to 300 --- arch/arm/crypto/Kconfig |7 arch/arm/crypto/Makefile |8 arch/arm/crypto/sha2-ce-glue.c|4 arch/arm/crypto/sha256-armv4.pl | 713 ++ arch/arm/crypto/sha256-core.S_shipped | 2775 arch/arm/crypto/sha256_glue.c | 246 ++ arch/arm/crypto/sha256_glue.h | 23 arch/arm/crypto/sha256_neon_glue.c| 172 + 8 files changed, 3945 insertions(+), 3 deletions(-) -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] crypto/arm: accelerated SHA-512 using ARM generic ASM and NEON
On 11 April 2015 at 10:48, Arnd Bergmann a...@arndb.de wrote: On Saturday 11 April 2015 09:35:15 Ard Biesheuvel wrote: On 10 April 2015 at 22:23, Ard Biesheuvel ard.biesheu...@linaro.org wrote: On 10 apr. 2015, at 22:08, Arnd Bergmann a...@arndb.de wrote: On Friday 10 April 2015 16:29:08 Ard Biesheuvel wrote: +#if __ARM_MAX_ARCH__=7 +.arch armv7-a +.fpu neon + This will cause a build failure on an ARMv7-M build, which is incompatible with .arch armv7-a and .fpu neon. The neon part depends on CONFIG_KERNEL_MODE_NEON, which would never be set for that platform, I suppose On second thought, that is not entirely true, but I still don't think there is problem here: the .arch/.fpu declarations are understood perfectly fine by GAS when targeting ARMv7-M. Only, it will emit code that is incompatible with it. However, this code is invoked at runtime only if a NEON unit has been detected, so it will just be ignored on ARMv7-M Sorry, I should have collected my findings better when replying to your patch. What I remembered was that I saw a problem in this area in linux-next with randconfig builds, but I did not notice that it was for a different file, and I had not double-checked that patch yet in order to send it out. See below for the patch I'm currently using for my randconfig builder. Before you apply this, please check again which files are affected, as it's possible that there are other modules that suffer from the same problem. Arnd 8--- Subject: [PATCH] ARM: crypto: avoid sha256 code on ARMv7-M The sha256 assembly implementation can deal with all architecture levels from ARMv4 to ARMv7-A, but not with ARMv7-M. Enabling it in an ARMv7-M kernel results in this build failure: arm-linux-gnueabi-ld: error: arch/arm/crypto/sha256_glue.o: Conflicting architecture profiles M/A arm-linux-gnueabi-ld: failed to merge target specific data of file arch/arm/crypto/sha256_glue.o This adds a Kconfig dependency to prevent the code from being disabled ... enabled? for ARMv7-M. Signed-off-by: Arnd Bergmann a...@arndb.de diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 458729d2ce22..76463da22f81 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -49,6 +49,7 @@ config CRYPTO_SHA2_ARM_CE config CRYPTO_SHA256_ARM tristate SHA-224/256 digest algorithm (ARM-asm and NEON) select CRYPTO_HASH + depends on !CPU_V7M help SHA-256 secure hash standard (DFIPS 180-2) implemented using optimized ARM assembler and NEON, when available. @Herbert: could you please apply this onto cryptodev before sending out your pull request for v4.1? And please disregard $subject, I will post a v3 with a similar 'depends on' added (unless you're ok to add it yourself) Thanks, Ard. -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] crypto: arm: workaround for building with old binutils
On 10 April 2015 at 21:57, Arnd Bergmann a...@arndb.de wrote: Old versions of binutils (before 2.23) do not yet understand the crypto-neon-fp-armv8 fpu instructions, and an attempt to build these files results in a build failure: arch/arm/crypto/aes-ce-core.S:133: Error: selected processor does not support ARM mode `vld1.8 {q10-q11},[ip]!' arch/arm/crypto/aes-ce-core.S:133: Error: bad instruction `aese.8 q0,q8' arch/arm/crypto/aes-ce-core.S:133: Error: bad instruction `aesmc.8 q0,q0' arch/arm/crypto/aes-ce-core.S:133: Error: bad instruction `aese.8 q0,q9' arch/arm/crypto/aes-ce-core.S:133: Error: bad instruction `aesmc.8 q0,q0' Since the affected versions are still in widespread use, and this breaks 'allmodconfig' builds, we should try to at least get a successful kernel build. Unfortunately, I could not come up with a way to make the Kconfig symbol depend on the binutils version, which would be the nicest solution. This patch uses the 'as-option' Kbuild macro to find out whether the support is present in the assembler, and otherwise passes a macro definition to each affected file, which in turn disables that code entirely and results in empty modules. In order to help users figure out what to do, we also add a #warning state in place of the removed to that tells users which version to use. Signed-off-by: Arnd Bergmann a...@arndb.de Link: http://storage.kernelci.org/next/next-20150410/arm-allmodconfig/build.log Fixes: 864cbeed4ab22d (crypto: arm - add support for SHA1 using ARMv8 Crypto Instructions) diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index ef46e898f98b..60f2101e0586 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -25,6 +25,10 @@ sha2-arm-ce-y:= sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o +armv8-ce-flags := $(call as-option,-Wa$(comma)-mfpu=crypto-neon-fp-armv8,-DARMV8_CE_DISABLED) +asflags-y := $(armv8-ce-flags) +ccflags-y := $(armv8-ce-flags) + Could you perhaps put the rules that build these modules inside a ifneq ($(armv8-ce-flags),-DARMV8_CE_DISABLED) ... endif block? quiet_cmd_perl = PERL$@ cmd_perl = $(PERL) $() $(@) diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index 8cfa468ee570..f2132ba91353 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -8,11 +8,14 @@ * published by the Free Software Foundation. */ +#ifdef ARMV8_CE_DISABLED +#warning ARMv8 Crypto Extensions need binutils 2.23 or higher +#else + #include linux/linkage.h #include asm/assembler.h .text - .fpucrypto-neon-fp-armv8 .align 3 .macro enc_round, state, key @@ -516,3 +519,5 @@ ENTRY(ce_aes_invert) vst1.8 {q0}, [r0] bx lr ENDPROC(ce_aes_invert) + +#endif diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index b445a5d56f43..4cba201a64a9 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -510,13 +510,16 @@ static struct crypto_alg aes_algs[] = { { static int __init aes_init(void) { - if (!(elf_hwcap2 HWCAP2_AES)) + if (IS_ENABLED(ARMV8_CE_DISABLED) || !(elf_hwcap2 HWCAP2_AES)) return -ENODEV; return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); } static void __exit aes_exit(void) { + if (IS_ENABLED(ARMV8_CE_DISABLED)) + return; + crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); } diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S index f6ab8bcc9efe..4fe75df41162 100644 --- a/arch/arm/crypto/ghash-ce-core.S +++ b/arch/arm/crypto/ghash-ce-core.S @@ -8,6 +8,10 @@ * by the Free Software Foundation. */ +#ifdef ARMV8_CE_DISABLED +#warning ARMv8 Crypto Extensions need binutils 2.23 or higher +#else + #include linux/linkage.h #include asm/assembler.h @@ -33,8 +37,6 @@ XH_L.reqd14 .text - .fpucrypto-neon-fp-armv8 - /* * void pmull_ghash_update(int blocks, u64 dg[], const char *src, * struct ghash_key const *k, const char *head) @@ -92,3 +94,5 @@ ENTRY(pmull_ghash_update) vst1.64 {XL}, [r1] bx lr ENDPROC(pmull_ghash_update) + +#endif diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c index 03a39fe29246..880afe904e5d 100644 --- a/arch/arm/crypto/ghash-ce-glue.c +++ b/arch/arm/crypto/ghash-ce-glue.c @@ -293,7 +293,7 @@ static int __init ghash_ce_mod_init(void) { int err; - if (!(elf_hwcap2 HWCAP2_PMULL)) + if (IS_ENABLED(ARMV8_CE_DISABLED) || !(elf_hwcap2 HWCAP2_AES)) return -ENODEV; err =
Re: [PATCH v2] crypto/arm: accelerated SHA-512 using ARM generic ASM and NEON
On 10 April 2015 at 22:23, Ard Biesheuvel ard.biesheu...@linaro.org wrote: On 10 apr. 2015, at 22:08, Arnd Bergmann a...@arndb.de wrote: On Friday 10 April 2015 16:29:08 Ard Biesheuvel wrote: +#if __ARM_MAX_ARCH__=7 +.arch armv7-a +.fpu neon + This will cause a build failure on an ARMv7-M build, which is incompatible with .arch armv7-a and .fpu neon. The neon part depends on CONFIG_KERNEL_MODE_NEON, which would never be set for that platform, I suppose On second thought, that is not entirely true, but I still don't think there is problem here: the .arch/.fpu declarations are understood perfectly fine by GAS when targeting ARMv7-M. Only, it will emit code that is incompatible with it. However, this code is invoked at runtime only if a NEON unit has been detected, so it will just be ignored on ARMv7-M -- Ard. -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Crypto Update for 4.1
On 23 April 2015 at 21:27, Bobby Powers bobbypow...@gmail.com wrote: Hello, Linus Torvalds torva...@linux-foundation.org wrote: Ok, this patch seems to fix it for me, so I undid my revert that I hadn't pushed out yet, and pushed out this instead. Commit e68410ebf62676dfb93aafff7c55b76644f37072 in Linus's tree from this crpyto update (crypto: x86/sha512_ssse3 - move SHA-384/512 SSSE3 implementation to base layer) causes a GPF on boot in sha512_ssse3_finup for me on a Broadwell i7-5600U, rendering the kernel unbootable. Reverting that commit enables me to boot. I don't know enough about the code to comment. Config is attached, and I can provide a photo of the reported Call Trace if thats helpful. It is quite reproducible for me. Hello Bobby, Would you be able to check whether the following patch fixes the crash? diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index a4771dcd1fcf..1f20b35d8573 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -79,7 +79,7 @@ NUM_BLKS= %rdx c = %rcx d = %r8 e = %rdx -y3 = %rdi +y3 = %rsi TBL = %rbp If not, please share the call trace and the content of /proc/cpuinfo Regards, Ard. -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] crypto: x86/sha512_ssse3 - fixup for asm function prototype change
Patch e68410ebf626 (crypto: x86/sha512_ssse3 - move SHA-384/512 SSSE3 implementation to base layer) changed the prototypes of the core asm SHA-512 implementations so that they are compatible with the prototype used by the base layer. However, in one instance, the register that was used for passing the input buffer was reused as a scratch register later on in the code, and since the input buffer param changed places with the digest param -which needs to be written back before the function returns- this resulted in the scratch register to be dereferenced in a memory write operation, causing a GPF. Fix this by changing the scratch register to use the same register as the input buffer param again. Fixes: e68410ebf626 (crypto: x86/sha512_ssse3 - move SHA-384/512 SSSE3 implementation to base layer) Reported-By: Bobby Powers bobbypow...@gmail.com Tested-By: Bobby Powers bobbypow...@gmail.com Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/x86/crypto/sha512-avx2-asm.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index a4771dcd1fcf..1f20b35d8573 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -79,7 +79,7 @@ NUM_BLKS= %rdx c = %rcx d = %r8 e = %rdx -y3 = %rdi +y3 = %rsi TBL = %rbp -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] crypto: arm/sha512 - accelerated SHA-512 using ARM generic ASM and NEON
On 11 May 2015 at 08:59, Herbert Xu herb...@gondor.apana.org.au wrote: On Fri, May 08, 2015 at 10:46:21AM +0200, Ard Biesheuvel wrote: diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 8da2207b0072..08b5fb85bff5 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -53,20 +53,14 @@ config CRYPTO_SHA256_ARM SHA-256 secure hash standard (DFIPS 180-2) implemented using optimized ARM assembler and NEON, when available. -config CRYPTO_SHA512_ARM_NEON - tristate SHA384 and SHA512 digest algorithm (ARM NEON) - depends on KERNEL_MODE_NEON - select CRYPTO_SHA512 +config CRYPTO_SHA512_ARM + tristate SHA-384/512 digest algorithm (ARM-asm and NEON) + depends on !CPU_V7M select CRYPTO_HASH + depends on !CPU_V7M This looks like a duplicate, no? Yes, you are right. Let me figure out what's going on and send you a new version. -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] crypto/arm: accelerated SHA-512 using ARM generic ASM and NEON
On 13 April 2015 at 06:13, Herbert Xu herb...@gondor.apana.org.au wrote: On Sat, Apr 11, 2015 at 09:15:10PM +0200, Ard Biesheuvel wrote: @Herbert: could you please apply this onto cryptodev before sending out your pull request for v4.1? Done. And please disregard $subject, I will post a v3 with a similar 'depends on' added (unless you're ok to add it yourself) Please resend the patch. But I'll process it after the merge window closes so no hurry. OK, all fine. Thanks Herbert! -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] crypto: arm: workaround for building with old binutils
From: Arnd Bergmann a...@arndb.de How about something like this: A warning will be emitted by make when descending into the arch/arm/crypto directory, but only if any ARMv8 Crypto modules were in fact selected. /home/ard/linux-2.6/arch/arm/crypto/Makefile:22: These ARMv8 Crypto Extensions modules need binutils 2.23 or higher /home/ard/linux-2.6/arch/arm/crypto/Makefile:23: aes-arm-ce.o sha1-arm-ce.o sha2-arm-ce.o ghash-arm-ce.o -8-- Old versions of binutils (before 2.23) do not yet understand the crypto-neon-fp-armv8 fpu instructions, and an attempt to build these files results in a build failure: arch/arm/crypto/aes-ce-core.S:133: Error: selected processor does not support ARM mode `vld1.8 {q10-q11},[ip]!' arch/arm/crypto/aes-ce-core.S:133: Error: bad instruction `aese.8 q0,q8' arch/arm/crypto/aes-ce-core.S:133: Error: bad instruction `aesmc.8 q0,q0' arch/arm/crypto/aes-ce-core.S:133: Error: bad instruction `aese.8 q0,q9' arch/arm/crypto/aes-ce-core.S:133: Error: bad instruction `aesmc.8 q0,q0' Since the affected versions are still in widespread use, and this breaks 'allmodconfig' builds, we should try to at least get a successful kernel build. Unfortunately, I could not come up with a way to make the Kconfig symbol depend on the binutils version, which would be the nicest solution. Instead, this patch uses the 'as-instr' Kbuild macro to find out whether the support is present in the assembler, and otherwise emits a non-fatal warning indicating which selected modules could not be built. Signed-off-by: Arnd Bergmann a...@arndb.de Link: http://storage.kernelci.org/next/next-20150410/arm-allmodconfig/build.log Fixes: 864cbeed4ab22d (crypto: arm - add support for SHA1 using ARMv8 Crypto Instructions) [ard.biesheuvel: - omit modules entirely instead of building empty ones if binutils is too old - update commit log accordingly] Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Makefile | 19 +++ 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b37597ad979c..fc5150702b64 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -4,14 +4,25 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o -obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o -obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o -obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o -obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o + +ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o +ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o +ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o +ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o + +ifneq ($(ce-obj-y)$(ce-obj-m),) +ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y) +obj-y += $(ce-obj-y) +obj-m += $(ce-obj-m) +else +$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher) +$(warning $(ce-obj-y) $(ce-obj-m)) +endif +endif aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] crypto: arm: workaround for building with old binutils
On 11 April 2015 at 22:54, Arnd Bergmann a...@arndb.de wrote: On Saturday 11 April 2015 15:32:34 Ard Biesheuvel wrote: From: Arnd Bergmann a...@arndb.de How about something like this: A warning will be emitted by make when descending into the arch/arm/crypto directory, but only if any ARMv8 Crypto modules were in fact selected. /home/ard/linux-2.6/arch/arm/crypto/Makefile:22: These ARMv8 Crypto Extensions modules need binutils 2.23 or higher /home/ard/linux-2.6/arch/arm/crypto/Makefile:23: aes-arm-ce.o sha1-arm-ce.o sha2-arm-ce.o ghash-arm-ce.o Looks good. Do you want me to do more randconfig tests on this, or put it in right away to fix the allmodconfig problem? It would be good to have confirmation that it fixes the actual symptom, so yes, more testing please. But I think the patch itself needs to go via Herbert's tree -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] crypto/arm: accelerated SHA-512 using ARM generic ASM and NEON
On 11 apr. 2015, at 10:48, Arnd Bergmann a...@arndb.de wrote: On Saturday 11 April 2015 09:35:15 Ard Biesheuvel wrote: On 10 April 2015 at 22:23, Ard Biesheuvel ard.biesheu...@linaro.org wrote: On 10 apr. 2015, at 22:08, Arnd Bergmann a...@arndb.de wrote: On Friday 10 April 2015 16:29:08 Ard Biesheuvel wrote: +#if __ARM_MAX_ARCH__=7 +.arch armv7-a +.fpu neon + This will cause a build failure on an ARMv7-M build, which is incompatible with .arch armv7-a and .fpu neon. The neon part depends on CONFIG_KERNEL_MODE_NEON, which would never be set for that platform, I suppose On second thought, that is not entirely true, but I still don't think there is problem here: the .arch/.fpu declarations are understood perfectly fine by GAS when targeting ARMv7-M. Only, it will emit code that is incompatible with it. However, this code is invoked at runtime only if a NEON unit has been detected, so it will just be ignored on ARMv7-M Sorry, I should have collected my findings better when replying to your patch. What I remembered was that I saw a problem in this area in linux-next with randconfig builds, but I did not notice that it was for a different file, and I had not double-checked that patch yet in order to send it out. See below for the patch I'm currently using for my randconfig builder. Before you apply this, please check again which files are affected, as it's possible that there are other modules that suffer from the same problem. Ah i see it now. The new Sha256 module as well as the Sha512 i am proposing here both use a single .o containing the !neon and neon implementations, and only expose the latter if KERNEL_MODE_NEON. This way, we can use the exact same .S file ad OpenSSL, which should mean less maintenance burden. So your fix seems the most appropriate, even if it means v7m won't be able to use the !neon part either. Arnd 8--- Subject: [PATCH] ARM: crypto: avoid sha256 code on ARMv7-M The sha256 assembly implementation can deal with all architecture levels from ARMv4 to ARMv7-A, but not with ARMv7-M. Enabling it in an ARMv7-M kernel results in this build failure: arm-linux-gnueabi-ld: error: arch/arm/crypto/sha256_glue.o: Conflicting architecture profiles M/A arm-linux-gnueabi-ld: failed to merge target specific data of file arch/arm/crypto/sha256_glue.o This adds a Kconfig dependency to prevent the code from being disabled for ARMv7-M. Signed-off-by: Arnd Bergmann a...@arndb.de diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 458729d2ce22..76463da22f81 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -49,6 +49,7 @@ config CRYPTO_SHA2_ARM_CE config CRYPTO_SHA256_ARM tristate SHA-224/256 digest algorithm (ARM-asm and NEON) select CRYPTO_HASH +depends on !CPU_V7M help SHA-256 secure hash standard (DFIPS 180-2) implemented using optimized ARM assembler and NEON, when available. -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] crypto: arm: workaround for building with old binutils
On 11 apr. 2015, at 10:55, Arnd Bergmann a...@arndb.de wrote: On Saturday 11 April 2015 09:41:08 Ard Biesheuvel wrote: Could you perhaps put the rules that build these modules inside a ifneq ($(armv8-ce-flags),-DARMV8_CE_DISABLED) ... endif How about something like this: ifeq ($(call as-option,-Wa$(comma)-mfpu=crypto-neon-fp-armv8),) $(warning ARMv8 Crypto Extensions need binutils 2.23 or higher) else ... endif That would basically be a reimplementation of the missing as-option-yn macro though, so we could also add that instead and do ifeq ($(call as-option-yn,-Wa$(comma)-mfpu=crypto-neon-fp-armv8),y) $(warning ARMv8 Crypto Extensions need binutils 2.23 or higher) else ... endif Yes, that should work. Could we also move the CE objs to ce-obj-$() and put ifneq ($(ce-obj-y)$(ce-obj-m),) if as check obj-y += $(ce-obj-y) obj-m += $(ce-obj-m) else $(warning ...) endif endif around it so you only get the warning if you have selected any of these modules? In any case, I strongly prefer to leave the .S files themselves alone if at all possible Ard. -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 10/16] crypto/arm: move SHA-224/256 ASM/NEON implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/sha256_glue.c | 174 - arch/arm/crypto/sha256_glue.h | 17 +--- arch/arm/crypto/sha256_neon_glue.c | 144 +- 3 files changed, 81 insertions(+), 254 deletions(-) diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c index ccef5e25bbcb..6f14a5a0a467 100644 --- a/arch/arm/crypto/sha256_glue.c +++ b/arch/arm/crypto/sha256_glue.c @@ -24,163 +24,56 @@ #include linux/types.h #include linux/string.h #include crypto/sha.h -#include asm/byteorder.h +#include crypto/sha256_base.h #include asm/simd.h #include asm/neon.h + #include sha256_glue.h asmlinkage void sha256_block_data_order(u32 *digest, const void *data, - unsigned int num_blks); - + unsigned int num_blks); -int sha256_init(struct shash_desc *desc) +static void sha256_arm_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - struct sha256_state *sctx = shash_desc_ctx(desc); - - sctx-state[0] = SHA256_H0; - sctx-state[1] = SHA256_H1; - sctx-state[2] = SHA256_H2; - sctx-state[3] = SHA256_H3; - sctx-state[4] = SHA256_H4; - sctx-state[5] = SHA256_H5; - sctx-state[6] = SHA256_H6; - sctx-state[7] = SHA256_H7; - sctx-count = 0; - - return 0; + if (head) + sha256_block_data_order(state, head, 1); + if (blocks) + sha256_block_data_order(state, src, blocks); } -int sha224_init(struct shash_desc *desc) +int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, +unsigned int len) { - struct sha256_state *sctx = shash_desc_ctx(desc); - - sctx-state[0] = SHA224_H0; - sctx-state[1] = SHA224_H1; - sctx-state[2] = SHA224_H2; - sctx-state[3] = SHA224_H3; - sctx-state[4] = SHA224_H4; - sctx-state[5] = SHA224_H5; - sctx-state[6] = SHA224_H6; - sctx-state[7] = SHA224_H7; - sctx-count = 0; - - return 0; + return sha256_base_do_update(desc, data, len, sha256_arm_block_fn, +NULL); } +EXPORT_SYMBOL(crypto_sha256_arm_update); -int __sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len, - unsigned int partial) +int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx-count += len; - - if (partial) { - done = SHA256_BLOCK_SIZE - partial; - memcpy(sctx-buf + partial, data, done); - sha256_block_data_order(sctx-state, sctx-buf, 1); - } - - if (len - done = SHA256_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; - - sha256_block_data_order(sctx-state, data + done, rounds); - done += rounds * SHA256_BLOCK_SIZE; - } - - memcpy(sctx-buf, data + done, len - done); - - return 0; + if (len) + sha256_base_do_update(desc, data, len, sha256_arm_block_fn, + NULL); + sha256_base_do_finalize(desc, sha256_arm_block_fn, NULL); + return sha256_base_finish(desc, hash); } +EXPORT_SYMBOL(crypto_sha256_arm_finup); -int sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA256_BLOCK_SIZE; - - /* Handle the fast case right here */ - if (partial + len SHA256_BLOCK_SIZE) { - sctx-count += len; - memcpy(sctx-buf + partial, data, len); - - return 0; - } - - return __sha256_update(desc, data, len, partial); -} - -/* Add padding and return the message digest. */ static int sha256_final(struct shash_desc *desc, u8 *out) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be32 *dst = (__be32 *)out; - __be64 bits; - static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; - - /* save number of bits */ - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64 and append length */ - index = sctx-count % SHA256_BLOCK_SIZE; - padlen = (index 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); - - /* We need to fill a whole block for __sha256_update */ - if (padlen = 56) { - sctx-count += padlen; - memcpy(sctx-buf + index, padding, padlen); - } else { - __sha256_update(desc, padding, padlen, index); - } - __sha256_update(desc, (const u8 *)bits, sizeof(bits), 56
[PATCH v3 07/16] crypto/arm: move SHA-1 ARM asm implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/sha1-ce-glue.c | 3 +- arch/arm/{include/asm = }/crypto/sha1.h | 3 + arch/arm/crypto/sha1_glue.c | 116 ++- arch/arm/crypto/sha1_neon_glue.c | 2 +- 4 files changed, 29 insertions(+), 95 deletions(-) rename arch/arm/{include/asm = }/crypto/sha1.h (67%) diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c index a9dd90df9fd7..e93b24c1af1f 100644 --- a/arch/arm/crypto/sha1-ce-glue.c +++ b/arch/arm/crypto/sha1-ce-glue.c @@ -13,12 +13,13 @@ #include linux/crypto.h #include linux/module.h -#include asm/crypto/sha1.h #include asm/hwcap.h #include asm/neon.h #include asm/simd.h #include asm/unaligned.h +#include sha1.h + MODULE_DESCRIPTION(SHA1 secure hash using ARMv8 Crypto Extensions); MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/crypto/sha1.h similarity index 67% rename from arch/arm/include/asm/crypto/sha1.h rename to arch/arm/crypto/sha1.h index 75e6a417416b..ffd8bd08b1a7 100644 --- a/arch/arm/include/asm/crypto/sha1.h +++ b/arch/arm/crypto/sha1.h @@ -7,4 +7,7 @@ extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, unsigned int len); +extern int sha1_finup_arm(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out); + #endif diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index e31b0440c613..c5a9519d 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -22,125 +22,55 @@ #include linux/cryptohash.h #include linux/types.h #include crypto/sha.h +#include crypto/sha1_base.h #include asm/byteorder.h -#include asm/crypto/sha1.h +#include sha1.h asmlinkage void sha1_block_data_order(u32 *digest, const unsigned char *data, unsigned int rounds); - -static int sha1_init(struct shash_desc *desc) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; - - return 0; -} - - -static int __sha1_update(struct sha1_state *sctx, const u8 *data, -unsigned int len, unsigned int partial) +static void sha1_arm_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - unsigned int done = 0; - - sctx-count += len; - - if (partial) { - done = SHA1_BLOCK_SIZE - partial; - memcpy(sctx-buffer + partial, data, done); - sha1_block_data_order(sctx-state, sctx-buffer, 1); - } - - if (len - done = SHA1_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; - sha1_block_data_order(sctx-state, data + done, rounds); - done += rounds * SHA1_BLOCK_SIZE; - } - - memcpy(sctx-buffer, data + done, len - done); - return 0; + if (head) + sha1_block_data_order(state, head, 1); + if (blocks) + sha1_block_data_order(state, src, blocks); } - int sha1_update_arm(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA1_BLOCK_SIZE; - int res; - - /* Handle the fast case right here */ - if (partial + len SHA1_BLOCK_SIZE) { - sctx-count += len; - memcpy(sctx-buffer + partial, data, len); - return 0; - } - res = __sha1_update(sctx, data, len, partial); - return res; + return sha1_base_do_update(desc, data, len, sha1_arm_block_fn, NULL); } EXPORT_SYMBOL_GPL(sha1_update_arm); - -/* Add padding and return the message digest. */ -static int sha1_final(struct shash_desc *desc, u8 *out) +int sha1_finup_arm(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be32 *dst = (__be32 *)out; - __be64 bits; - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; - - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64 and append length */ - index = sctx-count % SHA1_BLOCK_SIZE; - padlen = (index 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); - /* We need to fill a whole block for __sha1_update() */ - if (padlen = 56) { - sctx-count += padlen; - memcpy(sctx-buffer + index, padding, padlen); - } else { - __sha1_update(sctx, padding, padlen, index); - } - __sha1_update(sctx, (const u8 *)bits, sizeof(bits), 56); - - /* Store state in digest */ - for (i = 0; i 5
[PATCH v3 04/16] crypto: sha1-generic: move to generic glue implementation
This updates the generic SHA-1 implementation to use the generic shared SHA-1 glue code. It also implements a .finup hook crypto_sha1_finup() and exports it to other modules. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/sha1_generic.c | 108 +- include/crypto/sha.h | 3 ++ 2 files changed, 31 insertions(+), 80 deletions(-) diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c index a3e50c37eb6f..322a2278d939 100644 --- a/crypto/sha1_generic.c +++ b/crypto/sha1_generic.c @@ -23,109 +23,57 @@ #include linux/cryptohash.h #include linux/types.h #include crypto/sha.h +#include crypto/sha1_base.h #include asm/byteorder.h -static int sha1_init(struct shash_desc *desc) +static void sha1_generic_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - struct sha1_state *sctx = shash_desc_ctx(desc); + u32 temp[SHA_WORKSPACE_WORDS]; - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; + if (head) + sha_transform(state, head, temp); - return 0; + while (blocks--) { + sha_transform(state, src, temp); + src += SHA1_BLOCK_SIZE; + } + memzero_explicit(temp, sizeof(temp)); } int crypto_sha1_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial, done; - const u8 *src; - - partial = sctx-count % SHA1_BLOCK_SIZE; - sctx-count += len; - done = 0; - src = data; - - if ((partial + len) = SHA1_BLOCK_SIZE) { - u32 temp[SHA_WORKSPACE_WORDS]; - - if (partial) { - done = -partial; - memcpy(sctx-buffer + partial, data, - done + SHA1_BLOCK_SIZE); - src = sctx-buffer; - } - - do { - sha_transform(sctx-state, src, temp); - done += SHA1_BLOCK_SIZE; - src = data + done; - } while (done + SHA1_BLOCK_SIZE = len); - - memzero_explicit(temp, sizeof(temp)); - partial = 0; - } - memcpy(sctx-buffer + partial, src, len - done); - - return 0; + return sha1_base_do_update(desc, data, len, sha1_generic_block_fn, + NULL); } EXPORT_SYMBOL(crypto_sha1_update); - -/* Add padding and return the message digest. */ -static int sha1_final(struct shash_desc *desc, u8 *out) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - __be32 *dst = (__be32 *)out; - u32 i, index, padlen; - __be64 bits; - static const u8 padding[64] = { 0x80, }; - - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64 */ - index = sctx-count 0x3f; - padlen = (index 56) ? (56 - index) : ((64+56) - index); - crypto_sha1_update(desc, padding, padlen); - - /* Append length */ - crypto_sha1_update(desc, (const u8 *)bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i 5; i++) - dst[i] = cpu_to_be32(sctx-state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof *sctx); - - return 0; -} - -static int sha1_export(struct shash_desc *desc, void *out) +int crypto_sha1_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); - - memcpy(out, sctx, sizeof(*sctx)); - return 0; + if (len) + sha1_base_do_update(desc, data, len, sha1_generic_block_fn, + NULL); + sha1_base_do_finalize(desc, sha1_generic_block_fn, NULL); + return sha1_base_finish(desc, out); } +EXPORT_SYMBOL(crypto_sha1_finup); -static int sha1_import(struct shash_desc *desc, const void *in) +/* Add padding and return the message digest. */ +static int sha1_final(struct shash_desc *desc, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); - - memcpy(sctx, in, sizeof(*sctx)); - return 0; + return crypto_sha1_finup(desc, NULL, 0, out); } static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, - .init = sha1_init, + .init = sha1_base_init, .update = crypto_sha1_update, .final = sha1_final, - .export = sha1_export, - .import = sha1_import, + .finup = crypto_sha1_finup, + .export = sha1_base_export, + .import = sha1_base_import, .descsize = sizeof(struct sha1_state), .statesize
[PATCH v3 03/16] crypto: sha512: implement base layer for SHA-512
To reduce the number of copies of boilerplate code throughout the tree, this patch implements generic glue for the SHA-512 algorithm. This allows a specific arch or hardware implementation to only implement the special handling that it needs. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- include/crypto/sha512_base.h | 147 +++ 1 file changed, 147 insertions(+) create mode 100644 include/crypto/sha512_base.h diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h new file mode 100644 index ..44351f781dce --- /dev/null +++ b/include/crypto/sha512_base.h @@ -0,0 +1,147 @@ +/* + * sha512_base.h - core logic for SHA-512 implementations + * + * Copyright (C) 2015 Linaro Ltd ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include crypto/internal/hash.h +#include crypto/sha.h +#include linux/crypto.h +#include linux/module.h + +#include asm/unaligned.h + +typedef void (sha512_block_fn)(int blocks, u8 const *src, u64 *state, + const u8 *head, void *p); + +static inline int sha384_base_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + sctx-state[0] = SHA384_H0; + sctx-state[1] = SHA384_H1; + sctx-state[2] = SHA384_H2; + sctx-state[3] = SHA384_H3; + sctx-state[4] = SHA384_H4; + sctx-state[5] = SHA384_H5; + sctx-state[6] = SHA384_H6; + sctx-state[7] = SHA384_H7; + sctx-count[0] = sctx-count[1] = 0; + + return 0; +} + +static inline int sha512_base_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + sctx-state[0] = SHA512_H0; + sctx-state[1] = SHA512_H1; + sctx-state[2] = SHA512_H2; + sctx-state[3] = SHA512_H3; + sctx-state[4] = SHA512_H4; + sctx-state[5] = SHA512_H5; + sctx-state[6] = SHA512_H6; + sctx-state[7] = SHA512_H7; + sctx-count[0] = sctx-count[1] = 0; + + return 0; +} + +static inline int sha512_base_export(struct shash_desc *desc, void *out) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + struct sha512_state *dst = out; + + *dst = *sctx; + + return 0; +} + +static inline int sha512_base_import(struct shash_desc *desc, const void *in) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + struct sha512_state const *src = in; + + *sctx = *src; + + return 0; +} + +static inline int sha512_base_do_update(struct shash_desc *desc, const u8 *data, + unsigned int len, + sha512_block_fn *block_fn, void *p) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx-count[0] % SHA512_BLOCK_SIZE; + + sctx-count[0] += len; + if (sctx-count[0] len) + sctx-count[1]++; + + if (unlikely((partial + len) = SHA512_BLOCK_SIZE)) { + int blocks; + + if (partial) { + int p = SHA512_BLOCK_SIZE - partial; + + memcpy(sctx-buf + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA512_BLOCK_SIZE; + len %= SHA512_BLOCK_SIZE; + + block_fn(blocks, data, sctx-state, +partial ? sctx-buf : NULL, p); + data += blocks * SHA512_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx-buf + partial, data, len); + + return 0; +} + +static inline int sha512_base_do_finalize(struct shash_desc *desc, + sha512_block_fn *block_fn, void *p) +{ + const int bit_offset = SHA512_BLOCK_SIZE - sizeof(__be64[2]); + struct sha512_state *sctx = shash_desc_ctx(desc); + __be64 *bits = (__be64 *)(sctx-buf + bit_offset); + unsigned int partial = sctx-count[0] % SHA512_BLOCK_SIZE; + + sctx-buf[partial++] = 0x80; + if (partial bit_offset) { + memset(sctx-buf + partial, 0x0, SHA512_BLOCK_SIZE - partial); + partial = 0; + + block_fn(1, sctx-buf, sctx-state, NULL, p); + } + + memset(sctx-buf + partial, 0x0, bit_offset - partial); + bits[0] = cpu_to_be64(sctx-count[1] 3 | sctx-count[0] 61); + bits[1] = cpu_to_be64(sctx-count[0] 3); + block_fn(1, sctx-buf, sctx-state, NULL, p); + + return 0; +} + +static inline int sha512_base_finish(struct shash_desc *desc, u8 *out) +{ + unsigned int digest_size = crypto_shash_digestsize(desc-tfm); + struct sha512_state *sctx = shash_desc_ctx(desc); + __be64 *digest = (__be64 *)out; + int i
[PATCH v3 08/16] crypto/arm: move SHA-1 NEON implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/sha1_neon_glue.c | 137 +-- 1 file changed, 30 insertions(+), 107 deletions(-) diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c index 5d9a1b4aac73..4280f657fb9d 100644 --- a/arch/arm/crypto/sha1_neon_glue.c +++ b/arch/arm/crypto/sha1_neon_glue.c @@ -25,7 +25,7 @@ #include linux/cryptohash.h #include linux/types.h #include crypto/sha.h -#include asm/byteorder.h +#include crypto/sha1_base.h #include asm/neon.h #include asm/simd.h @@ -34,136 +34,59 @@ asmlinkage void sha1_transform_neon(void *state_h, const char *data, unsigned int rounds); - -static int sha1_neon_init(struct shash_desc *desc) +static void sha1_neon_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - struct sha1_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; - - return 0; -} - -static int __sha1_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx-count += len; - - if (partial) { - done = SHA1_BLOCK_SIZE - partial; - memcpy(sctx-buffer + partial, data, done); - sha1_transform_neon(sctx-state, sctx-buffer, 1); - } - - if (len - done = SHA1_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; - - sha1_transform_neon(sctx-state, data + done, rounds); - done += rounds * SHA1_BLOCK_SIZE; - } - - memcpy(sctx-buffer, data + done, len - done); - - return 0; + if (head) + sha1_transform_neon(state, head, 1); + if (blocks) + sha1_transform_neon(state, src, blocks); } static int sha1_neon_update(struct shash_desc *desc, const u8 *data, -unsigned int len) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA1_BLOCK_SIZE; - int res; - - /* Handle the fast case right here */ - if (partial + len SHA1_BLOCK_SIZE) { - sctx-count += len; - memcpy(sctx-buffer + partial, data, len); - - return 0; - } - - if (!may_use_simd()) { - res = sha1_update_arm(desc, data, len); - } else { - kernel_neon_begin(); - res = __sha1_neon_update(desc, data, len, partial); - kernel_neon_end(); - } - - return res; -} - - -/* Add padding and return the message digest. */ -static int sha1_neon_final(struct shash_desc *desc, u8 *out) + unsigned int len) { struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be32 *dst = (__be32 *)out; - __be64 bits; - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; - - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64 and append length */ - index = sctx-count % SHA1_BLOCK_SIZE; - padlen = (index 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); - if (!may_use_simd()) { - sha1_update_arm(desc, padding, padlen); - sha1_update_arm(desc, (const u8 *)bits, sizeof(bits)); - } else { - kernel_neon_begin(); - /* We need to fill a whole block for __sha1_neon_update() */ - if (padlen = 56) { - sctx-count += padlen; - memcpy(sctx-buffer + index, padding, padlen); - } else { - __sha1_neon_update(desc, padding, padlen, index); - } - __sha1_neon_update(desc, (const u8 *)bits, sizeof(bits), 56); - kernel_neon_end(); - } - /* Store state in digest */ - for (i = 0; i 5; i++) - dst[i] = cpu_to_be32(sctx-state[i]); + if (!may_use_simd() || + (sctx-count % SHA1_BLOCK_SIZE) + len SHA1_BLOCK_SIZE) + return sha1_update_arm(desc, data, len); - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx)); + kernel_neon_begin(); + sha1_base_do_update(desc, data, len, sha1_neon_block_fn, NULL); + kernel_neon_end(); return 0; } -static int sha1_neon_export(struct shash_desc *desc, void *out) +static int sha1_neon_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); + if (!may_use_simd()) + return sha1_finup_arm(desc, data, len, out); - memcpy(out, sctx, sizeof(*sctx
[PATCH v3 09/16] crypto/arm: move SHA-1 ARMv8 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm/crypto/Kconfig| 1 - arch/arm/crypto/sha1-ce-glue.c | 108 +++-- 2 files changed, 28 insertions(+), 81 deletions(-) diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 458729d2ce22..5ed98bc6f95d 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -31,7 +31,6 @@ config CRYPTO_SHA1_ARM_CE tristate SHA1 digest algorithm (ARM v8 Crypto Extensions) depends on KERNEL_MODE_NEON select CRYPTO_SHA1_ARM - select CRYPTO_SHA1 select CRYPTO_HASH help SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c index e93b24c1af1f..9d0e86e5647b 100644 --- a/arch/arm/crypto/sha1-ce-glue.c +++ b/arch/arm/crypto/sha1-ce-glue.c @@ -10,13 +10,13 @@ #include crypto/internal/hash.h #include crypto/sha.h +#include crypto/sha1_base.h #include linux/crypto.h #include linux/module.h #include asm/hwcap.h #include asm/neon.h #include asm/simd.h -#include asm/unaligned.h #include sha1.h @@ -24,104 +24,52 @@ MODULE_DESCRIPTION(SHA1 secure hash using ARMv8 Crypto Extensions); MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); -asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, - u8 *head); +asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p); -static int sha1_init(struct shash_desc *desc) +static int sha1_ce_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { struct sha1_state *sctx = shash_desc_ctx(desc); - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; - return 0; -} - -static int sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial; - - if (!may_use_simd()) + if (!may_use_simd() || + (sctx-count % SHA1_BLOCK_SIZE) + len SHA1_BLOCK_SIZE) return sha1_update_arm(desc, data, len); - partial = sctx-count % SHA1_BLOCK_SIZE; - sctx-count += len; - - if ((partial + len) = SHA1_BLOCK_SIZE) { - int blocks; + kernel_neon_begin(); + sha1_base_do_update(desc, data, len, sha1_ce_transform, NULL); + kernel_neon_end(); - if (partial) { - int p = SHA1_BLOCK_SIZE - partial; - - memcpy(sctx-buffer + partial, data, p); - data += p; - len -= p; - } - - blocks = len / SHA1_BLOCK_SIZE; - len %= SHA1_BLOCK_SIZE; - - kernel_neon_begin(); - sha1_ce_transform(blocks, data, sctx-state, - partial ? sctx-buffer : NULL); - kernel_neon_end(); - - data += blocks * SHA1_BLOCK_SIZE; - partial = 0; - } - if (len) - memcpy(sctx-buffer + partial, data, len); return 0; } -static int sha1_final(struct shash_desc *desc, u8 *out) +static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, +unsigned int len, u8 *out) { - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; - - struct sha1_state *sctx = shash_desc_ctx(desc); - __be64 bits = cpu_to_be64(sctx-count 3); - __be32 *dst = (__be32 *)out; - int i; - - u32 padlen = SHA1_BLOCK_SIZE -- ((sctx-count + sizeof(bits)) % SHA1_BLOCK_SIZE); - - sha1_update(desc, padding, padlen); - sha1_update(desc, (const u8 *)bits, sizeof(bits)); - - for (i = 0; i SHA1_DIGEST_SIZE / sizeof(__be32); i++) - put_unaligned_be32(sctx-state[i], dst++); - - *sctx = (struct sha1_state){}; - return 0; -} + if (!may_use_simd()) + return sha1_finup_arm(desc, data, len, out); -static int sha1_export(struct shash_desc *desc, void *out) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - struct sha1_state *dst = out; + kernel_neon_begin(); + if (len) + sha1_base_do_update(desc, data, len, sha1_ce_transform, NULL); + sha1_base_do_finalize(desc, sha1_ce_transform, NULL); + kernel_neon_end(); - *dst = *sctx; - return 0; + return sha1_base_finish(desc, out); } -static int sha1_import(struct shash_desc *desc, const void *in) +static int sha1_ce_final(struct shash_desc *desc, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); - struct sha1_state const *src = in; - - *sctx = *src; - return 0; + return
[PATCH v3 13/16] crypto/arm64: move SHA-224/256 ARMv8 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm64/crypto/sha2-ce-core.S | 11 ++- arch/arm64/crypto/sha2-ce-glue.c | 209 ++- 2 files changed, 38 insertions(+), 182 deletions(-) diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 7f29fc031ea8..65ad56636fba 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -135,15 +135,18 @@ CPU_LE( rev32 v19.16b, v19.16b) /* * Final block: add padding and total bit count. -* Skip if we have no total byte count in x4. In that case, the input -* size was not a round multiple of the block size, and the padding is -* handled by the C code. +* Skip if the input size was not a round multiple of the block size, +* the padding is handled by the C code in that case. */ cbz x4, 3f + ldr x5, [x2, #-8] // sha256_state::count + tst x5, #0x3f // round multiple of block size? + b.ne3f + str wzr, [x4] moviv17.2d, #0 mov x8, #0x8000 moviv18.2d, #0 - ror x7, x4, #29 // ror(lsl(x4, 3), 32) + ror x7, x5, #29 // ror(lsl(x4, 3), 32) fmovd16, x8 mov x4, #0 mov v19.d[0], xzr diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index ae67e88c28b9..91ac3682a730 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -12,6 +12,7 @@ #include asm/unaligned.h #include crypto/internal/hash.h #include crypto/sha.h +#include crypto/sha256_base.h #include linux/cpufeature.h #include linux/crypto.h #include linux/module.h @@ -20,195 +21,47 @@ MODULE_DESCRIPTION(SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions); MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); -asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, -u8 *head, long bytes); +asmlinkage void sha2_ce_transform(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p); -static int sha224_init(struct shash_desc *desc) +static int sha256_ce_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { - struct sha256_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha256_state){ - .state = { - SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, - SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, - } - }; - return 0; -} - -static int sha256_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha256_state){ - .state = { - SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, - SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, - } - }; - return 0; -} - -static int sha2_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA256_BLOCK_SIZE; - - sctx-count += len; - - if ((partial + len) = SHA256_BLOCK_SIZE) { - int blocks; - - if (partial) { - int p = SHA256_BLOCK_SIZE - partial; - - memcpy(sctx-buf + partial, data, p); - data += p; - len -= p; - } - - blocks = len / SHA256_BLOCK_SIZE; - len %= SHA256_BLOCK_SIZE; - - kernel_neon_begin_partial(28); - sha2_ce_transform(blocks, data, sctx-state, - partial ? sctx-buf : NULL, 0); - kernel_neon_end(); - - data += blocks * SHA256_BLOCK_SIZE; - partial = 0; - } - if (len) - memcpy(sctx-buf + partial, data, len); - return 0; -} - -static void sha2_final(struct shash_desc *desc) -{ - static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; - - struct sha256_state *sctx = shash_desc_ctx(desc); - __be64 bits = cpu_to_be64(sctx-count 3); - u32 padlen = SHA256_BLOCK_SIZE -- ((sctx-count + sizeof(bits)) % SHA256_BLOCK_SIZE); - - sha2_update(desc, padding, padlen); - sha2_update(desc, (const u8 *)bits, sizeof(bits)); -} - -static int sha224_final(struct shash_desc *desc, u8 *out) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - __be32 *dst = (__be32 *)out; - int i; - - sha2_final(desc); - - for (i = 0; i SHA224_DIGEST_SIZE
[PATCH v3 15/16] crypto/x86: move SHA-224/256 SSSE3 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/x86/crypto/sha256_ssse3_glue.c | 184 +++- 1 file changed, 36 insertions(+), 148 deletions(-) diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 8fad72f4dfd2..bd4ae0da0a49 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -36,7 +36,7 @@ #include linux/cryptohash.h #include linux/types.h #include crypto/sha.h -#include asm/byteorder.h +#include crypto/sha256_base.h #include asm/i387.h #include asm/xcr.h #include asm/xsave.h @@ -55,174 +55,61 @@ asmlinkage void sha256_transform_rorx(const char *data, u32 *digest, static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64); - -static int sha256_ssse3_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - - sctx-state[0] = SHA256_H0; - sctx-state[1] = SHA256_H1; - sctx-state[2] = SHA256_H2; - sctx-state[3] = SHA256_H3; - sctx-state[4] = SHA256_H4; - sctx-state[5] = SHA256_H5; - sctx-state[6] = SHA256_H6; - sctx-state[7] = SHA256_H7; - sctx-count = 0; - - return 0; -} - -static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial) +static void sha256_ssse3_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx-count += len; - - if (partial) { - done = SHA256_BLOCK_SIZE - partial; - memcpy(sctx-buf + partial, data, done); - sha256_transform_asm(sctx-buf, sctx-state, 1); - } - - if (len - done = SHA256_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; - - sha256_transform_asm(data + done, sctx-state, (u64) rounds); - - done += rounds * SHA256_BLOCK_SIZE; - } - - memcpy(sctx-buf, data + done, len - done); - - return 0; + if (head) + sha256_transform_asm(head, state, 1); + if (blocks) + sha256_transform_asm(src, state, blocks); } static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA256_BLOCK_SIZE; - int res; - /* Handle the fast case right here */ - if (partial + len SHA256_BLOCK_SIZE) { - sctx-count += len; - memcpy(sctx-buf + partial, data, len); - - return 0; - } - - if (!irq_fpu_usable()) { - res = crypto_sha256_update(desc, data, len); - } else { - kernel_fpu_begin(); - res = __sha256_ssse3_update(desc, data, len, partial); - kernel_fpu_end(); - } + if (!irq_fpu_usable() || + (sctx-count % SHA256_BLOCK_SIZE) + len SHA256_BLOCK_SIZE) + return crypto_sha256_update(desc, data, len); - return res; -} - - -/* Add padding and return the message digest. */ -static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be32 *dst = (__be32 *)out; - __be64 bits; - static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; - - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64 and append length */ - index = sctx-count % SHA256_BLOCK_SIZE; - padlen = (index 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); - - if (!irq_fpu_usable()) { - crypto_sha256_update(desc, padding, padlen); - crypto_sha256_update(desc, (const u8 *)bits, sizeof(bits)); - } else { - kernel_fpu_begin(); - /* We need to fill a whole block for __sha256_ssse3_update() */ - if (padlen = 56) { - sctx-count += padlen; - memcpy(sctx-buf + index, padding, padlen); - } else { - __sha256_ssse3_update(desc, padding, padlen, index); - } - __sha256_ssse3_update(desc, (const u8 *)bits, - sizeof(bits), 56); - kernel_fpu_end(); - } - - /* Store state in digest */ - for (i = 0; i 8; i++) - dst[i] = cpu_to_be32(sctx-state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx)); + kernel_fpu_begin(); + sha256_base_do_update(desc, data, len, sha256_ssse3_block_fn, NULL); + kernel_fpu_end(); return 0; } -static int sha256_ssse3_export(struct shash_desc *desc, void
[PATCH v3 05/16] crypto: sha256-generic: move to generic glue implementation
This updates the generic SHA-256 implementation to use the new shared SHA-256 glue code. It also implements a .finup hook crypto_sha256_finup() and exports it to other modules. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/sha256_generic.c | 140 ++-- include/crypto/sha.h| 3 ++ 2 files changed, 31 insertions(+), 112 deletions(-) diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c index b001ff5c2efc..794e31889ac9 100644 --- a/crypto/sha256_generic.c +++ b/crypto/sha256_generic.c @@ -23,6 +23,7 @@ #include linux/mm.h #include linux/types.h #include crypto/sha.h +#include crypto/sha256_base.h #include asm/byteorder.h #include asm/unaligned.h @@ -214,136 +215,50 @@ static void sha256_transform(u32 *state, const u8 *input) memzero_explicit(W, 64 * sizeof(u32)); } -static int sha224_init(struct shash_desc *desc) +static void sha256_generic_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - struct sha256_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA224_H0; - sctx-state[1] = SHA224_H1; - sctx-state[2] = SHA224_H2; - sctx-state[3] = SHA224_H3; - sctx-state[4] = SHA224_H4; - sctx-state[5] = SHA224_H5; - sctx-state[6] = SHA224_H6; - sctx-state[7] = SHA224_H7; - sctx-count = 0; + if (head) + sha256_transform(state, head); - return 0; -} - -static int sha256_init(struct shash_desc *desc) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA256_H0; - sctx-state[1] = SHA256_H1; - sctx-state[2] = SHA256_H2; - sctx-state[3] = SHA256_H3; - sctx-state[4] = SHA256_H4; - sctx-state[5] = SHA256_H5; - sctx-state[6] = SHA256_H6; - sctx-state[7] = SHA256_H7; - sctx-count = 0; - - return 0; + while (blocks--) { + sha256_transform(state, src); + src += SHA256_BLOCK_SIZE; + } } int crypto_sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha256_state *sctx = shash_desc_ctx(desc); - unsigned int partial, done; - const u8 *src; - - partial = sctx-count 0x3f; - sctx-count += len; - done = 0; - src = data; - - if ((partial + len) 63) { - if (partial) { - done = -partial; - memcpy(sctx-buf + partial, data, done + 64); - src = sctx-buf; - } - - do { - sha256_transform(sctx-state, src); - done += 64; - src = data + done; - } while (done + 63 len); - - partial = 0; - } - memcpy(sctx-buf + partial, src, len - done); - - return 0; + return sha256_base_do_update(desc, data, len, sha256_generic_block_fn, +NULL); } EXPORT_SYMBOL(crypto_sha256_update); -static int sha256_final(struct shash_desc *desc, u8 *out) -{ - struct sha256_state *sctx = shash_desc_ctx(desc); - __be32 *dst = (__be32 *)out; - __be64 bits; - unsigned int index, pad_len; - int i; - static const u8 padding[64] = { 0x80, }; - - /* Save number of bits */ - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64. */ - index = sctx-count 0x3f; - pad_len = (index 56) ? (56 - index) : ((64+56) - index); - crypto_sha256_update(desc, padding, pad_len); - - /* Append length (before padding) */ - crypto_sha256_update(desc, (const u8 *)bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i 8; i++) - dst[i] = cpu_to_be32(sctx-state[i]); - - /* Zeroize sensitive information. */ - memset(sctx, 0, sizeof(*sctx)); - - return 0; -} - -static int sha224_final(struct shash_desc *desc, u8 *hash) -{ - u8 D[SHA256_DIGEST_SIZE]; - - sha256_final(desc, D); - - memcpy(hash, D, SHA224_DIGEST_SIZE); - memzero_explicit(D, SHA256_DIGEST_SIZE); - - return 0; -} - -static int sha256_export(struct shash_desc *desc, void *out) +int crypto_sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash) { - struct sha256_state *sctx = shash_desc_ctx(desc); - - memcpy(out, sctx, sizeof(*sctx)); - return 0; + if (len) + sha256_base_do_update(desc, data, len, sha256_generic_block_fn, + NULL); + sha256_base_do_finalize(desc, sha256_generic_block_fn, NULL); + return sha256_base_finish(desc, hash); } +EXPORT_SYMBOL(crypto_sha256_finup); -static int sha256_import(struct shash_desc *desc, const void *in) +static int sha256_final(struct shash_desc
[PATCH v3 06/16] crypto: sha512-generic: move to generic glue implementation
This updated the generic SHA-512 implementation to use the generic shared SHA-512 glue code. It also implements a .finup hook crypto_sha512_finup() and exports it to other modules. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- crypto/sha512_generic.c | 127 ++-- include/crypto/sha.h| 3 ++ 2 files changed, 29 insertions(+), 101 deletions(-) diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 1c3c3767e079..8cf0082d7084 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -18,6 +18,7 @@ #include linux/crypto.h #include linux/types.h #include crypto/sha.h +#include crypto/sha512_base.h #include linux/percpu.h #include asm/byteorder.h #include asm/unaligned.h @@ -130,125 +131,48 @@ sha512_transform(u64 *state, const u8 *input) a = b = c = d = e = f = g = h = t1 = t2 = 0; } -static int -sha512_init(struct shash_desc *desc) +static void sha512_generic_block_fn(int blocks, u8 const *src, u64 *state, + const u8 *head, void *p) { - struct sha512_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA512_H0; - sctx-state[1] = SHA512_H1; - sctx-state[2] = SHA512_H2; - sctx-state[3] = SHA512_H3; - sctx-state[4] = SHA512_H4; - sctx-state[5] = SHA512_H5; - sctx-state[6] = SHA512_H6; - sctx-state[7] = SHA512_H7; - sctx-count[0] = sctx-count[1] = 0; + if (head) + sha512_transform(state, head); - return 0; -} - -static int -sha384_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - sctx-state[0] = SHA384_H0; - sctx-state[1] = SHA384_H1; - sctx-state[2] = SHA384_H2; - sctx-state[3] = SHA384_H3; - sctx-state[4] = SHA384_H4; - sctx-state[5] = SHA384_H5; - sctx-state[6] = SHA384_H6; - sctx-state[7] = SHA384_H7; - sctx-count[0] = sctx-count[1] = 0; - - return 0; + while (blocks--) { + sha512_transform(state, src); + src += SHA512_BLOCK_SIZE; + } } int crypto_sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha512_state *sctx = shash_desc_ctx(desc); - - unsigned int i, index, part_len; - - /* Compute number of bytes mod 128 */ - index = sctx-count[0] 0x7f; - - /* Update number of bytes */ - if ((sctx-count[0] += len) len) - sctx-count[1]++; - -part_len = 128 - index; - - /* Transform as many times as possible. */ - if (len = part_len) { - memcpy(sctx-buf[index], data, part_len); - sha512_transform(sctx-state, sctx-buf); - - for (i = part_len; i + 127 len; i+=128) - sha512_transform(sctx-state, data[i]); - - index = 0; - } else { - i = 0; - } - - /* Buffer remaining input */ - memcpy(sctx-buf[index], data[i], len - i); - - return 0; + return sha512_base_do_update(desc, data, len, sha512_generic_block_fn, +NULL); } EXPORT_SYMBOL(crypto_sha512_update); -static int -sha512_final(struct shash_desc *desc, u8 *hash) +int crypto_sha512_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash) { - struct sha512_state *sctx = shash_desc_ctx(desc); -static u8 padding[128] = { 0x80, }; - __be64 *dst = (__be64 *)hash; - __be64 bits[2]; - unsigned int index, pad_len; - int i; - - /* Save number of bits */ - bits[1] = cpu_to_be64(sctx-count[0] 3); - bits[0] = cpu_to_be64(sctx-count[1] 3 | sctx-count[0] 61); - - /* Pad out to 112 mod 128. */ - index = sctx-count[0] 0x7f; - pad_len = (index 112) ? (112 - index) : ((128+112) - index); - crypto_sha512_update(desc, padding, pad_len); - - /* Append length (before padding) */ - crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits)); - - /* Store state in digest */ - for (i = 0; i 8; i++) - dst[i] = cpu_to_be64(sctx-state[i]); - - /* Zeroize sensitive information. */ - memset(sctx, 0, sizeof(struct sha512_state)); - - return 0; + if (len) + sha512_base_do_update(desc, data, len, sha512_generic_block_fn, + NULL); + sha512_base_do_finalize(desc, sha512_generic_block_fn, NULL); + return sha512_base_finish(desc, hash); } +EXPORT_SYMBOL(crypto_sha512_finup); -static int sha384_final(struct shash_desc *desc, u8 *hash) +static int sha512_final(struct shash_desc *desc, u8 *hash) { - u8 D[64]; - - sha512_final(desc, D); - - memcpy(hash, D, 48); - memzero_explicit(D, 64); - - return 0; + return crypto_sha512_finup(desc, NULL, 0, hash
[PATCH v3 14/16] crypto/x86: move SHA-1 SSSE3 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/x86/crypto/sha1_ssse3_glue.c | 136 +- 1 file changed, 30 insertions(+), 106 deletions(-) diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 6c20fe04a738..8678dc75fbf3 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -28,7 +28,7 @@ #include linux/cryptohash.h #include linux/types.h #include crypto/sha.h -#include asm/byteorder.h +#include crypto/sha1_base.h #include asm/i387.h #include asm/xcr.h #include asm/xsave.h @@ -49,127 +49,50 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); - -static int sha1_ssse3_init(struct shash_desc *desc) +static void sha1_ssse3_block_fn(int blocks, u8 const *src, u32 *state, + const u8 *head, void *p) { - struct sha1_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; - - return 0; -} - -static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx-count += len; - - if (partial) { - done = SHA1_BLOCK_SIZE - partial; - memcpy(sctx-buffer + partial, data, done); - sha1_transform_asm(sctx-state, sctx-buffer, 1); - } - - if (len - done = SHA1_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; - - sha1_transform_asm(sctx-state, data + done, rounds); - done += rounds * SHA1_BLOCK_SIZE; - } - - memcpy(sctx-buffer, data + done, len - done); - - return 0; + if (head) + sha1_transform_asm(state, head, 1); + if (blocks) + sha1_transform_asm(state, src, blocks); } static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA1_BLOCK_SIZE; - int res; - /* Handle the fast case right here */ - if (partial + len SHA1_BLOCK_SIZE) { - sctx-count += len; - memcpy(sctx-buffer + partial, data, len); + if (!irq_fpu_usable() || + (sctx-count % SHA1_BLOCK_SIZE) + len SHA1_BLOCK_SIZE) + return crypto_sha1_update(desc, data, len); - return 0; - } - - if (!irq_fpu_usable()) { - res = crypto_sha1_update(desc, data, len); - } else { - kernel_fpu_begin(); - res = __sha1_ssse3_update(desc, data, len, partial); - kernel_fpu_end(); - } - - return res; -} - - -/* Add padding and return the message digest. */ -static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be32 *dst = (__be32 *)out; - __be64 bits; - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; - - bits = cpu_to_be64(sctx-count 3); - - /* Pad out to 56 mod 64 and append length */ - index = sctx-count % SHA1_BLOCK_SIZE; - padlen = (index 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); - if (!irq_fpu_usable()) { - crypto_sha1_update(desc, padding, padlen); - crypto_sha1_update(desc, (const u8 *)bits, sizeof(bits)); - } else { - kernel_fpu_begin(); - /* We need to fill a whole block for __sha1_ssse3_update() */ - if (padlen = 56) { - sctx-count += padlen; - memcpy(sctx-buffer + index, padding, padlen); - } else { - __sha1_ssse3_update(desc, padding, padlen, index); - } - __sha1_ssse3_update(desc, (const u8 *)bits, sizeof(bits), 56); - kernel_fpu_end(); - } - - /* Store state in digest */ - for (i = 0; i 5; i++) - dst[i] = cpu_to_be32(sctx-state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx)); + kernel_fpu_begin(); + sha1_base_do_update(desc, data, len, sha1_ssse3_block_fn, NULL); + kernel_fpu_end(); return 0; } -static int sha1_ssse3_export(struct shash_desc *desc, void *out) +static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); + if (!irq_fpu_usable()) + return crypto_sha1_finup(desc, data, len, out
[PATCH v3 16/16] crypto/x86: move SHA-384/512 SSSE3 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/x86/crypto/sha512_ssse3_glue.c | 193 +++- 1 file changed, 36 insertions(+), 157 deletions(-) diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 0b6af26832bf..4daa27a5d347 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -34,7 +34,7 @@ #include linux/cryptohash.h #include linux/types.h #include crypto/sha.h -#include asm/byteorder.h +#include crypto/sha512_base.h #include asm/i387.h #include asm/xcr.h #include asm/xsave.h @@ -54,183 +54,61 @@ asmlinkage void sha512_transform_rorx(const char *data, u64 *digest, static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64); - -static int sha512_ssse3_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - sctx-state[0] = SHA512_H0; - sctx-state[1] = SHA512_H1; - sctx-state[2] = SHA512_H2; - sctx-state[3] = SHA512_H3; - sctx-state[4] = SHA512_H4; - sctx-state[5] = SHA512_H5; - sctx-state[6] = SHA512_H6; - sctx-state[7] = SHA512_H7; - sctx-count[0] = sctx-count[1] = 0; - - return 0; -} - -static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial) +static void sha512_ssse3_block_fn(int blocks, u8 const *src, u64 *state, + const u8 *head, void *p) { - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx-count[0] += len; - if (sctx-count[0] len) - sctx-count[1]++; - - if (partial) { - done = SHA512_BLOCK_SIZE - partial; - memcpy(sctx-buf + partial, data, done); - sha512_transform_asm(sctx-buf, sctx-state, 1); - } - - if (len - done = SHA512_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; - - sha512_transform_asm(data + done, sctx-state, (u64) rounds); - - done += rounds * SHA512_BLOCK_SIZE; - } - - memcpy(sctx-buf, data + done, len - done); - - return 0; + if (head) + sha512_transform_asm(head, state, 1); + if (blocks) + sha512_transform_asm(src, state, blocks); } static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count[0] % SHA512_BLOCK_SIZE; - int res; - - /* Handle the fast case right here */ - if (partial + len SHA512_BLOCK_SIZE) { - sctx-count[0] += len; - if (sctx-count[0] len) - sctx-count[1]++; - memcpy(sctx-buf + partial, data, len); - - return 0; - } - - if (!irq_fpu_usable()) { - res = crypto_sha512_update(desc, data, len); - } else { - kernel_fpu_begin(); - res = __sha512_ssse3_update(desc, data, len, partial); - kernel_fpu_end(); - } - - return res; -} - - -/* Add padding and return the message digest. */ -static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be64 *dst = (__be64 *)out; - __be64 bits[2]; - static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; - - /* save number of bits */ - bits[1] = cpu_to_be64(sctx-count[0] 3); - bits[0] = cpu_to_be64(sctx-count[1] 3 | sctx-count[0] 61); - - /* Pad out to 112 mod 128 and append length */ - index = sctx-count[0] 0x7f; - padlen = (index 112) ? (112 - index) : ((128+112) - index); - - if (!irq_fpu_usable()) { - crypto_sha512_update(desc, padding, padlen); - crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits)); - } else { - kernel_fpu_begin(); - /* We need to fill a whole block for __sha512_ssse3_update() */ - if (padlen = 112) { - sctx-count[0] += padlen; - if (sctx-count[0] padlen) - sctx-count[1]++; - memcpy(sctx-buf + index, padding, padlen); - } else { - __sha512_ssse3_update(desc, padding, padlen, index); - } - __sha512_ssse3_update(desc, (const u8 *)bits, - sizeof(bits), 112); - kernel_fpu_end(); - } - - /* Store state in digest */ - for (i = 0; i 8; i++) - dst[i] = cpu_to_be64(sctx-state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx
[PATCH v3 12/16] crypto/arm64: move SHA-1 ARMv8 implementation to base layer
Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- arch/arm64/crypto/sha1-ce-core.S | 11 ++-- arch/arm64/crypto/sha1-ce-glue.c | 133 +++ 2 files changed, 31 insertions(+), 113 deletions(-) diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 09d57d98609c..a2c3ad51286b 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -131,15 +131,18 @@ CPU_LE( rev32 v11.16b, v11.16b) /* * Final block: add padding and total bit count. -* Skip if we have no total byte count in x4. In that case, the input -* size was not a round multiple of the block size, and the padding is -* handled by the C code. +* Skip if the input size was not a round multiple of the block size, +* the padding is handled by the C code in that case. */ cbz x4, 3f + ldr x5, [x2, #-8] // sha1_state::count + tst x5, #0x3f // round multiple of block size? + b.ne3f + str wzr, [x4] moviv9.2d, #0 mov x8, #0x8000 moviv10.2d, #0 - ror x7, x4, #29 // ror(lsl(x4, 3), 32) + ror x7, x5, #29 // ror(lsl(x4, 3), 32) fmovd8, x8 mov x4, #0 mov v11.d[0], xzr diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 6fe83f37a750..141d5f3d7389 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -12,6 +12,7 @@ #include asm/unaligned.h #include crypto/internal/hash.h #include crypto/sha.h +#include crypto/sha1_base.h #include linux/cpufeature.h #include linux/crypto.h #include linux/module.h @@ -21,132 +22,46 @@ MODULE_AUTHOR(Ard Biesheuvel ard.biesheu...@linaro.org); MODULE_LICENSE(GPL v2); asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, - u8 *head, long bytes); + const u8 *head, void *p); -static int sha1_init(struct shash_desc *desc) +static int sha1_ce_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { - struct sha1_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; - return 0; -} - -static int sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx-count % SHA1_BLOCK_SIZE; - - sctx-count += len; - - if ((partial + len) = SHA1_BLOCK_SIZE) { - int blocks; - - if (partial) { - int p = SHA1_BLOCK_SIZE - partial; - - memcpy(sctx-buffer + partial, data, p); - data += p; - len -= p; - } - - blocks = len / SHA1_BLOCK_SIZE; - len %= SHA1_BLOCK_SIZE; - - kernel_neon_begin_partial(16); - sha1_ce_transform(blocks, data, sctx-state, - partial ? sctx-buffer : NULL, 0); - kernel_neon_end(); - - data += blocks * SHA1_BLOCK_SIZE; - partial = 0; - } - if (len) - memcpy(sctx-buffer + partial, data, len); - return 0; -} - -static int sha1_final(struct shash_desc *desc, u8 *out) -{ - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; - - struct sha1_state *sctx = shash_desc_ctx(desc); - __be64 bits = cpu_to_be64(sctx-count 3); - __be32 *dst = (__be32 *)out; - int i; - - u32 padlen = SHA1_BLOCK_SIZE -- ((sctx-count + sizeof(bits)) % SHA1_BLOCK_SIZE); - - sha1_update(desc, padding, padlen); - sha1_update(desc, (const u8 *)bits, sizeof(bits)); - - for (i = 0; i SHA1_DIGEST_SIZE / sizeof(__be32); i++) - put_unaligned_be32(sctx-state[i], dst++); + kernel_neon_begin_partial(16); + sha1_base_do_update(desc, data, len, sha1_ce_transform, NULL); + kernel_neon_end(); - *sctx = (struct sha1_state){}; return 0; } -static int sha1_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, +unsigned int len, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); - __be32 *dst = (__be32 *)out; - int blocks; - int i; - - if (sctx-count || !len || (len % SHA1_BLOCK_SIZE)) { - sha1_update(desc, data, len); - return sha1_final(desc, out
[PATCH v3 01/16] crypto: sha1: implement base layer for SHA-1
To reduce the number of copies of boilerplate code throughout the tree, this patch implements generic glue for the SHA-1 algorithm. This allows a specific arch or hardware implementation to only implement the special handling that it needs. Signed-off-by: Ard Biesheuvel ard.biesheu...@linaro.org --- include/crypto/sha1_base.h | 123 + 1 file changed, 123 insertions(+) create mode 100644 include/crypto/sha1_base.h diff --git a/include/crypto/sha1_base.h b/include/crypto/sha1_base.h new file mode 100644 index ..919db0920203 --- /dev/null +++ b/include/crypto/sha1_base.h @@ -0,0 +1,123 @@ +/* + * sha1_base.h - core logic for SHA-1 implementations + * + * Copyright (C) 2015 Linaro Ltd ard.biesheu...@linaro.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include crypto/internal/hash.h +#include crypto/sha.h +#include linux/crypto.h +#include linux/module.h + +#include asm/unaligned.h + +typedef void (sha1_block_fn)(int blocks, u8 const *src, u32 *state, +const u8 *head, void *p); + +static inline int sha1_base_init(struct shash_desc *desc) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + sctx-state[0] = SHA1_H0; + sctx-state[1] = SHA1_H1; + sctx-state[2] = SHA1_H2; + sctx-state[3] = SHA1_H3; + sctx-state[4] = SHA1_H4; + sctx-count = 0; + + return 0; +} + +static inline int sha1_base_export(struct shash_desc *desc, void *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state *dst = out; + + *dst = *sctx; + + return 0; +} + +static inline int sha1_base_import(struct shash_desc *desc, const void *in) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state const *src = in; + + *sctx = *src; + + return 0; +} + +static inline int sha1_base_do_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha1_block_fn *block_fn, + void *p) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx-count % SHA1_BLOCK_SIZE; + + sctx-count += len; + + if (unlikely((partial + len) = SHA1_BLOCK_SIZE)) { + int blocks; + + if (partial) { + int p = SHA1_BLOCK_SIZE - partial; + + memcpy(sctx-buffer + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA1_BLOCK_SIZE; + len %= SHA1_BLOCK_SIZE; + + block_fn(blocks, data, sctx-state, +partial ? sctx-buffer : NULL, p); + data += blocks * SHA1_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx-buffer + partial, data, len); + + return 0; +} + +static inline int sha1_base_do_finalize(struct shash_desc *desc, + sha1_block_fn *block_fn, void *p) +{ + const int bit_offset = SHA1_BLOCK_SIZE - sizeof(__be64); + struct sha1_state *sctx = shash_desc_ctx(desc); + __be64 *bits = (__be64 *)(sctx-buffer + bit_offset); + unsigned int partial = sctx-count % SHA1_BLOCK_SIZE; + + sctx-buffer[partial++] = 0x80; + if (partial bit_offset) { + memset(sctx-buffer + partial, 0x0, SHA1_BLOCK_SIZE - partial); + partial = 0; + + block_fn(1, sctx-buffer, sctx-state, NULL, p); + } + + memset(sctx-buffer + partial, 0x0, bit_offset - partial); + *bits = cpu_to_be64(sctx-count 3); + block_fn(1, sctx-buffer, sctx-state, NULL, p); + + return 0; +} + +static inline int sha1_base_finish(struct shash_desc *desc, u8 *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + __be32 *digest = (__be32 *)out; + int i; + + for (i = 0; i SHA1_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx-state[i], digest++); + + *sctx = (struct sha1_state){}; + return 0; +} -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe linux-crypto in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html