[PATCH] crypto: arm/speck - fix building in Thumb2 mode
Building the kernel with CONFIG_THUMB2_KERNEL=y and CONFIG_CRYPTO_SPECK_NEON set fails with the following errors: arch/arm/crypto/speck-neon-core.S: Assembler messages: arch/arm/crypto/speck-neon-core.S:419: Error: r13 not allowed here -- `bic sp,#0xf' arch/arm/crypto/speck-neon-core.S:423: Error: r13 not allowed here -- `bic sp,#0xf' arch/arm/crypto/speck-neon-core.S:427: Error: r13 not allowed here -- `bic sp,#0xf' arch/arm/crypto/speck-neon-core.S:431: Error: r13 not allowed here -- `bic sp,#0xf' The problem is that the 'bic' instruction can't operate on the 'sp' register in Thumb2 mode. Fix it by using a temporary register. This isn't in the main loop, so the performance difference is negligible. This also matches what aes-neonbs-core.S does. Reported-by: Stefan Agner Fixes: ede9622162fa ("crypto: arm/speck - add NEON-accelerated implementation of Speck-XTS") Signed-off-by: Eric Biggers --- arch/arm/crypto/speck-neon-core.S | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S index 3c1e203e53b9..57caa742016e 100644 --- a/arch/arm/crypto/speck-neon-core.S +++ b/arch/arm/crypto/speck-neon-core.S @@ -272,9 +272,11 @@ * Allocate stack space to store 128 bytes worth of tweaks. For * performance, this space is aligned to a 16-byte boundary so that we * can use the load/store instructions that declare 16-byte alignment. +* For Thumb2 compatibility, don't do the 'bic' directly on 'sp'. */ - sub sp, #128 - bic sp, #0xf + sub r12, sp, #128 + bic r12, #0xf + mov sp, r12 .if \n == 64 // Load first tweak -- 2.18.0.rc1.244.gcf134e6275-goog
Re: [PATCH v3 3/5] crypto: arm/speck - add NEON-accelerated implementation of Speck-XTS
On 18 June 2018 at 23:56, Eric Biggers wrote: > On Sun, Jun 17, 2018 at 01:10:41PM +0200, Ard Biesheuvel wrote: >> > + >> > + // One-time XTS preparation >> > + >> > + /* >> > + * Allocate stack space to store 128 bytes worth of tweaks. For >> > + * performance, this space is aligned to a 16-byte boundary so >> > that we >> > + * can use the load/store instructions that declare 16-byte >> > alignment. >> > + */ >> > + sub sp, #128 >> > + bic sp, #0xf >> >> >> This fails here when building with CONFIG_THUMB2_KERNEL=y >> >> AS arch/arm/crypto/speck-neon-core.o >> >> arch/arm/crypto/speck-neon-core.S: Assembler messages: >> >> arch/arm/crypto/speck-neon-core.S:419: Error: r13 not allowed here -- >> `bic sp,#0xf' >> arch/arm/crypto/speck-neon-core.S:423: Error: r13 not allowed here -- >> `bic sp,#0xf' >> arch/arm/crypto/speck-neon-core.S:427: Error: r13 not allowed here -- >> `bic sp,#0xf' >> arch/arm/crypto/speck-neon-core.S:431: Error: r13 not allowed here -- >> `bic sp,#0xf' >> >> In a quick hack this change seems to address it: >> >> >> - sub sp, #128 >> - bic sp, #0xf >> + mov r6, sp >> + sub r6, #128 >> + bic r6, #0xf >> + mov sp, r6 >> >> But there is probably a better solution to address this. >> >> >>> >> >>> Given that there is no NEON on M class cores, I recommend we put >> >>> something like >> >>> >> >>> THUMB(bx pc) >> >>> THUMB(nop.w) >> >>> THUMB(.arm) >> >>> >> >>> at the beginning and be done with it. >> >> >> >> I mean nop.n or just nop, of course, and we may need a '.align 2' at >> >> the beginning as well. >> > >> > Wouldn't it be preferable to have it assemble it in Thumb2 too? It seems >> > that bic sp,#0xf is the only issue... >> > >> >> Well, in general, yes. In the case of NEON code, not really, since the >> resulting code will not be smaller anyway, because the Thumb2 NEON >> opcodes are all 4 bytes. Also, Thumb2-only cores don't have NEON >> units, so all cores that this code can run on will be able to run in >> ARM mode. >> >> So from a maintainability pov, having code that only assembles in one >> way is better than having code that must compile both to ARM and to >> Thumb2 opcodes. >> >> Just my 2 cents, anyway. > > I don't have too much of a preference, though Stefan's suggested 4 > instructions > can be reduced to 3, which also matches what aes-neonbs-core.S does: > > sub r12, sp, #128 > bic r12, #0xf > mov sp, r12 > > Ard, is the following what you're suggesting instead? > Yes, but after looking at the actual code, I prefer the change above. The access occurs only once, not in the loop so the additional instructions should not affect performance. Apologies for the noise. > diff --git a/arch/arm/crypto/speck-neon-core.S > b/arch/arm/crypto/speck-neon-core.S > index 3c1e203e53b9..c989ce3dc057 100644 > --- a/arch/arm/crypto/speck-neon-core.S > +++ b/arch/arm/crypto/speck-neon-core.S > @@ -8,6 +8,7 @@ > */ > > #include > +#include > > .text > .fpuneon > @@ -233,6 +234,12 @@ > * nonzero multiple of 128. > */ > .macro _speck_xts_cryptn, decrypting > + > + .align 2 > + THUMB(bx pc) > + THUMB(nop) > + THUMB(.arm) > + > push{r4-r7} > mov r7, sp > > @@ -413,6 +420,8 @@ > mov sp, r7 > pop {r4-r7} > bx lr > + > + THUMB(.thumb) > .endm > > ENTRY(speck128_xts_encrypt_neon)
Re: [PATCH v3 3/5] crypto: arm/speck - add NEON-accelerated implementation of Speck-XTS
On Sun, Jun 17, 2018 at 01:10:41PM +0200, Ard Biesheuvel wrote: > > + > > + // One-time XTS preparation > > + > > + /* > > + * Allocate stack space to store 128 bytes worth of tweaks. For > > + * performance, this space is aligned to a 16-byte boundary so > > that we > > + * can use the load/store instructions that declare 16-byte > > alignment. > > + */ > > + sub sp, #128 > > + bic sp, #0xf > > > This fails here when building with CONFIG_THUMB2_KERNEL=y > > AS arch/arm/crypto/speck-neon-core.o > > arch/arm/crypto/speck-neon-core.S: Assembler messages: > > arch/arm/crypto/speck-neon-core.S:419: Error: r13 not allowed here -- > `bic sp,#0xf' > arch/arm/crypto/speck-neon-core.S:423: Error: r13 not allowed here -- > `bic sp,#0xf' > arch/arm/crypto/speck-neon-core.S:427: Error: r13 not allowed here -- > `bic sp,#0xf' > arch/arm/crypto/speck-neon-core.S:431: Error: r13 not allowed here -- > `bic sp,#0xf' > > In a quick hack this change seems to address it: > > > - sub sp, #128 > - bic sp, #0xf > + mov r6, sp > + sub r6, #128 > + bic r6, #0xf > + mov sp, r6 > > But there is probably a better solution to address this. > > >>> > >>> Given that there is no NEON on M class cores, I recommend we put > >>> something like > >>> > >>> THUMB(bx pc) > >>> THUMB(nop.w) > >>> THUMB(.arm) > >>> > >>> at the beginning and be done with it. > >> > >> I mean nop.n or just nop, of course, and we may need a '.align 2' at > >> the beginning as well. > > > > Wouldn't it be preferable to have it assemble it in Thumb2 too? It seems > > that bic sp,#0xf is the only issue... > > > > Well, in general, yes. In the case of NEON code, not really, since the > resulting code will not be smaller anyway, because the Thumb2 NEON > opcodes are all 4 bytes. Also, Thumb2-only cores don't have NEON > units, so all cores that this code can run on will be able to run in > ARM mode. > > So from a maintainability pov, having code that only assembles in one > way is better than having code that must compile both to ARM and to > Thumb2 opcodes. > > Just my 2 cents, anyway. I don't have too much of a preference, though Stefan's suggested 4 instructions can be reduced to 3, which also matches what aes-neonbs-core.S does: sub r12, sp, #128 bic r12, #0xf mov sp, r12 Ard, is the following what you're suggesting instead? diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S index 3c1e203e53b9..c989ce3dc057 100644 --- a/arch/arm/crypto/speck-neon-core.S +++ b/arch/arm/crypto/speck-neon-core.S @@ -8,6 +8,7 @@ */ #include +#include .text .fpuneon @@ -233,6 +234,12 @@ * nonzero multiple of 128. */ .macro _speck_xts_cryptn, decrypting + + .align 2 + THUMB(bx pc) + THUMB(nop) + THUMB(.arm) + push{r4-r7} mov r7, sp @@ -413,6 +420,8 @@ mov sp, r7 pop {r4-r7} bx lr + + THUMB(.thumb) .endm ENTRY(speck128_xts_encrypt_neon)
[PATCH 4/4] crypto: vmac - remove insecure version with hardcoded nonce
From: Eric Biggers Remove the original version of the VMAC template that had the nonce hardcoded to 0 and produced a digest with the wrong endianness. I'm unsure whether this had users or not (there are no explicit in-kernel references to it), but given that the hardcoded nonce made it wildly insecure unless a unique key was used for each message, let's try removing it and see if anyone complains. Leave the new "vmac64" template that requires the nonce to be explicitly specified as the first 16 bytes of data and uses the correct endianness for the digest. Signed-off-by: Eric Biggers --- crypto/tcrypt.c | 2 +- crypto/testmgr.c | 6 --- crypto/testmgr.h | 102 --- crypto/vmac.c| 84 -- 4 files changed, 8 insertions(+), 186 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index d5bcdd905007..078ec36007bf 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1939,7 +1939,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) break; case 109: - ret += tcrypt_test("vmac(aes)"); + ret += tcrypt_test("vmac64(aes)"); break; case 111: diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 60a557b0f8d3..63f263fd1dae 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3477,12 +3477,6 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(tgr192_tv_template) } - }, { - .alg = "vmac(aes)", - .test = alg_test_hash, - .suite = { - .hash = __VECS(aes_vmac128_tv_template) - } }, { .alg = "vmac64(aes)", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 7b022c47a623..b6362169771a 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -4603,108 +4603,6 @@ static const struct hash_testvec aes_xcbc128_tv_template[] = { } }; -static const char vmac_string1[128] = {'\x01', '\x01', '\x01', '\x01', - '\x02', '\x03', '\x02', '\x02', - '\x02', '\x04', '\x01', '\x07', - '\x04', '\x01', '\x04', '\x03',}; -static const char vmac_string2[128] = {'a', 'b', 'c',}; -static const char vmac_string3[128] = {'a', 'b', 'c', 'a', 'b', 'c', - 'a', 'b', 'c', 'a', 'b', 'c', - 'a', 'b', 'c', 'a', 'b', 'c', - 'a', 'b', 'c', 'a', 'b', 'c', - 'a', 'b', 'c', 'a', 'b', 'c', - 'a', 'b', 'c', 'a', 'b', 'c', - 'a', 'b', 'c', 'a', 'b', 'c', - 'a', 'b', 'c', 'a', 'b', 'c', - }; - -static const char vmac_string4[17] = {'b', 'c', 'e', 'f', - 'i', 'j', 'l', 'm', - 'o', 'p', 'r', 's', - 't', 'u', 'w', 'x', 'z'}; - -static const char vmac_string5[127] = {'r', 'm', 'b', 't', 'c', - 'o', 'l', 'k', ']', '%', - '9', '2', '7', '!', 'A'}; - -static const char vmac_string6[129] = {'p', 't', '*', '7', 'l', - 'i', '!', '#', 'w', '0', - 'z', '/', '4', 'A', 'n'}; - -static const struct hash_testvec aes_vmac128_tv_template[] = { - { - .key= "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - .plaintext = NULL, - .digest = "\x07\x58\x80\x35\x77\xa4\x7b\x54", - .psize = 0, - .ksize = 16, - }, { - .key= "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - .plaintext = vmac_string1, - .digest = "\xce\xf5\x3c\xd3\xae\x68\x8c\xa1", - .psize = 128, - .ksize = 16, - }, { - .key= "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - .plaintext = vmac_string2, - .digest = "\xc9\x27\xb0\x73\x81\xbd\x14\x2d", - .psize = 128, - .ksize = 16, - }, { - .key= "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - .plaintext = vmac_string3, - .digest = "\x8d\x1a\x95\x8c\x98\x47\x0b\x19", - .psize = 128, - .ksize = 16, - }, { - .key= "abcdefghijklmnop", -
[PATCH 0/4] crypto: vmac - various fixes
From: Eric Biggers Hi, this series fixes various bugs in the VMAC template (crypto/vmac.c). First, the per-request context was being stored in the transform context, which made VMAC not thread-safe, and the kernel could be crashed by using the same VMAC transform in multiple threads using AF_ALG (found by syzkaller). Also the keys were incorrectly being wiped after each message. Patch 2 fixes these bugs, Cc'ed to stable. But there are also bugs that require breaking changes: the nonce is hardcoded to 0, and the endianness of the final digest is wrong. So patch 3 introduces a fixed version of the VMAC template that takes the nonce as the first 16 bytes of data, and fixes the digest endianness. Patch 4 then removes the current version of the VMAC template. I'm not 100% sure whether we can really do that or not as it may have users (there are no explicit users in the kernel, though), but given that the old version was insecure unless a unique key was set for each message, I think we should try and see if anyone complains. Eric Biggers (4): crypto: vmac - require a block cipher with 128-bit block size crypto: vmac - separate tfm and request context crypto: vmac - add nonced version with big endian digest crypto: vmac - remove insecure version with hardcoded nonce crypto/tcrypt.c | 2 +- crypto/testmgr.c | 4 +- crypto/testmgr.h | 217 + crypto/vmac.c | 444 -- include/crypto/vmac.h | 63 -- 5 files changed, 351 insertions(+), 379 deletions(-) delete mode 100644 include/crypto/vmac.h -- 2.18.0.rc1.244.gcf134e6275-goog
[PATCH 1/4] crypto: vmac - require a block cipher with 128-bit block size
From: Eric Biggers The VMAC template assumes the block cipher has a 128-bit block size, but it failed to check for that. Thus it was possible to instantiate it using a 64-bit block size cipher, e.g. "vmac(cast5)", causing uninitialized memory to be used. Add the needed check when instantiating the template. Fixes: f1939f7c5645 ("crypto: vmac - New hash algorithm for intel_txt support") Cc: # v2.6.32+ Signed-off-by: Eric Biggers --- crypto/vmac.c | 4 1 file changed, 4 insertions(+) diff --git a/crypto/vmac.c b/crypto/vmac.c index df76a816cfb2..3034454a3713 100644 --- a/crypto/vmac.c +++ b/crypto/vmac.c @@ -655,6 +655,10 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb) if (IS_ERR(alg)) return PTR_ERR(alg); + err = -EINVAL; + if (alg->cra_blocksize != 16) + goto out_put_alg; + inst = shash_alloc_instance("vmac", alg); err = PTR_ERR(inst); if (IS_ERR(inst)) -- 2.18.0.rc1.244.gcf134e6275-goog
[PATCH 3/4] crypto: vmac - add nonced version with big endian digest
From: Eric Biggers Currently the VMAC template uses a "nonce" hardcoded to 0, which makes it insecure unless a unique key is set for every message. Also, the endianness of the final digest is wrong: the implementation uses little endian, but the VMAC specification has it as big endian, as do other VMAC implementations such as the one in Crypto++. Add a new VMAC template where the nonce is passed as the first 16 bytes of data (similar to what is done for Poly1305's nonce), and the digest is big endian. Call it "vmac64", since the old name of simply "vmac" didn't clarify whether the implementation is of VMAC-64 or of VMAC-128 (which produce 64-bit and 128-bit digests respectively); so we fix the naming ambiguity too. Signed-off-by: Eric Biggers --- crypto/testmgr.c | 6 ++ crypto/testmgr.h | 155 +++ crypto/vmac.c| 130 +-- 3 files changed, 273 insertions(+), 18 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 11e45352fd0b..60a557b0f8d3 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3483,6 +3483,12 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(aes_vmac128_tv_template) } + }, { + .alg = "vmac64(aes)", + .test = alg_test_hash, + .suite = { + .hash = __VECS(vmac64_aes_tv_template) + } }, { .alg = "wp256", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index b950aa234e43..7b022c47a623 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -4705,6 +4705,161 @@ static const struct hash_testvec aes_vmac128_tv_template[] = { }, }; +static const char vmac64_string1[144] = { + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\x01', '\x01', '\x01', '\x01', '\x02', '\x03', '\x02', '\x02', + '\x02', '\x04', '\x01', '\x07', '\x04', '\x01', '\x04', '\x03', +}; + +static const char vmac64_string2[144] = { + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', +'a', 'b', 'c', +}; + +static const char vmac64_string3[144] = { + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', +'a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', +'c', 'a', 'b', 'c', 'a', 'b', 'c', 'a', +'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', +'a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', +'c', 'a', 'b', 'c', 'a', 'b', 'c', 'a', +'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', +}; + +static const char vmac64_string4[33] = { + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + 'b', 'c', 'e', 'f', 'i', 'j', 'l', 'm', + 'o', 'p', 'r', 's', 't', 'u', 'w', 'x', + 'z', +}; + +static const char vmac64_string5[143] = { + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', +'r', 'm', 'b', 't', 'c', 'o', 'l', 'k', +']', '%', '9', '2', '7', '!', 'A', +}; + +static const char vmac64_string6[145] = { + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', +'p', 't', '*', '7', 'l', 'i', '!', '#', +'w', '0', 'z', '/', '4', 'A', 'n', +}; + +static const struct hash_testvec vmac64_aes_tv_template[] = { + { /* draft-krovetz-vmac-01 test vector 1 */ + .key= "abcdefghijklmnop", + .ksize = 16, + .plaintext = "\0\0\0\0\0\0\0\0bcdefghi", + .psize = 16, + .digest = "\x25\x76\xbe\x1c\x56\xd8\xb8\x1b", + }, { /* draft-krovetz-vmac-01 test vector 2 */ + .key= "abcdefghijklmnop", + .ksize = 16, + .plaintext = "\0\0\0\0\0\0\0\0bcdefghiabc", + .psize = 19, + .digest = "\x2d\x37\x6c\xf5\xb1\x81\x3c\xe5", + }, { /* draft-krovetz-vmac-01 test vector 3 */ + .key= "abcdefghijklmnop", + .ksize = 16, + .plaintext = "\0\0\0\0\0\0\0\0bcdefghi" + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc", + .psize = 64, + .digest = "\xe8\x42\x1f\x61\xd5\x73\xd2\x98", + }, { /* draft-krovetz-vmac-01 test vector 4 */ + .key= "abcdefghijklmnop", + .ksize = 16, + .plaintext = "\0\0\0\0\0\0\0\0bcdefghi" + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" +
[PATCH 2/4] crypto: vmac - separate tfm and request context
From: Eric Biggers syzbot reported a crash in vmac_final() when multiple threads concurrently use the same "vmac(aes)" transform through AF_ALG. The bug is pretty fundamental: the VMAC template doesn't separate per-request state from per-tfm (per-key) state like the other hash algorithms do, but rather stores it all in the tfm context. That's wrong. Also, vmac_final() incorrectly zeroes most of the state including the derived keys and cached pseudorandom pad. Therefore, only the first VMAC invocation with a given key calculates the correct digest. Fix these bugs by splitting the per-tfm state from the per-request state and using the proper init/update/final sequencing for requests. Reproducer for the crash: #include #include #include int main() { int fd; struct sockaddr_alg addr = { .salg_type = "hash", .salg_name = "vmac(aes)", }; char buf[256] = { 0 }; fd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(fd, (void *), sizeof(addr)); setsockopt(fd, SOL_ALG, ALG_SET_KEY, buf, 16); fork(); fd = accept(fd, NULL, NULL); for (;;) write(fd, buf, 256); } The immediate cause of the crash is that vmac_ctx_t.partial_size exceeds VMAC_NHBYTES, causing vmac_final() to memset() a negative length. Reported-by: syzbot+264bca3a6e8d64555...@syzkaller.appspotmail.com Fixes: f1939f7c5645 ("crypto: vmac - New hash algorithm for intel_txt support") Cc: # v2.6.32+ Signed-off-by: Eric Biggers --- crypto/vmac.c | 408 +++--- include/crypto/vmac.h | 63 --- 2 files changed, 181 insertions(+), 290 deletions(-) delete mode 100644 include/crypto/vmac.h diff --git a/crypto/vmac.c b/crypto/vmac.c index 3034454a3713..bb2fc787d615 100644 --- a/crypto/vmac.c +++ b/crypto/vmac.c @@ -1,6 +1,10 @@ /* - * Modified to interface to the Linux kernel + * VMAC: Message Authentication Code using Universal Hashing + * + * Reference: https://tools.ietf.org/html/draft-krovetz-vmac-01 + * * Copyright (c) 2009, Intel Corporation. + * Copyright (c) 2018, Google Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -16,14 +20,15 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. */ -/* -- - * VMAC and VHASH Implementation by Ted Krovetz (t...@acm.org) and Wei Dai. - * This implementation is herby placed in the public domain. - * The authors offers no warranty. Use at your own risk. - * Please send bug reports to the authors. - * Last modified: 17 APR 08, 1700 PDT - * --- */ +/* + * Derived from: + * VMAC and VHASH Implementation by Ted Krovetz (t...@acm.org) and Wei Dai. + * This implementation is herby placed in the public domain. + * The authors offers no warranty. Use at your own risk. + * Last modified: 17 APR 08, 1700 PDT + */ +#include #include #include #include @@ -31,9 +36,35 @@ #include #include #include -#include #include +/* + * User definable settings. + */ +#define VMAC_TAG_LEN 64 +#define VMAC_KEY_SIZE 128/* Must be 128, 192 or 256 */ +#define VMAC_KEY_LEN (VMAC_KEY_SIZE/8) +#define VMAC_NHBYTES 128/* Must 2^i for any 3 < i < 13 Standard = 128*/ + +/* per-transform (per-key) context */ +struct vmac_tfm_ctx { + struct crypto_cipher *cipher; + u64 nhkey[(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)]; + u64 polykey[2*VMAC_TAG_LEN/64]; + u64 l3key[2*VMAC_TAG_LEN/64]; +}; + +/* per-request context */ +struct vmac_desc_ctx { + union { + u8 partial[VMAC_NHBYTES]; /* partial block */ + __le64 partial_words[VMAC_NHBYTES / 8]; + }; + unsigned int partial_size; /* size of the partial block */ + bool first_block_processed; + u64 polytmp[2*VMAC_TAG_LEN/64]; /* running total of L2-hash */ +}; + /* * Constants and masks */ @@ -318,13 +349,6 @@ static void poly_step_func(u64 *ahi, u64 *alo, } while (0) #endif -static void vhash_abort(struct vmac_ctx *ctx) -{ - ctx->polytmp[0] = ctx->polykey[0] ; - ctx->polytmp[1] = ctx->polykey[1] ; - ctx->first_block_processed = 0; -} - static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len) { u64 rh, rl, t, z = 0; @@ -364,280 +388,209 @@ static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len) return rl; } -static void vhash_update(const unsigned char *m, - unsigned int mbytes, /* Pos multiple of VMAC_NHBYTES */ - struct vmac_ctx *ctx) +/* L1 and L2-hash one or more VMAC_NHBYTES-byte blocks */ +static void vhash_blocks(const struct vmac_tfm_ctx *tctx, +