Re: improving OpenBSD's gmac.c...

David Gwynne Sun, 12 Oct 2014 18:49:05 -0700

dont you need endian.h to get bemtoh64 and htobem64?

On 13 Oct 2014, at 7:57, Christian Weisgerber <na...@mips.inka.de> wrote:


> Here's a cleaned-up diff.  Briefly tested on amd64 & sparc64.  I'll
> do some more testing tomorrow.  This already has mikeb@'s blessing.
> 
> Index: regress/sys/crypto/gmac/Makefile
> ===================================================================
> RCS file: /cvs/src/regress/sys/crypto/gmac/Makefile,v
> retrieving revision 1.2
> diff -u -p -r1.2 Makefile
> --- regress/sys/crypto/gmac/Makefile  18 Jan 2014 05:54:52 -0000      1.2
> +++ regress/sys/crypto/gmac/Makefile  12 Oct 2014 19:05:35 -0000
> @@ -3,7 +3,7 @@
> DIR=${.CURDIR}/../../../../sys
> 
> PROG= gmac_test
> -SRCS+=       rijndael.c gmac.c gmac_test.c
> +SRCS+=       rijndael.c gfmult.c gmac.c gmac_test.c
> CDIAGFLAGS=   -Wall
> CDIAGFLAGS+=  -Werror
> CDIAGFLAGS+=  -Wpointer-arith
> Index: sys/crypto/gfmult.c
> ===================================================================
> RCS file: sys/crypto/gfmult.c
> diff -N sys/crypto/gfmult.c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ sys/crypto/gfmult.c       12 Oct 2014 17:28:42 -0000
> @@ -0,0 +1,275 @@
> +/*-
> + * Copyright (c) 2014 The FreeBSD Foundation
> + * All rights reserved.
> + *
> + * This software was developed by John-Mark Gurney under
> + * the sponsorship of the FreeBSD Foundation and
> + * Rubicon Communications, LLC (Netgate).
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1.  Redistributions of source code must retain the above copyright
> + *     notice, this list of conditions and the following disclaimer.
> + * 2.  Redistributions in binary form must reproduce the above copyright
> + *     notice, this list of conditions and the following disclaimer in the
> + *     documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + *
> + *   $FreeBSD$
> + *
> + */
> +
> +#include <crypto/gfmult.h>
> +
> +#define REV_POLY_REDUCT      0xe1    /* 0x87 bit reversed */
> +
> +/* reverse the bits of a nibble */
> +static const uint8_t nib_rev[] = {
> +     0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
> +     0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf,
> +};
> +
> +/* calulate v * 2 */
> +static inline struct gf128
> +gf128_mulalpha(struct gf128 v)
> +{
> +     uint64_t mask;
> +
> +     mask = !!(v.v[1] & 1);
> +     mask = ~(mask - 1);
> +     v.v[1] = (v.v[1] >> 1) | ((v.v[0] & 1) << 63);
> +     v.v[0] = (v.v[0] >> 1) ^ ((mask & REV_POLY_REDUCT) << 56);
> +
> +     return v;
> +}
> +
> +/*
> + * Generate a table for 0-16 * h.  Store the results in the table w/ indexes
> + * bit reversed, and the words striped across the values.
> + */
> +void
> +gf128_genmultable(struct gf128 h, struct gf128table *t)
> +{
> +     struct gf128 tbl[16];
> +     int i;
> +
> +     tbl[0] = MAKE_GF128(0, 0);
> +     tbl[1] = h;
> +
> +     for (i = 2; i < 16; i += 2) {
> +             tbl[i] = gf128_mulalpha(tbl[i / 2]);
> +             tbl[i + 1] = gf128_add(tbl[i], h);
> +     }
> +
> +     for (i = 0; i < 16; i++) {
> +             t->a[nib_rev[i]] = tbl[i].v[0] >> 32;
> +             t->b[nib_rev[i]] = tbl[i].v[0];
> +             t->c[nib_rev[i]] = tbl[i].v[1] >> 32;
> +             t->d[nib_rev[i]] = tbl[i].v[1];
> +     }
> +}
> +
> +/*
> + * Generate tables containing h, h^2, h^3 and h^4, starting at 0.
> + */
> +void
> +gf128_genmultable4(struct gf128 h, struct gf128table4 *t)
> +{
> +     struct gf128 h2, h3, h4;
> +
> +     gf128_genmultable(h, &t->tbls[0]);
> +
> +     h2 = gf128_mul(h, &t->tbls[0]);
> +
> +     gf128_genmultable(h2, &t->tbls[1]);
> +
> +     h3 = gf128_mul(h, &t->tbls[1]);
> +     gf128_genmultable(h3, &t->tbls[2]);
> +
> +     h4 = gf128_mul(h2, &t->tbls[1]);
> +     gf128_genmultable(h4, &t->tbls[3]);
> +}
> +
> +/*
> + * Read a row from the table.
> + */
> +static inline struct gf128
> +readrow(struct gf128table *tbl, unsigned bits)
> +{
> +     struct gf128 r;
> +
> +     bits = bits % 16;
> +
> +     r.v[0] = ((uint64_t)tbl->a[bits] << 32) | tbl->b[bits];
> +     r.v[1] = ((uint64_t)tbl->c[bits] << 32) | tbl->d[bits];
> +
> +     return r;
> +}
> +
> +/*
> + * These are the reduction values.  Since we are dealing with bit reversed
> + * version, the values need to be bit reversed, AND the indexes are also
> + * bit reversed to make lookups quicker.
> + */
> +static uint16_t reduction[] = {
> +     0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0,
> +     0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0,
> +};
> +
> +/*
> + * Calculate:
> + * (x*2^4 + word[3,0]*h) *
> + * 2^4 + word[7,4]*h) *
> + * ...
> + * 2^4 + word[63,60]*h
> + */
> +static struct gf128
> +gfmultword(uint64_t word, struct gf128 x, struct gf128table *tbl)
> +{
> +     struct gf128 row;
> +     unsigned bits;
> +     unsigned redbits;
> +     int i;
> +
> +     for (i = 0; i < 64; i += 4) {
> +             bits = word % 16;
> +
> +             /* fetch row */
> +             row = readrow(tbl, bits);
> +
> +             /* x * 2^4 */
> +             redbits = x.v[1] % 16;
> +             x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;
> +             x.v[0] >>= 4;
> +             x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);
> +
> +             word >>= 4;
> +
> +             x = gf128_add(x, row);
> +     }
> +
> +     return x;
> +}
> +
> +/*
> + * Calculate
> + * (x*2^4 + worda[3,0]*h^4+wordb[3,0]*h^3+...+wordd[3,0]*h) *
> + * ...
> + * 2^4 + worda[63,60]*h^4+ ... + wordd[63,60]*h
> + *
> + * Passing/returning struct is .5% faster than passing in via pointer on
> + * amd64.
> + */
> +static struct gf128
> +gfmultword4(uint64_t worda, uint64_t wordb, uint64_t wordc, uint64_t wordd,
> +    struct gf128 x, struct gf128table4 *tbl)
> +{
> +     struct gf128 rowa, rowb, rowc, rowd;
> +     unsigned bitsa, bitsb, bitsc, bitsd;
> +     unsigned redbits;
> +     int i;
> +
> +     /*
> +      * XXX - nibble reverse words to save a shift? probably not as
> +      * nibble reverse would take 20 ops (5 * 4) verse 16
> +      */
> +
> +     for (i = 0; i < 64; i += 4) {
> +             bitsa = worda % 16;
> +             bitsb = wordb % 16;
> +             bitsc = wordc % 16;
> +             bitsd = wordd % 16;
> +
> +             /* fetch row */
> +             rowa = readrow(&tbl->tbls[3], bitsa);
> +             rowb = readrow(&tbl->tbls[2], bitsb);
> +             rowc = readrow(&tbl->tbls[1], bitsc);
> +             rowd = readrow(&tbl->tbls[0], bitsd);
> +
> +             /* x * 2^4 */
> +             redbits = x.v[1] % 16;
> +             x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;
> +             x.v[0] >>= 4;
> +             x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);
> +
> +             worda >>= 4;
> +             wordb >>= 4;
> +             wordc >>= 4;
> +             wordd >>= 4;
> +
> +             x = gf128_add(x, gf128_add(rowa, gf128_add(rowb,
> +                 gf128_add(rowc, rowd))));
> +     }
> +
> +     return x;
> +}
> +
> +struct gf128
> +gf128_mul(struct gf128 v, struct gf128table *tbl)
> +{
> +     struct gf128 ret;
> +
> +     ret = MAKE_GF128(0, 0);
> +
> +     ret = gfmultword(v.v[1], ret, tbl);
> +     ret = gfmultword(v.v[0], ret, tbl);
> +
> +     return ret;
> +}
> +
> +/*
> + * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or:
> + * (((a*h+b)*h+c)*h+d)*h
> + */
> +struct gf128
> +gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c, struct gf128 d,
> +    struct gf128table4 *tbl)
> +{
> +     struct gf128 tmp;
> +
> +     tmp = MAKE_GF128(0, 0);
> +
> +     tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);
> +     tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);
> +
> +     return tmp;
> +}
> +
> +/*
> + * a = data[0..15] + r
> + * b = data[16..31]
> + * c = data[32..47]
> + * d = data[48..63]
> + *
> + * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or:
> + * (((a*h+b)*h+c)*h+d)*h
> + */
> +struct gf128
> +gf128_mul4b(struct gf128 r, const uint8_t *v, struct gf128table4 *tbl)
> +{
> +     struct gf128 a, b, c, d;
> +     struct gf128 tmp;
> +
> +     tmp = MAKE_GF128(0, 0);
> +
> +     a = gf128_add(r, gf128_read(&v[0*16]));
> +     b = gf128_read(&v[1*16]);
> +     c = gf128_read(&v[2*16]);
> +     d = gf128_read(&v[3*16]);
> +
> +     tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);
> +     tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);
> +
> +     return tmp;
> +}
> Index: sys/crypto/gfmult.h
> ===================================================================
> RCS file: sys/crypto/gfmult.h
> diff -N sys/crypto/gfmult.h
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ sys/crypto/gfmult.h       12 Oct 2014 19:54:03 -0000
> @@ -0,0 +1,125 @@
> +/*-
> + * Copyright (c) 2014 The FreeBSD Foundation
> + * All rights reserved.
> + *
> + * This software was developed by John-Mark Gurney under
> + * the sponsorship of the FreeBSD Foundation and
> + * Rubicon Communications, LLC (Netgate).
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1.  Redistributions of source code must retain the above copyright
> + *     notice, this list of conditions and the following disclaimer.
> + * 2.  Redistributions in binary form must reproduce the above copyright
> + *     notice, this list of conditions and the following disclaimer in the
> + *     documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + *
> + *   $FreeBSD$
> + *
> + */
> +
> +#ifndef _GFMULT_H_
> +#define _GFMULT_H_
> +
> +#include <sys/types.h>
> +
> +#ifdef _KERNEL
> +#define be64dec(buf) bemtoh64(buf)
> +#define be64enc(buf, x)      htobem64(buf, x)
> +#else
> +#include <endian.h>
> +#define be64dec(buf) be64toh(*(uint64_t *)buf)
> +#define be64enc(buf, x)      (*(uint64_t *)buf = htobe64(x))
> +#endif
> +
> +/* XXX GCC 4.2 cannot align stack variables to 64 */
> +#define REQ_ALIGN    (16/* * 4*/)
> +/*
> + * The rows are striped across cache lines.  Note that the indexes
> + * are bit reversed to make accesses quicker.
> + */
> +struct gf128table {
> +     uint32_t a[16] __aligned(REQ_ALIGN);    /* bits   0 - 31 */
> +     uint32_t b[16] __aligned(REQ_ALIGN);    /* bits  63 - 32 */
> +     uint32_t c[16] __aligned(REQ_ALIGN);    /* bits  95 - 64 */
> +     uint32_t d[16] __aligned(REQ_ALIGN);    /* bits 127 - 96 */
> +} __aligned(REQ_ALIGN);
> +
> +/*
> + * A set of tables that contain h, h^2, h^3, h^4.  To be used w/ gf128_mul4.
> + */
> +struct gf128table4 {
> +     struct gf128table       tbls[4];
> +};
> +
> +/*
> + * GCM per spec is bit reversed in memory.  So byte 0 is really bit reversed
> + * and contains bits 0-7.  We can deal w/ this by using right shifts and
> + * related math instead of having to bit reverse everything.  This means that
> + * the low bits are in v[0] (bits 0-63) and reverse order, while the high
> + * bits are in v[1] (bits 64-127) and reverse order.  The high bit of v[0] is
> + * bit 0, and the low bit of v[1] is bit 127.
> + */
> +struct gf128 {
> +     uint64_t v[2];
> +};
> +
> +/* Note that we don't bit reverse in MAKE_GF128. */
> +#define MAKE_GF128(a, b)     ((struct gf128){.v = { (a), (b) } })
> +#define GF128_EQ(a, b)               ((((a).v[0] ^ (b).v[0]) | \
> +                                 ((a).v[1] ^ (b).v[1])) == 0)
> +
> +static inline struct gf128
> +gf128_read(const uint8_t *buf)
> +{
> +     struct gf128 r;
> +
> +     r.v[0] = be64dec(buf);
> +     buf += sizeof(uint64_t);
> +
> +     r.v[1] = be64dec(buf);
> +
> +     return r;
> +}
> +
> +static inline void
> +gf128_write(struct gf128 v, uint8_t *buf)
> +{
> +     uint64_t tmp;
> +
> +     be64enc(buf, v.v[0]);
> +     buf += sizeof tmp;
> +
> +     be64enc(buf, v.v[1]);
> +}
> +
> +static inline struct gf128 __pure /* XXX - __pure2 instead */
> +gf128_add(struct gf128 a, struct gf128 b)
> +{
> +     a.v[0] ^= b.v[0];
> +     a.v[1] ^= b.v[1];
> +
> +     return a;
> +}
> +
> +void gf128_genmultable(struct gf128 h, struct gf128table *t);
> +void gf128_genmultable4(struct gf128 h, struct gf128table4 *t);
> +struct gf128 gf128_mul(struct gf128 v, struct gf128table *tbl);
> +struct gf128 gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c,
> +    struct gf128 d, struct gf128table4 *tbl);
> +struct gf128 gf128_mul4b(struct gf128 r, const uint8_t *v,
> +    struct gf128table4 *tbl);
> +
> +#endif /* _GFMULT_H_ */
> Index: sys/crypto/gmac.c
> ===================================================================
> RCS file: /cvs/src/sys/crypto/gmac.c,v
> retrieving revision 1.3
> diff -u -p -r1.3 gmac.c
> --- sys/crypto/gmac.c 11 Jan 2011 15:44:23 -0000      1.3
> +++ sys/crypto/gmac.c 12 Oct 2014 20:09:20 -0000
> @@ -16,6 +16,35 @@
>  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
>  */
> 
> +/*-
> + * Copyright (c) 2014 The FreeBSD Foundation
> + * All rights reserved.
> + *
> + * This software was developed by John-Mark Gurney under
> + * the sponsorship of the FreeBSD Foundation and
> + * Rubicon Communications, LLC (Netgate).
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1.  Redistributions of source code must retain the above copyright
> + *     notice, this list of conditions and the following disclaimer.
> + * 2.  Redistributions in binary form must reproduce the above copyright
> + *     notice, this list of conditions and the following disclaimer in the
> + *     documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> /*
>  * This code implements the Message Authentication part of the
>  * Galois/Counter Mode (as being described in the RFC 4543) using
> @@ -28,91 +57,33 @@
> #include <crypto/rijndael.h>
> #include <crypto/gmac.h>
> 
> -void ghash_gfmul(uint32_t *, uint32_t *, uint32_t *);
> -void ghash_update(GHASH_CTX *, uint8_t *, size_t);
> -
> -/* Computes a block multiplication in the GF(2^128) */
> -void
> -ghash_gfmul(uint32_t *X, uint32_t *Y, uint32_t *product)
> -{
> -     uint32_t        v[4];
> -     uint32_t        z[4] = { 0, 0, 0, 0};
> -     uint8_t         *x = (uint8_t *)X;
> -     uint32_t        mul;
> -     int             i;
> -
> -     v[0] = betoh32(Y[0]);
> -     v[1] = betoh32(Y[1]);
> -     v[2] = betoh32(Y[2]);
> -     v[3] = betoh32(Y[3]);
> -
> -     for (i = 0; i < GMAC_BLOCK_LEN * 8; i++) {
> -             /* update Z */
> -             if (x[i >> 3] & (1 << (~i & 7))) {
> -                     z[0] ^= v[0];
> -                     z[1] ^= v[1];
> -                     z[2] ^= v[2];
> -                     z[3] ^= v[3];
> -             } /* else: we preserve old values */
> -
> -             /* update V */
> -             mul = v[3] & 1;
> -             v[3] = (v[2] << 31) | (v[3] >> 1);
> -             v[2] = (v[1] << 31) | (v[2] >> 1);
> -             v[1] = (v[0] << 31) | (v[1] >> 1);
> -             v[0] = (v[0] >> 1) ^ (0xe1000000 * mul);
> -     }
> -
> -     product[0] = htobe32(z[0]);
> -     product[1] = htobe32(z[1]);
> -     product[2] = htobe32(z[2]);
> -     product[3] = htobe32(z[3]);
> -}
> -
> -void
> -ghash_update(GHASH_CTX *ctx, uint8_t *X, size_t len)
> -{
> -     uint32_t        *x = (uint32_t *)X;
> -     uint32_t        *s = (uint32_t *)ctx->S;
> -     uint32_t        *y = (uint32_t *)ctx->Z;
> -     int             i;
> -
> -     for (i = 0; i < len / GMAC_BLOCK_LEN; i++) {
> -             s[0] = y[0] ^ x[0];
> -             s[1] = y[1] ^ x[1];
> -             s[2] = y[2] ^ x[2];
> -             s[3] = y[3] ^ x[3];
> -
> -             ghash_gfmul((uint32_t *)ctx->S, (uint32_t *)ctx->H,
> -                 (uint32_t *)ctx->S);
> -
> -             y = s;
> -             x += 4;
> -     }
> -
> -     bcopy(ctx->S, ctx->Z, GMAC_BLOCK_LEN);
> -}
> -
> #define AESCTR_NONCESIZE      4
> 
> void
> AES_GMAC_Init(AES_GMAC_CTX *ctx)
> {
> -     bzero(ctx->ghash.H, GMAC_BLOCK_LEN);
> -     bzero(ctx->ghash.S, GMAC_BLOCK_LEN);
> -     bzero(ctx->ghash.Z, GMAC_BLOCK_LEN);
> -     bzero(ctx->J, GMAC_BLOCK_LEN);
> +     bzero(ctx, sizeof(*ctx));
> }
> 
> void
> AES_GMAC_Setkey(AES_GMAC_CTX *ctx, const uint8_t *key, uint16_t klen)
> {
> +     const uint8_t   zeros[GMAC_BLOCK_LEN] = {};
> +     struct gf128    h;
> +     uint8_t         hbuf[GMAC_BLOCK_LEN];
> +
>       ctx->rounds = rijndaelKeySetupEnc(ctx->K, (u_char *)key,
>           (klen - AESCTR_NONCESIZE) * 8);
>       /* copy out salt to the counter block */
>       bcopy(key + klen - AESCTR_NONCESIZE, ctx->J, AESCTR_NONCESIZE);
>       /* prepare a hash subkey */
> -     rijndaelEncrypt(ctx->K, ctx->rounds, ctx->ghash.H, ctx->ghash.H);
> +     rijndaelEncrypt(ctx->K, ctx->rounds, zeros, hbuf);
> +
> +     h = gf128_read(hbuf);
> +     gf128_genmultable4(h, &ctx->ghashtbl);
> +
> +     explicit_bzero(&h, sizeof(h));
> +     explicit_bzero(hbuf, sizeof(hbuf));
> }
> 
> void
> @@ -125,20 +96,34 @@ AES_GMAC_Reinit(AES_GMAC_CTX *ctx, const
> int
> AES_GMAC_Update(AES_GMAC_CTX *ctx, const uint8_t *data, uint16_t len)
> {
> -     uint32_t        blk[4] = { 0, 0, 0, 0 };
> -     int             plen;
> +     struct gf128    v;
> +     uint8_t         buf[GMAC_BLOCK_LEN] = {};
> +     int             i;
> +
> +     v = ctx->hash;
> 
> -     if (len > 0) {
> -             plen = len % GMAC_BLOCK_LEN;
> -             if (len >= GMAC_BLOCK_LEN)
> -                     ghash_update(&ctx->ghash, (uint8_t *)data, len - plen);
> -             if (plen) {
> -                     bcopy((uint8_t *)data + (len - plen), (uint8_t *)blk,
> -                         plen);
> -                     ghash_update(&ctx->ghash, (uint8_t *)blk,
> -                         GMAC_BLOCK_LEN);
> +     while (len > 0) {
> +             if (len >= 4*GMAC_BLOCK_LEN) {
> +                     i = 4*GMAC_BLOCK_LEN;
> +                     v = gf128_mul4b(v, data, &ctx->ghashtbl);
> +             } else if (len >= GMAC_BLOCK_LEN) {
> +                     i = GMAC_BLOCK_LEN;
> +                     v = gf128_add(v, gf128_read(data));
> +                     v = gf128_mul(v, &ctx->ghashtbl.tbls[0]);
> +             } else {
> +                     i = len;
> +                     bcopy(data, buf, i);
> +                     v = gf128_add(v, gf128_read(&buf[0]));
> +                     v = gf128_mul(v, &ctx->ghashtbl.tbls[0]);
> +                     explicit_bzero(buf, sizeof buf);
>               }
> +             len -= i;
> +             data += i;
>       }
> +
> +     ctx->hash = v;
> +     explicit_bzero(&v, sizeof v);
> +
>       return (0);
> }
> 
> @@ -146,12 +131,12 @@ void
> AES_GMAC_Final(uint8_t digest[GMAC_DIGEST_LEN], AES_GMAC_CTX *ctx)
> {
>       uint8_t         keystream[GMAC_BLOCK_LEN];
> -     int             i;
> +     struct gf128    a;
> 
>       /* do one round of GCTR */
>       ctx->J[GMAC_BLOCK_LEN - 1] = 1;
>       rijndaelEncrypt(ctx->K, ctx->rounds, ctx->J, keystream);
> -     for (i = 0; i < GMAC_DIGEST_LEN; i++)
> -             digest[i] = ctx->ghash.S[i] ^ keystream[i];
> +     a = gf128_add(ctx->hash, gf128_read(keystream));
> +     gf128_write(a, digest);
>       explicit_bzero(keystream, sizeof(keystream));
> }
> Index: sys/crypto/gmac.h
> ===================================================================
> RCS file: /cvs/src/sys/crypto/gmac.h,v
> retrieving revision 1.2
> diff -u -p -r1.2 gmac.h
> --- sys/crypto/gmac.h 5 Dec 2012 23:20:15 -0000       1.2
> +++ sys/crypto/gmac.h 12 Oct 2014 17:40:58 -0000
> @@ -19,6 +19,7 @@
> #ifndef _GMAC_H_
> #define _GMAC_H_
> 
> +#include <crypto/gfmult.h>
> #include <crypto/rijndael.h>
> 
> #define GMAC_BLOCK_LEN                16
> @@ -31,7 +32,8 @@ typedef struct _GHASH_CTX {
> } GHASH_CTX;
> 
> typedef struct _AES_GMAC_CTX {
> -     GHASH_CTX       ghash;
> +     struct gf128table4 ghashtbl;
> +     struct gf128    hash;
>       uint32_t        K[4*(AES_MAXROUNDS + 1)];
>       uint8_t         J[GMAC_BLOCK_LEN];              /* counter block */
>       int             rounds;
> -- 
> Christian "naddy" Weisgerber                          na...@mips.inka.de
>

Re: improving OpenBSD's gmac.c...

Reply via email to