The branch master has been updated
       via  9a708bf982da1d2c9739339d16d7b021da955e00 (commit)
       via  c1b2569d234c1247d2a7a3338ca4568bc0a489a5 (commit)
       via  c5307d9cc079f379ee0e2bc960abda73046cd4ec (commit)
      from  1e653d0ff7fc2e06a1cb1e05c01feecde84e67d3 (commit)


- Log -----------------------------------------------------------------
commit 9a708bf982da1d2c9739339d16d7b021da955e00
Author: Andy Polyakov <[email protected]>
Date:   Sun May 20 12:13:16 2018 +0200

    {arm64|x86_64}cpuid.pl: add special 16-byte case to OPENSSL_memcmp.
    
    OPENSSL_memcmp is a must in GCM decrypt and general-purpose loop takes
    quite a portion of execution time for short inputs, more than GHASH for
    few-byte inputs according to profiler. Special 16-byte case takes it off
    top five list in profiler output.
    
    Reviewed-by: Rich Salz <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/6312)

commit c1b2569d234c1247d2a7a3338ca4568bc0a489a5
Author: Andy Polyakov <[email protected]>
Date:   Sun May 20 12:02:39 2018 +0200

    modes/gcm128.c: coalesce calls to GHASH.
    
    On contemporary platforms assembly GHASH processes multiple blocks
    faster than one by one. For TLS payloads shorter than 16 bytes, e.g.
    alerts, it's possible to reduce hashing operation to single call.
    And for block lengths not divisible by 16 - fold two final calls to
    one. Improvement is most noticeable with "reptoline", because call to
    assembly GHASH is indirect.
    
    Reviewed-by: Rich Salz <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/6312)

commit c5307d9cc079f379ee0e2bc960abda73046cd4ec
Author: Andy Polyakov <[email protected]>
Date:   Sun May 20 11:54:24 2018 +0200

    evp/e_aes.c: replace calls to one-liners with references in GCM.
    
    Even though calls can be viewed as styling improvement, they do come
    with cost. It's not big cost and shows only on short inputs, but it is
    measurable, 2-3% on some platforms.
    
    Reviewed-by: Rich Salz <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/6312)

-----------------------------------------------------------------------

Summary of changes:
 crypto/arm64cpuid.pl     |  13 +++
 crypto/evp/e_aes.c       |  73 +++++-------
 crypto/modes/gcm128.c    | 280 +++++++++++++++++++++++++++++++++++++++++------
 crypto/modes/modes_lcl.h |   3 +
 crypto/x86_64cpuid.pl    |  12 ++
 5 files changed, 304 insertions(+), 77 deletions(-)

diff --git a/crypto/arm64cpuid.pl b/crypto/arm64cpuid.pl
index daa2b17..06c8add 100755
--- a/crypto/arm64cpuid.pl
+++ b/crypto/arm64cpuid.pl
@@ -115,6 +115,19 @@ OPENSSL_cleanse:
 CRYPTO_memcmp:
        eor     w3,w3,w3
        cbz     x2,.Lno_data    // len==0?
+       cmp     x2,#16
+       b.ne    .Loop_cmp
+       ldp     x8,x9,[x0]
+       ldp     x10,x11,[x1]
+       eor     x8,x8,x10
+       eor     x9,x9,x11
+       orr     x8,x8,x9
+       mov     x0,#1
+       cmp     x8,#0
+       csel    x0,xzr,x0,eq
+       ret
+
+.align 4
 .Loop_cmp:
        ldrb    w4,[x0],#1
        ldrb    w5,[x1],#1
diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c
index 951fc8f..eb37b4b 100644
--- a/crypto/evp/e_aes.c
+++ b/crypto/evp/e_aes.c
@@ -2814,8 +2814,8 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int 
arg, void *ptr)
     case EVP_CTRL_INIT:
         gctx->key_set = 0;
         gctx->iv_set = 0;
-        gctx->ivlen = EVP_CIPHER_CTX_iv_length(c);
-        gctx->iv = EVP_CIPHER_CTX_iv_noconst(c);
+        gctx->ivlen = c->cipher->iv_len;
+        gctx->iv = c->iv;
         gctx->taglen = -1;
         gctx->iv_gen = 0;
         gctx->tls_aad_len = -1;
@@ -2826,7 +2826,7 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int 
arg, void *ptr)
             return 0;
         /* Allocate memory for IV if needed */
         if ((arg > EVP_MAX_IV_LENGTH) && (arg > gctx->ivlen)) {
-            if (gctx->iv != EVP_CIPHER_CTX_iv_noconst(c))
+            if (gctx->iv != c->iv)
                 OPENSSL_free(gctx->iv);
             if ((gctx->iv = OPENSSL_malloc(arg)) == NULL) {
                 EVPerr(EVP_F_AES_GCM_CTRL, ERR_R_MALLOC_FAILURE);
@@ -2837,17 +2837,17 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, 
int arg, void *ptr)
         return 1;
 
     case EVP_CTRL_AEAD_SET_TAG:
-        if (arg <= 0 || arg > 16 || EVP_CIPHER_CTX_encrypting(c))
+        if (arg <= 0 || arg > 16 || c->encrypt)
             return 0;
-        memcpy(EVP_CIPHER_CTX_buf_noconst(c), ptr, arg);
+        memcpy(c->buf, ptr, arg);
         gctx->taglen = arg;
         return 1;
 
     case EVP_CTRL_AEAD_GET_TAG:
-        if (arg <= 0 || arg > 16 || !EVP_CIPHER_CTX_encrypting(c)
+        if (arg <= 0 || arg > 16 || !c->encrypt
             || gctx->taglen < 0)
             return 0;
-        memcpy(ptr, EVP_CIPHER_CTX_buf_noconst(c), arg);
+        memcpy(ptr, c->buf, arg);
         return 1;
 
     case EVP_CTRL_GCM_SET_IV_FIXED:
@@ -2865,8 +2865,7 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int 
arg, void *ptr)
             return 0;
         if (arg)
             memcpy(gctx->iv, ptr, arg);
-        if (EVP_CIPHER_CTX_encrypting(c)
-            && RAND_bytes(gctx->iv + arg, gctx->ivlen - arg) <= 0)
+        if (c->encrypt && RAND_bytes(gctx->iv + arg, gctx->ivlen - arg) <= 0)
             return 0;
         gctx->iv_gen = 1;
         return 1;
@@ -2887,8 +2886,7 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int 
arg, void *ptr)
         return 1;
 
     case EVP_CTRL_GCM_SET_IV_INV:
-        if (gctx->iv_gen == 0 || gctx->key_set == 0
-            || EVP_CIPHER_CTX_encrypting(c))
+        if (gctx->iv_gen == 0 || gctx->key_set == 0 || c->encrypt)
             return 0;
         memcpy(gctx->iv + gctx->ivlen - arg, ptr, arg);
         CRYPTO_gcm128_setiv(&gctx->gcm, gctx->iv, gctx->ivlen);
@@ -2899,24 +2897,22 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, 
int arg, void *ptr)
         /* Save the AAD for later use */
         if (arg != EVP_AEAD_TLS1_AAD_LEN)
             return 0;
-        memcpy(EVP_CIPHER_CTX_buf_noconst(c), ptr, arg);
+        memcpy(c->buf, ptr, arg);
         gctx->tls_aad_len = arg;
         {
-            unsigned int len =
-                EVP_CIPHER_CTX_buf_noconst(c)[arg - 2] << 8
-                | EVP_CIPHER_CTX_buf_noconst(c)[arg - 1];
+            unsigned int len = c->buf[arg - 2] << 8 | c->buf[arg - 1];
             /* Correct length for explicit IV */
             if (len < EVP_GCM_TLS_EXPLICIT_IV_LEN)
                 return 0;
             len -= EVP_GCM_TLS_EXPLICIT_IV_LEN;
             /* If decrypting correct for tag too */
-            if (!EVP_CIPHER_CTX_encrypting(c)) {
+            if (!c->encrypt) {
                 if (len < EVP_GCM_TLS_TAG_LEN)
                     return 0;
                 len -= EVP_GCM_TLS_TAG_LEN;
             }
-            EVP_CIPHER_CTX_buf_noconst(c)[arg - 2] = len >> 8;
-            EVP_CIPHER_CTX_buf_noconst(c)[arg - 1] = len & 0xff;
+            c->buf[arg - 2] = len >> 8;
+            c->buf[arg - 1] = len & 0xff;
         }
         /* Extra padding: tag appended to record */
         return EVP_GCM_TLS_TAG_LEN;
@@ -2930,8 +2926,8 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int 
arg, void *ptr)
                     return 0;
                 gctx_out->gcm.key = &gctx_out->ks;
             }
-            if (gctx->iv == EVP_CIPHER_CTX_iv_noconst(c))
-                gctx_out->iv = EVP_CIPHER_CTX_iv_noconst(out);
+            if (gctx->iv == c->iv)
+                gctx_out->iv = out->iv;
             else {
                 if ((gctx_out->iv = OPENSSL_malloc(gctx->ivlen)) == NULL) {
                     EVPerr(EVP_F_AES_GCM_CTRL, ERR_R_MALLOC_FAILURE);
@@ -2958,8 +2954,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const 
unsigned char *key,
         do {
 #ifdef HWAES_CAPABLE
             if (HWAES_CAPABLE) {
-                HWAES_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8,
-                                      &gctx->ks.ks);
+                HWAES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
                 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
                                    (block128_f) HWAES_encrypt);
 # ifdef HWAES_ctr32_encrypt_blocks
@@ -2972,8 +2967,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const 
unsigned char *key,
 #endif
 #ifdef BSAES_CAPABLE
             if (BSAES_CAPABLE) {
-                AES_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8,
-                                    &gctx->ks.ks);
+                AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
                 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
                                    (block128_f) AES_encrypt);
                 gctx->ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks;
@@ -2982,8 +2976,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const 
unsigned char *key,
 #endif
 #ifdef VPAES_CAPABLE
             if (VPAES_CAPABLE) {
-                vpaes_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8,
-                                      &gctx->ks.ks);
+                vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
                 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
                                    (block128_f) vpaes_encrypt);
                 gctx->ctr = NULL;
@@ -2992,8 +2985,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const 
unsigned char *key,
 #endif
                 (void)0;        /* terminate potentially open 'else' */
 
-            AES_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8,
-                                &gctx->ks.ks);
+            AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
             CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
                                (block128_f) AES_encrypt);
 #ifdef AES_CTR_ASM
@@ -3045,19 +3037,18 @@ static int aes_gcm_tls_cipher(EVP_CIPHER_CTX *ctx, 
unsigned char *out,
      * Set IV from start of buffer or generate IV and write to start of
      * buffer.
      */
-    if (EVP_CIPHER_CTX_ctrl(ctx, EVP_CIPHER_CTX_encrypting(ctx) ?
-                            EVP_CTRL_GCM_IV_GEN : EVP_CTRL_GCM_SET_IV_INV,
+    if (EVP_CIPHER_CTX_ctrl(ctx, ctx->encrypt ? EVP_CTRL_GCM_IV_GEN
+                                              : EVP_CTRL_GCM_SET_IV_INV,
                             EVP_GCM_TLS_EXPLICIT_IV_LEN, out) <= 0)
         goto err;
     /* Use saved AAD */
-    if (CRYPTO_gcm128_aad(&gctx->gcm, EVP_CIPHER_CTX_buf_noconst(ctx),
-                          gctx->tls_aad_len))
+    if (CRYPTO_gcm128_aad(&gctx->gcm, ctx->buf, gctx->tls_aad_len))
         goto err;
     /* Fix buffer and length to point to payload */
     in += EVP_GCM_TLS_EXPLICIT_IV_LEN;
     out += EVP_GCM_TLS_EXPLICIT_IV_LEN;
     len -= EVP_GCM_TLS_EXPLICIT_IV_LEN + EVP_GCM_TLS_TAG_LEN;
-    if (EVP_CIPHER_CTX_encrypting(ctx)) {
+    if (ctx->encrypt) {
         /* Encrypt payload */
         if (gctx->ctr) {
             size_t bulk = 0;
@@ -3136,11 +3127,9 @@ static int aes_gcm_tls_cipher(EVP_CIPHER_CTX *ctx, 
unsigned char *out,
                 goto err;
         }
         /* Retrieve tag */
-        CRYPTO_gcm128_tag(&gctx->gcm, EVP_CIPHER_CTX_buf_noconst(ctx),
-                          EVP_GCM_TLS_TAG_LEN);
+        CRYPTO_gcm128_tag(&gctx->gcm, ctx->buf, EVP_GCM_TLS_TAG_LEN);
         /* If tag mismatch wipe buffer */
-        if (CRYPTO_memcmp(EVP_CIPHER_CTX_buf_noconst(ctx), in + len,
-                          EVP_GCM_TLS_TAG_LEN)) {
+        if (CRYPTO_memcmp(ctx->buf, in + len, EVP_GCM_TLS_TAG_LEN)) {
             OPENSSL_cleanse(out, len);
             goto err;
         }
@@ -3170,7 +3159,7 @@ static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned 
char *out,
         if (out == NULL) {
             if (CRYPTO_gcm128_aad(&gctx->gcm, in, len))
                 return -1;
-        } else if (EVP_CIPHER_CTX_encrypting(ctx)) {
+        } else if (ctx->encrypt) {
             if (gctx->ctr) {
                 size_t bulk = 0;
 #if defined(AES_GCM_ASM)
@@ -3261,17 +3250,15 @@ static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned 
char *out,
         }
         return len;
     } else {
-        if (!EVP_CIPHER_CTX_encrypting(ctx)) {
+        if (!ctx->encrypt) {
             if (gctx->taglen < 0)
                 return -1;
-            if (CRYPTO_gcm128_finish(&gctx->gcm,
-                                     EVP_CIPHER_CTX_buf_noconst(ctx),
-                                     gctx->taglen) != 0)
+            if (CRYPTO_gcm128_finish(&gctx->gcm, ctx->buf, gctx->taglen) != 0)
                 return -1;
             gctx->iv_set = 0;
             return 0;
         }
-        CRYPTO_gcm128_tag(&gctx->gcm, EVP_CIPHER_CTX_buf_noconst(ctx), 16);
+        CRYPTO_gcm128_tag(&gctx->gcm, ctx->buf, 16);
         gctx->taglen = 16;
         /* Don't reuse the IV */
         gctx->iv_set = 0;
diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c
index 4c8ae91..7a10eb2 100644
--- a/crypto/modes/gcm128.c
+++ b/crypto/modes/gcm128.c
@@ -986,7 +986,7 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
         long one;
         char little;
     } is_endian = { 1 };
-    unsigned int n, ctr;
+    unsigned int n, ctr, mres;
     size_t i;
     u64 mlen = ctx->len.u[1];
     block128_f block = ctx->block;
@@ -1004,9 +1004,23 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
         return -1;
     ctx->len.u[1] = mlen;
 
+    mres = ctx->mres;
+
     if (ctx->ares) {
         /* First call to encrypt finalizes GHASH(AAD) */
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+        if (len == 0) {
+            GCM_MUL(ctx);
+            ctx->ares = 0;
+            return 0;
+        }
+        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+        ctx->Xi.u[0] = 0;
+        ctx->Xi.u[1] = 0;
+        mres = sizeof(ctx->Xi);
+#else
         GCM_MUL(ctx);
+#endif
         ctx->ares = 0;
     }
 
@@ -1019,28 +1033,48 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
     else
         ctr = ctx->Yi.d[3];
 
-    n = ctx->mres;
+    n = mres % 16;
 #if !defined(OPENSSL_SMALL_FOOTPRINT)
     if (16 % sizeof(size_t) == 0) { /* always true actually */
         do {
             if (n) {
+# if defined(GHASH)
+                while (n && len) {
+                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
+                    --len;
+                    n = (n + 1) % 16;
+                }
+                if (n == 0) {
+                    GHASH(ctx, ctx->Xn, mres);
+                    mres = 0;
+                } else {
+                    ctx->mres = mres;
+                    return 0;
+                }
+# else
                 while (n && len) {
                     ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
                     --len;
                     n = (n + 1) % 16;
                 }
-                if (n == 0)
+                if (n == 0) {
                     GCM_MUL(ctx);
-                else {
+                    mres = 0;
+                } else {
                     ctx->mres = n;
                     return 0;
                 }
+# endif
             }
 # if defined(STRICT_ALIGNMENT)
             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
                 break;
 # endif
 # if defined(GHASH)
+            if (len >= 16 && mres) {
+                GHASH(ctx, ctx->Xn, mres);
+                mres = 0;
+            }
 #  if defined(GHASH_CHUNK)
             while (len >= GHASH_CHUNK) {
                 size_t j = GHASH_CHUNK;
@@ -1128,13 +1162,21 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
 # endif
                 else
                     ctx->Yi.d[3] = ctr;
+# if defined(GHASH)
+                while (len--) {
+                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
+                    ++n;
+                }
+# else
                 while (len--) {
                     ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
                     ++n;
                 }
+                mres = n;
+# endif
             }
 
-            ctx->mres = n;
+            ctx->mres = mres;
             return 0;
         } while (0);
     }
@@ -1152,13 +1194,22 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
             else
                 ctx->Yi.d[3] = ctr;
         }
-        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
         n = (n + 1) % 16;
+        if (mres == sizeof(ctx->Xn)) {
+            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
+            mres = 0;
+        }
+#else
+        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
+        mres = n = (n + 1) % 16;
         if (n == 0)
             GCM_MUL(ctx);
+#endif
     }
 
-    ctx->mres = n;
+    ctx->mres = mres;
     return 0;
 }
 
@@ -1170,7 +1221,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
         long one;
         char little;
     } is_endian = { 1 };
-    unsigned int n, ctr;
+    unsigned int n, ctr, mres;
     size_t i;
     u64 mlen = ctx->len.u[1];
     block128_f block = ctx->block;
@@ -1188,9 +1239,23 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
         return -1;
     ctx->len.u[1] = mlen;
 
+    mres = ctx->mres;
+
     if (ctx->ares) {
         /* First call to decrypt finalizes GHASH(AAD) */
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+        if (len == 0) {
+            GCM_MUL(ctx);
+            ctx->ares = 0;
+            return 0;
+        }
+        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+        ctx->Xi.u[0] = 0;
+        ctx->Xi.u[1] = 0;
+        mres = sizeof(ctx->Xi);
+#else
         GCM_MUL(ctx);
+#endif
         ctx->ares = 0;
     }
 
@@ -1203,11 +1268,25 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
     else
         ctr = ctx->Yi.d[3];
 
-    n = ctx->mres;
+    n = mres % 16;
 #if !defined(OPENSSL_SMALL_FOOTPRINT)
     if (16 % sizeof(size_t) == 0) { /* always true actually */
         do {
             if (n) {
+# if defined(GHASH)
+                while (n && len) {
+                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
+                    --len;
+                    n = (n + 1) % 16;
+                }
+                if (n == 0) {
+                    GHASH(ctx, ctx->Xn, mres);
+                    mres = 0;
+                } else {
+                    ctx->mres = mres;
+                    return 0;
+                }
+# else
                 while (n && len) {
                     u8 c = *(in++);
                     *(out++) = c ^ ctx->EKi.c[n];
@@ -1215,18 +1294,24 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                     --len;
                     n = (n + 1) % 16;
                 }
-                if (n == 0)
+                if (n == 0) {
                     GCM_MUL(ctx);
-                else {
+                    mres = 0;
+                } else {
                     ctx->mres = n;
                     return 0;
                 }
+# endif
             }
 # if defined(STRICT_ALIGNMENT)
             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
                 break;
 # endif
 # if defined(GHASH)
+            if (len >= 16 && mres) {
+                GHASH(ctx, ctx->Xn, mres);
+                mres = 0;
+            }
 #  if defined(GHASH_CHUNK)
             while (len >= GHASH_CHUNK) {
                 size_t j = GHASH_CHUNK;
@@ -1315,15 +1400,23 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
 # endif
                 else
                     ctx->Yi.d[3] = ctr;
+# if defined(GHASH)
+                while (len--) {
+                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
+                    ++n;
+                }
+# else
                 while (len--) {
                     u8 c = in[n];
                     ctx->Xi.c[n] ^= c;
                     out[n] = c ^ ctx->EKi.c[n];
                     ++n;
                 }
+                mres = n;
+# endif
             }
 
-            ctx->mres = n;
+            ctx->mres = mres;
             return 0;
         } while (0);
     }
@@ -1342,15 +1435,24 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
             else
                 ctx->Yi.d[3] = ctr;
         }
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
+        n = (n + 1) % 16;
+        if (mres == sizeof(ctx->Xn)) {
+            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
+            mres = 0;
+        }
+#else
         c = in[i];
         out[i] = c ^ ctx->EKi.c[n];
         ctx->Xi.c[n] ^= c;
-        n = (n + 1) % 16;
+        mres = n = (n + 1) % 16;
         if (n == 0)
             GCM_MUL(ctx);
+#endif
     }
 
-    ctx->mres = n;
+    ctx->mres = mres;
     return 0;
 }
 
@@ -1365,7 +1467,7 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
         long one;
         char little;
     } is_endian = { 1 };
-    unsigned int n, ctr;
+    unsigned int n, ctr, mres;
     size_t i;
     u64 mlen = ctx->len.u[1];
     void *key = ctx->key;
@@ -1382,9 +1484,23 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
         return -1;
     ctx->len.u[1] = mlen;
 
+    mres = ctx->mres;
+
     if (ctx->ares) {
         /* First call to encrypt finalizes GHASH(AAD) */
+#if defined(GHASH)
+        if (len == 0) {
+            GCM_MUL(ctx);
+            ctx->ares = 0;
+            return 0;
+        }
+        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+        ctx->Xi.u[0] = 0;
+        ctx->Xi.u[1] = 0;
+        mres = sizeof(ctx->Xi);
+#else
         GCM_MUL(ctx);
+#endif
         ctx->ares = 0;
     }
 
@@ -1397,30 +1513,51 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
     else
         ctr = ctx->Yi.d[3];
 
-    n = ctx->mres;
+    n = mres % 16;
     if (n) {
+# if defined(GHASH)
+        while (n && len) {
+            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
+            --len;
+            n = (n + 1) % 16;
+        }
+        if (n == 0) {
+            GHASH(ctx, ctx->Xn, mres);
+            mres = 0;
+        } else {
+            ctx->mres = mres;
+            return 0;
+        }
+# else
         while (n && len) {
             ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
             --len;
             n = (n + 1) % 16;
         }
-        if (n == 0)
+        if (n == 0) {
             GCM_MUL(ctx);
-        else {
+            mres = 0;
+        } else {
             ctx->mres = n;
             return 0;
         }
+# endif
     }
-# if defined(GHASH) && defined(GHASH_CHUNK)
+# if defined(GHASH)
+        if (len >= 16 && mres) {
+            GHASH(ctx, ctx->Xn, mres);
+            mres = 0;
+        }
+#  if defined(GHASH_CHUNK)
     while (len >= GHASH_CHUNK) {
         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
         ctr += GHASH_CHUNK / 16;
         if (is_endian.little)
-#  ifdef BSWAP4
+#   ifdef BSWAP4
             ctx->Yi.d[3] = BSWAP4(ctr);
-#  else
+#   else
             PUTU32(ctx->Yi.c + 12, ctr);
-#  endif
+#   endif
         else
             ctx->Yi.d[3] = ctr;
         GHASH(ctx, out, GHASH_CHUNK);
@@ -1428,6 +1565,7 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
         in += GHASH_CHUNK;
         len -= GHASH_CHUNK;
     }
+#  endif
 # endif
     if ((i = (len & (size_t)-16))) {
         size_t j = i / 16;
@@ -1468,12 +1606,16 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
         else
             ctx->Yi.d[3] = ctr;
         while (len--) {
-            ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+# if defined(GHASH)
+            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
+# else
+            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+# endif
             ++n;
         }
     }
 
-    ctx->mres = n;
+    ctx->mres = mres;
     return 0;
 #endif
 }
@@ -1489,7 +1631,7 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
         long one;
         char little;
     } is_endian = { 1 };
-    unsigned int n, ctr;
+    unsigned int n, ctr, mres;
     size_t i;
     u64 mlen = ctx->len.u[1];
     void *key = ctx->key;
@@ -1506,9 +1648,23 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
         return -1;
     ctx->len.u[1] = mlen;
 
+    mres = ctx->mres;
+
     if (ctx->ares) {
         /* First call to decrypt finalizes GHASH(AAD) */
+# if defined(GHASH)
+        if (len == 0) {
+            GCM_MUL(ctx);
+            ctx->ares = 0;
+            return 0;
+        }
+        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+        ctx->Xi.u[0] = 0;
+        ctx->Xi.u[1] = 0;
+        mres = sizeof(ctx->Xi);
+# else
         GCM_MUL(ctx);
+# endif
         ctx->ares = 0;
     }
 
@@ -1521,8 +1677,22 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
     else
         ctr = ctx->Yi.d[3];
 
-    n = ctx->mres;
+    n = mres % 16;
     if (n) {
+# if defined(GHASH)
+        while (n && len) {
+            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
+            --len;
+            n = (n + 1) % 16;
+        }
+        if (n == 0) {
+            GHASH(ctx, ctx->Xn, mres);
+            mres = 0;
+        } else {
+            ctx->mres = mres;
+            return 0;
+        }
+# else
         while (n && len) {
             u8 c = *(in++);
             *(out++) = c ^ ctx->EKi.c[n];
@@ -1530,30 +1700,38 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
             --len;
             n = (n + 1) % 16;
         }
-        if (n == 0)
+        if (n == 0) {
             GCM_MUL(ctx);
-        else {
+            mres = 0;
+        } else {
             ctx->mres = n;
             return 0;
         }
+# endif
     }
-# if defined(GHASH) && defined(GHASH_CHUNK)
+# if defined(GHASH)
+    if (len >= 16 && mres) {
+        GHASH(ctx, ctx->Xn, mres);
+        mres = 0;
+    }
+#  if defined(GHASH_CHUNK)
     while (len >= GHASH_CHUNK) {
         GHASH(ctx, in, GHASH_CHUNK);
         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
         ctr += GHASH_CHUNK / 16;
         if (is_endian.little)
-#  ifdef BSWAP4
+#   ifdef BSWAP4
             ctx->Yi.d[3] = BSWAP4(ctr);
-#  else
+#   else
             PUTU32(ctx->Yi.c + 12, ctr);
-#  endif
+#   endif
         else
             ctx->Yi.d[3] = ctr;
         out += GHASH_CHUNK;
         in += GHASH_CHUNK;
         len -= GHASH_CHUNK;
     }
+#  endif
 # endif
     if ((i = (len & (size_t)-16))) {
         size_t j = i / 16;
@@ -1597,14 +1775,18 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
         else
             ctx->Yi.d[3] = ctr;
         while (len--) {
+# if defined(GHASH)
+            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
+# else
             u8 c = in[n];
-            ctx->Xi.c[n] ^= c;
+            ctx->Xi.c[mres++] ^= c;
             out[n] = c ^ ctx->EKi.c[n];
+# endif
             ++n;
         }
     }
 
-    ctx->mres = n;
+    ctx->mres = mres;
     return 0;
 #endif
 }
@@ -1620,10 +1802,32 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const 
unsigned char *tag,
     u64 clen = ctx->len.u[1] << 3;
 #ifdef GCM_FUNCREF_4BIT
     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
+# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
+                         const u8 *inp, size_t len) = ctx->ghash;
+# endif
 #endif
 
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+    u128 bitlen;
+    unsigned int mres = ctx->mres;
+
+    if (mres) {
+        unsigned blocks = (mres + 15) & -16;
+
+        memset(ctx->Xn + mres, 0, blocks - mres);
+        mres = blocks;
+        if (mres == sizeof(ctx->Xn)) {
+            GHASH(ctx, ctx->Xn, mres);
+            mres = 0;
+        }
+    } else if (ctx->ares) {
+        GCM_MUL(ctx);
+    }
+#else
     if (ctx->mres || ctx->ares)
         GCM_MUL(ctx);
+#endif
 
     if (is_endian.little) {
 #ifdef BSWAP8
@@ -1640,9 +1844,17 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const 
unsigned char *tag,
 #endif
     }
 
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+    bitlen.hi = alen;
+    bitlen.lo = clen;
+    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
+    mres += sizeof(bitlen);
+    GHASH(ctx, ctx->Xn, mres);
+#else
     ctx->Xi.u[0] ^= alen;
     ctx->Xi.u[1] ^= clen;
     GCM_MUL(ctx);
+#endif
 
     ctx->Xi.u[0] ^= ctx->EK0.u[0];
     ctx->Xi.u[1] ^= ctx->EK0.u[1];
diff --git a/crypto/modes/modes_lcl.h b/crypto/modes/modes_lcl.h
index 16dcf4e..fa85460 100644
--- a/crypto/modes/modes_lcl.h
+++ b/crypto/modes/modes_lcl.h
@@ -128,6 +128,9 @@ struct gcm128_context {
     unsigned int mres, ares;
     block128_f block;
     void *key;
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+    unsigned char Xn[48];
+#endif
 };
 
 struct xts128_context {
diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index 513d005..6423e80 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -271,6 +271,18 @@ CRYPTO_memcmp:
        xor     %r10,%r10
        cmp     \$0,$arg3
        je      .Lno_data
+       cmp     \$16,$arg3
+       jne     .Loop_cmp
+       mov     ($arg1),%r10
+       mov     8($arg1),%r11
+       mov     \$1,$arg3
+       xor     ($arg2),%r10
+       xor     8($arg2),%r11
+       or      %r11,%r10
+       cmovnz  $arg3,%rax
+       ret
+
+.align 16
 .Loop_cmp:
        mov     ($arg1),%r10b
        lea     1($arg1),$arg1
_____
openssl-commits mailing list
To unsubscribe: https://mta.openssl.org/mailman/listinfo/openssl-commits

Reply via email to