The branch OpenSSL_1_0_2-stable has been updated
       via  817ddb9fb07e78b451e838a77b2b272b0dd23e5f (commit)
      from  f236ef27bd2ca99b3367554aa3e2fc9ca345deb5 (commit)


- Log -----------------------------------------------------------------
commit 817ddb9fb07e78b451e838a77b2b272b0dd23e5f
Author: Andy Polyakov <ap...@openssl.org>
Date:   Wed Nov 4 23:57:06 2015 +0100

    aesni-sha256-x86_64.pl: fix crash on AMD Jaguar.
    
    It was also found that stich performs suboptimally on AMD Jaguar, hence
    execution is limited to XOP-capable and Intel processors.
    
    Reviewed-by: Kurt Roeckx <k...@openssl.org>
    (cherry picked from commit a5fd24d19bbb586b1c6d235c2021e9bead22c9f5)

-----------------------------------------------------------------------

Summary of changes:
 crypto/aes/asm/aesni-sha256-x86_64.pl |  7 ++-----
 crypto/evp/e_aes_cbc_hmac_sha256.c    | 11 +++++++++++
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/crypto/aes/asm/aesni-sha256-x86_64.pl 
b/crypto/aes/asm/aesni-sha256-x86_64.pl
index 19b0433..1772cbe 100644
--- a/crypto/aes/asm/aesni-sha256-x86_64.pl
+++ b/crypto/aes/asm/aesni-sha256-x86_64.pl
@@ -139,11 +139,8 @@ $code.=<<___ if ($avx>1);
        je      ${func}_avx2
 ___
 $code.=<<___;
-       and     \$`1<<30`,%eax                  # mask "Intel CPU" bit
-       and     \$`1<<28|1<<9`,%r10d            # mask AVX+SSSE3 bits
-       or      %eax,%r10d
-       cmp     \$`1<<28|1<<9|1<<30`,%r10d
-       je      ${func}_avx
+       and     \$`1<<28`,%r10d                 # check for AVX
+       jnz     ${func}_avx
        ud2
 ___
                                                }
diff --git a/crypto/evp/e_aes_cbc_hmac_sha256.c 
b/crypto/evp/e_aes_cbc_hmac_sha256.c
index 028658b..3780021 100644
--- a/crypto/evp/e_aes_cbc_hmac_sha256.c
+++ b/crypto/evp/e_aes_cbc_hmac_sha256.c
@@ -498,7 +498,18 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX 
*ctx,
             iv = AES_BLOCK_SIZE;
 
 #  if defined(STITCHED_CALL)
+        /*
+         * Assembly stitch handles AVX-capable processors, but its
+         * performance is not optimal on AMD Jaguar, ~40% worse, for
+         * unknown reasons. Incidentally processor in question supports
+         * AVX, but not AMD-specific XOP extension, which can be used
+         * to identify it and avoid stitch invocation. So that after we
+         * establish that current CPU supports AVX, we even see if it's
+         * either even XOP-capable Bulldozer-based or GenuineIntel one.
+         */
         if (OPENSSL_ia32cap_P[1] & (1 << (60 - 32)) && /* AVX? */
+            ((OPENSSL_ia32cap_P[1] & (1 << (43 - 32))) /* XOP? */
+             | (OPENSSL_ia32cap_P[0] & (1<<30))) &&    /* "Intel CPU"? */
             plen > (sha_off + iv) &&
             (blocks = (plen - (sha_off + iv)) / SHA256_CBLOCK)) {
             SHA256_Update(&key->md, in + iv, sha_off);
_____
openssl-commits mailing list
To unsubscribe: https://mta.openssl.org/mailman/listinfo/openssl-commits

Reply via email to