The branch OpenSSL_1_1_0-stable has been updated
       via  6fb428f76c2fb039734694de9f7f8ab8f4d9f59c (commit)
       via  5c6317e86d5f44291bb2b028607b8bad858b4f74 (commit)
      from  a701db76e27ffce4c88c2f621269ca5589c984ea (commit)


- Log -----------------------------------------------------------------
commit 6fb428f76c2fb039734694de9f7f8ab8f4d9f59c
Author: Andy Polyakov <[email protected]>
Date:   Mon Jul 10 15:21:00 2017 +0200

    aes/asm/aesni-sha*-x86_64.pl: add SHAEXT performance results.
    
    Reviewed-by: Kurt Roeckx <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/3898)
    
    (cherry picked from commit 1843787173da9b07029d0863e236107b1dd4fdd7)

commit 5c6317e86d5f44291bb2b028607b8bad858b4f74
Author: Andy Polyakov <[email protected]>
Date:   Mon Jul 10 15:19:45 2017 +0200

    evp/e_aes_cbc_hmac_sha256.c: give SHAEXT right priority.
    
    Reviewed-by: Kurt Roeckx <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/3898)
    
    (cherry picked from commit d0f6eb1d8c84165c383a677266cfae9c0b162781)

-----------------------------------------------------------------------

Summary of changes:
 crypto/aes/asm/aesni-sha1-x86_64.pl   |  7 ++++++-
 crypto/aes/asm/aesni-sha256-x86_64.pl | 15 +++++++++------
 crypto/evp/e_aes_cbc_hmac_sha256.c    |  8 +++++---
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/crypto/aes/asm/aesni-sha1-x86_64.pl 
b/crypto/aes/asm/aesni-sha1-x86_64.pl
index adff3a3..33a7f0c 100644
--- a/crypto/aes/asm/aesni-sha1-x86_64.pl
+++ b/crypto/aes/asm/aesni-sha1-x86_64.pl
@@ -34,6 +34,8 @@
 # Haswell      4.43[+3.6(4.2)] 8.00(8.58)      4.55(5.21)  +75%(+65%)
 # Skylake      2.63[+3.5(4.1)] 6.17(6.69)      4.23(4.44)  +46%(+51%)
 # Bulldozer    5.77[+6.0]      11.72           6.37        +84%
+# Ryzen(**)    2.71[+1.93]     4.64            2.74        +69%
+# Goldmont(**) 3.82[+1.70]     5.52            4.20        +31%
 #
 #              AES-192-CBC
 # Westmere     4.51            9.81            6.80        +44%
@@ -47,13 +49,16 @@
 # Sandy Bridge 7.05            12.06(13.15)    7.12(7.72)  +69%(+70%)
 # Ivy Bridge   7.05            11.65           7.12        +64%
 # Haswell      6.19            9.76(10.34)     6.21(6.25)  +57%(+65%)
-# Skylake      3.62            7.16(7.68)      4.56(4.76)  +57%(+61$)
+# Skylake      3.62            7.16(7.68)      4.56(4.76)  +57%(+61%)
 # Bulldozer    8.00            13.95           8.25        +69%
+# Ryzen(**)    3.71            5.64            3.72        +52%
+# Goldmont(**) 5.35            7.05            5.76        +22%
 #
 # (*)  There are two code paths: SSSE3 and AVX. See sha1-568.pl for
 #      background information. Above numbers in parentheses are SSSE3
 #      results collected on AVX-capable CPU, i.e. apply on OSes that
 #      don't support AVX.
+# (**) SHAEXT results.
 #
 # Needless to mention that it makes no sense to implement "stitched"
 # *decrypt* subroutine. Because *both* AESNI-CBC decrypt and SHA1
diff --git a/crypto/aes/asm/aesni-sha256-x86_64.pl 
b/crypto/aes/asm/aesni-sha256-x86_64.pl
index 3b03328..0e49f26 100644
--- a/crypto/aes/asm/aesni-sha256-x86_64.pl
+++ b/crypto/aes/asm/aesni-sha256-x86_64.pl
@@ -28,18 +28,21 @@
 # for standalone AESNI-CBC encrypt, standalone SHA256, and stitched
 # subroutine:
 #
-#               AES-128/-192/-256+SHA256       this(**)gain
-# Sandy Bridge     5.05/6.05/7.05+11.6         13.0    +28%/36%/43%
-# Ivy Bridge       5.05/6.05/7.05+10.3         11.6    +32%/41%/50%
-# Haswell          4.43/5.29/6.19+7.80         8.79    +39%/49%/59%
-# Skylake          2.62/3.14/3.62+7.70         8.10    +27%/34%/40%
-# Bulldozer        5.77/6.89/8.00+13.7         13.7    +42%/50%/58%
+#               AES-128/-192/-256+SHA256   this(**)    gain
+# Sandy Bridge     5.05/6.05/7.05+11.6     13.0        +28%/36%/43%
+# Ivy Bridge       5.05/6.05/7.05+10.3     11.6        +32%/41%/50%
+# Haswell          4.43/5.29/6.19+7.80     8.79        +39%/49%/59%
+# Skylake          2.62/3.14/3.62+7.70     8.10        +27%/34%/40%
+# Bulldozer        5.77/6.89/8.00+13.7     13.7        +42%/50%/58%
+# Ryzen(***)       2.71/-/3.71+2.05        2.74/-/3.73 +74%/-/54%
+# Goldmont(***)            3.82/-/5.35+4.16        4.73/-/5.94 +69%/-/60%
 #
 # (*)  there are XOP, AVX1 and AVX2 code paths, meaning that
 #      Westmere is omitted from loop, this is because gain was not
 #      estimated high enough to justify the effort;
 # (**) these are EVP-free results, results obtained with 'speed
 #      -evp aes-256-cbc-hmac-sha256' will vary by percent or two;
+# (***)        these are SHAEXT results;
 
 $flavour = shift;
 $output  = shift;
diff --git a/crypto/evp/e_aes_cbc_hmac_sha256.c 
b/crypto/evp/e_aes_cbc_hmac_sha256.c
index 652ace4..13973f1 100644
--- a/crypto/evp/e_aes_cbc_hmac_sha256.c
+++ b/crypto/evp/e_aes_cbc_hmac_sha256.c
@@ -453,10 +453,12 @@ static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX 
*ctx,
          * to identify it and avoid stitch invocation. So that after we
          * establish that current CPU supports AVX, we even see if it's
          * either even XOP-capable Bulldozer-based or GenuineIntel one.
+         * But SHAEXT-capable go ahead...
          */
-        if (OPENSSL_ia32cap_P[1] & (1 << (60 - 32)) && /* AVX? */
-            ((OPENSSL_ia32cap_P[1] & (1 << (43 - 32))) /* XOP? */
-             | (OPENSSL_ia32cap_P[0] & (1<<30))) &&    /* "Intel CPU"? */
+        if (((OPENSSL_ia32cap_P[2] & (1 << 29)) ||         /* SHAEXT? */
+             ((OPENSSL_ia32cap_P[1] & (1 << (60 - 32))) && /* AVX? */
+              ((OPENSSL_ia32cap_P[1] & (1 << (43 - 32)))   /* XOP? */
+               | (OPENSSL_ia32cap_P[0] & (1 << 30))))) &&  /* "Intel CPU"? */
             plen > (sha_off + iv) &&
             (blocks = (plen - (sha_off + iv)) / SHA256_CBLOCK)) {
             SHA256_Update(&key->md, in + iv, sha_off);
_____
openssl-commits mailing list
To unsubscribe: https://mta.openssl.org/mailman/listinfo/openssl-commits

Reply via email to