The branch OpenSSL_1_1_0-stable has been updated
       via  738a9dd53cacce593cd7d67e18e1273549640a79 (commit)
      from  6fb428f76c2fb039734694de9f7f8ab8f4d9f59c (commit)


- Log -----------------------------------------------------------------
commit 738a9dd53cacce593cd7d67e18e1273549640a79
Author: Andy Polyakov <[email protected]>
Date:   Mon Jul 24 21:50:52 2017 +0200

    x86_64 assembly pack: "optimize" for Knights Landing.
    
    "Optimize" is in quotes because it's rather a "salvage operation"
    for now. Idea is to identify processor capability flags that
    drive Knights Landing to suboptimial code paths and mask them.
    Two flags were identified, XSAVE and ADCX/ADOX. Former affects
    choice of AES-NI code path specific for Silvermont (Knights Landing
    is of Silvermont "ancestry"). And 64-bit ADCX/ADOX instructions are
    effectively mishandled at decode time. In both cases we are looking
    at ~2x improvement.
    
    Hardware used for benchmarking courtesy of Atos, experiments run by
    Romain Dolbeau <[email protected]>. Kudos!
    
    This is minimalistic backpoint of 64d92d74985ebb3d0be58a9718f9e080a14a8e7f
    
    Thanks to David Benjamin for spotting typo in Knights Landing detection!
    
    Reviewed-by: Rich Salz <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/4006)

-----------------------------------------------------------------------

Summary of changes:
 crypto/x86_64cpuid.pl | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index afc50af..7995b5c 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -143,8 +143,19 @@ OPENSSL_ia32_cpuid:
        or      \$0x40000000,%edx       # set reserved bit#30 on Intel CPUs
        and     \$15,%ah
        cmp     \$15,%ah                # examine Family ID
-       jne     .Lnotintel
+       jne     .LnotP4
        or      \$0x00100000,%edx       # set reserved bit#20 to engage RC4_CHAR
+.LnotP4:
+       cmp     \$6,%ah
+       jne     .Lnotintel
+       and     \$0x0fff0ff0,%eax
+       cmp     \$0x00050670,%eax       # Knights Landing
+       je      .Lknights
+       cmp     \$0x00080650,%eax       # Knights Mill (according to sde)
+       jne     .Lnotintel
+.Lknights:
+       and     \$0xfbffffff,%ecx       # clear XSAVE flag to mimic Silvermont
+
 .Lnotintel:
        bt      \$28,%edx               # test hyper-threading bit
        jnc     .Lgeneric
@@ -169,6 +180,10 @@ OPENSSL_ia32_cpuid:
        mov     \$7,%eax
        xor     %ecx,%ecx
        cpuid
+       bt      \$26,%r9d               # check XSAVE bit, cleared on Knights
+       jc      .Lnotknights
+       and     \$0xfff7ffff,%ebx       # clear ADCX/ADOX flag
+.Lnotknights:
        mov     %ebx,8(%rdi)            # save extended feature flags
 .Lno_extended_info:
 
_____
openssl-commits mailing list
To unsubscribe: https://mta.openssl.org/mailman/listinfo/openssl-commits

Reply via email to