On a SPARC-T4, with AES opcodes disabled (OPENSSL_sparcv9cap=0):
type 16 bytes 64 bytes256 bytes 1024 bytes 8192 bytes
aes-128 cbc 75200.21k83425.11k86767.67k87853.06k88279.72k
aes-192 cbc 64906.68k71059.56k73902.42k74532.52k74855.77k
aes-256 cbc 56814.90k61781.72k63903.74k64367.27k64607.57k
And with them enabled:
type 16 bytes 64 bytes256 bytes 1024 bytes 8192 bytes
aes-128 cbc 501882.74k 836726.87k 993102.76k 1020379.48k 1054083.75k
aes-192 cbc 435068.22k 707080.77k 837915.90k 864243.03k 889279.83k
aes-256 cbc 393746.28k 620463.13k 727483.31k 749580.97k 769029.46k
This system is a T4-2 so it's fun to show off some parallel benchmarks,
for example openssl speed -multi 16 -evp aes-128-ecb gives:
type 16 bytes 64 bytes256 bytes 1024 bytes 8192 bytes
evp7429568.93k 17815630.93k 28436597.93k 32033047.55k 35120630.44k
35GB/sec AES encryption, not too bad.
Currently CBC, ECB, CTR, OFB, and CFB modes are explicitly optimized.
Other modes will be optimized in the future.
Signed-off-by: David S. Miller da...@davemloft.net
---
Configure |2 +-
crypto/aes/aes_sparccore.c| 55
crypto/aes/asm/aes-sparcv9.pl | 666 +
crypto/evp/e_aes.c| 400 +
crypto/sparc_arch.h | 19 ++
5 files changed, 1141 insertions(+), 1 deletion(-)
diff --git a/Configure b/Configure
index 66b4ff8..217a552 100755
--- a/Configure
+++ b/Configure
@@ -130,7 +130,7 @@ my $x86_elf_asm=$x86_asm:elf;
my $x86_64_asm=x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o
x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o
aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o
sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o
cmll_misc.o:ghash-x86_64.o:e_padlock-x86_64.o;
my $ia64_asm=ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o
aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o
rc4_skey.o:ghash-ia64.o::void;
-my $sparcv9_asm=sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o
sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_sparccore.o aes_cbc.o
aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o
sha512-sparcv9.o:::ghash-sparcv9.o::void;
+my $sparcv9_asm=sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o
sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_sparccore.o
aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o
sha512-sparcv9.o:::ghash-sparcv9.o::void;
my $sparcv8_asm=:sparcv8.o:des_enc-sparc.o fcrypt_b.o:void;
my $alpha_asm=alphacpuid.o:bn_asm.o
alpha-mont.o:sha1-alpha.o:::ghash-alpha.o::void;
my $mips64_asm=:bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o
sha256-mips.o sha512-mips.o;
diff --git a/crypto/aes/aes_sparccore.c b/crypto/aes/aes_sparccore.c
index 2842cbc..658cc66 100644
--- a/crypto/aes/aes_sparccore.c
+++ b/crypto/aes/aes_sparccore.c
@@ -36,6 +36,7 @@
#include stdlib.h
#include openssl/crypto.h
#include openssl/aes.h
+#include openssl/modes.h
#include aes_locl.h
#include sparc_arch.h
@@ -270,3 +271,57 @@ int AES_set_decrypt_key(const unsigned char *userKey,
const int bits,
}
return 0;
}
+
+void aes_sparc_hw_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key,
+ unsigned char *ivec, int enc);
+
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+size_t len, const AES_KEY *key,
+unsigned char *ivec, const int enc)
+{
+ const void *aligned_in;
+ void *aligned_out;
+ int aligned_len;
+ size_t bl = 16;
+
+ if (!(OPENSSL_sparcv9cap_P SPARCV9_AES))
+ goto slow;
+
+ aligned_len = len ~(bl - 1);
+ if (!aligned_len)
+ goto trailing;
+
+ aligned_out = out;
+ if ((unsigned long) out 0x7) {
+ aligned_out = OPENSSL_malloc(aligned_len);
+ if (!aligned_out)
+ goto slow;
+ }
+ aligned_in = in;
+ if ((unsigned long)in 0x7) {
+ memcpy(aligned_out, in, aligned_len);
+ aligned_in = (const void *) aligned_out;
+ }
+
+ aes_sparc_hw_cbc_encrypt(aligned_in, aligned_out, aligned_len,
+key, ivec, enc);
+
+ if ((unsigned long)out 0x7) {
+ memcpy(out, aligned_out, aligned_len);
+ OPENSSL_free(aligned_out);
+ }
+trailing:
+ len -= aligned_len;
+ if (len) {
+ out += aligned_len;
+ in += aligned_len;
+slow:
+ if (enc)
+ CRYPTO_cbc128_encrypt(in, out,