Hi again,
attached is a patch that brings SHA256 support for VIA PadLock engine.
Please apply on top of the previous SHA1 patch.
Again some numbers:
type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes
sha256 2230.10k 5584.86k 10494.04k 13480.62k 14701.91k
padlock 3104.30k 11820.84k 41395.54k 110511.10k 215739.05k
Michal Ludvig
--
* Personal homepage: http://www.logix.cz/michal
Index: openssl-0.9.8-O2/crypto/engine/eng_padlock.c
===================================================================
--- openssl-0.9.8-O2.orig/crypto/engine/eng_padlock.c
+++ openssl-0.9.8-O2/crypto/engine/eng_padlock.c
@@ -1111,6 +1111,23 @@ padlock_aes_cipher(EVP_CIPHER_CTX *ctx,
// #define PADLOCK_SHA_STAT 1
+union sha_all_ctx {
+ SHA_CTX sha_ctx;
+ SHA256_CTX sha256_ctx;
+};
+
+typedef int (*f_sha_init)(void *c);
+typedef int (*f_sha_update)(void *c, const void *_data, size_t len);
+typedef int (*f_sha_final)(unsigned char *md, void *c);
+typedef void (*f_sha_padlock)(char *in, char *out, int count);
+
+struct sha_digest_functions {
+ f_sha_init init;
+ f_sha_update update;
+ f_sha_final final;
+ f_sha_padlock padlock;
+};
+
/* Don't forget to initialize all relevant
* fields in padlock_sha_init() or face the
* consequences!!!
@@ -1118,10 +1135,12 @@ padlock_aes_cipher(EVP_CIPHER_CTX *ctx,
* because zeroing fallback_ctx is
* a waste of time. */
struct padlock_digest_data {
- SHA_CTX fallback_ctx;
void *buf_start, *buf_alloc;
ssize_t used;
unsigned long order:8, bypass:1;
+ /* Fallback support */
+ struct sha_digest_functions fallback_fcs;
+ union sha_all_ctx fallback_ctx;
#ifdef PADLOCK_SHA_STAT
size_t stat_count, stat_total;
#endif
@@ -1140,9 +1159,9 @@ padlock_sha_bypass(struct padlock_digest
if (ddata->bypass)
return;
- SHA1_Init(&ddata->fallback_ctx);
+ ddata->fallback_fcs.init(&ddata->fallback_ctx);
if (ddata->buf_start && ddata->used > 0) {
- SHA1_Update(&ddata->fallback_ctx, ddata->buf_start,
ddata->used);
+ ddata->fallback_fcs.update(&ddata->fallback_ctx,
ddata->buf_start, ddata->used);
if (ddata->buf_alloc) {
free(ddata->buf_alloc);
ddata->buf_alloc = 0;
@@ -1155,7 +1174,7 @@ padlock_sha_bypass(struct padlock_digest
return;
}
-static inline void
+static void
padlock_do_sha1(char *in, char *out, int count)
{
/* We can't store directly to *out as it
@@ -1179,6 +1198,33 @@ padlock_do_sha1(char *in, char *out, int
padlock_htonl_block((uint32_t*)out, 5);
}
+static void
+padlock_do_sha256(char *in, char *out, int count)
+{
+ /* We can't store directly to *out as it
+ * doesn't have to be aligned. But who cares,
+ * it's only a few bytes... */
+ char buf[128+16];
+ char *output = NEAREST_ALIGNED(buf);
+
+ ((uint32_t*)output)[0] = 0x6A09E667;
+ ((uint32_t*)output)[1] = 0xBB67AE85;
+ ((uint32_t*)output)[2] = 0x3C6EF372;
+ ((uint32_t*)output)[3] = 0xA54FF53A;
+ ((uint32_t*)output)[4] = 0x510E527F;
+ ((uint32_t*)output)[5] = 0x9B05688C;
+ ((uint32_t*)output)[6] = 0x1F83D9AB;
+ ((uint32_t*)output)[7] = 0x5BE0CD19;
+
+ asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
+ : "+S"(in), "+D"(output)
+ : "c"(count), "a"(0));
+
+ memcpy(out, output, 8 * sizeof(uint32_t));
+
+ padlock_htonl_block((uint32_t*)out, 8);
+}
+
static int
padlock_sha_init(EVP_MD_CTX *ctx)
{
@@ -1195,6 +1241,32 @@ padlock_sha_init(EVP_MD_CTX *ctx)
}
static int
+padlock_sha1_init(EVP_MD_CTX *ctx)
+{
+ struct padlock_digest_data *ddata = DIGEST_DATA(ctx);
+
+ ddata->fallback_fcs.init = (f_sha_init)SHA1_Init;
+ ddata->fallback_fcs.update = (f_sha_update)SHA1_Update;
+ ddata->fallback_fcs.final = (f_sha_final)SHA1_Final;
+ ddata->fallback_fcs.padlock = (f_sha_padlock)padlock_do_sha1;
+
+ return padlock_sha_init(ctx);
+}
+
+static int
+padlock_sha256_init(EVP_MD_CTX *ctx)
+{
+ struct padlock_digest_data *ddata = DIGEST_DATA(ctx);
+
+ ddata->fallback_fcs.init = (f_sha_init)SHA256_Init;
+ ddata->fallback_fcs.update = (f_sha_update)SHA256_Update;
+ ddata->fallback_fcs.final = (f_sha_final)SHA256_Final;
+ ddata->fallback_fcs.padlock = (f_sha_padlock)padlock_do_sha256;
+
+ return padlock_sha_init(ctx);
+}
+
+static int
padlock_sha_update(EVP_MD_CTX *ctx, const void *data, size_t length)
{
struct padlock_digest_data *ddata = DIGEST_DATA(ctx);
@@ -1206,14 +1278,14 @@ padlock_sha_update(EVP_MD_CTX *ctx, cons
all_total += length;
#endif
if (unlikely(ddata->bypass)) {
- SHA1_Update(&ddata->fallback_ctx, data, length);
+ ddata->fallback_fcs.update(&ddata->fallback_ctx, data, length);
return 1;
}
if (unlikely(DDATA_FREE(ddata) < length)) {
if (likely(ddata->used + length > (1 << PADLOCK_SHA_MAX_ORD))) {
/* Too much data to be stored -> bypass to SW SHA */
padlock_sha_bypass(ddata);
- SHA1_Update(&ddata->fallback_ctx, data, length);
+ ddata->fallback_fcs.update(&ddata->fallback_ctx, data,
length);
return 1;
} else {
/* Resize the alocated buffer */
@@ -1225,7 +1297,7 @@ padlock_sha_update(EVP_MD_CTX *ctx, cons
if(!(new_buf = realloc(ddata->buf_alloc, new_size +
16))) {
/* fallback plan again */
padlock_sha_bypass(ddata);
- SHA1_Update(&ddata->fallback_ctx, data, length);
+
ddata->fallback_fcs.update(&ddata->fallback_ctx, data, length);
return 1;
}
ddata->buf_alloc = new_buf;
@@ -1253,12 +1325,12 @@ padlock_sha_final(EVP_MD_CTX *ctx, unsig
#endif
if (ddata->bypass) {
- SHA1_Final(md, &ddata->fallback_ctx);
+ ddata->fallback_fcs.final(md, &ddata->fallback_ctx);
return 1;
}
/* Pass the input buffer to PadLock microcode... */
- padlock_do_sha1(ddata->buf_start, md, ddata->used);
+ ddata->fallback_fcs.padlock(ddata->buf_start, md, ddata->used);
free(ddata->buf_alloc);
ddata->buf_start = 0;
ddata->buf_alloc = 0;
@@ -1304,7 +1376,22 @@ static const EVP_MD padlock_sha1_md = {
NID_sha1WithRSAEncryption,
SHA_DIGEST_LENGTH,
0,
- padlock_sha_init,
+ padlock_sha1_init,
+ padlock_sha_update,
+ padlock_sha_final,
+ padlock_sha_copy,
+ padlock_sha_cleanup,
+ EVP_PKEY_RSA_method,
+ SHA_CBLOCK,
+ sizeof(struct padlock_digest_data),
+};
+
+static const EVP_MD padlock_sha256_md = {
+ NID_sha256,
+ NID_sha256WithRSAEncryption,
+ SHA256_DIGEST_LENGTH,
+ 0,
+ padlock_sha256_init,
padlock_sha_update,
padlock_sha_final,
padlock_sha_copy,
@@ -1315,8 +1402,12 @@ static const EVP_MD padlock_sha1_md = {
};
static int padlock_digest_nids[] = {
+#if !defined(OPENSSL_NO_SHA)
NID_sha1,
-// NID_sha256
+#endif
+#if !defined(OPENSSL_NO_SHA256)
+ NID_sha256
+#endif
};
static int padlock_digest_nids_num =
sizeof(padlock_digest_nids)/sizeof(padlock_digest_nids[0]);
@@ -1332,9 +1423,17 @@ padlock_digests (ENGINE *e, const EVP_MD
/* ... or the requested "digest" otherwise */
switch (nid) {
+#if !defined(OPENSSL_NO_SHA)
case NID_sha1:
*digest = &padlock_sha1_md;
break;
+#endif
+
+#if !defined(OPENSSL_NO_SHA256)
+ case NID_sha256:
+ *digest = &padlock_sha256_md;
+ break;
+#endif
default:
/* Sorry, we don't support this NID */