Hi again,

attached is a patch that brings SHA256 support for VIA PadLock engine.
Please apply on top of the previous SHA1 patch.

Again some numbers:

type     16 bytes   64 bytes  256 bytes  1024 bytes  8192 bytes
sha256   2230.10k   5584.86k  10494.04k   13480.62k   14701.91k
padlock  3104.30k  11820.84k  41395.54k  110511.10k  215739.05k

Michal Ludvig
-- 
* Personal homepage: http://www.logix.cz/michal
Index: openssl-0.9.8-O2/crypto/engine/eng_padlock.c
===================================================================
--- openssl-0.9.8-O2.orig/crypto/engine/eng_padlock.c
+++ openssl-0.9.8-O2/crypto/engine/eng_padlock.c
@@ -1111,6 +1111,23 @@ padlock_aes_cipher(EVP_CIPHER_CTX *ctx, 
 
 // #define PADLOCK_SHA_STAT 1
 
+union sha_all_ctx {
+       SHA_CTX         sha_ctx;
+       SHA256_CTX      sha256_ctx;
+};
+
+typedef int (*f_sha_init)(void *c);
+typedef int (*f_sha_update)(void *c, const void *_data, size_t len);
+typedef int (*f_sha_final)(unsigned char *md, void *c);
+typedef void (*f_sha_padlock)(char *in, char *out, int count);
+
+struct sha_digest_functions {
+       f_sha_init      init;
+       f_sha_update    update;
+       f_sha_final     final;
+       f_sha_padlock   padlock;
+};
+       
 /* Don't forget to initialize all relevant 
  * fields in padlock_sha_init() or face the
  * consequences!!! 
@@ -1118,10 +1135,12 @@ padlock_aes_cipher(EVP_CIPHER_CTX *ctx, 
  *     because zeroing fallback_ctx is
  *     a waste of time. */
 struct padlock_digest_data {
-       SHA_CTX         fallback_ctx;
        void            *buf_start, *buf_alloc;
        ssize_t         used;
        unsigned long   order:8, bypass:1;
+       /* Fallback support */
+       struct sha_digest_functions     fallback_fcs;
+       union sha_all_ctx               fallback_ctx;
 #ifdef PADLOCK_SHA_STAT
        size_t          stat_count, stat_total;
 #endif
@@ -1140,9 +1159,9 @@ padlock_sha_bypass(struct padlock_digest
        if (ddata->bypass)
                return;
 
-       SHA1_Init(&ddata->fallback_ctx);
+       ddata->fallback_fcs.init(&ddata->fallback_ctx);
        if (ddata->buf_start && ddata->used > 0) {
-               SHA1_Update(&ddata->fallback_ctx, ddata->buf_start, 
ddata->used);
+               ddata->fallback_fcs.update(&ddata->fallback_ctx, 
ddata->buf_start, ddata->used);
                if (ddata->buf_alloc) {
                        free(ddata->buf_alloc);
                        ddata->buf_alloc = 0;
@@ -1155,7 +1174,7 @@ padlock_sha_bypass(struct padlock_digest
        return;
 }
 
-static inline void
+static void
 padlock_do_sha1(char *in, char *out, int count)
 {
        /* We can't store directly to *out as it 
@@ -1179,6 +1198,33 @@ padlock_do_sha1(char *in, char *out, int
        padlock_htonl_block((uint32_t*)out, 5);
 }
 
+static void
+padlock_do_sha256(char *in, char *out, int count)
+{
+       /* We can't store directly to *out as it 
+        * doesn't have to be aligned. But who cares, 
+        * it's only a few bytes... */
+       char buf[128+16];
+       char *output = NEAREST_ALIGNED(buf);
+
+       ((uint32_t*)output)[0] = 0x6A09E667;
+       ((uint32_t*)output)[1] = 0xBB67AE85;
+       ((uint32_t*)output)[2] = 0x3C6EF372;
+       ((uint32_t*)output)[3] = 0xA54FF53A;
+       ((uint32_t*)output)[4] = 0x510E527F;
+       ((uint32_t*)output)[5] = 0x9B05688C;
+       ((uint32_t*)output)[6] = 0x1F83D9AB;
+       ((uint32_t*)output)[7] = 0x5BE0CD19;
+
+       asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"       /* rep xsha256 */
+                     : "+S"(in), "+D"(output)
+                     : "c"(count), "a"(0));
+
+       memcpy(out, output, 8 * sizeof(uint32_t));
+
+       padlock_htonl_block((uint32_t*)out, 8);
+}
+
 static int
 padlock_sha_init(EVP_MD_CTX *ctx)
 {
@@ -1195,6 +1241,32 @@ padlock_sha_init(EVP_MD_CTX *ctx)
 }
 
 static int
+padlock_sha1_init(EVP_MD_CTX *ctx)
+{
+       struct padlock_digest_data *ddata = DIGEST_DATA(ctx);
+
+       ddata->fallback_fcs.init = (f_sha_init)SHA1_Init;
+       ddata->fallback_fcs.update = (f_sha_update)SHA1_Update;
+       ddata->fallback_fcs.final = (f_sha_final)SHA1_Final;
+       ddata->fallback_fcs.padlock = (f_sha_padlock)padlock_do_sha1;
+
+       return padlock_sha_init(ctx);
+}
+
+static int
+padlock_sha256_init(EVP_MD_CTX *ctx)
+{
+       struct padlock_digest_data *ddata = DIGEST_DATA(ctx);
+
+       ddata->fallback_fcs.init = (f_sha_init)SHA256_Init;
+       ddata->fallback_fcs.update = (f_sha_update)SHA256_Update;
+       ddata->fallback_fcs.final = (f_sha_final)SHA256_Final;
+       ddata->fallback_fcs.padlock = (f_sha_padlock)padlock_do_sha256;
+
+       return padlock_sha_init(ctx);
+}
+
+static int
 padlock_sha_update(EVP_MD_CTX *ctx, const void *data, size_t length)
 {
        struct padlock_digest_data *ddata = DIGEST_DATA(ctx);
@@ -1206,14 +1278,14 @@ padlock_sha_update(EVP_MD_CTX *ctx, cons
        all_total += length;
 #endif
        if (unlikely(ddata->bypass)) {
-               SHA1_Update(&ddata->fallback_ctx, data, length);
+               ddata->fallback_fcs.update(&ddata->fallback_ctx, data, length);
                return 1;
        }
        if (unlikely(DDATA_FREE(ddata) < length)) {
                if (likely(ddata->used + length > (1 << PADLOCK_SHA_MAX_ORD))) {
                        /* Too much data to be stored -> bypass to SW SHA */
                        padlock_sha_bypass(ddata);
-                       SHA1_Update(&ddata->fallback_ctx, data, length);
+                       ddata->fallback_fcs.update(&ddata->fallback_ctx, data, 
length);
                        return 1;
                } else {
                        /* Resize the alocated buffer */
@@ -1225,7 +1297,7 @@ padlock_sha_update(EVP_MD_CTX *ctx, cons
                        if(!(new_buf = realloc(ddata->buf_alloc, new_size + 
16))) {
                                /* fallback plan again */
                                padlock_sha_bypass(ddata);
-                               SHA1_Update(&ddata->fallback_ctx, data, length);
+                               
ddata->fallback_fcs.update(&ddata->fallback_ctx, data, length);
                                return 1;
                        }
                        ddata->buf_alloc = new_buf;
@@ -1253,12 +1325,12 @@ padlock_sha_final(EVP_MD_CTX *ctx, unsig
 #endif
 
        if (ddata->bypass) {
-               SHA1_Final(md, &ddata->fallback_ctx);
+               ddata->fallback_fcs.final(md, &ddata->fallback_ctx);
                return 1;
        }
        
        /* Pass the input buffer to PadLock microcode... */
-       padlock_do_sha1(ddata->buf_start, md, ddata->used);
+       ddata->fallback_fcs.padlock(ddata->buf_start, md, ddata->used);
        free(ddata->buf_alloc);
        ddata->buf_start = 0;
        ddata->buf_alloc = 0;
@@ -1304,7 +1376,22 @@ static const EVP_MD padlock_sha1_md = {
        NID_sha1WithRSAEncryption,
        SHA_DIGEST_LENGTH,
        0,
-       padlock_sha_init,
+       padlock_sha1_init,
+       padlock_sha_update,
+       padlock_sha_final,
+       padlock_sha_copy,
+       padlock_sha_cleanup,
+       EVP_PKEY_RSA_method,
+       SHA_CBLOCK,
+       sizeof(struct padlock_digest_data),
+};
+
+static const EVP_MD padlock_sha256_md = {
+       NID_sha256,
+       NID_sha256WithRSAEncryption,
+       SHA256_DIGEST_LENGTH,
+       0,
+       padlock_sha256_init,
        padlock_sha_update,
        padlock_sha_final,
        padlock_sha_copy,
@@ -1315,8 +1402,12 @@ static const EVP_MD padlock_sha1_md = {
 };
 
 static int padlock_digest_nids[] = {
+#if !defined(OPENSSL_NO_SHA)
        NID_sha1,
-//     NID_sha256
+#endif
+#if !defined(OPENSSL_NO_SHA256)
+       NID_sha256
+#endif
 };
 
 static int padlock_digest_nids_num = 
sizeof(padlock_digest_nids)/sizeof(padlock_digest_nids[0]);
@@ -1332,9 +1423,17 @@ padlock_digests (ENGINE *e, const EVP_MD
 
        /* ... or the requested "digest" otherwise */
        switch (nid) {
+#if !defined(OPENSSL_NO_SHA)
          case NID_sha1:
            *digest = &padlock_sha1_md;
            break;
+#endif
+
+#if !defined(OPENSSL_NO_SHA256)
+         case NID_sha256:
+           *digest = &padlock_sha256_md;
+           break;
+#endif
 
          default:
            /* Sorry, we don't support this NID */

Reply via email to