Dear OpenSSL-dev

This is a patch to a patch. 

I have recently installed openssl-1.0.1c with the following patches (available 
from
http://git.alpinelinux.org/cgit/aports/plain/main/openssl/)

0001-crypto-hmac-support-EVP_MD_CTX_FLAG_ONESHOT-and-set-.patch
0002-engines-e_padlock-backport-cvs-head-changes.patch
0003-engines-e_padlock-implement-sha1-sha224-sha256-accel.patch
0004-crypto-engine-autoload-padlock-dynamic-engine.patch

These patches provide PadLock support for the x64 VIA Nano processors.

I have one small change to suggest, and that is to set a flag in the definition 
of padlock_sha1_md.

Namely, to change 
0003-engines-e_padlock-implement-sha1-sha224-sha256-accel.patch from

+static EVP_MD padlock_sha1_md = {
+       NID_sha1,
+       NID_sha1WithRSAEncryption,
+       SHA_DIGEST_LENGTH,
+       0,
+       padlock_sha1_init,


to

+static EVP_MD padlock_sha1_md = {
+       NID_sha1,
+       NID_sha1WithRSAEncryption,
+       SHA_DIGEST_LENGTH,
+       EVP_MD_FLAG_PKEY_METHOD_SIGNATURE,
+       padlock_sha1_init,

This is because OpenSSL, it seems, does not distinguish between DSA/SHA1 and 
RSA/SHA1. When it comes to verify the correct required_pkey_type in 
p_verify.c:101, OpenSSL will always compare the type to one of the RSA/SHA1 
pkey types.

OpenSSL (without padlock support) uses the flag 
EVP_MD_FLAG_PKEY_METHOD_SIGNATURE to get around this, which is why I have 
copied that flag from the definition of sha1_md to padlock_sha1_md.

This change is sufficient for "make test" to pass all tests on Ubuntu 12.10 
x86_64 and FreeBSD 9.0 amd64.

Regards,

Daniel

-- 

Daniel Mansfield
School of Mathematics and Statistics
University of New South Wales
Sydney NSW 2052
Australia

ph (+61|0)2 9385 6904
>From 77b32d0906eaac4d9adf3e6b7c3b52d927e10b41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.te...@iki.fi>
Date: Wed, 28 Jul 2010 08:37:58 +0300
Subject: [PATCH 3/4] engines/e_padlock: implement sha1/sha224/sha256
 acceleration

Limited support for VIA C7 that works only when EVP_MD_CTX_FLAG_ONESHOT
is used appropriately (as done by EVP_Digest, and my previous HMAC patch).

Full support for VIA Nano including partial transformation and 64-bit mode.

Benchmarks from VIA Nano 1.6GHz, done with including the previous HMAC and
apps/speed patches done. From single run, error margin of about 100-200k.

No padlock

type         16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
sha1         20057.60k    51514.05k    99721.39k   130167.81k   142811.14k
sha256        7757.72k    16907.18k    28937.05k    35181.23k    37568.51k
hmac(sha1)    8582.53k    27644.69k    70402.30k   114602.67k   140167.85k

With the patch

sha1         37713.77k   114562.71k   259637.33k   379907.41k   438818.13k
sha256       34262.86k   103233.75k   232476.07k   338386.60k   389860.01k
hmac(sha1)    8424.70k    31475.11k   104036.10k   245559.30k   406667.26k
---
 engines/e_padlock.c | 660 ++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 613 insertions(+), 47 deletions(-)

diff --git a/engines/e_padlock.c b/engines/e_padlock.c
index 6ab42d2..e107d3c 100644
--- a/engines/e_padlock.c
+++ b/engines/e_padlock.c
@@ -3,6 +3,9 @@
  * Written by Michal Ludvig <mic...@logix.cz>
  *            http://www.logix.cz/michal
  *
+ * SHA support by Timo Teras <timo.te...@iki.fi>. Portions based on
+ * code originally written by Michal Ludvig.
+ *
  * Big thanks to Andy Polyakov for a help with optimization, 
  * assembler fixes, port to MS Windows and a lot of other 
  * valuable work on this engine!
@@ -64,7 +67,9 @@
 
 
 #include <stdio.h>
+#include <stdint.h>
 #include <string.h>
+#include <netinet/in.h>
 
 #include <openssl/opensslconf.h>
 #include <openssl/crypto.h>
@@ -74,12 +79,33 @@
 #ifndef OPENSSL_NO_AES
 #include <openssl/aes.h>
 #endif
+#ifndef OPENSSL_NO_SHA
+#include <openssl/sha.h>
+#endif
 #include <openssl/rand.h>
 #include <openssl/err.h>
 
 #ifndef OPENSSL_NO_HW
 #ifndef OPENSSL_NO_HW_PADLOCK
 
+/* PadLock RNG is disabled by default */
+#define	PADLOCK_NO_RNG	1
+
+/* No ASM routines for SHA in MSC yet */
+#ifdef _MSC_VER
+#define OPENSSL_NO_SHA
+#endif
+
+/* 64-bit mode does not need software SHA1 as fallback, we can
+ * do all operations with padlock */
+#if defined(__x86_64__) || defined(__x86_64)
+#define PADLOCK_NEED_FALLBACK_SHA	0
+#else
+#define PADLOCK_NEED_FALLBACK_SHA	1
+#endif
+
+#define PADLOCK_MAX_FINALIZING_LENGTH	0x1FFFFFFE
+
 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
 #if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
 #  ifndef OPENSSL_NO_DYNAMIC_ENGINE
@@ -149,58 +175,40 @@ static int padlock_available(void);
 static int padlock_init(ENGINE *e);
 
 /* RNG Stuff */
+#ifndef PADLOCK_NO_RNG
 static RAND_METHOD padlock_rand;
-
-/* Cipher Stuff */
-#ifndef OPENSSL_NO_AES
-static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
 #endif
 
 /* Engine names */
 static const char *padlock_id = "padlock";
 static char padlock_name[100];
 
-/* Available features */
-static int padlock_use_ace = 0;	/* Advanced Cryptography Engine */
-static int padlock_use_rng = 0;	/* Random Number Generator */
-#ifndef OPENSSL_NO_AES
-static int padlock_aes_align_required = 1;
-#endif
+static int padlock_bind_helper(ENGINE *e);
 
-/* ===== Engine "management" functions ===== */
-
-/* Prepare the ENGINE structure for registration */
-static int
-padlock_bind_helper(ENGINE *e)
-{
-	/* Check available features */
-	padlock_available();
-
-#if 1	/* disable RNG for now, see commentary in vicinity of RNG code */
-	padlock_use_rng=0;
-#endif
-
-	/* Generate a nice engine name with available features */
-	BIO_snprintf(padlock_name, sizeof(padlock_name),
-		"VIA PadLock (%s, %s)", 
-		 padlock_use_rng ? "RNG" : "no-RNG",
-		 padlock_use_ace ? "ACE" : "no-ACE");
+ /* Available features */
+enum padlock_flags {
+	PADLOCK_RNG  = 0x01,
+	PADLOCK_ACE  = 0x02,
+	PADLOCK_ACE2 = 0x04,
+	PADLOCK_PHE  = 0x08,
+	PADLOCK_PMM  = 0x10,
+	PADLOCK_NANO = 0x20,
+};
+enum padlock_flags padlock_flags;
 
-	/* Register everything or return with an error */ 
-	if (!ENGINE_set_id(e, padlock_id) ||
-	    !ENGINE_set_name(e, padlock_name) ||
+#define PADLOCK_HAVE_RNG  (padlock_flags & PADLOCK_RNG)
+#define PADLOCK_HAVE_ACE  (padlock_flags & (PADLOCK_ACE|PADLOCK_ACE2))
+#define PADLOCK_HAVE_ACE1 (padlock_flags & PADLOCK_ACE)
+#define PADLOCK_HAVE_ACE2 (padlock_flags & PADLOCK_ACE2)
+#define PADLOCK_HAVE_PHE  (padlock_flags & PADLOCK_PHE)
+#define PADLOCK_HAVE_PMM  (padlock_flags & PADLOCK_PMM)
+#define PADLOCK_HAVE_NANO (padlock_flags & PADLOCK_NANO)
 
-	    !ENGINE_set_init_function(e, padlock_init) ||
 #ifndef OPENSSL_NO_AES
-	    (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
+static int padlock_aes_align_required = 1;
 #endif
-	    (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
-		return 0;
-	}
 
-	/* Everything looks good */
-	return 1;
-}
+/* ===== Engine "management" functions ===== */
 
 #ifdef OPENSSL_NO_DYNAMIC_ENGINE
 
@@ -228,7 +236,7 @@ ENGINE_padlock(void)
 static int
 padlock_init(ENGINE *e)
 {
-	return (padlock_use_rng || padlock_use_ace);
+	return padlock_flags;
 }
 
 /* This stuff is needed if this ENGINE is being compiled into a self-contained
@@ -381,10 +389,20 @@ padlock_available(void)
 		: "+a"(eax), "=d"(edx) : : "ecx");
 
 	/* Fill up some flags */
-	padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
-	padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
+	padlock_flags |= ((edx & (0x3<<3)) ? PADLOCK_RNG : 0);
+	padlock_flags |= ((edx & (0x3<<7)) ? PADLOCK_ACE : 0);
+	padlock_flags |= ((edx & (0x3<<9)) ? PADLOCK_ACE2 : 0);
+	padlock_flags |= ((edx & (0x3<<11)) ? PADLOCK_PHE : 0);
+	padlock_flags |= ((edx & (0x3<<13)) ? PADLOCK_PMM : 0);
+
+	/* Check for VIA Nano CPU */
+	eax = 0x00000001;
+	asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
+		: "+a"(eax) : : "ecx", "edx");
+	if ((eax | 0x000F) == 0x06FF)
+		padlock_flags |= PADLOCK_NANO;
 
-	return padlock_use_ace + padlock_use_rng;
+	return padlock_flags;
 }
 
 /* Force key reload from memory to the CPU microcode.
@@ -481,10 +499,14 @@ padlock_available(void)
 		: "+a"(eax), "=d"(edx) : : "rbx", "rcx");
 
 	/* Fill up some flags */
-	padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
-	padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
-
-	return padlock_use_ace + padlock_use_rng;
+	padlock_flags |= ((edx & (0x3<<3)) ? PADLOCK_RNG : 0);
+	padlock_flags |= ((edx & (0x3<<7)) ? PADLOCK_ACE : 0);
+	padlock_flags |= ((edx & (0x3<<9)) ? PADLOCK_ACE2 : 0);
+	padlock_flags |= ((edx & (0x3<<11)) ? PADLOCK_PHE : 0);
+	padlock_flags |= ((edx & (0x3<<13)) ? PADLOCK_PMM : 0);
+	padlock_flags |= PADLOCK_NANO;
+
+	return padlock_flags;
 }
 
 /* Force key reload from memory to the CPU microcode.
@@ -1273,6 +1295,496 @@ padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
 
 #endif /* OPENSSL_NO_AES */
 
+#ifndef OPENSSL_NO_SHA
+
+static inline void
+padlock_copy_bswap(void *dst, void *src, size_t count)
+{
+	uint32_t *udst = dst, *usrc = src;
+	int i = 0;
+
+	for (i = 0; i < count; i++)
+		udst[i] = htonl(usrc[i]);
+}
+
+static unsigned long padlock_sha_prepare_padding(
+		EVP_MD_CTX *ctx,
+		unsigned char *padding,
+		unsigned char *data, size_t data_len,
+		uint64_t total)
+{
+	unsigned int padding_len;
+
+	padding_len = data_len < 56 ? SHA_CBLOCK : 2 * SHA_CBLOCK;
+	if (data_len)
+		memcpy(padding, data, data_len);
+
+	memset(padding + data_len, 0, padding_len - data_len);
+	padding[data_len] = 0x80;
+	*(uint32_t *)(padding + padding_len - 8) = htonl(total >> 32);
+	*(uint32_t *)(padding + padding_len - 4) = htonl(total & 0xffffffff);
+
+	return data_len < 56 ? 1 : 2;
+}
+
+#define PADLOCK_SHA_ALIGN(dd)	(uint32_t*)(((uintptr_t)(dd) + 15) & ~15)
+#define PADLOCK_SHA_HWCTX	(128+16)
+
+static void
+padlock_sha1(void *hwctx, const void *buf, unsigned long total, unsigned long now)
+{
+	unsigned long pos = total - now;
+
+	asm volatile ("xsha1"
+			: "+S"(buf), "+D"(hwctx), "+a"(pos), "+c"(total)
+			: : "memory");
+}
+
+static void
+padlock_sha1_partial(void *hwctx, const void *buf, unsigned long blocks)
+{
+	asm volatile ("xsha1"
+			: "+S"(buf), "+D"(hwctx), "+c"(blocks)
+			: "a"(-1L) : "memory");
+}
+
+static int padlock_sha1_init(EVP_MD_CTX *ctx)
+{
+	return SHA1_Init(ctx->md_data);
+}
+
+#if PADLOCK_NEED_FALLBACK_SHA
+
+static int padlock_sha1_update_eden(EVP_MD_CTX *ctx, const void *data,
+				    size_t len)
+{
+	unsigned char hwctx[PADLOCK_SHA_HWCTX];
+	uint32_t *aligned = PADLOCK_SHA_ALIGN(hwctx);
+	SHA_CTX *c = ctx->md_data;
+	uint_fast64_t total;
+	const unsigned char *p = data;
+	unsigned long l = 0;
+
+	/* Calculate total length (Nl,Nh) is length in bits */
+	total = (((uint_fast64_t) c->Nh) << 29) + (c->Nl >> 3);
+	total += len;
+
+	if ((ctx->flags & EVP_MD_CTX_FLAG_ONESHOT) &&
+	    (total <= PADLOCK_MAX_FINALIZING_LENGTH)) {
+		if (c->num != 0) {
+			l = (len < SHA_CBLOCK - c->num) ? len : (SHA_CBLOCK - c->num);
+			if (!SHA1_Update(c, data, l))
+				return 0;
+			p += l;
+			if (c->num != 0) {
+				p = (unsigned char *) c->data;
+				len = c->num;
+				l = 0;
+			}
+		}
+		memcpy(aligned, &c->h0, 5 * sizeof(SHA_LONG));
+		padlock_sha1(aligned, p, total, len - l);
+		memcpy(&c->h0, aligned, 5 * sizeof(SHA_LONG));
+		c->num = -1;
+		return 1;
+	}
+
+	return SHA1_Update(c, data, len);
+}
+#endif
+
+static int padlock_sha1_update(EVP_MD_CTX *ctx, const void *data,
+			       size_t len)
+{
+	unsigned char hwctx[PADLOCK_SHA_HWCTX];
+	uint32_t *aligned = PADLOCK_SHA_ALIGN(hwctx);
+	SHA_CTX *c = ctx->md_data;
+	uint_fast64_t total;
+	unsigned char *p;
+	unsigned long n;
+
+	/* Calculate total length (Nl,Nh) is length in bits */
+	total = (((uint_fast64_t) c->Nh) << 29) + (c->Nl >> 3);
+	total += len;
+	c->Nh = total >> 29;
+	c->Nl = (total << 3) & 0xffffffffUL;
+
+	memcpy(aligned, &c->h0, 5 * sizeof(SHA_LONG));
+
+	/* Check partial data */
+	n = c->num;
+	if (n) {
+		p = (unsigned char *) c->data;
+		if (len >= SHA_CBLOCK || len+n >= SHA_CBLOCK) {
+			memcpy(p+n, data, SHA_CBLOCK-n);
+			padlock_sha1_partial(aligned, p, 1);
+			n      = SHA_CBLOCK - n;
+			data  += n;
+			len   -= n;
+			c->num = 0;
+			memset(p, 0, SHA_CBLOCK);
+		} else {
+			memcpy(p+n, data, len);
+			c->num += (unsigned int)len;
+			return 1;
+		}
+	}
+
+	/* Can we finalize straight away? */
+	if ((ctx->flags & EVP_MD_CTX_FLAG_ONESHOT) &&
+	    (total <= PADLOCK_MAX_FINALIZING_LENGTH)) {
+		padlock_sha1(aligned, data, total, len);
+		memcpy(&c->h0, aligned, 5 * sizeof(SHA_LONG));
+		c->num = -1;
+		return 1;
+	}
+
+	/* Use nonfinalizing update */
+	n = len / SHA_CBLOCK;
+	if (n != 0) {
+		padlock_sha1_partial(aligned, data, n);
+		data += n * SHA_CBLOCK;
+		len  -= n * SHA_CBLOCK;
+	}
+	memcpy(&c->h0, aligned, 5 * sizeof(SHA_LONG));
+
+	/* Buffer remaining bytes */
+	if (len) {
+		memcpy(c->data, data, len);
+		c->num = len;
+	}
+
+	return 1;
+}
+
+static int padlock_sha1_final(EVP_MD_CTX *ctx, unsigned char *md)
+{
+	unsigned char hwctx[PADLOCK_SHA_HWCTX];
+	uint32_t *aligned = PADLOCK_SHA_ALIGN(hwctx);
+	uint64_t total;
+	SHA_CTX *c = ctx->md_data;
+
+	if (c->num == -1) {
+		padlock_copy_bswap(md, &c->h0, 5);
+		c->num = 0;
+		return 1;
+	}
+
+	total = (((uint_fast64_t) c->Nh) << 29) + (c->Nl >> 3);
+#if PADLOCK_NEED_FALLBACK_SHA
+	if ((!PADLOCK_HAVE_NANO) && (total > PADLOCK_MAX_FINALIZING_LENGTH))
+		return SHA1_Final(md, c);
+#endif
+
+	memcpy(aligned, &c->h0, 5 * sizeof(SHA_LONG));
+	if (total > PADLOCK_MAX_FINALIZING_LENGTH) {
+		unsigned char padding[2 * SHA_CBLOCK];
+		unsigned long n;
+
+		n = padlock_sha_prepare_padding(ctx, padding,
+			(unsigned char *) c->data, c->num, total << 3);
+		padlock_sha1_partial(aligned, padding, n);
+	} else {
+		padlock_sha1(aligned, c->data, total, c->num);
+	}
+	padlock_copy_bswap(md, aligned, 5);
+	c->num = 0;
+
+	return 1;
+}
+
+static EVP_MD padlock_sha1_md = {
+	NID_sha1,
+	NID_sha1WithRSAEncryption,
+	SHA_DIGEST_LENGTH,
+	EVP_MD_FLAG_PKEY_METHOD_SIGNATURE,
+	padlock_sha1_init,
+	padlock_sha1_update,
+	padlock_sha1_final,
+	NULL,
+	NULL,
+	EVP_PKEY_RSA_method,
+	SHA_CBLOCK,
+	sizeof(SHA_CTX),
+};
+
+static EVP_MD padlock_dss1_md = {
+	NID_dsa,
+	NID_dsaWithSHA1,
+	SHA_DIGEST_LENGTH,
+	0,
+	padlock_sha1_init,
+	padlock_sha1_update,
+	padlock_sha1_final,
+	NULL,
+	NULL,
+	EVP_PKEY_DSA_method,
+	SHA_CBLOCK,
+	sizeof(SHA_CTX),
+};
+
+
+#if !defined(OPENSSL_NO_SHA256)
+
+static void
+padlock_sha256(void *hwctx, const void *buf, unsigned long total, unsigned long now)
+{
+	unsigned long pos = total - now;
+
+	asm volatile ("xsha256"
+			: "+S"(buf), "+D"(hwctx), "+a"(pos), "+c"(total)
+			: : "memory");
+}
+
+static void
+padlock_sha256_partial(void *hwctx, const void *buf, unsigned long blocks)
+{
+	asm volatile ("xsha256"
+			: "+S"(buf), "+D"(hwctx), "+c"(blocks)
+			: "a"(-1L) : "memory");
+}
+
+#if PADLOCK_NEED_FALLBACK_SHA
+
+static int padlock_sha256_update_eden(EVP_MD_CTX *ctx, const void *data,
+				      size_t len)
+{
+	unsigned char hwctx[PADLOCK_SHA_HWCTX];
+	uint32_t *aligned = PADLOCK_SHA_ALIGN(hwctx);
+	SHA256_CTX *c = ctx->md_data;
+	uint_fast64_t total;
+	const unsigned char *p = data;
+	unsigned int l = 0;
+
+	/* Calculate total length (Nl,Nh) is length in bits */
+	total = (((uint_fast64_t) c->Nh) << 29) + (c->Nl >> 3);
+	total += len;
+
+	if ((ctx->flags & EVP_MD_CTX_FLAG_ONESHOT) &&
+	    (total <= PADLOCK_MAX_FINALIZING_LENGTH)) {
+		if (c->num != 0) {
+			l = (len < SHA256_CBLOCK - c->num) ? len : (SHA256_CBLOCK - c->num);
+			if (!SHA256_Update(c, data, l))
+				return 0;
+			p += l;
+			if (c->num != 0) {
+				p = (unsigned char *) c->data;
+				len = c->num;
+				l = 0;
+			}
+		}
+		memcpy(aligned, c->h, sizeof(c->h));
+		padlock_sha256(aligned, p, total, len - l);
+		memcpy(c->h, aligned, sizeof(c->h));
+		c->num = -1;
+		return 1;
+	}
+
+	return SHA256_Update(c, data, len);
+}
+
+#endif
+
+static int padlock_sha256_update(EVP_MD_CTX *ctx, const void *data,
+				 size_t len)
+{
+	unsigned char hwctx[PADLOCK_SHA_HWCTX];
+	uint32_t *aligned = PADLOCK_SHA_ALIGN(hwctx);
+	SHA256_CTX *c = ctx->md_data;
+	uint_fast64_t total;
+	unsigned char *p;
+	unsigned long n;
+
+	/* Calculate total length (Nl,Nh) is length in bits */
+	total = (((uint_fast64_t) c->Nh) << 29) + (c->Nl >> 3);
+	total += len;
+	c->Nh = total >> 29;
+	c->Nl = (total << 3) & 0xffffffffUL;
+
+	memcpy(aligned, c->h, sizeof(c->h));
+
+	/* Check partial data */
+	n = c->num;
+	if (n) {
+		p = (unsigned char *) c->data;
+		if (len >= SHA256_CBLOCK || len+n >= SHA256_CBLOCK) {
+			memcpy(p+n, data, SHA256_CBLOCK-n);
+			padlock_sha256_partial(aligned, p, 1);
+			n      = SHA256_CBLOCK - n;
+			data  += n;
+			len   -= n;
+			c->num = 0;
+			memset(p, 0, SHA256_CBLOCK);
+		} else {
+			memcpy(p+n, data, len);
+			c->num += (unsigned int)len;
+			return 1;
+		}
+	}
+
+	/* Can we finalize straight away? */
+	if ((ctx->flags & EVP_MD_CTX_FLAG_ONESHOT) &&
+	    (total <= PADLOCK_MAX_FINALIZING_LENGTH)) {
+		padlock_sha256(aligned, data, total, len);
+		memcpy(c->h, aligned, sizeof(c->h));
+		c->num = -1;
+		return 1;
+	}
+
+	/* Use nonfinalizing update */
+	n = len / SHA256_CBLOCK;
+	if (n != 0) {
+		padlock_sha256_partial(aligned, data, n);
+		data += n * SHA256_CBLOCK;
+		len  -= n * SHA256_CBLOCK;
+	}
+	memcpy(c->h, aligned, sizeof(c->h));
+
+	/* Buffer remaining bytes */
+	if (len) {
+		memcpy(c->data, data, len);
+		c->num = len;
+	}
+
+	return 1;
+}
+
+static int padlock_sha256_final(EVP_MD_CTX *ctx, unsigned char *md)
+{
+	unsigned char hwctx[PADLOCK_SHA_HWCTX];
+	uint32_t *aligned = PADLOCK_SHA_ALIGN(hwctx);
+	uint64_t total;
+	SHA256_CTX *c = ctx->md_data;
+
+	if (c->num == -1) {
+		padlock_copy_bswap(md, c->h, sizeof(c->h)/sizeof(c->h[0]));
+		c->num = 0;
+		return 1;
+	}
+
+	total = (((uint_fast64_t) c->Nh) << 29) + (c->Nl >> 3);
+#if PADLOCK_NEED_FALLBACK_SHA
+	if ((!PADLOCK_HAVE_NANO) && (total > PADLOCK_MAX_FINALIZING_LENGTH))
+		return SHA256_Final(md, c);
+#endif
+
+	memcpy(aligned, c->h, sizeof(c->h));
+	if (total > PADLOCK_MAX_FINALIZING_LENGTH) {
+		unsigned char padding[2 * SHA_CBLOCK];
+		unsigned long n;
+
+		n = padlock_sha_prepare_padding(ctx, padding,
+			(unsigned char *) c->data, c->num, total << 3);
+		padlock_sha256_partial(aligned, padding, n);
+	} else {
+		padlock_sha256(aligned, c->data, total, c->num);
+	}
+	padlock_copy_bswap(md, aligned, sizeof(c->h)/sizeof(c->h[0]));
+	c->num = 0;
+	return 1;
+}
+
+#if !defined(OPENSSL_NO_SHA224)
+
+static int padlock_sha224_init(EVP_MD_CTX *ctx)
+{
+	return SHA224_Init(ctx->md_data);
+}
+
+static EVP_MD padlock_sha224_md = {
+	NID_sha224,
+	NID_sha224WithRSAEncryption,
+	SHA224_DIGEST_LENGTH,
+	0,
+	padlock_sha224_init,
+	padlock_sha256_update,
+	padlock_sha256_final,
+	NULL,
+	NULL,
+	EVP_PKEY_RSA_method,
+	SHA_CBLOCK,
+	sizeof(SHA256_CTX),
+};
+#endif /* !OPENSSL_NO_SHA224 */
+
+static int padlock_sha256_init(EVP_MD_CTX *ctx)
+{
+	return SHA256_Init(ctx->md_data);
+}
+
+static EVP_MD padlock_sha256_md = {
+	NID_sha256,
+	NID_sha256WithRSAEncryption,
+	SHA256_DIGEST_LENGTH,
+	0,
+	padlock_sha256_init,
+	padlock_sha256_update,
+	padlock_sha256_final,
+	NULL,
+	NULL,
+	EVP_PKEY_RSA_method,
+	SHA_CBLOCK,
+	sizeof(SHA256_CTX),
+};
+#endif /* !OPENSSL_NO_SHA256 */
+
+static int padlock_digest_nids[] = {
+#if !defined(OPENSSL_NO_SHA)
+	NID_sha1,
+	NID_dsa,
+#endif
+#if !defined(OPENSSL_NO_SHA256)
+#if !defined(OPENSSL_NO_SHA224)
+	NID_sha224,
+#endif
+	NID_sha256,
+#endif
+};
+
+static int padlock_digest_nids_num = sizeof(padlock_digest_nids)/sizeof(padlock_digest_nids[0]);
+
+static int
+padlock_digests (ENGINE *e, const EVP_MD **digest, const int **nids, int nid)
+{
+	/* No specific digest => return a list of supported nids ... */
+	if (!digest) {
+		*nids = padlock_digest_nids;
+		return padlock_digest_nids_num;
+	}
+
+	/* ... or the requested "digest" otherwise */
+	switch (nid) {
+#if !defined(OPENSSL_NO_SHA)
+	  case NID_sha1:
+	    *digest = &padlock_sha1_md;
+	    break;
+	  case NID_dsa:
+	    *digest = &padlock_dss1_md;
+	    break;
+#endif
+#if !defined(OPENSSL_NO_SHA256)
+#if !defined(OPENSSL_NO_SHA224)
+	  case NID_sha224:
+	    *digest = &padlock_sha224_md;
+	    break;
+#endif	/* OPENSSL_NO_SHA224 */
+	  case NID_sha256:
+	    *digest = &padlock_sha256_md;
+	    break;
+#endif	/* OPENSSL_NO_SHA256 */
+	  default:
+	    /* Sorry, we don't support this NID */
+	    *digest = NULL;
+	    return 0;
+	}
+
+	return 1;
+}
+
+#endif /* OPENSSL_NO_SHA */
+
+#ifndef PADLOCK_NO_RNG
+
 /* ===== Random Number Generator ===== */
 /*
  * This code is not engaged. The reason is that it does not comply
@@ -1329,6 +1841,60 @@ static RAND_METHOD padlock_rand = {
 	padlock_rand_status,	/* rand status */
 };
 
+#endif /* PADLOCK_NO_RNG */
+
+/* Prepare the ENGINE structure for registration */
+static int
+padlock_bind_helper(ENGINE *e)
+{
+	/* Check available features */
+	padlock_available();
+
+	/* Generate a nice engine name with available features */
+	BIO_snprintf(padlock_name, sizeof(padlock_name),
+		"VIA PadLock: %s%s%s%s%s%s",
+		padlock_flags ? "" : "not supported",
+		PADLOCK_HAVE_RNG ? "RNG " : "",
+		PADLOCK_HAVE_ACE ? (PADLOCK_HAVE_ACE2 ? "ACE2 " : "ACE ") : "",
+		PADLOCK_HAVE_PHE ? "PHE " : "",
+		PADLOCK_HAVE_PMM ? "PMM " : "",
+		PADLOCK_HAVE_NANO ? "NANO " : ""
+		);
+
+#if PADLOCK_NEED_FALLBACK_SHA && !defined(OPENSSL_NO_SHA)
+	if (!PADLOCK_HAVE_NANO) {
+		padlock_sha1_md.update = padlock_sha1_update_eden;
+		padlock_dss1_md.update = padlock_sha1_update_eden;
+#if !defined(OPENSSL_NO_SHA256)
+#if !defined(OPENSSL_NO_SHA224)
+		padlock_sha224_md.update = padlock_sha256_update_eden;
+#endif
+		padlock_sha256_md.update = padlock_sha256_update_eden;
+#endif
+	}
+#endif  
+
+	/* Register everything or return with an error */
+	if (!ENGINE_set_id(e, padlock_id) ||
+	    !ENGINE_set_name(e, padlock_name) ||
+	    !ENGINE_set_init_function(e, padlock_init)
+#ifndef OPENSSL_NO_AES
+	    || (PADLOCK_HAVE_ACE && !ENGINE_set_ciphers (e, padlock_ciphers))
+#endif
+#ifndef OPENSSL_NO_SHA
+	    || (PADLOCK_HAVE_PHE && !ENGINE_set_digests (e, padlock_digests))
+#endif
+#ifndef PADLOCK_NO_RNG
+	    || (PADLOCK_HAVE_RNG && !ENGINE_set_RAND (e, &padlock_rand))
+#endif
+	    ) {
+		return 0;
+	}
+
+	/* Everything looks good */
+	return 1;
+}
+
 #else  /* !COMPILE_HW_PADLOCK */
 #ifndef OPENSSL_NO_DYNAMIC_ENGINE
 OPENSSL_EXPORT
-- 
1.7.11.3

Reply via email to